From 8cf2ee57216b4dffc419e1762ff1fe4dfd30e227 Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Sun, 1 Feb 2015 13:43:10 +0100
Subject: 500px unit test

---
 searx/engines/www500px.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/www500px.py b/searx/engines/www500px.py
index f25678c24..99dba4abf 100644
--- a/searx/engines/www500px.py
+++ b/searx/engines/www500px.py
@@ -15,6 +15,7 @@ from urllib import urlencode
 from urlparse import urljoin
 from lxml import html
 import re
+from searx.engines.xpath import extract_text
 
 # engine dependent config
 categories = ['images']
@@ -22,7 +23,7 @@ paging = True
 
 # search-url
 base_url = 'https://500px.com'
-search_url = base_url+'/search?search?page={pageno}&type=photos&{query}'
+search_url = base_url + '/search?search?page={pageno}&type=photos&{query}'
 
 
 # do search-request
@@ -44,11 +45,11 @@ def response(resp):
     for result in dom.xpath('//div[@class="photo"]'):
         link = result.xpath('.//a')[0]
         url = urljoin(base_url, link.attrib.get('href'))
-        title = result.xpath('.//div[@class="title"]//text()')[0]
-        thumbnail_src = link.xpath('.//img')[0].attrib['src']
+        title = extract_text(result.xpath('.//div[@class="title"]'))
+        thumbnail_src = link.xpath('.//img')[0].attrib.get('src')
         # To have a bigger thumbnail, uncomment the next line
-        #thumbnail_src = regex.sub('4.jpg', thumbnail_src)
-        content = result.xpath('.//div[@class="info"]//text()')[0]
+        # thumbnail_src = regex.sub('4.jpg', thumbnail_src)
+        content = extract_text(result.xpath('.//div[@class="info"]'))
         img_src = regex.sub('2048.jpg', thumbnail_src)
 
         # append result
-- 
cgit v1.2.3