From 44c9216c497862293318a48ad5c39f373cee95e6 Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Sun, 25 Jan 2015 20:04:44 +0100
Subject: Sanitize extract_text

---
 searx/engines/xpath.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
index 72120304e..1a599dc0a 100644
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@@ -28,13 +28,13 @@ def extract_text(xpath_results):
         result = ''
         for e in xpath_results:
             result = result + extract_text(e)
-        return result
+        return result.strip()
     elif type(xpath_results) in [_ElementStringResult, _ElementUnicodeResult]:
         # it's a string
         return ''.join(xpath_results)
     else:
         # it's a element
-        return html_to_text(xpath_results.text_content())
+        return html_to_text(xpath_results.text_content()).strip()
 
 
 def extract_url(xpath_results, search_url):
-- 
cgit v1.2.3


From 525af2a031b787e22c3e310e61bfcd5fd1737bca Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Sun, 25 Jan 2015 20:14:37 +0100
Subject: Add bing in the test units

---
 searx/engines/bing.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index 5de461cfe..f9c323d05 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -14,6 +14,7 @@
 from urllib import urlencode
 from cgi import escape
 from lxml import html
+from searx.engines.xpath import extract_text
 
 # engine dependent config
 categories = ['general']
@@ -55,8 +56,8 @@ def response(resp):
     for result in dom.xpath('//div[@class="sa_cc"]'):
         link = result.xpath('.//h3/a')[0]
         url = link.attrib.get('href')
-        title = ' '.join(link.xpath('.//text()'))
-        content = escape(' '.join(result.xpath('.//p//text()')))
+        title = extract_text(link)
+        content = escape(extract_text(result.xpath('.//p')))
 
         # append result
         results.append({'url': url,
@@ -71,8 +72,8 @@ def response(resp):
     for result in dom.xpath('//li[@class="b_algo"]'):
         link = result.xpath('.//h2/a')[0]
         url = link.attrib.get('href')
-        title = ' '.join(link.xpath('.//text()'))
-        content = escape(' '.join(result.xpath('.//p//text()')))
+        title = extract_text(link)
+        content = escape(extract_text(result.xpath('.//p')))
 
         # append result
         results.append({'url': url,
-- 
cgit v1.2.3


From 4dba3739fb3b98572cbd51adab226376b5844105 Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Mon, 26 Jan 2015 18:24:08 +0100
Subject: Youtube's unit test

---
 searx/engines/youtube.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/youtube.py b/searx/engines/youtube.py
index 59f07c574..1375538a8 100644
--- a/searx/engines/youtube.py
+++ b/searx/engines/youtube.py
@@ -57,7 +57,7 @@ def response(resp):
         url = [x['href'] for x in result['link'] if x['type'] == 'text/html']
 
         if not url:
-            return
+            continue
 
         # remove tracking
         url = url[0].replace('feature=youtube_gdata', '')
@@ -73,7 +73,7 @@ def response(resp):
         pubdate = result['published']['$t']
         publishedDate = parser.parse(pubdate)
 
-        if result['media$group']['media$thumbnail']:
+        if 'media$thumbnail' in result['media$group']:
             thumbnail = result['media$group']['media$thumbnail'][0]['url']
 
         content = result['content']['$t']
-- 
cgit v1.2.3


From 3282e62ff92f1c2158cb169d2a21a5988766450c Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Tue, 27 Jan 2015 22:39:25 +0100
Subject: Searchcode engines corrections

---
 searx/engines/searchcode_code.py | 2 +-
 searx/engines/searchcode_doc.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py
index 655818da2..f276697b1 100644
--- a/searx/engines/searchcode_code.py
+++ b/searx/engines/searchcode_code.py
@@ -42,7 +42,7 @@ def response(resp):
     search_results = loads(resp.text)
 
     # parse results
-    for result in search_results['results']:
+    for result in search_results.get('results', []):
         href = result['url']
         title = "" + result['name'] + " - " + result['filename']
         repo = result['repo']
diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py
index b5b7159be..76da8d752 100644
--- a/searx/engines/searchcode_doc.py
+++ b/searx/engines/searchcode_doc.py
@@ -35,7 +35,7 @@ def response(resp):
     search_results = loads(resp.text)
 
     # parse results
-    for result in search_results['results']:
+    for result in search_results.get('results', []):
         href = result['url']
         title = "[" + result['type'] + "] " +\
                 result['namespace'] +\
-- 
cgit v1.2.3


From 1d255061c7422045ef912a471500513832e0319f Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Thu, 29 Jan 2015 00:26:12 +0100
Subject: Digg's unit test

---
 searx/engines/digg.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'searx/engines')

diff --git a/searx/engines/digg.py b/searx/engines/digg.py
index 8c457d6b9..1b5f2c8e4 100644
--- a/searx/engines/digg.py
+++ b/searx/engines/digg.py
@@ -44,7 +44,7 @@ def response(resp):
 
     search_result = loads(resp.text)
 
-    if search_result['html'] == '':
+    if 'html' not in search_result or search_result['html'] == '':
         return results
 
     dom = html.fromstring(search_result['html'])
-- 
cgit v1.2.3


From d4957045513d6fb32dcffbc7ea87483479a8cb6e Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Thu, 29 Jan 2015 01:13:33 +0100
Subject: Deviant Art's unit test

---
 searx/engines/deviantart.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py
index 6284cf598..4198e8c76 100644
--- a/searx/engines/deviantart.py
+++ b/searx/engines/deviantart.py
@@ -14,6 +14,7 @@ from urllib import urlencode
 from urlparse import urljoin
 from lxml import html
 import re
+from searx.engines.xpath import extract_text
 
 # engine dependent config
 categories = ['images']
@@ -50,9 +51,9 @@ def response(resp):
     for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'):
         link = result.xpath('.//a[contains(@class, "thumb")]')[0]
         url = urljoin(base_url, link.attrib.get('href'))
-        title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]')  # noqa
-        title = ''.join(title_links[0].xpath('.//text()'))
-        thumbnail_src = link.xpath('.//img')[0].attrib['src']
+        title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]')
+        title = extract_text(title_links[0])
+        thumbnail_src = link.xpath('.//img')[0].attrib.get('src')
         img_src = regex.sub('/', thumbnail_src)
 
         # append result
-- 
cgit v1.2.3


From dad0434f34f04ada2b4b0961bbb714e25c752677 Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Thu, 29 Jan 2015 20:15:52 +0100
Subject: Bing images' unit test

---
 searx/engines/bing_images.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'searx/engines')

diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py
index 9ae498427..9d1c22f5a 100644
--- a/searx/engines/bing_images.py
+++ b/searx/engines/bing_images.py
@@ -33,7 +33,10 @@ def request(query, params):
     offset = (params['pageno'] - 1) * 10 + 1
 
     # required for cookie
-    language = 'en-US'
+    if params['language'] == 'all':
+        language = 'en-US'
+    else:
+        language = params['language'].replace('_', '-')
 
     search_path = search_string.format(
         query=urlencode({'q': query}),
-- 
cgit v1.2.3


From efde2c21c8656ad21b24980b516ddbbf2e209523 Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Thu, 29 Jan 2015 20:56:57 +0100
Subject: Bing news' unit test I have no idea why coverage tell 97% and 2
 misses in branches. If anyone has an idea...

---
 searx/engines/bing_news.py | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py
index 789a23b89..182bd36b5 100644
--- a/searx/engines/bing_news.py
+++ b/searx/engines/bing_news.py
@@ -15,6 +15,7 @@ from lxml import html
 from datetime import datetime, timedelta
 from dateutil import parser
 import re
+from searx.engines.xpath import extract_text
 
 # engine dependent config
 categories = ['news']
@@ -42,6 +43,7 @@ def request(query, params):
     params['cookies']['_FP'] = "ui=en-US"
 
     params['url'] = base_url + search_path
+
     return params
 
 
@@ -55,44 +57,37 @@ def response(resp):
     for result in dom.xpath('//div[@class="sn_r"]'):
         link = result.xpath('.//div[@class="newstitle"]/a')[0]
         url = link.attrib.get('href')
-        title = ' '.join(link.xpath('.//text()'))
-        contentXPath = result.xpath('.//div[@class="sn_txt"]/div'
-                                    '//span[@class="sn_snip"]//text()')
+        title = extract_text(link)
+        contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]')
         if contentXPath is not None:
-            content = escape(' '.join(contentXPath))
+            content = escape(extract_text(contentXPath))
 
         # parse publishedDate
         publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div'
                                           '//span[contains(@class,"sn_ST")]'
-                                          '//span[contains(@class,"sn_tm")]'
-                                          '//text()')
+                                          '//span[contains(@class,"sn_tm")]')
+
         if publishedDateXPath is not None:
-            publishedDate = escape(' '.join(publishedDateXPath))
+            publishedDate = escape(extract_text(publishedDateXPath))
 
         if re.match("^[0-9]+ minute(s|) ago$", publishedDate):
             timeNumbers = re.findall(r'\d+', publishedDate)
-            publishedDate = datetime.now()\
-                - timedelta(minutes=int(timeNumbers[0]))
+            publishedDate = datetime.now() - timedelta(minutes=int(timeNumbers[0]))
         elif re.match("^[0-9]+ hour(s|) ago$", publishedDate):
             timeNumbers = re.findall(r'\d+', publishedDate)
-            publishedDate = datetime.now()\
-                - timedelta(hours=int(timeNumbers[0]))
-        elif re.match("^[0-9]+ hour(s|),"
-                      " [0-9]+ minute(s|) ago$", publishedDate):
+            publishedDate = datetime.now() - timedelta(hours=int(timeNumbers[0]))
+        elif re.match("^[0-9]+ hour(s|), [0-9]+ minute(s|) ago$", publishedDate):
             timeNumbers = re.findall(r'\d+', publishedDate)
             publishedDate = datetime.now()\
                 - timedelta(hours=int(timeNumbers[0]))\
                 - timedelta(minutes=int(timeNumbers[1]))
         elif re.match("^[0-9]+ day(s|) ago$", publishedDate):
             timeNumbers = re.findall(r'\d+', publishedDate)
-            publishedDate = datetime.now()\
-                - timedelta(days=int(timeNumbers[0]))
+            publishedDate = datetime.now() - timedelta(days=int(timeNumbers[0]))
         else:
             try:
-                # FIXME use params['language'] to parse either mm/dd or dd/mm
                 publishedDate = parser.parse(publishedDate, dayfirst=False)
             except TypeError:
-                # FIXME
                 publishedDate = datetime.now()
 
         # append result
-- 
cgit v1.2.3


From 5761d6f0ab071bdae05ecef1966dd3e4cbec6eee Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Thu, 29 Jan 2015 21:19:59 +0100
Subject: Bing news engine corrections XPath *never* return None.

(I found the HTML report of coverage)
---
 searx/engines/bing_news.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py
index 182bd36b5..e6adb2644 100644
--- a/searx/engines/bing_news.py
+++ b/searx/engines/bing_news.py
@@ -59,16 +59,14 @@ def response(resp):
         url = link.attrib.get('href')
         title = extract_text(link)
         contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]')
-        if contentXPath is not None:
-            content = escape(extract_text(contentXPath))
+        content = escape(extract_text(contentXPath))
 
         # parse publishedDate
         publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div'
                                           '//span[contains(@class,"sn_ST")]'
                                           '//span[contains(@class,"sn_tm")]')
 
-        if publishedDateXPath is not None:
-            publishedDate = escape(extract_text(publishedDateXPath))
+        publishedDate = escape(extract_text(publishedDateXPath))
 
         if re.match("^[0-9]+ minute(s|) ago$", publishedDate):
             timeNumbers = re.findall(r'\d+', publishedDate)
-- 
cgit v1.2.3


From a3d444ab85dbb85dc3200c686ec3323dbb7008cb Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Fri, 30 Jan 2015 19:52:44 +0100
Subject: BTDigg's unit test

---
 searx/engines/btdigg.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py
index 973ede9ac..944250628 100644
--- a/searx/engines/btdigg.py
+++ b/searx/engines/btdigg.py
@@ -23,11 +23,6 @@ paging = True
 url = 'https://btdigg.org'
 search_url = url + '/search?q={search_term}&p={pageno}'
 
-# specific xpath variables
-magnet_xpath = './/a[@title="Torrent magnet link"]'
-torrent_xpath = './/a[@title="Download torrent file"]'
-content_xpath = './/span[@class="font11px lightgrey block"]'
-
 
 # do search-request
 def request(query, params):
@@ -52,8 +47,8 @@ def response(resp):
     # parse results
     for result in search_res:
         link = result.xpath('.//td[@class="torrent_name"]//a')[0]
-        href = urljoin(url, link.attrib['href'])
-        title = escape(extract_text(link.xpath('.//text()')))
+        href = urljoin(url, link.attrib.get('href'))
+        title = escape(extract_text(link))
         content = escape(extract_text(result.xpath('.//pre[@class="snippet"]')[0]))
         content = "<br />".join(content.split("\n"))
 
@@ -81,7 +76,7 @@ def response(resp):
                 filesize = int(filesize * 1024 * 1024 * 1024)
             elif filesize_multiplier == 'MB':
                 filesize = int(filesize * 1024 * 1024)
-            elif filesize_multiplier == 'kb':
+            elif filesize_multiplier == 'KB':
                 filesize = int(filesize * 1024)
         except:
             filesize = None
-- 
cgit v1.2.3


From 8ea749d6ec0b711c516f3dbdb34a1bd17ae7d945 Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Fri, 30 Jan 2015 21:02:17 +0100
Subject: Kickass' unit test

---
 searx/engines/kickass.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py
index ac349283d..8b89e1f47 100644
--- a/searx/engines/kickass.py
+++ b/searx/engines/kickass.py
@@ -13,6 +13,7 @@ from cgi import escape
 from urllib import quote
 from lxml import html
 from operator import itemgetter
+from searx.engines.xpath import extract_text
 
 # engine dependent config
 categories = ['videos', 'music', 'files']
@@ -56,9 +57,8 @@ def response(resp):
     for result in search_res[1:]:
         link = result.xpath('.//a[@class="cellMainLink"]')[0]
         href = urljoin(url, link.attrib['href'])
-        title = ' '.join(link.xpath('.//text()'))
-        content = escape(html.tostring(result.xpath(content_xpath)[0],
-                                       method="text"))
+        title = extract_text(link)
+        content = escape(extract_text(result.xpath(content_xpath)))
         seed = result.xpath('.//td[contains(@class, "green")]/text()')[0]
         leech = result.xpath('.//td[contains(@class, "red")]/text()')[0]
         filesize = result.xpath('.//td[contains(@class, "nobr")]/text()')[0]
@@ -88,7 +88,7 @@ def response(resp):
                 filesize = int(filesize * 1024 * 1024 * 1024)
             elif filesize_multiplier == 'MB':
                 filesize = int(filesize * 1024 * 1024)
-            elif filesize_multiplier == 'kb':
+            elif filesize_multiplier == 'KB':
                 filesize = int(filesize * 1024)
         except:
             filesize = None
-- 
cgit v1.2.3


From d5b8005ee10054b5260f57c1800ddebfa03c39cf Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Sat, 31 Jan 2015 16:16:30 +0100
Subject: Google images' unit test

---
 searx/engines/google_images.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py
index cc62a4fd2..092ae6639 100644
--- a/searx/engines/google_images.py
+++ b/searx/engines/google_images.py
@@ -18,7 +18,7 @@ paging = True
 
 # search-url
 url = 'https://ajax.googleapis.com/'
-search_url = url + 'ajax/services/search/images?v=1.0&start={offset}&rsz=large&safe=off&filter=off&{query}'  # noqa
+search_url = url + 'ajax/services/search/images?v=1.0&start={offset}&rsz=large&safe=off&filter=off&{query}'
 
 
 # do search-request
@@ -45,14 +45,14 @@ def response(resp):
     for result in search_res['responseData']['results']:
         href = result['originalContextUrl']
         title = result['title']
-        if not result['url']:
+        if 'url' not in result:
             continue
         thumbnail_src = result['tbUrl']
 
         # append result
         results.append({'url': href,
                         'title': title,
-                        'content': '',
+                        'content': result['content'],
                         'thumbnail_src': thumbnail_src,
                         'img_src': unquote(result['url']),
                         'template': 'images.html'})
-- 
cgit v1.2.3


From b7dc1fb9d572d53d04c0120d96c76a20a418cc94 Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Sat, 31 Jan 2015 16:38:03 +0100
Subject: Google news' unit test

---
 searx/engines/google_news.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
index eb114f9c9..3e4371b99 100644
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@@ -20,7 +20,7 @@ language_support = True
 
 # engine dependent config
 url = 'https://ajax.googleapis.com/'
-search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}'  # noqa
+search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={lang}'
 
 
 # do search-request
@@ -33,7 +33,7 @@ def request(query, params):
 
     params['url'] = search_url.format(offset=offset,
                                       query=urlencode({'q': query}),
-                                      language=language)
+                                      lang=language)
 
     return params
 
@@ -52,6 +52,8 @@ def response(resp):
     for result in search_res['responseData']['results']:
         # parse publishedDate
         publishedDate = parser.parse(result['publishedDate'])
+        if 'url' not in result:
+            continue
 
         # append result
         results.append({'url': result['unescapedUrl'],
-- 
cgit v1.2.3


From d20ddf9da147647710127385a3ee95ff273d4fea Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Sat, 31 Jan 2015 17:29:22 +0100
Subject: Stackoverflow's unit test

---
 searx/engines/stackoverflow.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py
index dcbb1890c..78dba9f68 100644
--- a/searx/engines/stackoverflow.py
+++ b/searx/engines/stackoverflow.py
@@ -12,6 +12,7 @@ from urlparse import urljoin
 from cgi import escape
 from urllib import urlencode
 from lxml import html
+from searx.engines.xpath import extract_text
 
 # engine dependent config
 categories = ['it']
@@ -24,8 +25,7 @@ search_url = url+'search?{query}&page={pageno}'
 # specific xpath variables
 results_xpath = '//div[contains(@class,"question-summary")]'
 link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a'
-title_xpath = './/text()'
-content_xpath = './/div[@class="excerpt"]//text()'
+content_xpath = './/div[@class="excerpt"]'
 
 
 # do search-request
@@ -46,8 +46,8 @@ def response(resp):
     for result in dom.xpath(results_xpath):
         link = result.xpath(link_xpath)[0]
         href = urljoin(url, link.attrib.get('href'))
-        title = escape(' '.join(link.xpath(title_xpath)))
-        content = escape(' '.join(result.xpath(content_xpath)))
+        title = escape(extract_text(link))
+        content = escape(extract_text(result.xpath(content_xpath)))
 
         # append result
         results.append({'url': href,
-- 
cgit v1.2.3


From 04fa31b7f4d45182fa4ced6d6e23fd9ec4960d2e Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Sat, 31 Jan 2015 19:49:54 +0100
Subject: Vimeo's unit test

---
 searx/engines/vimeo.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py
index 39033c591..7577d12e1 100644
--- a/searx/engines/vimeo.py
+++ b/searx/engines/vimeo.py
@@ -59,8 +59,7 @@ def response(resp):
         url = base_url + videoid
         title = p.unescape(extract_text(result.xpath(title_xpath)))
         thumbnail = extract_text(result.xpath(content_xpath)[0])
-        publishedDate = parser.parse(extract_text(
-            result.xpath(publishedDate_xpath)[0]))
+        publishedDate = parser.parse(extract_text(result.xpath(publishedDate_xpath)[0]))
         embedded = embedded_url.format(videoid=videoid)
 
         # append result
-- 
cgit v1.2.3


From f18807955beceb86a99963feedee8355f31c481c Mon Sep 17 00:00:00 2001
From: Adam Tauber <asciimoo@gmail.com>
Date: Sat, 31 Jan 2015 22:05:13 +0100
Subject: [mod] python importable engine names

---
 searx/engines/500px.py        |  63 -------------------------
 searx/engines/flickr-noapi.py | 104 ------------------------------------------
 searx/engines/flickr_noapi.py | 104 ++++++++++++++++++++++++++++++++++++++++++
 searx/engines/www500px.py     |  63 +++++++++++++++++++++++++
 4 files changed, 167 insertions(+), 167 deletions(-)
 delete mode 100644 searx/engines/500px.py
 delete mode 100644 searx/engines/flickr-noapi.py
 create mode 100644 searx/engines/flickr_noapi.py
 create mode 100644 searx/engines/www500px.py

(limited to 'searx/engines')

diff --git a/searx/engines/500px.py b/searx/engines/500px.py
deleted file mode 100644
index f25678c24..000000000
--- a/searx/engines/500px.py
+++ /dev/null
@@ -1,63 +0,0 @@
-## 500px (Images)
-#
-# @website     https://500px.com
-# @provide-api yes (https://developers.500px.com/)
-#
-# @using-api   no
-# @results     HTML
-# @stable      no (HTML can change)
-# @parse       url, title, thumbnail, img_src, content
-#
-# @todo        rewrite to api
-
-
-from urllib import urlencode
-from urlparse import urljoin
-from lxml import html
-import re
-
-# engine dependent config
-categories = ['images']
-paging = True
-
-# search-url
-base_url = 'https://500px.com'
-search_url = base_url+'/search?search?page={pageno}&type=photos&{query}'
-
-
-# do search-request
-def request(query, params):
-    params['url'] = search_url.format(pageno=params['pageno'],
-                                      query=urlencode({'q': query}))
-
-    return params
-
-
-# get response from search-request
-def response(resp):
-    results = []
-
-    dom = html.fromstring(resp.text)
-    regex = re.compile('3\.jpg.*$')
-
-    # parse results
-    for result in dom.xpath('//div[@class="photo"]'):
-        link = result.xpath('.//a')[0]
-        url = urljoin(base_url, link.attrib.get('href'))
-        title = result.xpath('.//div[@class="title"]//text()')[0]
-        thumbnail_src = link.xpath('.//img')[0].attrib['src']
-        # To have a bigger thumbnail, uncomment the next line
-        #thumbnail_src = regex.sub('4.jpg', thumbnail_src)
-        content = result.xpath('.//div[@class="info"]//text()')[0]
-        img_src = regex.sub('2048.jpg', thumbnail_src)
-
-        # append result
-        results.append({'url': url,
-                        'title': title,
-                        'img_src': img_src,
-                        'content': content,
-                        'thumbnail_src': thumbnail_src,
-                        'template': 'images.html'})
-
-    # return results
-    return results
diff --git a/searx/engines/flickr-noapi.py b/searx/engines/flickr-noapi.py
deleted file mode 100644
index 66c6f4027..000000000
--- a/searx/engines/flickr-noapi.py
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env python
-
-#  Flickr (Images)
-#
-# @website     https://www.flickr.com
-# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
-#
-# @using-api   no
-# @results     HTML
-# @stable      no
-# @parse       url, title, thumbnail, img_src
-
-from urllib import urlencode
-from json import loads
-import re
-
-categories = ['images']
-
-url = 'https://secure.flickr.com/'
-search_url = url+'search/?{query}&page={page}'
-photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
-regex = re.compile(r"\"search-photos-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL)
-image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's')
-
-paging = True
-
-
-def build_flickr_url(user_id, photo_id):
-    return photo_url.format(userid=user_id, photoid=photo_id)
-
-
-def request(query, params):
-    params['url'] = search_url.format(query=urlencode({'text': query}),
-                                      page=params['pageno'])
-    return params
-
-
-def response(resp):
-    results = []
-
-    matches = regex.search(resp.text)
-
-    if matches is None:
-        return results
-
-    match = matches.group(1)
-    search_results = loads(match)
-
-    if '_data' not in search_results:
-        return []
-
-    photos = search_results['_data']
-
-    for photo in photos:
-
-        # In paged configuration, the first pages' photos
-        # are represented by a None object
-        if photo is None:
-            continue
-
-        img_src = None
-        # From the biggest to the lowest format
-        for image_size in image_sizes:
-            if image_size in photo['sizes']:
-                img_src = photo['sizes'][image_size]['displayUrl']
-                break
-
-        if not img_src:
-            continue
-
-        if 'id' not in photo['owner']:
-            continue
-
-# For a bigger thumbnail, keep only the url_z, not the url_n
-        if 'n' in photo['sizes']:
-            thumbnail_src = photo['sizes']['n']['displayUrl']
-        elif 'z' in photo['sizes']:
-            thumbnail_src = photo['sizes']['z']['displayUrl']
-        else:
-            thumbnail_src = img_src
-
-        url = build_flickr_url(photo['owner']['id'], photo['id'])
-
-        title = photo.get('title', '')
-
-        content = '<span class="photo-author">' +\
-                  photo['owner']['username'] +\
-                  '</span><br />'
-
-        if 'description' in photo:
-            content = content +\
-                '<span class="description">' +\
-                photo['description'] +\
-                '</span>'
-
-        # append result
-        results.append({'url': url,
-                        'title': title,
-                        'img_src': img_src,
-                        'thumbnail_src': thumbnail_src,
-                        'content': content,
-                        'template': 'images.html'})
-
-    return results
diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py
new file mode 100644
index 000000000..66c6f4027
--- /dev/null
+++ b/searx/engines/flickr_noapi.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+
+#  Flickr (Images)
+#
+# @website     https://www.flickr.com
+# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
+#
+# @using-api   no
+# @results     HTML
+# @stable      no
+# @parse       url, title, thumbnail, img_src
+
+from urllib import urlencode
+from json import loads
+import re
+
+categories = ['images']
+
+url = 'https://secure.flickr.com/'
+search_url = url+'search/?{query}&page={page}'
+photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
+regex = re.compile(r"\"search-photos-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL)
+image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's')
+
+paging = True
+
+
+def build_flickr_url(user_id, photo_id):
+    return photo_url.format(userid=user_id, photoid=photo_id)
+
+
+def request(query, params):
+    params['url'] = search_url.format(query=urlencode({'text': query}),
+                                      page=params['pageno'])
+    return params
+
+
+def response(resp):
+    results = []
+
+    matches = regex.search(resp.text)
+
+    if matches is None:
+        return results
+
+    match = matches.group(1)
+    search_results = loads(match)
+
+    if '_data' not in search_results:
+        return []
+
+    photos = search_results['_data']
+
+    for photo in photos:
+
+        # In paged configuration, the first pages' photos
+        # are represented by a None object
+        if photo is None:
+            continue
+
+        img_src = None
+        # From the biggest to the lowest format
+        for image_size in image_sizes:
+            if image_size in photo['sizes']:
+                img_src = photo['sizes'][image_size]['displayUrl']
+                break
+
+        if not img_src:
+            continue
+
+        if 'id' not in photo['owner']:
+            continue
+
+# For a bigger thumbnail, keep only the url_z, not the url_n
+        if 'n' in photo['sizes']:
+            thumbnail_src = photo['sizes']['n']['displayUrl']
+        elif 'z' in photo['sizes']:
+            thumbnail_src = photo['sizes']['z']['displayUrl']
+        else:
+            thumbnail_src = img_src
+
+        url = build_flickr_url(photo['owner']['id'], photo['id'])
+
+        title = photo.get('title', '')
+
+        content = '<span class="photo-author">' +\
+                  photo['owner']['username'] +\
+                  '</span><br />'
+
+        if 'description' in photo:
+            content = content +\
+                '<span class="description">' +\
+                photo['description'] +\
+                '</span>'
+
+        # append result
+        results.append({'url': url,
+                        'title': title,
+                        'img_src': img_src,
+                        'thumbnail_src': thumbnail_src,
+                        'content': content,
+                        'template': 'images.html'})
+
+    return results
diff --git a/searx/engines/www500px.py b/searx/engines/www500px.py
new file mode 100644
index 000000000..f25678c24
--- /dev/null
+++ b/searx/engines/www500px.py
@@ -0,0 +1,63 @@
+## 500px (Images)
+#
+# @website     https://500px.com
+# @provide-api yes (https://developers.500px.com/)
+#
+# @using-api   no
+# @results     HTML
+# @stable      no (HTML can change)
+# @parse       url, title, thumbnail, img_src, content
+#
+# @todo        rewrite to api
+
+
+from urllib import urlencode
+from urlparse import urljoin
+from lxml import html
+import re
+
+# engine dependent config
+categories = ['images']
+paging = True
+
+# search-url
+base_url = 'https://500px.com'
+search_url = base_url+'/search?search?page={pageno}&type=photos&{query}'
+
+
+# do search-request
+def request(query, params):
+    params['url'] = search_url.format(pageno=params['pageno'],
+                                      query=urlencode({'q': query}))
+
+    return params
+
+
+# get response from search-request
+def response(resp):
+    results = []
+
+    dom = html.fromstring(resp.text)
+    regex = re.compile('3\.jpg.*$')
+
+    # parse results
+    for result in dom.xpath('//div[@class="photo"]'):
+        link = result.xpath('.//a')[0]
+        url = urljoin(base_url, link.attrib.get('href'))
+        title = result.xpath('.//div[@class="title"]//text()')[0]
+        thumbnail_src = link.xpath('.//img')[0].attrib['src']
+        # To have a bigger thumbnail, uncomment the next line
+        #thumbnail_src = regex.sub('4.jpg', thumbnail_src)
+        content = result.xpath('.//div[@class="info"]//text()')[0]
+        img_src = regex.sub('2048.jpg', thumbnail_src)
+
+        # append result
+        results.append({'url': url,
+                        'title': title,
+                        'img_src': img_src,
+                        'content': content,
+                        'thumbnail_src': thumbnail_src,
+                        'template': 'images.html'})
+
+    # return results
+    return results
-- 
cgit v1.2.3


From 8cf2ee57216b4dffc419e1762ff1fe4dfd30e227 Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Sun, 1 Feb 2015 13:43:10 +0100
Subject: 500px unit test

---
 searx/engines/www500px.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/www500px.py b/searx/engines/www500px.py
index f25678c24..99dba4abf 100644
--- a/searx/engines/www500px.py
+++ b/searx/engines/www500px.py
@@ -15,6 +15,7 @@ from urllib import urlencode
 from urlparse import urljoin
 from lxml import html
 import re
+from searx.engines.xpath import extract_text
 
 # engine dependent config
 categories = ['images']
@@ -22,7 +23,7 @@ paging = True
 
 # search-url
 base_url = 'https://500px.com'
-search_url = base_url+'/search?search?page={pageno}&type=photos&{query}'
+search_url = base_url + '/search?search?page={pageno}&type=photos&{query}'
 
 
 # do search-request
@@ -44,11 +45,11 @@ def response(resp):
     for result in dom.xpath('//div[@class="photo"]'):
         link = result.xpath('.//a')[0]
         url = urljoin(base_url, link.attrib.get('href'))
-        title = result.xpath('.//div[@class="title"]//text()')[0]
-        thumbnail_src = link.xpath('.//img')[0].attrib['src']
+        title = extract_text(result.xpath('.//div[@class="title"]'))
+        thumbnail_src = link.xpath('.//img')[0].attrib.get('src')
         # To have a bigger thumbnail, uncomment the next line
-        #thumbnail_src = regex.sub('4.jpg', thumbnail_src)
-        content = result.xpath('.//div[@class="info"]//text()')[0]
+        # thumbnail_src = regex.sub('4.jpg', thumbnail_src)
+        content = extract_text(result.xpath('.//div[@class="info"]'))
         img_src = regex.sub('2048.jpg', thumbnail_src)
 
         # append result
-- 
cgit v1.2.3


From c6535dd65ebf110d00d633db1170f35cf60b8df0 Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Sun, 1 Feb 2015 14:31:04 +0100
Subject: Flickr Noapi unit test

---
 searx/engines/flickr_noapi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'searx/engines')

diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py
index 66c6f4027..73dff44c4 100644
--- a/searx/engines/flickr_noapi.py
+++ b/searx/engines/flickr_noapi.py
@@ -17,7 +17,7 @@ import re
 categories = ['images']
 
 url = 'https://secure.flickr.com/'
-search_url = url+'search/?{query}&page={page}'
+search_url = url + 'search/?{query}&page={page}'
 photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
 regex = re.compile(r"\"search-photos-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL)
 image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's')
-- 
cgit v1.2.3


From 5a16077455ef9e821a2b5f5f7e975be8a37ce83d Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Sun, 1 Feb 2015 15:23:26 +0100
Subject: PirateBay unit test + reactivation in Settings

---
 searx/engines/piratebay.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py
index f6144faa2..207df276c 100644
--- a/searx/engines/piratebay.py
+++ b/searx/engines/piratebay.py
@@ -13,6 +13,7 @@ from cgi import escape
 from urllib import quote
 from lxml import html
 from operator import itemgetter
+from searx.engines.xpath import extract_text
 
 # engine dependent config
 categories = ['videos', 'music', 'files']
@@ -29,7 +30,8 @@ search_types = {'files': '0',
 
 # specific xpath variables
 magnet_xpath = './/a[@title="Download this torrent using magnet"]'
-content_xpath = './/font[@class="detDesc"]//text()'
+torrent_xpath = './/a[@title="Download this torrent"]'
+content_xpath = './/font[@class="detDesc"]'
 
 
 # do search-request
@@ -59,8 +61,8 @@ def response(resp):
     for result in search_res[1:]:
         link = result.xpath('.//div[@class="detName"]//a')[0]
         href = urljoin(url, link.attrib.get('href'))
-        title = ' '.join(link.xpath('.//text()'))
-        content = escape(' '.join(result.xpath(content_xpath)))
+        title = extract_text(link)
+        content = escape(extract_text(result.xpath(content_xpath)))
         seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
 
         # convert seed to int if possible
@@ -76,6 +78,7 @@ def response(resp):
             leech = 0
 
         magnetlink = result.xpath(magnet_xpath)[0]
+        torrentfile = result.xpath(torrent_xpath)[0]
 
         # append result
         results.append({'url': href,
@@ -83,7 +86,8 @@ def response(resp):
                         'content': content,
                         'seed': seed,
                         'leech': leech,
-                        'magnetlink': magnetlink.attrib['href'],
+                        'magnetlink': magnetlink.attrib.get('href'),
+                        'torrentfile': torrentfile.attrib.get('href'),
                         'template': 'torrent.html'})
 
     # return results sorted by seeder
-- 
cgit v1.2.3