From b2492c94f422e18cb8954ec983134f4fa5c7cdc0 Mon Sep 17 00:00:00 2001
From: asciimoo <asciimoo@gmail.com>
Date: Mon, 20 Jan 2014 02:31:20 +0100
Subject: [fix] pep/flake8 compatibility

---
 searx/engines/xpath.py | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

(limited to 'searx/engines/xpath.py')

diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
index 5e2c3c38b..a7d24e2a2 100644
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@@ -1,21 +1,24 @@
 from lxml import html
 from urllib import urlencode, unquote
 from urlparse import urlparse, urljoin
-from cgi import escape
 from lxml.etree import _ElementStringResult
 
-search_url    = None
-url_xpath     = None
+search_url = None
+url_xpath = None
 content_xpath = None
-title_xpath   = None
+title_xpath = None
 suggestion_xpath = ''
 results_xpath = ''
 
+
 '''
 if xpath_results is list, extract the text from each result and concat the list
-if xpath_results is a xml element, extract all the text node from it ( text_content() method from lxml )
+if xpath_results is a xml element, extract all the text node from it
+   ( text_content() method from lxml )
 if xpath_results is a string element, then it's already done
 '''
+
+
 def extract_text(xpath_results):
     if type(xpath_results) == list:
         # it's list of result : concat everything using recursive call
@@ -60,7 +63,8 @@ def normalize_url(url):
         url += '/'
 
     # FIXME : hack for yahoo
-    if parsed_url.hostname == 'search.yahoo.com' and parsed_url.path.startswith('/r'):
+    if parsed_url.hostname == 'search.yahoo.com'\
+       and parsed_url.path.startswith('/r'):
         p = parsed_url.path
         mark = p.find('/**')
         if mark != -1:
@@ -82,15 +86,15 @@ def response(resp):
     if results_xpath:
         for result in dom.xpath(results_xpath):
             url = extract_url(result.xpath(url_xpath))
-            title = extract_text(result.xpath(title_xpath)[0 ])
+            title = extract_text(result.xpath(title_xpath)[0])
             content = extract_text(result.xpath(content_xpath)[0])
             results.append({'url': url, 'title': title, 'content': content})
     else:
         for url, title, content in zip(
-            map(extract_url, dom.xpath(url_xpath)), \
-            map(extract_text, dom.xpath(title_xpath)), \
-            map(extract_text, dom.xpath(content_xpath)), \
-                ):
+            map(extract_url, dom.xpath(url_xpath)),
+            map(extract_text, dom.xpath(title_xpath)),
+            map(extract_text, dom.xpath(content_xpath))
+        ):
             results.append({'url': url, 'title': title, 'content': content})
 
     if not suggestion_xpath:
-- 
cgit v1.2.3


From 59eeeaab87951fd6fa3302ec240db98902a20b2c Mon Sep 17 00:00:00 2001
From: asciimoo <asciimoo@gmail.com>
Date: Thu, 23 Jan 2014 11:08:08 +0100
Subject: [fix] html tag removal

---
 searx/engines/xpath.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'searx/engines/xpath.py')

diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
index a7d24e2a2..8960b5f21 100644
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@@ -2,6 +2,7 @@ from lxml import html
 from urllib import urlencode, unquote
 from urlparse import urlparse, urljoin
 from lxml.etree import _ElementStringResult
+from searx.utils import html_to_text
 
 search_url = None
 url_xpath = None
@@ -33,7 +34,7 @@ def extract_text(xpath_results):
         return ''.join(xpath_results)
     else:
         # it's a element
-        return xpath_results.text_content()
+        return html_to_text(xpath_results.text_content())
 
 
 def extract_url(xpath_results):
-- 
cgit v1.2.3