From 0ad272c5cb81a9c69008aa86a1f29cd642ddf4ff Mon Sep 17 00:00:00 2001
From: Adam Tauber <adam.tauber@balabit.com>
Date: Wed, 30 Sep 2015 16:42:03 +0200
Subject: [fix] content escaping - closes #441

TODO check other engines too
---
 searx/engines/google.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'searx/engines/google.py')

diff --git a/searx/engines/google.py b/searx/engines/google.py
index 0e78a9e2c..c8299d04b 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -9,6 +9,7 @@
 # @parse       url, title, content, suggestion
 
 import re
+from cgi import escape
 from urllib import urlencode
 from urlparse import urlparse, parse_qsl
 from lxml import html
@@ -167,7 +168,7 @@ def parse_url(url_string, google_hostname):
 def extract_text_from_dom(result, xpath):
     r = result.xpath(xpath)
     if len(r) > 0:
-        return extract_text(r[0])
+        return escape(extract_text(r[0]))
     return None
 
 
@@ -273,7 +274,7 @@ def response(resp):
     # parse suggestion
     for suggestion in dom.xpath(suggestion_xpath):
         # append suggestion
-        results.append({'suggestion': extract_text(suggestion)})
+        results.append({'suggestion': escape(extract_text(suggestion))})
 
     # return results
     return results
-- 
cgit v1.2.3


From 5d49c15f791c3b9297bb890b28643e6c50406f35 Mon Sep 17 00:00:00 2001
From: Adam Tauber <asciimoo@gmail.com>
Date: Thu, 29 Oct 2015 12:47:12 +0100
Subject: [fix] google engine - ignore new useless result type

---
 searx/engines/google.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'searx/engines/google.py')

diff --git a/searx/engines/google.py b/searx/engines/google.py
index c8299d04b..67e6ebb87 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -12,9 +12,12 @@ import re
 from cgi import escape
 from urllib import urlencode
 from urlparse import urlparse, parse_qsl
-from lxml import html
+from lxml import html, etree
 from searx.poolrequests import get
 from searx.engines.xpath import extract_text, extract_url
+from searx.search import logger
+
+logger = logger.getChild('google engine')
 
 
 # engine dependent config
@@ -225,8 +228,8 @@ def response(resp):
 
     # parse results
     for result in dom.xpath(results_xpath):
-        title = extract_text(result.xpath(title_xpath)[0])
         try:
+            title = extract_text(result.xpath(title_xpath)[0])
             url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname)
             parsed_url = urlparse(url, google_hostname)
 
@@ -269,6 +272,7 @@ def response(resp):
                                 'content': content
                                 })
         except:
+            logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True))
             continue
 
     # parse suggestion
-- 
cgit v1.2.3