From 0ad272c5cb81a9c69008aa86a1f29cd642ddf4ff Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Wed, 30 Sep 2015 16:42:03 +0200 Subject: [fix] content escaping - closes #441 TODO check other engines too --- searx/engines/google.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'searx/engines/google.py') diff --git a/searx/engines/google.py b/searx/engines/google.py index 0e78a9e2c..c8299d04b 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -9,6 +9,7 @@ # @parse url, title, content, suggestion import re +from cgi import escape from urllib import urlencode from urlparse import urlparse, parse_qsl from lxml import html @@ -167,7 +168,7 @@ def parse_url(url_string, google_hostname): def extract_text_from_dom(result, xpath): r = result.xpath(xpath) if len(r) > 0: - return extract_text(r[0]) + return escape(extract_text(r[0])) return None @@ -273,7 +274,7 @@ def response(resp): # parse suggestion for suggestion in dom.xpath(suggestion_xpath): # append suggestion - results.append({'suggestion': extract_text(suggestion)}) + results.append({'suggestion': escape(extract_text(suggestion))}) # return results return results -- cgit v1.2.3 From 5d49c15f791c3b9297bb890b28643e6c50406f35 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Thu, 29 Oct 2015 12:47:12 +0100 Subject: [fix] google engine - ignore new useless result type --- searx/engines/google.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'searx/engines/google.py') diff --git a/searx/engines/google.py b/searx/engines/google.py index c8299d04b..67e6ebb87 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -12,9 +12,12 @@ import re from cgi import escape from urllib import urlencode from urlparse import urlparse, parse_qsl -from lxml import html +from lxml import html, etree from searx.poolrequests import get from searx.engines.xpath import extract_text, extract_url +from searx.search import logger + +logger = logger.getChild('google engine') # engine dependent config @@ -225,8 +228,8 @@ def response(resp): # parse results for result in dom.xpath(results_xpath): - title = extract_text(result.xpath(title_xpath)[0]) try: + title = extract_text(result.xpath(title_xpath)[0]) url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname) parsed_url = urlparse(url, google_hostname) @@ -269,6 +272,7 @@ def response(resp): 'content': content }) except: + logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True)) continue # parse suggestion -- cgit v1.2.3