summaryrefslogtreecommitdiff
path: root/searx/engines/google.py
diff options
context:
space:
mode:
authorKang-min Liu <gugod@gugod.org>2015-11-14 00:05:44 +0100
committerKang-min Liu <gugod@gugod.org>2015-11-14 00:05:44 +0100
commitac8759cd3ff99024864fd04d7c4bef5c3a00b971 (patch)
tree30c3f8b61504532df926bbffedcc8df80a8e926e /searx/engines/google.py
parentc7c6c35ccd7373d2107b70b92badb9b70d31905f (diff)
parente98aef6fc4954681e58d774203d522f0ae478004 (diff)
Merge remote-tracking branch 'origin/master'
Diffstat (limited to 'searx/engines/google.py')
-rw-r--r--searx/engines/google.py13
1 files changed, 9 insertions, 4 deletions
diff --git a/searx/engines/google.py b/searx/engines/google.py
index 0e78a9e2c..67e6ebb87 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -9,11 +9,15 @@
# @parse url, title, content, suggestion
import re
+from cgi import escape
from urllib import urlencode
from urlparse import urlparse, parse_qsl
-from lxml import html
+from lxml import html, etree
from searx.poolrequests import get
from searx.engines.xpath import extract_text, extract_url
+from searx.search import logger
+
+logger = logger.getChild('google engine')
# engine dependent config
@@ -167,7 +171,7 @@ def parse_url(url_string, google_hostname):
def extract_text_from_dom(result, xpath):
r = result.xpath(xpath)
if len(r) > 0:
- return extract_text(r[0])
+ return escape(extract_text(r[0]))
return None
@@ -224,8 +228,8 @@ def response(resp):
# parse results
for result in dom.xpath(results_xpath):
- title = extract_text(result.xpath(title_xpath)[0])
try:
+ title = extract_text(result.xpath(title_xpath)[0])
url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname)
parsed_url = urlparse(url, google_hostname)
@@ -268,12 +272,13 @@ def response(resp):
'content': content
})
except:
+ logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True))
continue
# parse suggestion
for suggestion in dom.xpath(suggestion_xpath):
# append suggestion
- results.append({'suggestion': extract_text(suggestion)})
+ results.append({'suggestion': escape(extract_text(suggestion))})
# return results
return results