diff options
| author | Kang-min Liu <gugod@gugod.org> | 2015-11-14 00:05:44 +0100 |
|---|---|---|
| committer | Kang-min Liu <gugod@gugod.org> | 2015-11-14 00:05:44 +0100 |
| commit | ac8759cd3ff99024864fd04d7c4bef5c3a00b971 (patch) | |
| tree | 30c3f8b61504532df926bbffedcc8df80a8e926e /searx/engines/google.py | |
| parent | c7c6c35ccd7373d2107b70b92badb9b70d31905f (diff) | |
| parent | e98aef6fc4954681e58d774203d522f0ae478004 (diff) | |
Merge remote-tracking branch 'origin/master'
Diffstat (limited to 'searx/engines/google.py')
| -rw-r--r-- | searx/engines/google.py | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/searx/engines/google.py b/searx/engines/google.py index 0e78a9e2c..67e6ebb87 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -9,11 +9,15 @@ # @parse url, title, content, suggestion import re +from cgi import escape from urllib import urlencode from urlparse import urlparse, parse_qsl -from lxml import html +from lxml import html, etree from searx.poolrequests import get from searx.engines.xpath import extract_text, extract_url +from searx.search import logger + +logger = logger.getChild('google engine') # engine dependent config @@ -167,7 +171,7 @@ def parse_url(url_string, google_hostname): def extract_text_from_dom(result, xpath): r = result.xpath(xpath) if len(r) > 0: - return extract_text(r[0]) + return escape(extract_text(r[0])) return None @@ -224,8 +228,8 @@ def response(resp): # parse results for result in dom.xpath(results_xpath): - title = extract_text(result.xpath(title_xpath)[0]) try: + title = extract_text(result.xpath(title_xpath)[0]) url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname) parsed_url = urlparse(url, google_hostname) @@ -268,12 +272,13 @@ def response(resp): 'content': content }) except: + logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True)) continue # parse suggestion for suggestion in dom.xpath(suggestion_xpath): # append suggestion - results.append({'suggestion': extract_text(suggestion)}) + results.append({'suggestion': escape(extract_text(suggestion))}) # return results return results |