From f1c10f4fe45f34c12994b9bbc4aca133202fd7ca Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Fri, 6 Feb 2015 17:31:10 +0100 Subject: Startpage's unit test --- searx/engines/startpage.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'searx/engines') diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index d60ecd978..9d5b4befe 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -13,6 +13,7 @@ from lxml import html from cgi import escape import re +from searx.engines.xpath import extract_text # engine dependent config categories = ['general'] @@ -45,8 +46,7 @@ def request(query, params): # set language if specified if params['language'] != 'all': - params['data']['with_language'] = ('lang_' + - params['language'].split('_')[0]) + params['data']['with_language'] = ('lang_' + params['language'].split('_')[0]) return params @@ -64,18 +64,15 @@ def response(resp): continue link = links[0] url = link.attrib.get('href') - try: - title = escape(link.text_content()) - except UnicodeDecodeError: - continue # block google-ad url's if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url): continue + title = escape(extract_text(link)) + if result.xpath('./p[@class="desc"]'): - content = escape(result.xpath('./p[@class="desc"]')[0] - .text_content()) + content = escape(extract_text(result.xpath('./p[@class="desc"]'))) else: content = '' -- cgit v1.2.3