diff options
| author | Cqoicebordel <Cqoicebordel@users.noreply.github.com> | 2015-02-06 17:31:10 +0100 |
|---|---|---|
| committer | Cqoicebordel <Cqoicebordel@users.noreply.github.com> | 2015-02-06 17:31:10 +0100 |
| commit | f1c10f4fe45f34c12994b9bbc4aca133202fd7ca (patch) | |
| tree | 155243afe2b7ed1d98160d61f664101214ec47eb /searx/engines | |
| parent | 3a4d6045c1da950d13d1d14192247389c5932631 (diff) | |
Startpage's unit test
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/startpage.py | 13 |
1 files changed, 5 insertions, 8 deletions
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index d60ecd978..9d5b4befe 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -13,6 +13,7 @@ from lxml import html from cgi import escape import re +from searx.engines.xpath import extract_text # engine dependent config categories = ['general'] @@ -45,8 +46,7 @@ def request(query, params): # set language if specified if params['language'] != 'all': - params['data']['with_language'] = ('lang_' + - params['language'].split('_')[0]) + params['data']['with_language'] = ('lang_' + params['language'].split('_')[0]) return params @@ -64,18 +64,15 @@ def response(resp): continue link = links[0] url = link.attrib.get('href') - try: - title = escape(link.text_content()) - except UnicodeDecodeError: - continue # block google-ad url's if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url): continue + title = escape(extract_text(link)) + if result.xpath('./p[@class="desc"]'): - content = escape(result.xpath('./p[@class="desc"]')[0] - .text_content()) + content = escape(extract_text(result.xpath('./p[@class="desc"]'))) else: content = '' |