diff options
| author | Adam Tauber <asciimoo@gmail.com> | 2015-02-12 10:52:55 +0100 |
|---|---|---|
| committer | Adam Tauber <asciimoo@gmail.com> | 2015-02-12 10:52:55 +0100 |
| commit | f6db77d81ea87d99462b4c3cc40a8a27e0264724 (patch) | |
| tree | b26fb71a62082aeec81c7bb1bb3d7447d006aed3 /searx/engines/startpage.py | |
| parent | 516105c570a920dadeb87b34ee5ee434ad5cb16f (diff) | |
| parent | f96154b7c454a3b02bf688f248b4471c2020c28f (diff) | |
Merge pull request #210 from Cqoicebordel/unit-tests
unit tests
Diffstat (limited to 'searx/engines/startpage.py')
| -rw-r--r-- | searx/engines/startpage.py | 13 |
1 files changed, 5 insertions, 8 deletions
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index d60ecd978..9d5b4befe 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -13,6 +13,7 @@ from lxml import html from cgi import escape import re +from searx.engines.xpath import extract_text # engine dependent config categories = ['general'] @@ -45,8 +46,7 @@ def request(query, params): # set language if specified if params['language'] != 'all': - params['data']['with_language'] = ('lang_' + - params['language'].split('_')[0]) + params['data']['with_language'] = ('lang_' + params['language'].split('_')[0]) return params @@ -64,18 +64,15 @@ def response(resp): continue link = links[0] url = link.attrib.get('href') - try: - title = escape(link.text_content()) - except UnicodeDecodeError: - continue # block google-ad url's if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url): continue + title = escape(extract_text(link)) + if result.xpath('./p[@class="desc"]'): - content = escape(result.xpath('./p[@class="desc"]')[0] - .text_content()) + content = escape(extract_text(result.xpath('./p[@class="desc"]'))) else: content = '' |