diff options
| author | pw3t <romain@berthor.fr> | 2014-01-23 22:11:36 +0100 |
|---|---|---|
| committer | pw3t <romain@berthor.fr> | 2014-01-23 22:11:36 +0100 |
| commit | 132681b3aaf5b330d9d19624038b51fe2ebfd8d5 (patch) | |
| tree | 393114f41b487eea4b71dd4073903726310a1257 /searx/engines/startpage.py | |
| parent | d6b017efb5b51623a02c85690c7335cfc6674092 (diff) | |
| parent | 59eeeaab87951fd6fa3302ec240db98902a20b2c (diff) | |
Merge branch 'master' of https://github.com/asciimoo/searx
Diffstat (limited to 'searx/engines/startpage.py')
| -rw-r--r-- | searx/engines/startpage.py | 6 |
1 files changed, 2 insertions, 4 deletions
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 87c091e2d..d6d7cf44d 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -1,11 +1,10 @@ from urllib import urlencode from lxml import html -from urlparse import urlparse -from cgi import escape base_url = 'https://startpage.com/' search_url = base_url+'do/search' + def request(query, params): global search_url query = urlencode({'q': query})[2:] @@ -20,11 +19,10 @@ def response(resp): results = [] dom = html.fromstring(resp.content) # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"] - # not ads : div[@class="result"] are the direct childs of div[@id="results"] + # not ads: div[@class="result"] are the direct childs of div[@id="results"] for result in dom.xpath('//div[@id="results"]/div[@class="result"]'): link = result.xpath('.//h3/a')[0] url = link.attrib.get('href') - parsed_url = urlparse(url) title = link.text_content() content = result.xpath('./p[@class="desc"]')[0].text_content() results.append({'url': url, 'title': title, 'content': content}) |