summaryrefslogtreecommitdiff
path: root/searx/engines/startpage.py
diff options
context:
space:
mode:
authorAdam Tauber <asciimoo@gmail.com>2014-01-05 05:49:39 -0800
committerAdam Tauber <asciimoo@gmail.com>2014-01-05 05:49:39 -0800
commit5c9f6d51746796ef63cd08d27c31e931272e083f (patch)
tree1ff55038c8df267d8a3a1d4f33fbd4101546bf5d /searx/engines/startpage.py
parent556b9dd2b0a17528a1c6f2f581e7a0173bc7d4cf (diff)
parent3dc3fc77709d6f56cd42f748ae356e2915fa2286 (diff)
Merge pull request #16 from dalf/master
bug fixes
Diffstat (limited to 'searx/engines/startpage.py')
-rw-r--r--searx/engines/startpage.py11
1 files changed, 5 insertions, 6 deletions
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
index 061c8158d..87c091e2d 100644
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@@ -19,14 +19,13 @@ def response(resp):
global base_url
results = []
dom = html.fromstring(resp.content)
- for result in dom.xpath('//div[@class="result"]'):
+ # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
+ # not ads : div[@class="result"] are the direct childs of div[@id="results"]
+ for result in dom.xpath('//div[@id="results"]/div[@class="result"]'):
link = result.xpath('.//h3/a')[0]
url = link.attrib.get('href')
parsed_url = urlparse(url)
- # TODO better google link detection
- if parsed_url.netloc.find('www.google.com') >= 0:
- continue
- title = ' '.join(link.xpath('.//text()'))
- content = escape(' '.join(result.xpath('.//p[@class="desc"]//text()')))
+ title = link.text_content()
+ content = result.xpath('./p[@class="desc"]')[0].text_content()
results.append({'url': url, 'title': title, 'content': content})
return results