summaryrefslogtreecommitdiff
path: root/searx/engines/startpage.py
diff options
context:
space:
mode:
Diffstat (limited to 'searx/engines/startpage.py')
-rw-r--r--searx/engines/startpage.py43
1 files changed, 26 insertions, 17 deletions
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
index 3e067597e..76567396f 100644
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@@ -15,6 +15,8 @@ from dateutil import parser
from datetime import datetime, timedelta
import re
from searx.engines.xpath import extract_text
+from searx.languages import language_codes
+from searx.utils import eval_xpath
# engine dependent config
categories = ['general']
@@ -22,7 +24,7 @@ categories = ['general']
# (probably the parameter qid), require
# storing of qid's between mulitble search-calls
-# paging = False
+paging = True
language_support = True
# search-url
@@ -32,21 +34,32 @@ search_url = base_url + 'do/search'
# specific xpath variables
# ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
# not ads: div[@class="result"] are the direct childs of div[@id="results"]
-results_xpath = '//div[@class="result"]'
-link_xpath = './/h3/a'
+results_xpath = '//div[@class="w-gl__result"]'
+link_xpath = './/a[@class="w-gl__result-title"]'
+content_xpath = './/p[@class="w-gl__description"]'
# do search-request
def request(query, params):
- offset = (params['pageno'] - 1) * 10
params['url'] = search_url
params['method'] = 'POST'
- params['data'] = {'query': query,
- 'startat': offset}
-
- # set language
- params['data']['with_language'] = ('lang_' + params['language'].split('-')[0])
+ params['data'] = {
+ 'query': query,
+ 'page': params['pageno'],
+ 'cat': 'web',
+ 'cmd': 'process_search',
+ 'engine0': 'v1all',
+ }
+
+ # set language if specified
+ if params['language'] != 'all':
+ language = 'english'
+ for lc, _, _, lang in language_codes:
+ if lc == params['language']:
+ language = lang
+ params['data']['language'] = language
+ params['data']['lui'] = language
return params
@@ -58,8 +71,8 @@ def response(resp):
dom = html.fromstring(resp.text)
# parse results
- for result in dom.xpath(results_xpath):
- links = result.xpath(link_xpath)
+ for result in eval_xpath(dom, results_xpath):
+ links = eval_xpath(result, link_xpath)
if not links:
continue
link = links[0]
@@ -73,14 +86,10 @@ def response(resp):
if re.match(r"^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url):
continue
- # block ixquick search url's
- if re.match(r"^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url):
- continue
-
title = extract_text(link)
- if result.xpath('./p[@class="desc clk"]'):
- content = extract_text(result.xpath('./p[@class="desc clk"]'))
+ if eval_xpath(result, content_xpath):
+ content = extract_text(eval_xpath(result, content_xpath))
else:
content = ''