diff options
Diffstat (limited to 'searx/engines/startpage.py')
| -rw-r--r-- | searx/engines/startpage.py | 43 |
1 files changed, 26 insertions, 17 deletions
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 3e067597e..76567396f 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -15,6 +15,8 @@ from dateutil import parser from datetime import datetime, timedelta import re from searx.engines.xpath import extract_text +from searx.languages import language_codes +from searx.utils import eval_xpath # engine dependent config categories = ['general'] @@ -22,7 +24,7 @@ categories = ['general'] # (probably the parameter qid), require # storing of qid's between mulitble search-calls -# paging = False +paging = True language_support = True # search-url @@ -32,21 +34,32 @@ search_url = base_url + 'do/search' # specific xpath variables # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"] # not ads: div[@class="result"] are the direct childs of div[@id="results"] -results_xpath = '//div[@class="result"]' -link_xpath = './/h3/a' +results_xpath = '//div[@class="w-gl__result"]' +link_xpath = './/a[@class="w-gl__result-title"]' +content_xpath = './/p[@class="w-gl__description"]' # do search-request def request(query, params): - offset = (params['pageno'] - 1) * 10 params['url'] = search_url params['method'] = 'POST' - params['data'] = {'query': query, - 'startat': offset} - - # set language - params['data']['with_language'] = ('lang_' + params['language'].split('-')[0]) + params['data'] = { + 'query': query, + 'page': params['pageno'], + 'cat': 'web', + 'cmd': 'process_search', + 'engine0': 'v1all', + } + + # set language if specified + if params['language'] != 'all': + language = 'english' + for lc, _, _, lang in language_codes: + if lc == params['language']: + language = lang + params['data']['language'] = language + params['data']['lui'] = language return params @@ -58,8 +71,8 @@ def response(resp): dom = html.fromstring(resp.text) # parse results - for result in dom.xpath(results_xpath): - links = result.xpath(link_xpath) + for result in eval_xpath(dom, results_xpath): + links = eval_xpath(result, link_xpath) if not links: continue link = links[0] @@ -73,14 +86,10 @@ def response(resp): if re.match(r"^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url): continue - # block ixquick search url's - if re.match(r"^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url): - continue - title = extract_text(link) - if result.xpath('./p[@class="desc clk"]'): - content = extract_text(result.xpath('./p[@class="desc clk"]')) + if eval_xpath(result, content_xpath): + content = extract_text(eval_xpath(result, content_xpath)) else: content = '' |