diff options
| author | Noémi Ványi <kvch@users.noreply.github.com> | 2019-01-15 22:11:26 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-01-15 22:11:26 +0100 |
| commit | 25367cfba13de9bec580d7d495c4e70b8ea8f76d (patch) | |
| tree | bc4f87b06f658d13d8e4f6a553b5ab90ffe00ed8 /searx/engines/startpage.py | |
| parent | 4977ea5405954107d071235093f01d0246c31fb2 (diff) | |
| parent | 383e3cc554a3704cd8b076aa686f32b8ce78f681 (diff) | |
Merge branch 'master' into setup-no-tests
Diffstat (limited to 'searx/engines/startpage.py')
| -rw-r--r-- | searx/engines/startpage.py | 16 |
1 files changed, 7 insertions, 9 deletions
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 3e067597e..6638f3d83 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -32,8 +32,9 @@ search_url = base_url + 'do/search' # specific xpath variables # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"] # not ads: div[@class="result"] are the direct childs of div[@id="results"] -results_xpath = '//div[@class="result"]' +results_xpath = '//li[contains(@class, "search-result") and contains(@class, "search-item")]' link_xpath = './/h3/a' +content_xpath = './p[@class="search-item__body"]' # do search-request @@ -45,8 +46,9 @@ def request(query, params): params['data'] = {'query': query, 'startat': offset} - # set language - params['data']['with_language'] = ('lang_' + params['language'].split('-')[0]) + # set language if specified + if params['language'] != 'all': + params['data']['with_language'] = ('lang_' + params['language'].split('-')[0]) return params @@ -73,14 +75,10 @@ def response(resp): if re.match(r"^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url): continue - # block ixquick search url's - if re.match(r"^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url): - continue - title = extract_text(link) - if result.xpath('./p[@class="desc clk"]'): - content = extract_text(result.xpath('./p[@class="desc clk"]')) + if result.xpath(content_xpath): + content = extract_text(result.xpath(content_xpath)) else: content = '' |