diff options
| author | Markus Heiser <markus.heiser@darmarIT.de> | 2019-12-24 15:42:05 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-12-24 15:42:05 +0100 |
| commit | 38dad2e8e3b100711afe3ae942aaed5111841cd6 (patch) | |
| tree | 51f1a35121155010411aa5970ef06aff80adf741 /searx/engines | |
| parent | 0ae86cd1685d244c83a6080a7816365096ab06f8 (diff) | |
| parent | a395fb4a8d030d5b8fde496d2ae722bc034d3e32 (diff) | |
Merge branch 'master' into ne/fix-infinite_scroll-with-vim_bindings
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/gigablast.py | 41 | ||||
| -rw-r--r-- | searx/engines/openstreetmap.py | 2 | ||||
| -rw-r--r-- | searx/engines/qwant.py | 1 | ||||
| -rw-r--r-- | searx/engines/wikipedia.py | 9 |
4 files changed, 37 insertions, 16 deletions
diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index a84f3f69d..2bb29a9fe 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -14,6 +14,7 @@ import random from json import loads from time import time from lxml.html import fromstring +from searx.poolrequests import get from searx.url_utils import urlencode from searx.utils import eval_xpath @@ -31,13 +32,9 @@ search_string = 'search?{query}'\ '&c=main'\ '&s={offset}'\ '&format=json'\ - '&qh=0'\ - '&qlang={lang}'\ + '&langcountry={lang}'\ '&ff={safesearch}'\ - '&rxiec={rxieu}'\ - '&ulse={ulse}'\ - '&rand={rxikd}'\ - '&dbez={dbez}' + '&rand={rxikd}' # specific xpath variables results_xpath = '//response//result' url_xpath = './/url' @@ -46,9 +43,26 @@ content_xpath = './/sum' supported_languages_url = 'https://gigablast.com/search?&rxikd=1' +extra_param = '' # gigablast requires a random extra parameter +# which can be extracted from the source code of the search page + + +def parse_extra_param(text): + global extra_param + param_lines = [x for x in text.splitlines() if x.startswith('var url=') or x.startswith('url=url+')] + extra_param = '' + for l in param_lines: + extra_param += l.split("'")[1] + extra_param = extra_param.split('&')[-1] + + +def init(engine_settings=None): + parse_extra_param(get('http://gigablast.com/search?c=main&qlangcountry=en-us&q=south&s=10').text) + # do search-request def request(query, params): + print("EXTRAPARAM:", extra_param) offset = (params['pageno'] - 1) * number_of_results if params['language'] == 'all': @@ -67,14 +81,11 @@ def request(query, params): search_path = search_string.format(query=urlencode({'q': query}), offset=offset, number_of_results=number_of_results, - rxikd=int(time() * 1000), - rxieu=random.randint(1000000000, 9999999999), - ulse=random.randint(100000000, 999999999), lang=language, - safesearch=safesearch, - dbez=random.randint(100000000, 999999999)) + rxikd=int(time() * 1000), + safesearch=safesearch) - params['url'] = base_url + search_path + params['url'] = base_url + search_path + '&' + extra_param return params @@ -84,7 +95,11 @@ def response(resp): results = [] # parse results - response_json = loads(resp.text) + try: + response_json = loads(resp.text) + except: + parse_extra_param(resp.text) + raise Exception('extra param expired, please reload') for result in response_json['results']: # append result diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py index 733ba6203..cec10a3c7 100644 --- a/searx/engines/openstreetmap.py +++ b/searx/engines/openstreetmap.py @@ -24,7 +24,7 @@ result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}' # do search-request def request(query, params): - params['url'] = base_url + search_string.format(query=query) + params['url'] = base_url + search_string.format(query=query.decode('utf-8')) return params diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index de12955c6..54e9dafad 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -50,6 +50,7 @@ def request(query, params): language = match_language(params['language'], supported_languages, language_aliases) params['url'] += '&locale=' + language.replace('-', '_').lower() + params['headers']['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0' return params diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 4dae735d1..a216ba886 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -21,7 +21,8 @@ search_url = base_url + u'w/api.php?'\ 'action=query'\ '&format=json'\ '&{query}'\ - '&prop=extracts|pageimages'\ + '&prop=extracts|pageimages|pageprops'\ + '&ppprop=disambiguation'\ '&exintro'\ '&explaintext'\ '&pithumbsize=300'\ @@ -79,12 +80,15 @@ def response(resp): # wikipedia article's unique id # first valid id is assumed to be the requested article + if 'pages' not in search_result['query']: + return results + for article_id in search_result['query']['pages']: page = search_result['query']['pages'][article_id] if int(article_id) > 0: break - if int(article_id) < 0: + if int(article_id) < 0 or 'disambiguation' in page.get('pageprops', {}): return [] title = page.get('title') @@ -96,6 +100,7 @@ def response(resp): extract = page.get('extract') summary = extract_first_paragraph(extract, title, image) + summary = summary.replace('() ', '') # link to wikipedia article wikipedia_link = base_url.format(language=url_lang(resp.search_params['language'])) \ |