From bbe4442a863b92767c71812acbe2ab72afaaeb54 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Wed, 16 Oct 2019 15:27:37 +0200 Subject: [fix] update gigablast engine --- searx/engines/gigablast.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'searx/engines/gigablast.py') diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index a6aa5d718..6b0402233 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -35,8 +35,8 @@ search_string = 'search?{query}'\ '&ff={safesearch}'\ '&rxiec={rxieu}'\ '&ulse={ulse}'\ - '&rand={rxikd}' # current unix timestamp - + '&rand={rxikd}'\ + '&dbez={dbez}' # specific xpath variables results_xpath = '//response//result' url_xpath = './/url' @@ -70,7 +70,8 @@ def request(query, params): rxieu=random.randint(1000000000, 9999999999), ulse=random.randint(100000000, 999999999), lang=language, - safesearch=safesearch) + safesearch=safesearch, + dbez=random.randint(100000000, 999999999)) params['url'] = base_url + search_path -- cgit v1.2.3 From 85b37233458c21b775bf98568c0a5c9260aa14fe Mon Sep 17 00:00:00 2001 From: Dalf Date: Fri, 15 Nov 2019 09:31:37 +0100 Subject: [mod] speed optimization compile XPath only once avoid redundant call to urlparse get_locale(webapp.py): avoid useless call to request.accept_languages.best_match --- searx/engines/gigablast.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'searx/engines/gigablast.py') diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index 6b0402233..a84f3f69d 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -15,6 +15,7 @@ from json import loads from time import time from lxml.html import fromstring from searx.url_utils import urlencode +from searx.utils import eval_xpath # engine dependent config categories = ['general'] @@ -99,9 +100,9 @@ def response(resp): def _fetch_supported_languages(resp): supported_languages = [] dom = fromstring(resp.text) - links = dom.xpath('//span[@id="menu2"]/a') + links = eval_xpath(dom, '//span[@id="menu2"]/a') for link in links: - href = link.xpath('./@href')[0].split('lang%3A') + href = eval_xpath(link, './@href')[0].split('lang%3A') if len(href) == 2: code = href[1].split('_') if len(code) == 2: -- cgit v1.2.3 From e5305f886c0d7d5fb3f34d1fbd7f9a545c14c284 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Sat, 21 Dec 2019 20:51:30 +0100 Subject: [fix] fetch extra search param of gigablast - fixes #1293 --- searx/engines/gigablast.py | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) (limited to 'searx/engines/gigablast.py') diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index a84f3f69d..2a5067bc3 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -14,6 +14,7 @@ import random from json import loads from time import time from lxml.html import fromstring +from searx.poolrequests import get from searx.url_utils import urlencode from searx.utils import eval_xpath @@ -31,13 +32,9 @@ search_string = 'search?{query}'\ '&c=main'\ '&s={offset}'\ '&format=json'\ - '&qh=0'\ - '&qlang={lang}'\ + '&langcountry={lang}'\ '&ff={safesearch}'\ - '&rxiec={rxieu}'\ - '&ulse={ulse}'\ - '&rand={rxikd}'\ - '&dbez={dbez}' + '&rand={rxikd}' # specific xpath variables results_xpath = '//response//result' url_xpath = './/url' @@ -46,9 +43,26 @@ content_xpath = './/sum' supported_languages_url = 'https://gigablast.com/search?&rxikd=1' +extra_param = '' # gigablast requires a random extra parameter +# which can be extracted from the source code of the search page + + +def parse_extra_param(text): + global extra_param + param_lines = [x for x in text.splitlines() if x.startswith('var url=') or x.startswith('url=url+')] + extra_param = '' + for l in param_lines: + extra_param += l.split("'")[1] + extra_param = extra_param.split('&')[-1] + + +def init(engine_settings=None): + parse_extra_param(get('http://gigablast.com/search?c=main&qlangcountry=en-us&q=south&s=10').text) + # do search-request def request(query, params): + print("EXTRAPARAM:", extra_param) offset = (params['pageno'] - 1) * number_of_results if params['language'] == 'all': @@ -67,14 +81,11 @@ def request(query, params): search_path = search_string.format(query=urlencode({'q': query}), offset=offset, number_of_results=number_of_results, - rxikd=int(time() * 1000), - rxieu=random.randint(1000000000, 9999999999), - ulse=random.randint(100000000, 999999999), lang=language, - safesearch=safesearch, - dbez=random.randint(100000000, 999999999)) + rxikd=int(time() * 1000), + safesearch=safesearch) - params['url'] = base_url + search_path + params['url'] = base_url + search_path + '&' + extra_param return params @@ -84,7 +95,11 @@ def response(resp): results = [] # parse results - response_json = loads(resp.text) + try: + response_json = loads(resp.text) + except: + parse_extra_param(resp.text) + return results for result in response_json['results']: # append result -- cgit v1.2.3 From fc457569f757dd10ff55393f472ea9ed49a42374 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Sat, 21 Dec 2019 21:13:43 +0100 Subject: [fix] pep8 --- searx/engines/gigablast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'searx/engines/gigablast.py') diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index 2a5067bc3..5af593e36 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -43,7 +43,7 @@ content_xpath = './/sum' supported_languages_url = 'https://gigablast.com/search?&rxikd=1' -extra_param = '' # gigablast requires a random extra parameter +extra_param = '' # gigablast requires a random extra parameter # which can be extracted from the source code of the search page -- cgit v1.2.3 From 34ad3d6b34017523a9502f86b92c17fe389918eb Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Sat, 21 Dec 2019 21:25:50 +0100 Subject: [enh] display error message if gigablast extra param expired --- searx/engines/gigablast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'searx/engines/gigablast.py') diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index 5af593e36..2bb29a9fe 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -99,7 +99,7 @@ def response(resp): response_json = loads(resp.text) except: parse_extra_param(resp.text) - return results + raise Exception('extra param expired, please reload') for result in response_json['results']: # append result -- cgit v1.2.3