summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2019-12-22 10:12:33 +0100
committerMarkus Heiser <markus.heiser@darmarit.de>2019-12-22 10:12:33 +0100
commitaea4667fbc0ab33cd1981bc66d1cf1c9a9768023 (patch)
treeed714471cd102b65c9a81418f9e458cd1e2c9bd8 /searx
parentd1154202bcd27a7cf3a1bed524ee6b24955df8af (diff)
parent34ad3d6b34017523a9502f86b92c17fe389918eb (diff)
Merge branch 'master' of https://github.com/asciimoo/searx into makefile-doc
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/gigablast.py41
-rw-r--r--searx/engines/openstreetmap.py2
-rw-r--r--searx/engines/qwant.py1
-rw-r--r--searx/engines/wikipedia.py3
4 files changed, 33 insertions, 14 deletions
diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py
index a84f3f69d..2bb29a9fe 100644
--- a/searx/engines/gigablast.py
+++ b/searx/engines/gigablast.py
@@ -14,6 +14,7 @@ import random
from json import loads
from time import time
from lxml.html import fromstring
+from searx.poolrequests import get
from searx.url_utils import urlencode
from searx.utils import eval_xpath
@@ -31,13 +32,9 @@ search_string = 'search?{query}'\
'&c=main'\
'&s={offset}'\
'&format=json'\
- '&qh=0'\
- '&qlang={lang}'\
+ '&langcountry={lang}'\
'&ff={safesearch}'\
- '&rxiec={rxieu}'\
- '&ulse={ulse}'\
- '&rand={rxikd}'\
- '&dbez={dbez}'
+ '&rand={rxikd}'
# specific xpath variables
results_xpath = '//response//result'
url_xpath = './/url'
@@ -46,9 +43,26 @@ content_xpath = './/sum'
supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
+extra_param = '' # gigablast requires a random extra parameter
+# which can be extracted from the source code of the search page
+
+
+def parse_extra_param(text):
+ global extra_param
+ param_lines = [x for x in text.splitlines() if x.startswith('var url=') or x.startswith('url=url+')]
+ extra_param = ''
+ for l in param_lines:
+ extra_param += l.split("'")[1]
+ extra_param = extra_param.split('&')[-1]
+
+
+def init(engine_settings=None):
+ parse_extra_param(get('http://gigablast.com/search?c=main&qlangcountry=en-us&q=south&s=10').text)
+
# do search-request
def request(query, params):
+ print("EXTRAPARAM:", extra_param)
offset = (params['pageno'] - 1) * number_of_results
if params['language'] == 'all':
@@ -67,14 +81,11 @@ def request(query, params):
search_path = search_string.format(query=urlencode({'q': query}),
offset=offset,
number_of_results=number_of_results,
- rxikd=int(time() * 1000),
- rxieu=random.randint(1000000000, 9999999999),
- ulse=random.randint(100000000, 999999999),
lang=language,
- safesearch=safesearch,
- dbez=random.randint(100000000, 999999999))
+ rxikd=int(time() * 1000),
+ safesearch=safesearch)
- params['url'] = base_url + search_path
+ params['url'] = base_url + search_path + '&' + extra_param
return params
@@ -84,7 +95,11 @@ def response(resp):
results = []
# parse results
- response_json = loads(resp.text)
+ try:
+ response_json = loads(resp.text)
+ except:
+ parse_extra_param(resp.text)
+ raise Exception('extra param expired, please reload')
for result in response_json['results']:
# append result
diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py
index 733ba6203..cec10a3c7 100644
--- a/searx/engines/openstreetmap.py
+++ b/searx/engines/openstreetmap.py
@@ -24,7 +24,7 @@ result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
# do search-request
def request(query, params):
- params['url'] = base_url + search_string.format(query=query)
+ params['url'] = base_url + search_string.format(query=query.decode('utf-8'))
return params
diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py
index de12955c6..54e9dafad 100644
--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
@@ -50,6 +50,7 @@ def request(query, params):
language = match_language(params['language'], supported_languages, language_aliases)
params['url'] += '&locale=' + language.replace('-', '_').lower()
+ params['headers']['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0'
return params
diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py
index 4dae735d1..690da72fe 100644
--- a/searx/engines/wikipedia.py
+++ b/searx/engines/wikipedia.py
@@ -79,6 +79,9 @@ def response(resp):
# wikipedia article's unique id
# first valid id is assumed to be the requested article
+ if 'pages' not in search_result['query']:
+ return results
+
for article_id in search_result['query']['pages']:
page = search_result['query']['pages'][article_id]
if int(article_id) > 0: