summaryrefslogtreecommitdiff
path: root/searx/search
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2021-12-27 19:11:01 +0100
committerGitHub <noreply@github.com>2021-12-27 19:11:01 +0100
commitc6922ae7c5e53f695d5f5f8704b10b4e2815cda4 (patch)
tree9c0456ad1a1d9d375311ccd8c9bd3eafd0779114 /searx/search
parent54bce130f9074c3d63009237b014c727a1443cc5 (diff)
parentd84226bf63757b1d4245ab26e9c081daf42389aa (diff)
Merge pull request #619 from dalf/apply-black
Apply black
Diffstat (limited to 'searx/search')
-rw-r--r--searx/search/__init__.py7
-rw-r--r--searx/search/checker/__main__.py27
-rw-r--r--searx/search/checker/background.py17
-rw-r--r--searx/search/checker/impl.py77
-rw-r--r--searx/search/models.py80
-rw-r--r--searx/search/processors/__init__.py1
-rw-r--r--searx/search/processors/abstract.py13
-rw-r--r--searx/search/processors/offline.py2
-rw-r--r--searx/search/processors/online.py47
-rw-r--r--searx/search/processors/online_currency.py4
-rw-r--r--searx/search/processors/online_dictionary.py6
11 files changed, 164 insertions, 117 deletions
diff --git a/searx/search/__init__.py b/searx/search/__init__.py
index 0a3c5b3ac..d66f3362d 100644
--- a/searx/search/__init__.py
+++ b/searx/search/__init__.py
@@ -123,8 +123,11 @@ class Search:
# Max & user query: From user query except if above max
actual_timeout = min(query_timeout, max_request_timeout)
- logger.debug("actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})"
- .format(actual_timeout, default_timeout, query_timeout, max_request_timeout))
+ logger.debug(
+ "actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})".format(
+ actual_timeout, default_timeout, query_timeout, max_request_timeout
+ )
+ )
return requests, actual_timeout
diff --git a/searx/search/checker/__main__.py b/searx/search/checker/__main__.py
index 4ce4ca76b..1311288f3 100644
--- a/searx/search/checker/__main__.py
+++ b/searx/search/checker/__main__.py
@@ -37,12 +37,12 @@ else:
stdout = io.TextIOWrapper(
# pylint: disable=consider-using-with
open(sys.stdout.fileno(), 'wb', 0),
- write_through=True
+ write_through=True,
)
stderr = io.TextIOWrapper(
# pylint: disable=consider-using-with
- open(sys.stderr.fileno(), 'wb', 0)
- , write_through=True
+ open(sys.stderr.fileno(), 'wb', 0),
+ write_through=True,
)
@@ -91,12 +91,21 @@ def run(engine_name_list, verbose):
# call by setup.py
def main():
parser = argparse.ArgumentParser(description='Check searx engines.')
- parser.add_argument('engine_name_list', metavar='engine name', type=str, nargs='*',
- help='engines name or shortcut list. Empty for all engines.')
- parser.add_argument('--verbose', '-v',
- action='store_true', dest='verbose',
- help='Display details about the test results',
- default=False)
+ parser.add_argument(
+ 'engine_name_list',
+ metavar='engine name',
+ type=str,
+ nargs='*',
+ help='engines name or shortcut list. Empty for all engines.',
+ )
+ parser.add_argument(
+ '--verbose',
+ '-v',
+ action='store_true',
+ dest='verbose',
+ help='Display details about the test results',
+ default=False,
+ )
args = parser.parse_args()
run(args.engine_name_list, args.verbose)
diff --git a/searx/search/checker/background.py b/searx/search/checker/background.py
index d9f11a71c..ff005dd91 100644
--- a/searx/search/checker/background.py
+++ b/searx/search/checker/background.py
@@ -23,10 +23,12 @@ running = threading.Lock()
def _get_interval(every, error_msg):
if isinstance(every, int):
every = (every, every)
- if not isinstance(every, (tuple, list))\
- or len(every) != 2\
- or not isinstance(every[0], int)\
- or not isinstance(every[1], int):
+ if (
+ not isinstance(every, (tuple, list))
+ or len(every) != 2
+ or not isinstance(every[0], int)
+ or not isinstance(every[1], int)
+ ):
raise SearxSettingsException(error_msg, None)
return every
@@ -50,14 +52,11 @@ def _set_result(result, include_timestamp=True):
def run():
- if not running.acquire(blocking=False): # pylint: disable=consider-using-with
+ if not running.acquire(blocking=False): # pylint: disable=consider-using-with
return
try:
logger.info('Starting checker')
- result = {
- 'status': 'ok',
- 'engines': {}
- }
+ result = {'status': 'ok', 'engines': {}}
for name, processor in PROCESSORS.items():
logger.debug('Checking %s engine', name)
checker = Checker(processor)
diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py
index 626aa8ce0..c0dd966d0 100644
--- a/searx/search/checker/impl.py
+++ b/searx/search/checker/impl.py
@@ -25,6 +25,7 @@ from searx.metrics import counter_inc
logger = logger.getChild('searx.search.checker')
HTML_TAGS = [
+ # fmt: off
'embed', 'iframe', 'object', 'param', 'picture', 'source', 'svg', 'math', 'canvas', 'noscript', 'script',
'del', 'ins', 'area', 'audio', 'img', 'map', 'track', 'video', 'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite',
'code', 'data', 'dfn', 'em', 'i', 'kdb', 'mark', 'q', 'rb', 'rp', 'rt', 'rtc', 'ruby', 's', 'samp', 'small',
@@ -32,6 +33,7 @@ HTML_TAGS = [
'figcaption', 'figure', 'hr', 'li', 'ol', 'p', 'pre', 'ul', 'button', 'datalist', 'fieldset', 'form', 'input',
'label', 'legend', 'meter', 'optgroup', 'option', 'output', 'progress', 'select', 'textarea', 'applet',
'frame', 'frameset'
+ # fmt: on
]
@@ -72,17 +74,23 @@ def _download_and_check_if_image(image_url: str) -> bool:
try:
# use "image_proxy" (avoid HTTP/2)
network.set_context_network_name('image_proxy')
- stream = network.stream('GET', image_url, timeout=10.0, allow_redirects=True, headers={
- 'User-Agent': gen_useragent(),
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
- 'Accept-Language': 'en-US;q=0.5,en;q=0.3',
- 'Accept-Encoding': 'gzip, deflate, br',
- 'DNT': '1',
- 'Connection': 'keep-alive',
- 'Upgrade-Insecure-Requests': '1',
- 'Sec-GPC': '1',
- 'Cache-Control': 'max-age=0'
- })
+ stream = network.stream(
+ 'GET',
+ image_url,
+ timeout=10.0,
+ allow_redirects=True,
+ headers={
+ 'User-Agent': gen_useragent(),
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+ 'Accept-Language': 'en-US;q=0.5,en;q=0.3',
+ 'Accept-Encoding': 'gzip, deflate, br',
+ 'DNT': '1',
+ 'Connection': 'keep-alive',
+ 'Upgrade-Insecure-Requests': '1',
+ 'Sec-GPC': '1',
+ 'Cache-Control': 'max-age=0',
+ },
+ )
r = next(stream)
r.close()
if r.status_code == 200:
@@ -102,8 +110,7 @@ def _download_and_check_if_image(image_url: str) -> bool:
def _is_url_image(image_url) -> bool:
- """Normalize image_url
- """
+ """Normalize image_url"""
if not isinstance(image_url, str):
return False
@@ -129,8 +136,9 @@ def _search_query_to_dict(search_query: SearchQuery) -> typing.Dict[str, typing.
}
-def _search_query_diff(sq1: SearchQuery, sq2: SearchQuery)\
- -> typing.Tuple[typing.Dict[str, typing.Any], typing.Dict[str, typing.Any]]:
+def _search_query_diff(
+ sq1: SearchQuery, sq2: SearchQuery
+) -> typing.Tuple[typing.Dict[str, typing.Any], typing.Dict[str, typing.Any]]:
param1 = _search_query_to_dict(sq1)
param2 = _search_query_to_dict(sq2)
common = {}
@@ -180,11 +188,9 @@ class ResultContainerTests:
__slots__ = 'test_name', 'search_query', 'result_container', 'languages', 'stop_test', 'test_results'
- def __init__(self,
- test_results: TestResults,
- test_name: str,
- search_query: SearchQuery,
- result_container: ResultContainer):
+ def __init__(
+ self, test_results: TestResults, test_name: str, search_query: SearchQuery, result_container: ResultContainer
+ ):
self.test_name = test_name
self.search_query = search_query
self.result_container = result_container
@@ -324,10 +330,9 @@ class CheckerTests:
__slots__ = 'test_results', 'test_name', 'result_container_tests_list'
- def __init__(self,
- test_results: TestResults,
- test_name: str,
- result_container_tests_list: typing.List[ResultContainerTests]):
+ def __init__(
+ self, test_results: TestResults, test_name: str, result_container_tests_list: typing.List[ResultContainerTests]
+ ):
self.test_results = test_results
self.test_name = test_name
self.result_container_tests_list = result_container_tests_list
@@ -340,14 +345,17 @@ class CheckerTests:
for i, urls_i in enumerate(urls_list):
for j, urls_j in enumerate(urls_list):
if i < j and urls_i == urls_j:
- common, diff = _search_query_diff(self.result_container_tests_list[i].search_query,
- self.result_container_tests_list[j].search_query)
+ common, diff = _search_query_diff(
+ self.result_container_tests_list[i].search_query,
+ self.result_container_tests_list[j].search_query,
+ )
common_str = ' '.join(['{}={!r}'.format(k, v) for k, v in common.items()])
- diff1_str = ', ' .join(['{}={!r}'.format(k, v1) for (k, (v1, v2)) in diff.items()])
- diff2_str = ', ' .join(['{}={!r}'.format(k, v2) for (k, (v1, v2)) in diff.items()])
- self.test_results.add_error(self.test_name,
- 'results are identitical for {} and {} ({})'
- .format(diff1_str, diff2_str, common_str))
+ diff1_str = ', '.join(['{}={!r}'.format(k, v1) for (k, (v1, v2)) in diff.items()])
+ diff2_str = ', '.join(['{}={!r}'.format(k, v2) for (k, (v1, v2)) in diff.items()])
+ self.test_results.add_error(
+ self.test_name,
+ 'results are identitical for {} and {} ({})'.format(diff1_str, diff2_str, common_str),
+ )
class Checker:
@@ -393,9 +401,10 @@ class Checker:
elif isinstance(method, types.FunctionType):
method(*args)
else:
- self.test_results.add_error(obj.test_name,
- 'method {!r} ({}) not found for {}'
- .format(method, method.__class__.__name__, obj.__class__.__name__))
+ self.test_results.add_error(
+ obj.test_name,
+ 'method {!r} ({}) not found for {}'.format(method, method.__class__.__name__, obj.__class__.__name__),
+ )
def call_tests(self, obj, test_descriptions):
for test_description in test_descriptions:
diff --git a/searx/search/models.py b/searx/search/models.py
index e48cb3611..ff5897966 100644
--- a/searx/search/models.py
+++ b/searx/search/models.py
@@ -25,19 +25,30 @@ class EngineRef:
class SearchQuery:
"""container for all the search parameters (query, language, etc...)"""
- __slots__ = 'query', 'engineref_list', 'lang', 'safesearch', 'pageno', 'time_range',\
- 'timeout_limit', 'external_bang', 'engine_data'
-
- def __init__(self,
- query: str,
- engineref_list: typing.List[EngineRef],
- lang: str='all',
- safesearch: int=0,
- pageno: int=1,
- time_range: typing.Optional[str]=None,
- timeout_limit: typing.Optional[float]=None,
- external_bang: typing.Optional[str]=None,
- engine_data: typing.Optional[typing.Dict[str, str]]=None):
+ __slots__ = (
+ 'query',
+ 'engineref_list',
+ 'lang',
+ 'safesearch',
+ 'pageno',
+ 'time_range',
+ 'timeout_limit',
+ 'external_bang',
+ 'engine_data',
+ )
+
+ def __init__(
+ self,
+ query: str,
+ engineref_list: typing.List[EngineRef],
+ lang: str = 'all',
+ safesearch: int = 0,
+ pageno: int = 1,
+ time_range: typing.Optional[str] = None,
+ timeout_limit: typing.Optional[float] = None,
+ external_bang: typing.Optional[str] = None,
+ engine_data: typing.Optional[typing.Dict[str, str]] = None,
+ ):
self.query = query
self.engineref_list = engineref_list
self.lang = lang
@@ -53,20 +64,39 @@ class SearchQuery:
return list(set(map(lambda engineref: engineref.category, self.engineref_list)))
def __repr__(self):
- return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\
- format(self.query, self.engineref_list, self.lang, self.safesearch,
- self.pageno, self.time_range, self.timeout_limit, self.external_bang)
+ return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".format(
+ self.query,
+ self.engineref_list,
+ self.lang,
+ self.safesearch,
+ self.pageno,
+ self.time_range,
+ self.timeout_limit,
+ self.external_bang,
+ )
def __eq__(self, other):
- return self.query == other.query\
- and self.engineref_list == other.engineref_list\
- and self.lang == other.lang\
- and self.safesearch == other.safesearch\
- and self.pageno == other.pageno\
- and self.time_range == other.time_range\
- and self.timeout_limit == other.timeout_limit\
+ return (
+ self.query == other.query
+ and self.engineref_list == other.engineref_list
+ and self.lang == other.lang
+ and self.safesearch == other.safesearch
+ and self.pageno == other.pageno
+ and self.time_range == other.time_range
+ and self.timeout_limit == other.timeout_limit
and self.external_bang == other.external_bang
+ )
def __hash__(self):
- return hash((self.query, tuple(self.engineref_list), self.lang, self.safesearch, self.pageno, self.time_range,
- self.timeout_limit, self.external_bang))
+ return hash(
+ (
+ self.query,
+ tuple(self.engineref_list),
+ self.lang,
+ self.safesearch,
+ self.pageno,
+ self.time_range,
+ self.timeout_limit,
+ self.external_bang,
+ )
+ )
diff --git a/searx/search/processors/__init__.py b/searx/search/processors/__init__.py
index 8108f8dfa..966b990ec 100644
--- a/searx/search/processors/__init__.py
+++ b/searx/search/processors/__init__.py
@@ -29,6 +29,7 @@ logger = logger.getChild('search.processors')
PROCESSORS = {}
"""Cache request processores, stored by *engine-name* (:py:func:`initialize`)"""
+
def get_processor_class(engine_type):
"""Return processor class according to the ``engine_type``"""
for c in [OnlineProcessor, OfflineProcessor, OnlineDictionaryProcessor, OnlineCurrencyProcessor]:
diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py
index b5fa063fd..732b55d52 100644
--- a/searx/search/processors/abstract.py
+++ b/searx/search/processors/abstract.py
@@ -19,6 +19,7 @@ from searx.utils import get_engine_from_settings
logger = logger.getChild('searx.search.processor')
SUSPENDED_STATUS = {}
+
class SuspendedStatus:
"""Class to handle suspend state."""
@@ -39,8 +40,10 @@ class SuspendedStatus:
# update continuous_errors / suspend_end_time
self.continuous_errors += 1
if suspended_time is None:
- suspended_time = min(settings['search']['max_ban_time_on_fail'],
- self.continuous_errors * settings['search']['ban_time_on_fail'])
+ suspended_time = min(
+ settings['search']['max_ban_time_on_fail'],
+ self.continuous_errors * settings['search']['ban_time_on_fail'],
+ )
self.suspend_end_time = default_timer() + suspended_time
self.suspend_reason = suspend_reason
logger.debug('Suspend for %i seconds', suspended_time)
@@ -127,9 +130,9 @@ class EngineProcessor(ABC):
def extend_container_if_suspended(self, result_container):
if self.suspended_status.is_suspended:
- result_container.add_unresponsive_engine(self.engine_name,
- self.suspended_status.suspend_reason,
- suspended=True)
+ result_container.add_unresponsive_engine(
+ self.engine_name, self.suspended_status.suspend_reason, suspended=True
+ )
return True
return False
diff --git a/searx/search/processors/offline.py b/searx/search/processors/offline.py
index ec7a4a36e..13f077cb1 100644
--- a/searx/search/processors/offline.py
+++ b/searx/search/processors/offline.py
@@ -23,6 +23,6 @@ class OfflineProcessor(EngineProcessor):
except ValueError as e:
# do not record the error
self.logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e))
- except Exception as e: # pylint: disable=broad-except
+ except Exception as e: # pylint: disable=broad-except
self.handle_exception(result_container, e)
self.logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e))
diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py
index c4ee58e11..8d8275df1 100644
--- a/searx/search/processors/online.py
+++ b/searx/search/processors/online.py
@@ -23,6 +23,7 @@ from .abstract import EngineProcessor
def default_request_params():
"""Default request parameters for ``online`` engines."""
return {
+ # fmt: off
'method': 'GET',
'headers': {},
'data': {},
@@ -30,6 +31,7 @@ def default_request_params():
'cookies': {},
'verify': True,
'auth': None
+ # fmt: on
}
@@ -64,10 +66,7 @@ class OnlineProcessor(EngineProcessor):
# create dictionary which contain all
# informations about the request
request_args = dict(
- headers=params['headers'],
- cookies=params['cookies'],
- verify=params['verify'],
- auth=params['auth']
+ headers=params['headers'], cookies=params['cookies'], verify=params['verify'], auth=params['auth']
)
# max_redirects
@@ -103,10 +102,12 @@ class OnlineProcessor(EngineProcessor):
status_code = str(response.status_code or '')
reason = response.reason_phrase or ''
hostname = response.url.host
- count_error(self.engine_name,
- '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects),
- (status_code, reason, hostname),
- secondary=True)
+ count_error(
+ self.engine_name,
+ '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects),
+ (status_code, reason, hostname),
+ secondary=True,
+ )
return response
@@ -145,22 +146,16 @@ class OnlineProcessor(EngineProcessor):
# requests timeout (connect or read)
self.handle_exception(result_container, e, suspend=True)
self.logger.error(
- "HTTP requests timeout (search duration : {0} s, timeout: {1} s) : {2}"
- .format(
- default_timer() - start_time,
- timeout_limit,
- e.__class__.__name__
+ "HTTP requests timeout (search duration : {0} s, timeout: {1} s) : {2}".format(
+ default_timer() - start_time, timeout_limit, e.__class__.__name__
)
)
except (httpx.HTTPError, httpx.StreamError) as e:
# other requests exception
self.handle_exception(result_container, e, suspend=True)
self.logger.exception(
- "requests exception (search duration : {0} s, timeout: {1} s) : {2}"
- .format(
- default_timer() - start_time,
- timeout_limit,
- e
+ "requests exception (search duration : {0} s, timeout: {1} s) : {2}".format(
+ default_timer() - start_time, timeout_limit, e
)
)
except SearxEngineCaptchaException as e:
@@ -186,10 +181,9 @@ class OnlineProcessor(EngineProcessor):
if getattr(self.engine, 'paging', False):
tests['paging'] = {
- 'matrix': {'query': 'time',
- 'pageno': (1, 2, 3)},
+ 'matrix': {'query': 'time', 'pageno': (1, 2, 3)},
'result_container': ['not_empty'],
- 'test': ['unique_results']
+ 'test': ['unique_results'],
}
if 'general' in self.engine.categories:
# avoid documentation about HTML tags (<time> and <input type="time">)
@@ -197,10 +191,9 @@ class OnlineProcessor(EngineProcessor):
if getattr(self.engine, 'time_range', False):
tests['time_range'] = {
- 'matrix': {'query': 'news',
- 'time_range': (None, 'day')},
+ 'matrix': {'query': 'news', 'time_range': (None, 'day')},
'result_container': ['not_empty'],
- 'test': ['unique_results']
+ 'test': ['unique_results'],
}
if getattr(self.engine, 'supported_languages', []):
@@ -214,10 +207,6 @@ class OnlineProcessor(EngineProcessor):
}
if getattr(self.engine, 'safesearch', False):
- tests['safesearch'] = {
- 'matrix': {'query': 'porn',
- 'safesearch': (0, 2)},
- 'test': ['unique_results']
- }
+ tests['safesearch'] = {'matrix': {'query': 'porn', 'safesearch': (0, 2)}, 'test': ['unique_results']}
return tests
diff --git a/searx/search/processors/online_currency.py b/searx/search/processors/online_currency.py
index 4e5c57264..6bd891b1d 100644
--- a/searx/search/processors/online_currency.py
+++ b/searx/search/processors/online_currency.py
@@ -12,11 +12,13 @@ from .online import OnlineProcessor
parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
+
def normalize_name(name):
name = name.lower().replace('-', ' ').rstrip('s')
name = re.sub(' +', ' ', name)
return unicodedata.normalize('NFKD', name).lower()
+
def name_to_iso4217(name):
name = normalize_name(name)
currency = CURRENCIES['names'].get(name, [name])
@@ -24,9 +26,11 @@ def name_to_iso4217(name):
return currency
return currency[0]
+
def iso4217_to_name(iso4217, language):
return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217)
+
class OnlineCurrencyProcessor(OnlineProcessor):
"""Processor class used by ``online_currency`` engines."""
diff --git a/searx/search/processors/online_dictionary.py b/searx/search/processors/online_dictionary.py
index 72941d57a..3e7f6ed59 100644
--- a/searx/search/processors/online_dictionary.py
+++ b/searx/search/processors/online_dictionary.py
@@ -11,6 +11,7 @@ from .online import OnlineProcessor
parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
+
class OnlineDictionaryProcessor(OnlineProcessor):
"""Processor class used by ``online_dictionary`` engines."""
@@ -44,10 +45,9 @@ class OnlineDictionaryProcessor(OnlineProcessor):
if getattr(self.engine, 'paging', False):
tests['translation_paging'] = {
- 'matrix': {'query': 'en-es house',
- 'pageno': (1, 2, 3)},
+ 'matrix': {'query': 'en-es house', 'pageno': (1, 2, 3)},
'result_container': ['not_empty', ('one_title_contains', 'house')],
- 'test': ['unique_results']
+ 'test': ['unique_results'],
}
else:
tests['translation'] = {