diff options
| author | Alexandre Flament <alex@al-f.net> | 2021-12-27 19:11:01 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-12-27 19:11:01 +0100 |
| commit | c6922ae7c5e53f695d5f5f8704b10b4e2815cda4 (patch) | |
| tree | 9c0456ad1a1d9d375311ccd8c9bd3eafd0779114 /searx/search | |
| parent | 54bce130f9074c3d63009237b014c727a1443cc5 (diff) | |
| parent | d84226bf63757b1d4245ab26e9c081daf42389aa (diff) | |
Merge pull request #619 from dalf/apply-black
Apply black
Diffstat (limited to 'searx/search')
| -rw-r--r-- | searx/search/__init__.py | 7 | ||||
| -rw-r--r-- | searx/search/checker/__main__.py | 27 | ||||
| -rw-r--r-- | searx/search/checker/background.py | 17 | ||||
| -rw-r--r-- | searx/search/checker/impl.py | 77 | ||||
| -rw-r--r-- | searx/search/models.py | 80 | ||||
| -rw-r--r-- | searx/search/processors/__init__.py | 1 | ||||
| -rw-r--r-- | searx/search/processors/abstract.py | 13 | ||||
| -rw-r--r-- | searx/search/processors/offline.py | 2 | ||||
| -rw-r--r-- | searx/search/processors/online.py | 47 | ||||
| -rw-r--r-- | searx/search/processors/online_currency.py | 4 | ||||
| -rw-r--r-- | searx/search/processors/online_dictionary.py | 6 |
11 files changed, 164 insertions, 117 deletions
diff --git a/searx/search/__init__.py b/searx/search/__init__.py index 0a3c5b3ac..d66f3362d 100644 --- a/searx/search/__init__.py +++ b/searx/search/__init__.py @@ -123,8 +123,11 @@ class Search: # Max & user query: From user query except if above max actual_timeout = min(query_timeout, max_request_timeout) - logger.debug("actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})" - .format(actual_timeout, default_timeout, query_timeout, max_request_timeout)) + logger.debug( + "actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})".format( + actual_timeout, default_timeout, query_timeout, max_request_timeout + ) + ) return requests, actual_timeout diff --git a/searx/search/checker/__main__.py b/searx/search/checker/__main__.py index 4ce4ca76b..1311288f3 100644 --- a/searx/search/checker/__main__.py +++ b/searx/search/checker/__main__.py @@ -37,12 +37,12 @@ else: stdout = io.TextIOWrapper( # pylint: disable=consider-using-with open(sys.stdout.fileno(), 'wb', 0), - write_through=True + write_through=True, ) stderr = io.TextIOWrapper( # pylint: disable=consider-using-with - open(sys.stderr.fileno(), 'wb', 0) - , write_through=True + open(sys.stderr.fileno(), 'wb', 0), + write_through=True, ) @@ -91,12 +91,21 @@ def run(engine_name_list, verbose): # call by setup.py def main(): parser = argparse.ArgumentParser(description='Check searx engines.') - parser.add_argument('engine_name_list', metavar='engine name', type=str, nargs='*', - help='engines name or shortcut list. Empty for all engines.') - parser.add_argument('--verbose', '-v', - action='store_true', dest='verbose', - help='Display details about the test results', - default=False) + parser.add_argument( + 'engine_name_list', + metavar='engine name', + type=str, + nargs='*', + help='engines name or shortcut list. Empty for all engines.', + ) + parser.add_argument( + '--verbose', + '-v', + action='store_true', + dest='verbose', + help='Display details about the test results', + default=False, + ) args = parser.parse_args() run(args.engine_name_list, args.verbose) diff --git a/searx/search/checker/background.py b/searx/search/checker/background.py index d9f11a71c..ff005dd91 100644 --- a/searx/search/checker/background.py +++ b/searx/search/checker/background.py @@ -23,10 +23,12 @@ running = threading.Lock() def _get_interval(every, error_msg): if isinstance(every, int): every = (every, every) - if not isinstance(every, (tuple, list))\ - or len(every) != 2\ - or not isinstance(every[0], int)\ - or not isinstance(every[1], int): + if ( + not isinstance(every, (tuple, list)) + or len(every) != 2 + or not isinstance(every[0], int) + or not isinstance(every[1], int) + ): raise SearxSettingsException(error_msg, None) return every @@ -50,14 +52,11 @@ def _set_result(result, include_timestamp=True): def run(): - if not running.acquire(blocking=False): # pylint: disable=consider-using-with + if not running.acquire(blocking=False): # pylint: disable=consider-using-with return try: logger.info('Starting checker') - result = { - 'status': 'ok', - 'engines': {} - } + result = {'status': 'ok', 'engines': {}} for name, processor in PROCESSORS.items(): logger.debug('Checking %s engine', name) checker = Checker(processor) diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py index 626aa8ce0..c0dd966d0 100644 --- a/searx/search/checker/impl.py +++ b/searx/search/checker/impl.py @@ -25,6 +25,7 @@ from searx.metrics import counter_inc logger = logger.getChild('searx.search.checker') HTML_TAGS = [ + # fmt: off 'embed', 'iframe', 'object', 'param', 'picture', 'source', 'svg', 'math', 'canvas', 'noscript', 'script', 'del', 'ins', 'area', 'audio', 'img', 'map', 'track', 'video', 'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite', 'code', 'data', 'dfn', 'em', 'i', 'kdb', 'mark', 'q', 'rb', 'rp', 'rt', 'rtc', 'ruby', 's', 'samp', 'small', @@ -32,6 +33,7 @@ HTML_TAGS = [ 'figcaption', 'figure', 'hr', 'li', 'ol', 'p', 'pre', 'ul', 'button', 'datalist', 'fieldset', 'form', 'input', 'label', 'legend', 'meter', 'optgroup', 'option', 'output', 'progress', 'select', 'textarea', 'applet', 'frame', 'frameset' + # fmt: on ] @@ -72,17 +74,23 @@ def _download_and_check_if_image(image_url: str) -> bool: try: # use "image_proxy" (avoid HTTP/2) network.set_context_network_name('image_proxy') - stream = network.stream('GET', image_url, timeout=10.0, allow_redirects=True, headers={ - 'User-Agent': gen_useragent(), - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', - 'Accept-Language': 'en-US;q=0.5,en;q=0.3', - 'Accept-Encoding': 'gzip, deflate, br', - 'DNT': '1', - 'Connection': 'keep-alive', - 'Upgrade-Insecure-Requests': '1', - 'Sec-GPC': '1', - 'Cache-Control': 'max-age=0' - }) + stream = network.stream( + 'GET', + image_url, + timeout=10.0, + allow_redirects=True, + headers={ + 'User-Agent': gen_useragent(), + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Accept-Language': 'en-US;q=0.5,en;q=0.3', + 'Accept-Encoding': 'gzip, deflate, br', + 'DNT': '1', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + 'Sec-GPC': '1', + 'Cache-Control': 'max-age=0', + }, + ) r = next(stream) r.close() if r.status_code == 200: @@ -102,8 +110,7 @@ def _download_and_check_if_image(image_url: str) -> bool: def _is_url_image(image_url) -> bool: - """Normalize image_url - """ + """Normalize image_url""" if not isinstance(image_url, str): return False @@ -129,8 +136,9 @@ def _search_query_to_dict(search_query: SearchQuery) -> typing.Dict[str, typing. } -def _search_query_diff(sq1: SearchQuery, sq2: SearchQuery)\ - -> typing.Tuple[typing.Dict[str, typing.Any], typing.Dict[str, typing.Any]]: +def _search_query_diff( + sq1: SearchQuery, sq2: SearchQuery +) -> typing.Tuple[typing.Dict[str, typing.Any], typing.Dict[str, typing.Any]]: param1 = _search_query_to_dict(sq1) param2 = _search_query_to_dict(sq2) common = {} @@ -180,11 +188,9 @@ class ResultContainerTests: __slots__ = 'test_name', 'search_query', 'result_container', 'languages', 'stop_test', 'test_results' - def __init__(self, - test_results: TestResults, - test_name: str, - search_query: SearchQuery, - result_container: ResultContainer): + def __init__( + self, test_results: TestResults, test_name: str, search_query: SearchQuery, result_container: ResultContainer + ): self.test_name = test_name self.search_query = search_query self.result_container = result_container @@ -324,10 +330,9 @@ class CheckerTests: __slots__ = 'test_results', 'test_name', 'result_container_tests_list' - def __init__(self, - test_results: TestResults, - test_name: str, - result_container_tests_list: typing.List[ResultContainerTests]): + def __init__( + self, test_results: TestResults, test_name: str, result_container_tests_list: typing.List[ResultContainerTests] + ): self.test_results = test_results self.test_name = test_name self.result_container_tests_list = result_container_tests_list @@ -340,14 +345,17 @@ class CheckerTests: for i, urls_i in enumerate(urls_list): for j, urls_j in enumerate(urls_list): if i < j and urls_i == urls_j: - common, diff = _search_query_diff(self.result_container_tests_list[i].search_query, - self.result_container_tests_list[j].search_query) + common, diff = _search_query_diff( + self.result_container_tests_list[i].search_query, + self.result_container_tests_list[j].search_query, + ) common_str = ' '.join(['{}={!r}'.format(k, v) for k, v in common.items()]) - diff1_str = ', ' .join(['{}={!r}'.format(k, v1) for (k, (v1, v2)) in diff.items()]) - diff2_str = ', ' .join(['{}={!r}'.format(k, v2) for (k, (v1, v2)) in diff.items()]) - self.test_results.add_error(self.test_name, - 'results are identitical for {} and {} ({})' - .format(diff1_str, diff2_str, common_str)) + diff1_str = ', '.join(['{}={!r}'.format(k, v1) for (k, (v1, v2)) in diff.items()]) + diff2_str = ', '.join(['{}={!r}'.format(k, v2) for (k, (v1, v2)) in diff.items()]) + self.test_results.add_error( + self.test_name, + 'results are identitical for {} and {} ({})'.format(diff1_str, diff2_str, common_str), + ) class Checker: @@ -393,9 +401,10 @@ class Checker: elif isinstance(method, types.FunctionType): method(*args) else: - self.test_results.add_error(obj.test_name, - 'method {!r} ({}) not found for {}' - .format(method, method.__class__.__name__, obj.__class__.__name__)) + self.test_results.add_error( + obj.test_name, + 'method {!r} ({}) not found for {}'.format(method, method.__class__.__name__, obj.__class__.__name__), + ) def call_tests(self, obj, test_descriptions): for test_description in test_descriptions: diff --git a/searx/search/models.py b/searx/search/models.py index e48cb3611..ff5897966 100644 --- a/searx/search/models.py +++ b/searx/search/models.py @@ -25,19 +25,30 @@ class EngineRef: class SearchQuery: """container for all the search parameters (query, language, etc...)""" - __slots__ = 'query', 'engineref_list', 'lang', 'safesearch', 'pageno', 'time_range',\ - 'timeout_limit', 'external_bang', 'engine_data' - - def __init__(self, - query: str, - engineref_list: typing.List[EngineRef], - lang: str='all', - safesearch: int=0, - pageno: int=1, - time_range: typing.Optional[str]=None, - timeout_limit: typing.Optional[float]=None, - external_bang: typing.Optional[str]=None, - engine_data: typing.Optional[typing.Dict[str, str]]=None): + __slots__ = ( + 'query', + 'engineref_list', + 'lang', + 'safesearch', + 'pageno', + 'time_range', + 'timeout_limit', + 'external_bang', + 'engine_data', + ) + + def __init__( + self, + query: str, + engineref_list: typing.List[EngineRef], + lang: str = 'all', + safesearch: int = 0, + pageno: int = 1, + time_range: typing.Optional[str] = None, + timeout_limit: typing.Optional[float] = None, + external_bang: typing.Optional[str] = None, + engine_data: typing.Optional[typing.Dict[str, str]] = None, + ): self.query = query self.engineref_list = engineref_list self.lang = lang @@ -53,20 +64,39 @@ class SearchQuery: return list(set(map(lambda engineref: engineref.category, self.engineref_list))) def __repr__(self): - return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\ - format(self.query, self.engineref_list, self.lang, self.safesearch, - self.pageno, self.time_range, self.timeout_limit, self.external_bang) + return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".format( + self.query, + self.engineref_list, + self.lang, + self.safesearch, + self.pageno, + self.time_range, + self.timeout_limit, + self.external_bang, + ) def __eq__(self, other): - return self.query == other.query\ - and self.engineref_list == other.engineref_list\ - and self.lang == other.lang\ - and self.safesearch == other.safesearch\ - and self.pageno == other.pageno\ - and self.time_range == other.time_range\ - and self.timeout_limit == other.timeout_limit\ + return ( + self.query == other.query + and self.engineref_list == other.engineref_list + and self.lang == other.lang + and self.safesearch == other.safesearch + and self.pageno == other.pageno + and self.time_range == other.time_range + and self.timeout_limit == other.timeout_limit and self.external_bang == other.external_bang + ) def __hash__(self): - return hash((self.query, tuple(self.engineref_list), self.lang, self.safesearch, self.pageno, self.time_range, - self.timeout_limit, self.external_bang)) + return hash( + ( + self.query, + tuple(self.engineref_list), + self.lang, + self.safesearch, + self.pageno, + self.time_range, + self.timeout_limit, + self.external_bang, + ) + ) diff --git a/searx/search/processors/__init__.py b/searx/search/processors/__init__.py index 8108f8dfa..966b990ec 100644 --- a/searx/search/processors/__init__.py +++ b/searx/search/processors/__init__.py @@ -29,6 +29,7 @@ logger = logger.getChild('search.processors') PROCESSORS = {} """Cache request processores, stored by *engine-name* (:py:func:`initialize`)""" + def get_processor_class(engine_type): """Return processor class according to the ``engine_type``""" for c in [OnlineProcessor, OfflineProcessor, OnlineDictionaryProcessor, OnlineCurrencyProcessor]: diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index b5fa063fd..732b55d52 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -19,6 +19,7 @@ from searx.utils import get_engine_from_settings logger = logger.getChild('searx.search.processor') SUSPENDED_STATUS = {} + class SuspendedStatus: """Class to handle suspend state.""" @@ -39,8 +40,10 @@ class SuspendedStatus: # update continuous_errors / suspend_end_time self.continuous_errors += 1 if suspended_time is None: - suspended_time = min(settings['search']['max_ban_time_on_fail'], - self.continuous_errors * settings['search']['ban_time_on_fail']) + suspended_time = min( + settings['search']['max_ban_time_on_fail'], + self.continuous_errors * settings['search']['ban_time_on_fail'], + ) self.suspend_end_time = default_timer() + suspended_time self.suspend_reason = suspend_reason logger.debug('Suspend for %i seconds', suspended_time) @@ -127,9 +130,9 @@ class EngineProcessor(ABC): def extend_container_if_suspended(self, result_container): if self.suspended_status.is_suspended: - result_container.add_unresponsive_engine(self.engine_name, - self.suspended_status.suspend_reason, - suspended=True) + result_container.add_unresponsive_engine( + self.engine_name, self.suspended_status.suspend_reason, suspended=True + ) return True return False diff --git a/searx/search/processors/offline.py b/searx/search/processors/offline.py index ec7a4a36e..13f077cb1 100644 --- a/searx/search/processors/offline.py +++ b/searx/search/processors/offline.py @@ -23,6 +23,6 @@ class OfflineProcessor(EngineProcessor): except ValueError as e: # do not record the error self.logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e)) - except Exception as e: # pylint: disable=broad-except + except Exception as e: # pylint: disable=broad-except self.handle_exception(result_container, e) self.logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e)) diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index c4ee58e11..8d8275df1 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -23,6 +23,7 @@ from .abstract import EngineProcessor def default_request_params(): """Default request parameters for ``online`` engines.""" return { + # fmt: off 'method': 'GET', 'headers': {}, 'data': {}, @@ -30,6 +31,7 @@ def default_request_params(): 'cookies': {}, 'verify': True, 'auth': None + # fmt: on } @@ -64,10 +66,7 @@ class OnlineProcessor(EngineProcessor): # create dictionary which contain all # informations about the request request_args = dict( - headers=params['headers'], - cookies=params['cookies'], - verify=params['verify'], - auth=params['auth'] + headers=params['headers'], cookies=params['cookies'], verify=params['verify'], auth=params['auth'] ) # max_redirects @@ -103,10 +102,12 @@ class OnlineProcessor(EngineProcessor): status_code = str(response.status_code or '') reason = response.reason_phrase or '' hostname = response.url.host - count_error(self.engine_name, - '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects), - (status_code, reason, hostname), - secondary=True) + count_error( + self.engine_name, + '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects), + (status_code, reason, hostname), + secondary=True, + ) return response @@ -145,22 +146,16 @@ class OnlineProcessor(EngineProcessor): # requests timeout (connect or read) self.handle_exception(result_container, e, suspend=True) self.logger.error( - "HTTP requests timeout (search duration : {0} s, timeout: {1} s) : {2}" - .format( - default_timer() - start_time, - timeout_limit, - e.__class__.__name__ + "HTTP requests timeout (search duration : {0} s, timeout: {1} s) : {2}".format( + default_timer() - start_time, timeout_limit, e.__class__.__name__ ) ) except (httpx.HTTPError, httpx.StreamError) as e: # other requests exception self.handle_exception(result_container, e, suspend=True) self.logger.exception( - "requests exception (search duration : {0} s, timeout: {1} s) : {2}" - .format( - default_timer() - start_time, - timeout_limit, - e + "requests exception (search duration : {0} s, timeout: {1} s) : {2}".format( + default_timer() - start_time, timeout_limit, e ) ) except SearxEngineCaptchaException as e: @@ -186,10 +181,9 @@ class OnlineProcessor(EngineProcessor): if getattr(self.engine, 'paging', False): tests['paging'] = { - 'matrix': {'query': 'time', - 'pageno': (1, 2, 3)}, + 'matrix': {'query': 'time', 'pageno': (1, 2, 3)}, 'result_container': ['not_empty'], - 'test': ['unique_results'] + 'test': ['unique_results'], } if 'general' in self.engine.categories: # avoid documentation about HTML tags (<time> and <input type="time">) @@ -197,10 +191,9 @@ class OnlineProcessor(EngineProcessor): if getattr(self.engine, 'time_range', False): tests['time_range'] = { - 'matrix': {'query': 'news', - 'time_range': (None, 'day')}, + 'matrix': {'query': 'news', 'time_range': (None, 'day')}, 'result_container': ['not_empty'], - 'test': ['unique_results'] + 'test': ['unique_results'], } if getattr(self.engine, 'supported_languages', []): @@ -214,10 +207,6 @@ class OnlineProcessor(EngineProcessor): } if getattr(self.engine, 'safesearch', False): - tests['safesearch'] = { - 'matrix': {'query': 'porn', - 'safesearch': (0, 2)}, - 'test': ['unique_results'] - } + tests['safesearch'] = {'matrix': {'query': 'porn', 'safesearch': (0, 2)}, 'test': ['unique_results']} return tests diff --git a/searx/search/processors/online_currency.py b/searx/search/processors/online_currency.py index 4e5c57264..6bd891b1d 100644 --- a/searx/search/processors/online_currency.py +++ b/searx/search/processors/online_currency.py @@ -12,11 +12,13 @@ from .online import OnlineProcessor parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) + def normalize_name(name): name = name.lower().replace('-', ' ').rstrip('s') name = re.sub(' +', ' ', name) return unicodedata.normalize('NFKD', name).lower() + def name_to_iso4217(name): name = normalize_name(name) currency = CURRENCIES['names'].get(name, [name]) @@ -24,9 +26,11 @@ def name_to_iso4217(name): return currency return currency[0] + def iso4217_to_name(iso4217, language): return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217) + class OnlineCurrencyProcessor(OnlineProcessor): """Processor class used by ``online_currency`` engines.""" diff --git a/searx/search/processors/online_dictionary.py b/searx/search/processors/online_dictionary.py index 72941d57a..3e7f6ed59 100644 --- a/searx/search/processors/online_dictionary.py +++ b/searx/search/processors/online_dictionary.py @@ -11,6 +11,7 @@ from .online import OnlineProcessor parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) + class OnlineDictionaryProcessor(OnlineProcessor): """Processor class used by ``online_dictionary`` engines.""" @@ -44,10 +45,9 @@ class OnlineDictionaryProcessor(OnlineProcessor): if getattr(self.engine, 'paging', False): tests['translation_paging'] = { - 'matrix': {'query': 'en-es house', - 'pageno': (1, 2, 3)}, + 'matrix': {'query': 'en-es house', 'pageno': (1, 2, 3)}, 'result_container': ['not_empty', ('one_title_contains', 'house')], - 'test': ['unique_results'] + 'test': ['unique_results'], } else: tests['translation'] = { |