diff options
| author | Alexandre Flament <alex@al-f.net> | 2021-05-05 13:08:54 +0200 |
|---|---|---|
| committer | Alexandre Flament <alex@al-f.net> | 2021-05-05 13:12:42 +0200 |
| commit | 8c1a65d32fb6a0859c0052d668d01f08325f11ad (patch) | |
| tree | 8837e952d67fb8a4755ce2c732ada76474da75c2 /searx/search/processors | |
| parent | d36adfa59f242a8775ad74245c696d62b7727a36 (diff) | |
[mod] multithreading only in searx.search.* packages
it prepares the new architecture change,
everything about multithreading in moved in the searx.search.* packages
previously the call to the "init" function of the engines was done in searx.engines:
* the network was not set (request not sent using the defined proxy)
* it requires to monkey patch the code to avoid HTTP requests during the tests
Diffstat (limited to 'searx/search/processors')
| -rw-r--r-- | searx/search/processors/__init__.py | 37 | ||||
| -rw-r--r-- | searx/search/processors/abstract.py | 17 | ||||
| -rw-r--r-- | searx/search/processors/online.py | 15 |
3 files changed, 55 insertions, 14 deletions
diff --git a/searx/search/processors/__init__.py b/searx/search/processors/__init__.py index caac74e65..d5ebdb70c 100644 --- a/searx/search/processors/__init__.py +++ b/searx/search/processors/__init__.py @@ -11,9 +11,11 @@ __all__ = [ 'OnlineProcessor', 'OnlineDictionaryProcessor', 'OnlineCurrencyProcessor', - 'processors', + 'PROCESSORS', ] +import threading + from searx import logger import searx.engines as engines @@ -24,7 +26,7 @@ from .online_currency import OnlineCurrencyProcessor from .abstract import EngineProcessor logger = logger.getChild('search.processors') -processors = {} +PROCESSORS = {} """Cache request processores, stored by *engine-name* (:py:func:`initialize`)""" def get_processor_class(engine_type): @@ -34,6 +36,7 @@ def get_processor_class(engine_type): return c return None + def get_processor(engine, engine_name): """Return processor instance that fits to ``engine.engine.type``)""" engine_type = getattr(engine, 'engine_type', 'online') @@ -42,12 +45,26 @@ def get_processor(engine, engine_name): return processor_class(engine, engine_name) return None + +def initialize_processor(processor): + """Initialize one processor + + Call the init function of the engine + """ + if processor.has_initialize_function: + t = threading.Thread(target=processor.initialize, daemon=True) + t.start() + + def initialize(engine_list): - """Initialize all engines and store a processor for each engine in :py:obj:`processors`.""" - engines.initialize_engines(engine_list) - for engine_name, engine in engines.engines.items(): - processor = get_processor(engine, engine_name) - if processor is None: - logger.error('Error get processor for engine %s', engine_name) - else: - processors[engine_name] = processor + """Initialize all engines and store a processor for each engine in :py:obj:`PROCESSORS`.""" + for engine_data in engine_list: + engine_name = engine_data['name'] + engine = engines.engines.get(engine_name) + if engine: + processor = get_processor(engine, engine_name) + initialize_processor(processor) + if processor is None: + logger.error('Error get processor for engine %s', engine_name) + else: + PROCESSORS[engine_name] = processor diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index 38811d87c..81724f052 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -13,7 +13,8 @@ from searx import logger from searx.engines import settings from searx.network import get_time_for_thread, get_network from searx.metrics import histogram_observe, counter_inc, count_exception, count_error -from searx.exceptions import SearxEngineAccessDeniedException +from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException +from searx.utils import get_engine_from_settings logger = logger.getChild('searx.search.processor') SUSPENDED_STATUS = {} @@ -66,6 +67,20 @@ class EngineProcessor(ABC): key = id(key) if key else self.engine_name self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus()) + def initialize(self): + try: + self.engine.init(get_engine_from_settings(self.engine_name)) + except SearxEngineResponseException as exc: + logger.warn('%s engine: Fail to initialize // %s', self.engine_name, exc) + except Exception: # pylint: disable=broad-except + logger.exception('%s engine: Fail to initialize', self.engine_name) + else: + logger.debug('%s engine: Initialized', self.engine_name) + + @property + def has_initialize_function(self): + return hasattr(self.engine, 'init') + def handle_exception(self, result_container, exception_or_message, suspend=False): # update result_container if isinstance(exception_or_message, BaseException): diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index 93a9c6cbf..48a514e8a 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -5,7 +5,7 @@ """ -from time import time +from timeit import default_timer import asyncio import httpx @@ -40,6 +40,15 @@ class OnlineProcessor(EngineProcessor): engine_type = 'online' + def initialize(self): + # set timeout for all HTTP requests + searx.network.set_timeout_for_thread(self.engine.timeout, start_time=default_timer()) + # reset the HTTP total time + searx.network.reset_time_for_thread() + # set the network + searx.network.set_context_network_name(self.engine_name) + super().initialize() + def get_params(self, search_query, engine_category): params = super().get_params(search_query, engine_category) if params is None: @@ -139,7 +148,7 @@ class OnlineProcessor(EngineProcessor): self.handle_exception(result_container, e, suspend=True) logger.error("engine {0} : HTTP requests timeout" "(search duration : {1} s, timeout: {2} s) : {3}" - .format(self.engine_name, time() - start_time, + .format(self.engine_name, default_timer() - start_time, timeout_limit, e.__class__.__name__)) except (httpx.HTTPError, httpx.StreamError) as e: @@ -147,7 +156,7 @@ class OnlineProcessor(EngineProcessor): self.handle_exception(result_container, e, suspend=True) logger.exception("engine {0} : requests exception" "(search duration : {1} s, timeout: {2} s) : {3}" - .format(self.engine_name, time() - start_time, + .format(self.engine_name, default_timer() - start_time, timeout_limit, e)) except SearxEngineCaptchaException as e: |