diff options
Diffstat (limited to 'searx')
| -rw-r--r-- | searx/engines/searx_engine.py | 2 | ||||
| -rw-r--r-- | searx/metrics/__init__.py | 12 | ||||
| -rw-r--r-- | searx/metrics/error_recorder.py | 6 | ||||
| -rw-r--r-- | searx/metrics/models.py | 17 | ||||
| -rw-r--r-- | searx/network/client.py | 105 | ||||
| -rw-r--r-- | searx/network/network.py | 45 | ||||
| -rw-r--r-- | searx/search/__init__.py | 4 | ||||
| -rw-r--r-- | searx/settings.yml | 7 | ||||
| -rw-r--r-- | searx/settings_defaults.py | 3 | ||||
| -rw-r--r-- | searx/webadapter.py | 2 | ||||
| -rwxr-xr-x | searx/webapp.py | 2 |
11 files changed, 80 insertions, 125 deletions
diff --git a/searx/engines/searx_engine.py b/searx/engines/searx_engine.py index 3e9035d6f..84a8e6449 100644 --- a/searx/engines/searx_engine.py +++ b/searx/engines/searx_engine.py @@ -10,7 +10,7 @@ from searx.engines import categories as searx_categories about = { "website": 'https://github.com/searxng/searxng', "wikidata_id": 'Q17639196', - "official_api_documentation": 'https://searxng.github.io/searxng/dev/search_api.html', + "official_api_documentation": 'https://docs.searxng.org/dev/search_api.html', "use_official_api": True, "require_api_key": False, "results": 'JSON', diff --git a/searx/metrics/__init__.py b/searx/metrics/__init__.py index 37f0ba121..bc755b96b 100644 --- a/searx/metrics/__init__.py +++ b/searx/metrics/__init__.py @@ -9,7 +9,7 @@ from timeit import default_timer from operator import itemgetter from searx.engines import engines -from .models import HistogramStorage, CounterStorage +from .models import HistogramStorage, CounterStorage, VoidHistogram, VoidCounterStorage from .error_recorder import count_error, count_exception, errors_per_engines __all__ = [ @@ -69,14 +69,18 @@ def counter(*args): return counter_storage.get(*args) -def initialize(engine_names=None): +def initialize(engine_names=None, enabled=True): """ Initialize metrics """ global counter_storage, histogram_storage # pylint: disable=global-statement - counter_storage = CounterStorage() - histogram_storage = HistogramStorage() + if enabled: + counter_storage = CounterStorage() + histogram_storage = HistogramStorage() + else: + counter_storage = VoidCounterStorage() + histogram_storage = HistogramStorage(histogram_class=VoidHistogram) # max_timeout = max of all the engine.timeout max_timeout = 2 diff --git a/searx/metrics/error_recorder.py b/searx/metrics/error_recorder.py index 76d27f64f..1d0d6e7a3 100644 --- a/searx/metrics/error_recorder.py +++ b/searx/metrics/error_recorder.py @@ -9,7 +9,7 @@ from searx.exceptions import ( SearxEngineAPIException, SearxEngineAccessDeniedException, ) -from searx import searx_parent_dir +from searx import searx_parent_dir, settings from searx.engines import engines @@ -165,6 +165,8 @@ def get_error_context(framerecords, exception_classname, log_message, log_parame def count_exception(engine_name: str, exc: Exception, secondary: bool = False) -> None: + if not settings['general']['enable_metrics']: + return framerecords = inspect.trace() try: exception_classname = get_exception_classname(exc) @@ -178,6 +180,8 @@ def count_exception(engine_name: str, exc: Exception, secondary: bool = False) - def count_error( engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None, secondary: bool = False ) -> None: + if not settings['general']['enable_metrics']: + return framerecords = list(reversed(inspect.stack()[1:])) try: error_context = get_error_context(framerecords, None, log_message, log_parameters or (), secondary) diff --git a/searx/metrics/models.py b/searx/metrics/models.py index d42569b7f..900a7fa93 100644 --- a/searx/metrics/models.py +++ b/searx/metrics/models.py @@ -102,16 +102,17 @@ class Histogram: class HistogramStorage: - __slots__ = 'measures' + __slots__ = 'measures', 'histogram_class' - def __init__(self): + def __init__(self, histogram_class=Histogram): self.clear() + self.histogram_class = histogram_class def clear(self): self.measures = {} def configure(self, width, size, *args): - measure = Histogram(width, size) + measure = self.histogram_class(width, size) self.measures[args] = measure return measure @@ -154,3 +155,13 @@ class CounterStorage: logger.debug("Counters:") for k in ks: logger.debug("- %-60s %s", '|'.join(k), self.counters[k]) + + +class VoidHistogram(Histogram): + def observe(self, value): + pass + + +class VoidCounterStorage(CounterStorage): + def add(self, value, *args): + pass diff --git a/searx/network/client.py b/searx/network/client.py index cd1e41460..6858ac05b 100644 --- a/searx/network/client.py +++ b/searx/network/client.py @@ -6,8 +6,6 @@ import asyncio import logging import threading -import anyio -import httpcore import httpx from httpx_socks import AsyncProxyTransport from python_socks import parse_proxy_url, ProxyConnectionError, ProxyTimeoutError, ProxyError @@ -27,31 +25,10 @@ logger = logger.getChild('searx.network.client') LOOP = None SSLCONTEXTS = {} TRANSPORT_KWARGS = { - # use anyio : - # * https://github.com/encode/httpcore/issues/344 - # * https://github.com/encode/httpx/discussions/1511 - 'backend': 'anyio', 'trust_env': False, } -# pylint: disable=protected-access -async def close_connections_for_url(connection_pool: httpcore.AsyncConnectionPool, url: httpcore._utils.URL): - - origin = httpcore._utils.url_to_origin(url) - logger.debug('Drop connections for %r', origin) - connections_to_close = connection_pool._connections_for_origin(origin) - for connection in connections_to_close: - await connection_pool._remove_from_pool(connection) - try: - await connection.aclose() - except httpx.NetworkError as e: - logger.warning('Error closing an existing connection', exc_info=e) - - -# pylint: enable=protected-access - - def get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True, http2=False): key = (proxy_url, cert, verify, trust_env, http2) if key not in SSLCONTEXTS: @@ -62,75 +39,25 @@ def get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True, http class AsyncHTTPTransportNoHttp(httpx.AsyncHTTPTransport): """Block HTTP request""" - async def handle_async_request(self, method, url, headers=None, stream=None, extensions=None): + async def handle_async_request(self, request): raise httpx.UnsupportedProtocol('HTTP protocol is disabled') class AsyncProxyTransportFixed(AsyncProxyTransport): """Fix httpx_socks.AsyncProxyTransport - Map python_socks exceptions to httpx.ProxyError / httpx.ConnectError - - Map socket.gaierror to httpx.ConnectError - - Note: AsyncProxyTransport inherit from AsyncConnectionPool + Map python_socks exceptions to httpx.ProxyError exceptions """ - async def handle_async_request(self, method, url, headers=None, stream=None, extensions=None): - retry = 2 - while retry > 0: - retry -= 1 - try: - return await super().handle_async_request( - method, url, headers=headers, stream=stream, extensions=extensions - ) - except (ProxyConnectionError, ProxyTimeoutError, ProxyError) as e: - raise httpx.ProxyError from e - except OSError as e: - # socket.gaierror when DNS resolution fails - raise httpx.ConnectError from e - except httpx.NetworkError as e: - # httpx.WriteError on HTTP/2 connection leaves a new opened stream - # then each new request creates a new stream and raise the same WriteError - await close_connections_for_url(self, url) - raise e - except anyio.ClosedResourceError as e: - await close_connections_for_url(self, url) - raise httpx.CloseError from e - except httpx.RemoteProtocolError as e: - # in case of httpx.RemoteProtocolError: Server disconnected - await close_connections_for_url(self, url) - logger.warning('httpx.RemoteProtocolError: retry', exc_info=e) - # retry - - -class AsyncHTTPTransportFixed(httpx.AsyncHTTPTransport): - """Fix httpx.AsyncHTTPTransport""" - - async def handle_async_request(self, method, url, headers=None, stream=None, extensions=None): - retry = 2 - while retry > 0: - retry -= 1 - try: - return await super().handle_async_request( - method, url, headers=headers, stream=stream, extensions=extensions - ) - except OSError as e: - # socket.gaierror when DNS resolution fails - raise httpx.ConnectError from e - except httpx.NetworkError as e: - # httpx.WriteError on HTTP/2 connection leaves a new opened stream - # then each new request creates a new stream and raise the same WriteError - await close_connections_for_url(self._pool, url) - raise e - except anyio.ClosedResourceError as e: - await close_connections_for_url(self._pool, url) - raise httpx.CloseError from e - except httpx.RemoteProtocolError as e: - # in case of httpx.RemoteProtocolError: Server disconnected - await close_connections_for_url(self._pool, url) - logger.warning('httpx.RemoteProtocolError: retry', exc_info=e) - # retry + async def handle_async_request(self, request): + try: + return await super().handle_async_request(request) + except ProxyConnectionError as e: + raise httpx.ProxyError("ProxyConnectionError: " + e.strerror, request=request) from e + except ProxyTimeoutError as e: + raise httpx.ProxyError("ProxyTimeoutError: " + e.args[0], request=request) from e + except ProxyError as e: + raise httpx.ProxyError("ProxyError: " + e.args[0], request=request) from e def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit, retries): @@ -157,9 +84,7 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit verify=verify, http2=http2, local_address=local_address, - max_connections=limit.max_connections, - max_keepalive_connections=limit.max_keepalive_connections, - keepalive_expiry=limit.keepalive_expiry, + limits=limit, retries=retries, **TRANSPORT_KWARGS, ) @@ -167,13 +92,13 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit def get_transport(verify, http2, local_address, proxy_url, limit, retries): verify = get_sslcontexts(None, None, True, False, http2) if verify is True else verify - return AsyncHTTPTransportFixed( + return httpx.AsyncHTTPTransport( # pylint: disable=protected-access verify=verify, http2=http2, - local_address=local_address, - proxy=httpx._config.Proxy(proxy_url) if proxy_url else None, limits=limit, + proxy=httpx._config.Proxy(proxy_url) if proxy_url else None, + local_address=local_address, retries=retries, **TRANSPORT_KWARGS, ) diff --git a/searx/network/network.py b/searx/network/network.py index 9e14e14bd..43140b44d 100644 --- a/searx/network/network.py +++ b/searx/network/network.py @@ -213,15 +213,18 @@ class Network: await asyncio.gather(*[close_client(client) for client in self._clients.values()], return_exceptions=False) @staticmethod - def get_kwargs_clients(kwargs): + def extract_kwargs_clients(kwargs): kwargs_clients = {} if 'verify' in kwargs: kwargs_clients['verify'] = kwargs.pop('verify') if 'max_redirects' in kwargs: kwargs_clients['max_redirects'] = kwargs.pop('max_redirects') + if 'allow_redirects' in kwargs: + # see https://github.com/encode/httpx/pull/1808 + kwargs['follow_redirects'] = kwargs.pop('allow_redirects') return kwargs_clients - def is_valid_respones(self, response): + def is_valid_response(self, response): # pylint: disable=too-many-boolean-expressions if ( (self.retry_on_http_error is True and 400 <= response.status_code <= 599) @@ -231,34 +234,40 @@ class Network: return False return True - async def request(self, method, url, **kwargs): + async def call_client(self, stream, method, url, **kwargs): retries = self.retries + was_disconnected = False + kwargs_clients = Network.extract_kwargs_clients(kwargs) while retries >= 0: # pragma: no cover - kwargs_clients = Network.get_kwargs_clients(kwargs) client = await self.get_client(**kwargs_clients) try: - response = await client.request(method, url, **kwargs) - if self.is_valid_respones(response) or retries <= 0: + if stream: + response = client.stream(method, url, **kwargs) + else: + response = await client.request(method, url, **kwargs) + if self.is_valid_response(response) or retries <= 0: return response - except (httpx.RequestError, httpx.HTTPStatusError) as e: + except httpx.RemoteProtocolError as e: + if not was_disconnected: + # the server has closed the connection: + # try again without decreasing the retries variable & with a new HTTP client + was_disconnected = True + await client.aclose() + self._logger.warning('httpx.RemoteProtocolError: the server has disconnected, retrying') + continue if retries <= 0: raise e - retries -= 1 - - async def stream(self, method, url, **kwargs): - retries = self.retries - while retries >= 0: # pragma: no cover - kwargs_clients = Network.get_kwargs_clients(kwargs) - client = await self.get_client(**kwargs_clients) - try: - response = client.stream(method, url, **kwargs) - if self.is_valid_respones(response) or retries <= 0: - return response except (httpx.RequestError, httpx.HTTPStatusError) as e: if retries <= 0: raise e retries -= 1 + async def request(self, method, url, **kwargs): + return await self.call_client(False, method, url, **kwargs) + + async def stream(self, method, url, **kwargs): + return await self.call_client(True, method, url, **kwargs) + @classmethod async def aclose_all(cls): await asyncio.gather(*[network.aclose() for network in NETWORKS.values()], return_exceptions=False) diff --git a/searx/search/__init__.py b/searx/search/__init__.py index d66f3362d..e790bd05e 100644 --- a/searx/search/__init__.py +++ b/searx/search/__init__.py @@ -24,13 +24,13 @@ from searx.search.checker import initialize as initialize_checker logger = logger.getChild('search') -def initialize(settings_engines=None, enable_checker=False, check_network=False): +def initialize(settings_engines=None, enable_checker=False, check_network=False, enable_metrics=True): settings_engines = settings_engines or settings['engines'] load_engines(settings_engines) initialize_network(settings_engines, settings['outgoing']) if check_network: check_network_configuration() - initialize_metrics([engine['name'] for engine in settings_engines]) + initialize_metrics([engine['name'] for engine in settings_engines], enable_metrics) initialize_processors(settings_engines) if enable_checker: initialize_checker() diff --git a/searx/settings.yml b/searx/settings.yml index 0aa7b99b0..f1d6fa1b1 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -2,10 +2,11 @@ general: debug: false # Debug mode, only for development instance_name: "SearXNG" # displayed name contact_url: false # mailto:contact@example.com + enable_metrics: true # record stats brand: new_issue_url: https://github.com/searxng/searxng/issues/new - docs_url: https://searxng.github.io/searxng + docs_url: https://docs.searxng.org/ public_instances: https://searx.space wiki_url: https://github.com/searxng/searxng/wiki issue_url: https://github.com/searxng/searxng/issues @@ -145,7 +146,7 @@ outgoing: # - fe80::/126 # External plugin configuration, for more details see -# https://searxng.github.io/searxng/dev/plugins.html +# https://docs.searxng.org/dev/plugins.html # # plugins: # - plugin1 @@ -625,7 +626,7 @@ engines: - name: google engine: google shortcut: go - # see https://searxng.github.io/searxng/src/searx.engines.google.html#module-searx.engines.google + # see https://docs.searxng.org/src/searx.engines.google.html#module-searx.engines.google use_mobile_ui: false # additional_tests: # android: *test_android diff --git a/searx/settings_defaults.py b/searx/settings_defaults.py index c1154f7ff..e84b442fe 100644 --- a/searx/settings_defaults.py +++ b/searx/settings_defaults.py @@ -142,11 +142,12 @@ SCHEMA = { 'debug': SettingsValue(bool, False, 'SEARXNG_DEBUG'), 'instance_name': SettingsValue(str, 'SearXNG'), 'contact_url': SettingsValue((None, False, str), None), + 'enable_metrics': SettingsValue(bool, True), }, 'brand': { 'issue_url': SettingsValue(str, 'https://github.com/searxng/searxng/issues'), 'new_issue_url': SettingsValue(str, 'https://github.com/searxng/searxng/issues/new'), - 'docs_url': SettingsValue(str, 'https://searxng.github.io/searxng'), + 'docs_url': SettingsValue(str, 'https://docs.searxng.org'), 'public_instances': SettingsValue(str, 'https://searx.space'), 'wiki_url': SettingsValue(str, 'https://github.com/searxng/searxng/wiki'), }, diff --git a/searx/webadapter.py b/searx/webadapter.py index 4fd18cee9..185cb568e 100644 --- a/searx/webadapter.py +++ b/searx/webadapter.py @@ -236,7 +236,7 @@ def get_search_query_from_webapp( external_bang = raw_text_query.external_bang engine_data = parse_engine_data(form) - if not is_locked('categories') and raw_text_query.enginerefs and raw_text_query.specific: + if not is_locked('categories') and raw_text_query.specific: # if engines are calculated from query, # set categories by using that informations query_engineref_list = raw_text_query.enginerefs diff --git a/searx/webapp.py b/searx/webapp.py index f2957a944..46b838b77 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -1343,7 +1343,7 @@ werkzeug_reloader = flask_run_development or (searx_debug and __name__ == "__mai # initialize the engines except on the first run of the werkzeug server. if not werkzeug_reloader or (werkzeug_reloader and os.environ.get("WERKZEUG_RUN_MAIN") == "true"): plugin_initialize(app) - search_initialize(enable_checker=True, check_network=True) + search_initialize(enable_checker=True, check_network=True, enable_metrics=settings['general']['enable_metrics']) def run(): |