diff options
29 files changed, 285 insertions, 205 deletions
diff --git a/.config.sh b/.config.sh index 9a3a0bbcb..cc7663648 100644 --- a/.config.sh +++ b/.config.sh @@ -26,7 +26,7 @@ # SEARXNG_SETTINGS_PATH : /etc/searxng/settings.yml # SEARX_SRC : /usr/local/searx/searx-src # -# [1] https://searxng.github.io/searxng/admin/engines/settings.html +# [1] https://docs.searxng.org/admin/engines/settings.html # utils/filtron.sh # ---------------- diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c215f225e..95b4ad0e3 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -180,7 +180,7 @@ New settings.yml - ``ui.advanced_search`` - add preference for displaying advanced settings ( #2327 ) - ``server.method: "POST"`` - Make default query submission method configurable ( #2130 ) - ``server.default_http_headers`` - add default http headers ( #2295 ) -- ``engines.*.proxies`` - Using proxy only for specific engines ( #1827 #2319 ), see https://searxng.github.io/searxng/dev/engine_overview.html#settings-yml +- ``engines.*.proxies`` - Using proxy only for specific engines ( #1827 #2319 ), see https://docs.searxng.org/dev/engine_overview.html#settings-yml - ``enabled_plugins`` - Enabled plugins ( a05c660e3036ad8d02072fc6731af54c2ed6151c ) - ``preferences.lock`` - Let admins lock user preferences ( #2270 ) @@ -296,8 +296,8 @@ Special thanks to `NLNet <https://nlnet.nl>`__ for sponsoring multiple features - Wikivoyage - Rubygems - Engine fixes (google, google images, startpage, gigablast, yacy) - - Private engines introduced - more details: https://searxng.github.io/searxng/blog/private-engines.html - - Greatly improved documentation - check it at https://searxng.github.io/searxng + - Private engines introduced - more details: https://docs.searxng.org/blog/private-engines.html + - Greatly improved documentation - check it at https://docs.searxng.org - Added autofocus to all search inputs - CSP friendly oscar theme - Added option to hide engine errors with `display_error_messages` engine option (true/false values, default is true) @@ -595,7 +595,7 @@ News News ~~~~ -New documentation page is available: https://searxng.github.io/searxng +New documentation page is available: https://docs.searxng.org 0.8.0 2015.09.08 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 66adfb9fc..748a7b53c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,8 +2,8 @@ ## Resources in the documentation -* [Development quickstart](https://searxng.github.io/searxng/dev/contribution_guide.html) -* [Contribution guide](https://searxng.github.io/searxng/dev/contribution_guide.html) +* [Development quickstart](https://docs.searxng.org/dev/contribution_guide.html) +* [Contribution guide](https://docs.searxng.org/dev/contribution_guide.html) ## Submitting PRs diff --git a/README.rst b/README.rst index e3c384019..becf7b989 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ .. SPDX-License-Identifier: AGPL-3.0-or-later .. figure:: https://raw.githubusercontent.com/searxng/searxng/master/src/brand/searxng.svg - :target: https://searxng.github.io/searxng/ + :target: https://docs.searxng.org/ :alt: SearXNG :width: 100% :align: center @@ -23,21 +23,21 @@ Otherwise jump to the user_, admin_ and developer_ handbooks you will find on our homepage_. .. _searx.space: https://searx.space -.. _user: https://searxng.github.io/searxng/user -.. _admin: https://searxng.github.io/searxng/admin -.. _developer: https://searxng.github.io/searxng/dev -.. _homepage: https://searxng.github.io/searxng +.. _user: https://docs.searxng.org/user +.. _admin: https://docs.searxng.org/user/admin +.. _developer: https://docs.searxng.org/dev +.. _homepage: https://docs.searxng.org/ .. _metasearch engine: https://en.wikipedia.org/wiki/Metasearch_engine .. |SearXNG logo| image:: https://raw.githubusercontent.com/searxng/searxng/master/src/brand/searxng-wordmark.svg - :target: https://searxng.github.io/searxng + :target: https://docs.searxng.org/ :width: 5% .. |SearXNG install| image:: https://img.shields.io/badge/-install-blue - :target: https://searxng.github.io/searxng/admin/installation.html + :target: https://docs.searxng.org/admin/installation.html .. |SearXNG homepage| image:: https://img.shields.io/badge/-homepage-blue - :target: https://searxng.github.io/searxng + :target: https://docs.searxng.org/ .. |SearXNG wiki| image:: https://img.shields.io/badge/-wiki-blue :target: https://github.com/searxng/searxng/wiki diff --git a/docs/admin/arch_public.dot b/docs/admin/arch_public.dot index b838685e7..c4ee5f3c1 100644 --- a/docs/admin/arch_public.dot +++ b/docs/admin/arch_public.dot @@ -4,11 +4,11 @@ digraph G { edge [fontname="Sans"]; browser [label="Browser", shape=Mdiamond]; - rp [label="Reverse Proxy", href="https://searxng.github.io/searxng/utils/filtron.sh.html#public-reverse-proxy"]; - filtron [label="Filtron", href="https://searxng.github.io/searxng/utils/filtron.sh.html"]; - morty [label="Morty", href="https://searxng.github.io/searxng/utils/morty.sh.html"]; + rp [label="Reverse Proxy", href="https://docs.searxng.org/utils/filtron.sh.html#public-reverse-proxy"]; + filtron [label="Filtron", href="https://docs.searxng.org/utils/filtron.sh.html"]; + morty [label="Morty", href="https://docs.searxng.org/utils/morty.sh.html"]; static [label="Static files", href="url to configure static files"]; - uwsgi [label="uwsgi", href="https://searxng.github.io/searxng/utils/searx.sh.html"] + uwsgi [label="uwsgi", href="https://docs.searxng.org/utils/searx.sh.html"] searx1 [label="Searx #1"]; searx2 [label="Searx #2"]; searx3 [label="Searx #3"]; diff --git a/docs/admin/engines/settings.rst b/docs/admin/engines/settings.rst index 80fb183c0..bcf0c6bb3 100644 --- a/docs/admin/engines/settings.rst +++ b/docs/admin/engines/settings.rst @@ -46,7 +46,7 @@ Global Settings brand: issue_url: https://github.com/searxng/searxng/issues - docs_url: https://searxng/searxng.github.io/searxng + docs_url: https://docs.searxng.org public_instances: https://searx.space wiki_url: https://github.com/searxng/searxng/wiki diff --git a/docs/dev/reST.rst b/docs/dev/reST.rst index 4f17b1b2f..70d08adb5 100644 --- a/docs/dev/reST.rst +++ b/docs/dev/reST.rst @@ -320,7 +320,7 @@ To list all anchors of the inventory (e.g. ``python``) use: $ python -m sphinx.ext.intersphinx https://docs.python.org/3/objects.inv ... - $ python -m sphinx.ext.intersphinx https://searxng.github.io/searxng/objects.inv + $ python -m sphinx.ext.intersphinx https://docs.searxng.org/objects.inv ... Literal blocks diff --git a/docs/dev/searxng_extra/index.rst b/docs/dev/searxng_extra/index.rst index f38bb3154..c2b5c312b 100644 --- a/docs/dev/searxng_extra/index.rst +++ b/docs/dev/searxng_extra/index.rst @@ -1,14 +1,15 @@ .. _searxng_extra: -====================================================== -Tooling box ``searxng_extra`` for developers and users -====================================================== +============================= +Tooling box ``searxng_extra`` +============================= -In the folder :origin:`searxng_extra/` we maintain some tools useful for +In the folder :origin:`searxng_extra/` we maintain some tools useful for CI and developers. .. toctree:: :maxdepth: 2 :caption: Contents + update standalone_searx.py diff --git a/docs/dev/searxng_extra/update.rst b/docs/dev/searxng_extra/update.rst new file mode 100644 index 000000000..d05c81409 --- /dev/null +++ b/docs/dev/searxng_extra/update.rst @@ -0,0 +1,88 @@ +========================= +``searxng_extra/update/`` +========================= + +:origin:`[source] <searxng_extra/update/__init__.py>` + +Scripts to update static data in :origin:`searx/data/` + +.. _update_ahmia_blacklist.py: + +``update_ahmia_blacklist.py`` +============================= + +:origin:`[source] <searxng_extra/update/update_ahmia_blacklist.py>` + +.. automodule:: searxng_extra.update.update_ahmia_blacklist + :members: + + +``update_currencies.py`` +======================== + +:origin:`[source] <searxng_extra/update/update_currencies.py>` + +.. automodule:: searxng_extra.update.update_currencies + :members: + +``update_engine_descriptions.py`` +================================= + +:origin:`[source] <searxng_extra/update/update_engine_descriptions.py>` + +.. automodule:: searxng_extra.update.update_engine_descriptions + :members: + + +``update_external_bangs.py`` +============================ + +:origin:`[source] <searxng_extra/update/update_external_bangs.py>` + +.. automodule:: searxng_extra.update.update_external_bangs + :members: + + +``update_firefox_version.py`` +============================= + +:origin:`[source] <searxng_extra/update/update_firefox_version.py>` + +.. automodule:: searxng_extra.update.update_firefox_version + :members: + + +``update_languages.py`` +======================= + +:origin:`[source] <searxng_extra/update/update_languages.py>` + +.. automodule:: searxng_extra.update.update_languages + :members: + + +``update_osm_keys_tags.py`` +=========================== + +:origin:`[source] <searxng_extra/update/update_osm_keys_tags.py>` + +.. automodule:: searxng_extra.update.update_osm_keys_tags + :members: + + +``update_pygments.py`` +====================== + +:origin:`[source] <searxng_extra/update/update_pygments.py>` + +.. automodule:: searxng_extra.update.update_pygments + :members: + + +``update_wikidata_units.py`` +============================ + +:origin:`[source] <searxng_extra/update/update_wikidata_units.py>` + +.. automodule:: searxng_extra.update.update_wikidata_units + :members: diff --git a/requirements.txt b/requirements.txt index e42c1fb7f..f6308044b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,9 +7,9 @@ lxml==4.7.1 pygments==2.11.0 python-dateutil==2.8.2 pyyaml==6.0 -httpx[http2]==0.19.0 +httpx[http2]==0.21.2 Brotli==1.0.9 uvloop==0.16.0 -httpx-socks[asyncio]==0.4.1 +httpx-socks[asyncio]==0.7.2 langdetect==1.0.9 setproctitle==1.2.2 diff --git a/searx/engines/searx_engine.py b/searx/engines/searx_engine.py index 3e9035d6f..84a8e6449 100644 --- a/searx/engines/searx_engine.py +++ b/searx/engines/searx_engine.py @@ -10,7 +10,7 @@ from searx.engines import categories as searx_categories about = { "website": 'https://github.com/searxng/searxng', "wikidata_id": 'Q17639196', - "official_api_documentation": 'https://searxng.github.io/searxng/dev/search_api.html', + "official_api_documentation": 'https://docs.searxng.org/dev/search_api.html', "use_official_api": True, "require_api_key": False, "results": 'JSON', diff --git a/searx/network/client.py b/searx/network/client.py index cd1e41460..6858ac05b 100644 --- a/searx/network/client.py +++ b/searx/network/client.py @@ -6,8 +6,6 @@ import asyncio import logging import threading -import anyio -import httpcore import httpx from httpx_socks import AsyncProxyTransport from python_socks import parse_proxy_url, ProxyConnectionError, ProxyTimeoutError, ProxyError @@ -27,31 +25,10 @@ logger = logger.getChild('searx.network.client') LOOP = None SSLCONTEXTS = {} TRANSPORT_KWARGS = { - # use anyio : - # * https://github.com/encode/httpcore/issues/344 - # * https://github.com/encode/httpx/discussions/1511 - 'backend': 'anyio', 'trust_env': False, } -# pylint: disable=protected-access -async def close_connections_for_url(connection_pool: httpcore.AsyncConnectionPool, url: httpcore._utils.URL): - - origin = httpcore._utils.url_to_origin(url) - logger.debug('Drop connections for %r', origin) - connections_to_close = connection_pool._connections_for_origin(origin) - for connection in connections_to_close: - await connection_pool._remove_from_pool(connection) - try: - await connection.aclose() - except httpx.NetworkError as e: - logger.warning('Error closing an existing connection', exc_info=e) - - -# pylint: enable=protected-access - - def get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True, http2=False): key = (proxy_url, cert, verify, trust_env, http2) if key not in SSLCONTEXTS: @@ -62,75 +39,25 @@ def get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True, http class AsyncHTTPTransportNoHttp(httpx.AsyncHTTPTransport): """Block HTTP request""" - async def handle_async_request(self, method, url, headers=None, stream=None, extensions=None): + async def handle_async_request(self, request): raise httpx.UnsupportedProtocol('HTTP protocol is disabled') class AsyncProxyTransportFixed(AsyncProxyTransport): """Fix httpx_socks.AsyncProxyTransport - Map python_socks exceptions to httpx.ProxyError / httpx.ConnectError - - Map socket.gaierror to httpx.ConnectError - - Note: AsyncProxyTransport inherit from AsyncConnectionPool + Map python_socks exceptions to httpx.ProxyError exceptions """ - async def handle_async_request(self, method, url, headers=None, stream=None, extensions=None): - retry = 2 - while retry > 0: - retry -= 1 - try: - return await super().handle_async_request( - method, url, headers=headers, stream=stream, extensions=extensions - ) - except (ProxyConnectionError, ProxyTimeoutError, ProxyError) as e: - raise httpx.ProxyError from e - except OSError as e: - # socket.gaierror when DNS resolution fails - raise httpx.ConnectError from e - except httpx.NetworkError as e: - # httpx.WriteError on HTTP/2 connection leaves a new opened stream - # then each new request creates a new stream and raise the same WriteError - await close_connections_for_url(self, url) - raise e - except anyio.ClosedResourceError as e: - await close_connections_for_url(self, url) - raise httpx.CloseError from e - except httpx.RemoteProtocolError as e: - # in case of httpx.RemoteProtocolError: Server disconnected - await close_connections_for_url(self, url) - logger.warning('httpx.RemoteProtocolError: retry', exc_info=e) - # retry - - -class AsyncHTTPTransportFixed(httpx.AsyncHTTPTransport): - """Fix httpx.AsyncHTTPTransport""" - - async def handle_async_request(self, method, url, headers=None, stream=None, extensions=None): - retry = 2 - while retry > 0: - retry -= 1 - try: - return await super().handle_async_request( - method, url, headers=headers, stream=stream, extensions=extensions - ) - except OSError as e: - # socket.gaierror when DNS resolution fails - raise httpx.ConnectError from e - except httpx.NetworkError as e: - # httpx.WriteError on HTTP/2 connection leaves a new opened stream - # then each new request creates a new stream and raise the same WriteError - await close_connections_for_url(self._pool, url) - raise e - except anyio.ClosedResourceError as e: - await close_connections_for_url(self._pool, url) - raise httpx.CloseError from e - except httpx.RemoteProtocolError as e: - # in case of httpx.RemoteProtocolError: Server disconnected - await close_connections_for_url(self._pool, url) - logger.warning('httpx.RemoteProtocolError: retry', exc_info=e) - # retry + async def handle_async_request(self, request): + try: + return await super().handle_async_request(request) + except ProxyConnectionError as e: + raise httpx.ProxyError("ProxyConnectionError: " + e.strerror, request=request) from e + except ProxyTimeoutError as e: + raise httpx.ProxyError("ProxyTimeoutError: " + e.args[0], request=request) from e + except ProxyError as e: + raise httpx.ProxyError("ProxyError: " + e.args[0], request=request) from e def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit, retries): @@ -157,9 +84,7 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit verify=verify, http2=http2, local_address=local_address, - max_connections=limit.max_connections, - max_keepalive_connections=limit.max_keepalive_connections, - keepalive_expiry=limit.keepalive_expiry, + limits=limit, retries=retries, **TRANSPORT_KWARGS, ) @@ -167,13 +92,13 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit def get_transport(verify, http2, local_address, proxy_url, limit, retries): verify = get_sslcontexts(None, None, True, False, http2) if verify is True else verify - return AsyncHTTPTransportFixed( + return httpx.AsyncHTTPTransport( # pylint: disable=protected-access verify=verify, http2=http2, - local_address=local_address, - proxy=httpx._config.Proxy(proxy_url) if proxy_url else None, limits=limit, + proxy=httpx._config.Proxy(proxy_url) if proxy_url else None, + local_address=local_address, retries=retries, **TRANSPORT_KWARGS, ) diff --git a/searx/network/network.py b/searx/network/network.py index 9e14e14bd..43140b44d 100644 --- a/searx/network/network.py +++ b/searx/network/network.py @@ -213,15 +213,18 @@ class Network: await asyncio.gather(*[close_client(client) for client in self._clients.values()], return_exceptions=False) @staticmethod - def get_kwargs_clients(kwargs): + def extract_kwargs_clients(kwargs): kwargs_clients = {} if 'verify' in kwargs: kwargs_clients['verify'] = kwargs.pop('verify') if 'max_redirects' in kwargs: kwargs_clients['max_redirects'] = kwargs.pop('max_redirects') + if 'allow_redirects' in kwargs: + # see https://github.com/encode/httpx/pull/1808 + kwargs['follow_redirects'] = kwargs.pop('allow_redirects') return kwargs_clients - def is_valid_respones(self, response): + def is_valid_response(self, response): # pylint: disable=too-many-boolean-expressions if ( (self.retry_on_http_error is True and 400 <= response.status_code <= 599) @@ -231,34 +234,40 @@ class Network: return False return True - async def request(self, method, url, **kwargs): + async def call_client(self, stream, method, url, **kwargs): retries = self.retries + was_disconnected = False + kwargs_clients = Network.extract_kwargs_clients(kwargs) while retries >= 0: # pragma: no cover - kwargs_clients = Network.get_kwargs_clients(kwargs) client = await self.get_client(**kwargs_clients) try: - response = await client.request(method, url, **kwargs) - if self.is_valid_respones(response) or retries <= 0: + if stream: + response = client.stream(method, url, **kwargs) + else: + response = await client.request(method, url, **kwargs) + if self.is_valid_response(response) or retries <= 0: return response - except (httpx.RequestError, httpx.HTTPStatusError) as e: + except httpx.RemoteProtocolError as e: + if not was_disconnected: + # the server has closed the connection: + # try again without decreasing the retries variable & with a new HTTP client + was_disconnected = True + await client.aclose() + self._logger.warning('httpx.RemoteProtocolError: the server has disconnected, retrying') + continue if retries <= 0: raise e - retries -= 1 - - async def stream(self, method, url, **kwargs): - retries = self.retries - while retries >= 0: # pragma: no cover - kwargs_clients = Network.get_kwargs_clients(kwargs) - client = await self.get_client(**kwargs_clients) - try: - response = client.stream(method, url, **kwargs) - if self.is_valid_respones(response) or retries <= 0: - return response except (httpx.RequestError, httpx.HTTPStatusError) as e: if retries <= 0: raise e retries -= 1 + async def request(self, method, url, **kwargs): + return await self.call_client(False, method, url, **kwargs) + + async def stream(self, method, url, **kwargs): + return await self.call_client(True, method, url, **kwargs) + @classmethod async def aclose_all(cls): await asyncio.gather(*[network.aclose() for network in NETWORKS.values()], return_exceptions=False) diff --git a/searx/settings.yml b/searx/settings.yml index 4ac9c8943..f4ecf6d10 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -6,7 +6,7 @@ general: brand: new_issue_url: https://github.com/searxng/searxng/issues/new - docs_url: https://searxng.github.io/searxng + docs_url: https://docs.searxng.org/ public_instances: https://searx.space wiki_url: https://github.com/searxng/searxng/wiki issue_url: https://github.com/searxng/searxng/issues @@ -152,7 +152,7 @@ outgoing: # - fe80::/126 # External plugin configuration, for more details see -# https://searxng.github.io/searxng/dev/plugins.html +# https://docs.searxng.org/dev/plugins.html # # plugins: # - plugin1 @@ -619,7 +619,7 @@ engines: - name: google engine: google shortcut: go - # see https://searxng.github.io/searxng/src/searx.engines.google.html#module-searx.engines.google + # see https://docs.searxng.org/src/searx.engines.google.html#module-searx.engines.google use_mobile_ui: false # additional_tests: # android: *test_android diff --git a/searx/settings_defaults.py b/searx/settings_defaults.py index 95d482a0e..bb91ed855 100644 --- a/searx/settings_defaults.py +++ b/searx/settings_defaults.py @@ -147,7 +147,7 @@ SCHEMA = { 'brand': { 'issue_url': SettingsValue(str, 'https://github.com/searxng/searxng/issues'), 'new_issue_url': SettingsValue(str, 'https://github.com/searxng/searxng/issues/new'), - 'docs_url': SettingsValue(str, 'https://searxng.github.io/searxng'), + 'docs_url': SettingsValue(str, 'https://docs.searxng.org'), 'public_instances': SettingsValue(str, 'https://searx.space'), 'wiki_url': SettingsValue(str, 'https://github.com/searxng/searxng/wiki'), }, diff --git a/searxng_extra/update/update_ahmia_blacklist.py b/searxng_extra/update/update_ahmia_blacklist.py index f7695deae..26c485195 100755 --- a/searxng_extra/update/update_ahmia_blacklist.py +++ b/searxng_extra/update/update_ahmia_blacklist.py @@ -1,10 +1,15 @@ #!/usr/bin/env python +# lint: pylint # SPDX-License-Identifier: AGPL-3.0-or-later +"""This script saves `Ahmia's blacklist`_ for onion sites. -# This script saves Ahmia's blacklist for onion sites. -# More info in https://ahmia.fi/blacklist/ +Output file: :origin:`searx/data/ahmia_blacklist.txt` (:origin:`CI Update data +... <.github/workflows/data-update.yml>`). + +.. _Ahmia's blacklist: https://ahmia.fi/blacklist/ + +""" -# set path from os.path import join import requests @@ -17,15 +22,14 @@ def fetch_ahmia_blacklist(): resp = requests.get(URL, timeout=3.0) if resp.status_code != 200: raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code) - else: - blacklist = resp.text.split() - return blacklist + return resp.text.split() def get_ahmia_blacklist_filename(): return join(join(searx_dir, "data"), "ahmia_blacklist.txt") -blacklist = fetch_ahmia_blacklist() -with open(get_ahmia_blacklist_filename(), "w") as f: - f.write('\n'.join(blacklist)) +if __name__ == '__main__': + blacklist = fetch_ahmia_blacklist() + with open(get_ahmia_blacklist_filename(), "w", encoding='utf-8') as f: + f.write('\n'.join(blacklist)) diff --git a/searxng_extra/update/update_currencies.py b/searxng_extra/update/update_currencies.py index 3373e2455..e51692e72 100755 --- a/searxng_extra/update/update_currencies.py +++ b/searxng_extra/update/update_currencies.py @@ -1,13 +1,22 @@ #!/usr/bin/env python +# lint: pylint # SPDX-License-Identifier: AGPL-3.0-or-later +"""Fetch currencies from :origin:`searx/engines/wikidata.py` engine. + +Output file: :origin:`searx/data/currencies.json` (:origin:`CI Update data ... +<.github/workflows/data-update.yml>`). + +""" + +# pylint: disable=invalid-name + import re import unicodedata import json # set path -from sys import path -from os.path import realpath, dirname, join +from os.path import join from searx import searx_dir from searx.locales import LOCALE_NAMES diff --git a/searxng_extra/update/update_engine_descriptions.py b/searxng_extra/update/update_engine_descriptions.py index 51cfc7cc2..5b73fd396 100755 --- a/searxng_extra/update/update_engine_descriptions.py +++ b/searxng_extra/update/update_engine_descriptions.py @@ -1,6 +1,16 @@ #!/usr/bin/env python +# lint: pylint # SPDX-License-Identifier: AGPL-3.0-or-later +"""Fetch website description from websites and from +:origin:`searx/engines/wikidata.py` engine. + +Output file: :origin:`searx/data/engine_descriptions.json`. + +""" + +# pylint: disable=invalid-name, global-statement + import json from urllib.parse import urlparse from os.path import join @@ -102,7 +112,7 @@ def get_wikipedia_summary(lang, pageid): response.raise_for_status() api_result = json.loads(response.text) return api_result.get('extract') - except: + except Exception: # pylint: disable=broad-except return None @@ -134,7 +144,7 @@ def get_website_description(url, lang1, lang2=None): try: response = searx.network.get(url, headers=headers, timeout=10) response.raise_for_status() - except Exception: + except Exception: # pylint: disable=broad-except return (None, None) try: diff --git a/searxng_extra/update/update_external_bangs.py b/searxng_extra/update/update_external_bangs.py index d5c6b585a..be3aade0f 100755 --- a/searxng_extra/update/update_external_bangs.py +++ b/searxng_extra/update/update_external_bangs.py @@ -1,17 +1,20 @@ #!/usr/bin/env python # lint: pylint # SPDX-License-Identifier: AGPL-3.0-or-later -""" -Update searx/data/external_bangs.json using the duckduckgo bangs. +"""Update :origin:`searx/data/external_bangs.json` using the duckduckgo bangs +(:origin:`CI Update data ... <.github/workflows/data-update.yml>`). + +https://duckduckgo.com/newbang loads: -https://duckduckgo.com/newbang loads * a javascript which provides the bang version ( https://duckduckgo.com/bv1.js ) * a JSON file which contains the bangs ( https://duckduckgo.com/bang.v260.js for example ) This script loads the javascript, then the bangs. -The javascript URL may change in the future ( for example https://duckduckgo.com/bv2.js ), -but most probably it will requires to update RE_BANG_VERSION +The javascript URL may change in the future ( for example +https://duckduckgo.com/bv2.js ), but most probably it will requires to update +RE_BANG_VERSION + """ # pylint: disable=C0116 diff --git a/searxng_extra/update/update_firefox_version.py b/searxng_extra/update/update_firefox_version.py index 750e955fd..a447f9fd5 100755 --- a/searxng_extra/update/update_firefox_version.py +++ b/searxng_extra/update/update_firefox_version.py @@ -1,21 +1,30 @@ #!/usr/bin/env python +# lint: pylint # SPDX-License-Identifier: AGPL-3.0-or-later +"""Fetch firefox useragent signatures + +Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ... +<.github/workflows/data-update.yml>`). + +""" + import json -import requests import re -from os.path import dirname, join +from os.path import join from urllib.parse import urlparse, urljoin -from distutils.version import LooseVersion, StrictVersion +from distutils.version import LooseVersion + +import requests from lxml import html from searx import searx_dir URL = 'https://ftp.mozilla.org/pub/firefox/releases/' RELEASE_PATH = '/pub/firefox/releases/' -NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?$') -# BETA_REGEX = re.compile('.*[0-9]b([0-9\-a-z]+)$') -# ESR_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?esr$') +NORMAL_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?$') +# BETA_REGEX = re.compile(r'.*[0-9]b([0-9\-a-z]+)$') +# ESR_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?esr$') # useragents = { @@ -32,20 +41,19 @@ def fetch_firefox_versions(): resp = requests.get(URL, timeout=2.0) if resp.status_code != 200: raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code) - else: - dom = html.fromstring(resp.text) - versions = [] + dom = html.fromstring(resp.text) + versions = [] - for link in dom.xpath('//a/@href'): - url = urlparse(urljoin(URL, link)) - path = url.path - if path.startswith(RELEASE_PATH): - version = path[len(RELEASE_PATH) : -1] - if NORMAL_REGEX.match(version): - versions.append(LooseVersion(version)) + for link in dom.xpath('//a/@href'): + url = urlparse(urljoin(URL, link)) + path = url.path + if path.startswith(RELEASE_PATH): + version = path[len(RELEASE_PATH) : -1] + if NORMAL_REGEX.match(version): + versions.append(LooseVersion(version)) - list.sort(versions, reverse=True) - return versions + list.sort(versions, reverse=True) + return versions def fetch_firefox_last_versions(): @@ -66,6 +74,7 @@ def get_useragents_filename(): return join(join(searx_dir, "data"), "useragents.json") -useragents["versions"] = fetch_firefox_last_versions() -with open(get_useragents_filename(), "w") as f: - json.dump(useragents, f, indent=4, ensure_ascii=False) +if __name__ == '__main__': + useragents["versions"] = fetch_firefox_last_versions() + with open(get_useragents_filename(), "w", encoding='utf-8') as f: + json.dump(useragents, f, indent=4, ensure_ascii=False) diff --git a/searxng_extra/update/update_languages.py b/searxng_extra/update/update_languages.py index f37345808..754180c47 100755 --- a/searxng_extra/update/update_languages.py +++ b/searxng_extra/update/update_languages.py @@ -1,9 +1,17 @@ #!/usr/bin/env python +# lint: pylint + # SPDX-License-Identifier: AGPL-3.0-or-later +"""This script generates languages.py from intersecting each engine's supported +languages. + +Output files: :origin:`searx/data/engines_languages.json` and +:origin:`searx/languages.py` (:origin:`CI Update data ... +<.github/workflows/data-update.yml>`). + +""" -# This script generates languages.py from intersecting each engine's supported languages. -# -# Output files: searx/data/engines_languages.json and searx/languages.py +# pylint: disable=invalid-name import json from pathlib import Path @@ -24,7 +32,7 @@ languages_file = Path(searx_dir) / 'languages.py' def fetch_supported_languages(): set_timeout_for_thread(10.0) - engines_languages = dict() + engines_languages = {} names = list(engines) names.sort() @@ -32,7 +40,7 @@ def fetch_supported_languages(): if hasattr(engines[engine_name], 'fetch_supported_languages'): engines_languages[engine_name] = engines[engine_name].fetch_supported_languages() print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name)) - if type(engines_languages[engine_name]) == list: + if type(engines_languages[engine_name]) == list: # pylint: disable=unidiomatic-typecheck engines_languages[engine_name] = sorted(engines_languages[engine_name]) print("fetched languages from %s engines" % len(engines_languages)) @@ -55,7 +63,7 @@ def get_locale(lang_code): # Join all language lists. def join_language_lists(engines_languages): - language_list = dict() + language_list = {} for engine_name in engines_languages: for lang_code in engines_languages[engine_name]: @@ -91,7 +99,7 @@ def join_language_lists(engines_languages): 'name': language_name, 'english_name': english_name, 'counter': set(), - 'countries': dict(), + 'countries': {}, } # add language with country if not in list @@ -119,6 +127,7 @@ def join_language_lists(engines_languages): def filter_language_list(all_languages): min_engines_per_lang = 13 min_engines_per_country = 7 + # pylint: disable=consider-using-dict-items, consider-iterating-dictionary main_engines = [ engine_name for engine_name in engines.keys() @@ -138,7 +147,7 @@ def filter_language_list(all_languages): } def _copy_lang_data(lang, country_name=None): - new_dict = dict() + new_dict = {} new_dict['name'] = all_languages[lang]['name'] new_dict['english_name'] = all_languages[lang]['english_name'] if country_name: @@ -146,10 +155,10 @@ def filter_language_list(all_languages): return new_dict # for each language get country codes supported by most engines or at least one country code - filtered_languages_with_countries = dict() + filtered_languages_with_countries = {} for lang, lang_data in filtered_languages.items(): countries = lang_data['countries'] - filtered_countries = dict() + filtered_countries = {} # get language's country codes with enough supported engines for lang_country, country_data in countries.items(): @@ -211,7 +220,7 @@ def write_languages_file(languages): language_codes = tuple(language_codes) - with open(languages_file, 'w') as new_file: + with open(languages_file, 'w', encoding='utf-8') as new_file: file_content = "{file_headers} {language_codes},\n)\n".format( # fmt: off file_headers = '\n'.join(file_headers), @@ -224,7 +233,7 @@ def write_languages_file(languages): if __name__ == "__main__": load_engines(settings['engines']) - engines_languages = fetch_supported_languages() - all_languages = join_language_lists(engines_languages) - filtered_languages = filter_language_list(all_languages) - write_languages_file(filtered_languages) + _engines_languages = fetch_supported_languages() + _all_languages = join_language_lists(_engines_languages) + _filtered_languages = filter_language_list(_all_languages) + write_languages_file(_filtered_languages) diff --git a/searxng_extra/update/update_osm_keys_tags.py b/searxng_extra/update/update_osm_keys_tags.py index 2916cbff1..1d691c194 100755 --- a/searxng_extra/update/update_osm_keys_tags.py +++ b/searxng_extra/update/update_osm_keys_tags.py @@ -5,7 +5,10 @@ To get the i18n names, the scripts uses `Wikidata Query Service`_ instead of for example `OSM tags API`_ (sidenote: the actual change log from -map.atownsend.org.uk_ might be useful to normalize OSM tags) +map.atownsend.org.uk_ might be useful to normalize OSM tags). + +Output file: :origin:`searx/data/osm_keys_tags` (:origin:`CI Update data ... +<.github/workflows/data-update.yml>`). .. _Wikidata Query Service: https://query.wikidata.org/ .. _OSM tags API: https://taginfo.openstreetmap.org/taginfo/apidoc diff --git a/searxng_extra/update/update_wikidata_units.py b/searxng_extra/update/update_wikidata_units.py index 43a872b1b..e999b6cfd 100755 --- a/searxng_extra/update/update_wikidata_units.py +++ b/searxng_extra/update/update_wikidata_units.py @@ -3,6 +3,13 @@ # lint: pylint # pylint: disable=missing-module-docstring +"""Fetch units from :origin:`searx/engines/wikidata.py` engine. + +Output file: :origin:`searx/data/wikidata_units.json` (:origin:`CI Update data +... <.github/workflows/data-update.yml>`). + +""" + import json import collections @@ -54,5 +61,6 @@ def get_wikidata_units_filename(): return join(join(searx_dir, "data"), "wikidata_units.json") -with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f: - json.dump(get_data(), f, indent=4, ensure_ascii=False) +if __name__ == '__main__': + with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f: + json.dump(get_data(), f, indent=4, ensure_ascii=False) @@ -36,8 +36,8 @@ setup( 'License :: OSI Approved :: GNU Affero General Public License v3' ], keywords='metasearch searchengine search web http', - author='Adam Tauber', - author_email='asciimoo@gmail.com', + author='SearXNG dev team', + author_email='contact@searxng.org', license='GNU Affero General Public License', packages=find_packages(exclude=["tests*", "searxng_extra"]), zip_safe=False, diff --git a/tests/robot/settings_robot.yml b/tests/robot/settings_robot.yml index e0d01bb37..73e8b722d 100644 --- a/tests/robot/settings_robot.yml +++ b/tests/robot/settings_robot.yml @@ -7,7 +7,7 @@ brand: git_branch: master issue_url: https://github.com/searxng/searxng/issues new_issue_url: https://github.com/searxng/searxng/issues/new - docs_url: https://searxng.github.io/searxng + docs_url: https://docs.searxng.org public_instances: https://searx.space wiki_url: https://github.com/searxng/searxng/wiki diff --git a/tests/unit/network/test_network.py b/tests/unit/network/test_network.py index d25a0d77b..4253e69ac 100644 --- a/tests/unit/network/test_network.py +++ b/tests/unit/network/test_network.py @@ -76,13 +76,15 @@ class TestNetwork(SearxTestCase): 'verify': True, 'max_redirects': 5, 'timeout': 2, + 'allow_redirects': True, } - kwargs_client = Network.get_kwargs_clients(kwargs) + kwargs_client = Network.extract_kwargs_clients(kwargs) self.assertEqual(len(kwargs_client), 2) - self.assertEqual(len(kwargs), 1) + self.assertEqual(len(kwargs), 2) self.assertEqual(kwargs['timeout'], 2) + self.assertEqual(kwargs['follow_redirects'], True) self.assertTrue(kwargs_client['verify']) self.assertEqual(kwargs_client['max_redirects'], 5) diff --git a/utils/lxc.sh b/utils/lxc.sh index 9a79606ca..ab96a99ce 100755 --- a/utils/lxc.sh +++ b/utils/lxc.sh @@ -568,7 +568,7 @@ check_connectivity() { info_msg "Most often the connectivity is blocked by a docker installation:" info_msg "Whenever docker is started (reboot) it sets the iptables policy " info_msg "for the FORWARD chain to DROP, see:" - info_msg " https://searxng.github.io/searxng/utils/lxc.sh.html#internet-connectivity-docker" + info_msg " https://docs.searxng.org/utils/lxc.sh.html#internet-connectivity-docker" iptables-save | grep ":FORWARD" fi return $ret_val diff --git a/utils/morty.sh b/utils/morty.sh index 25263a3d4..c4e7bdf52 100755 --- a/utils/morty.sh +++ b/utils/morty.sh @@ -116,7 +116,7 @@ info_searx() { # shellcheck disable=SC1117 cat <<EOF To activate result and image proxy in SearXNG read: - https://searxng.github.io/searxng/admin/morty.html + https://docs.searxng.org/admin/morty.html Check settings in file ${SEARXNG_SETTINGS_PATH} ... result_proxy: url : ${PUBLIC_URL_MORTY} diff --git a/utils/templates/etc/searxng/settings.yml b/utils/templates/etc/searxng/settings.yml index dd2a29b3b..860f4f5e9 100644 --- a/utils/templates/etc/searxng/settings.yml +++ b/utils/templates/etc/searxng/settings.yml @@ -1,6 +1,6 @@ # SearXNG settings, before editing this file read: # -# https://searxng.github.io/searxng/admin/engines/settings.html +# https://docs.searxng.org/admin/engines/settings.html use_default_settings: true |