diff options
| author | Ivan Gabaldon <igabaldon@inetol.net> | 2025-08-09 23:03:30 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-09 23:03:30 +0200 |
| commit | ce8929cabe27c7cf0bfb21b47786c7442ffb3712 (patch) | |
| tree | 8b478698e812efee8e457d34cef8fbd43d2da024 /searx/botdetection/trusted_proxies.py | |
| parent | 341d718c7f8557e03e184102f63b2f4a4364b939 (diff) | |
[mod] limiter: trusted proxies (#4911)
Replaces `x_for` functionality with `trusted_proxies`. This allows defining
which IP / ranges to trust extracting the client IP address from X-Forwarded-For
and X-Real-IP headers.
We don't know if the proxy chain will give us the proper client
address (REMOTE_ADDR in the WSGI environment), so we rely on reading the headers
of the proxy before SearXNG (if there is one, in that case it must be added to
trusted_proxies) hoping it has done the proper checks. In case a proxy in the
chain does not check the client address correctly, integrity is compromised and
this should be fixed by whoever manages the proxy, not us.
Closes:
- https://github.com/searxng/searxng/issues/4940
- https://github.com/searxng/searxng/issues/4939
- https://github.com/searxng/searxng/issues/4907
- https://github.com/searxng/searxng/issues/3632
- https://github.com/searxng/searxng/issues/3191
- https://github.com/searxng/searxng/issues/1237
Related:
- https://github.com/searxng/searxng-docker/issues/386
- https://github.com/inetol-infrastructure/searxng-container/issues/81
Diffstat (limited to 'searx/botdetection/trusted_proxies.py')
| -rw-r--r-- | searx/botdetection/trusted_proxies.py | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/searx/botdetection/trusted_proxies.py b/searx/botdetection/trusted_proxies.py new file mode 100644 index 000000000..7191f0eb2 --- /dev/null +++ b/searx/botdetection/trusted_proxies.py @@ -0,0 +1,175 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Implementation of a middleware to determine the real IP of an HTTP request +(:py:obj:`flask.request.remote_addr`) behind a proxy chain.""" +# pylint: disable=too-many-branches + +from __future__ import annotations +import typing as t + +from collections import abc +from ipaddress import IPv4Address, IPv6Address, ip_address, ip_network, IPv4Network, IPv6Network +from werkzeug.http import parse_list_header + +from . import config +from ._helpers import log_error_only_once, logger + +if t.TYPE_CHECKING: + from _typeshed.wsgi import StartResponse + from _typeshed.wsgi import WSGIApplication + from _typeshed.wsgi import WSGIEnvironment + + +class ProxyFix: + """A middleware like the ProxyFix_ class, where the `x_for` argument is + replaced by a method that determines the number of trusted proxies via + the `botdetection.trusted_proxies` setting. + + .. sidebar:: :py:obj:`flask.Request.remote_addr` + + SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``). + + The remote IP (py:obj:`flask.Request.remote_addr`) of the request is taken + from (first match): + + - X-Forwarded-For_: If the header is set, the first untrusted IP that comes + before the IPs that are still part of the ``botdetection.trusted_proxies`` + is used. + + - `X-Real-IP <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__: + If X-Forwarded-For_ is not set, `X-Real-IP` is used + (``botdetection.trusted_proxies`` is ignored). + + If none of the header is set, the REMOTE_ADDR_ from the WSGI layer is used. + If (for whatever reasons) none IP can be determined, an error message is + displayed and ``100::`` is used instead (:rfc:`6666`). + + .. _ProxyFix: + https://werkzeug.palletsprojects.com/middleware/proxy_fix/ + + .. _X-Forwarded-For: + https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For + + .. _REMOTE_ADDR: + https://wsgi.readthedocs.io/en/latest/proposals-2.0.html#making-some-keys-required + + """ + + def __init__(self, wsgi_app: WSGIApplication) -> None: + self.wsgi_app = wsgi_app + + def trusted_proxies(self) -> list[IPv4Network | IPv6Network]: + cfg = config.get_global_cfg() + proxy_list: list[str] = cfg.get("botdetection.trusted_proxies", default=[]) + return [ip_network(net, strict=False) for net in proxy_list] + + def trusted_remote_addr( + self, + x_forwarded_for: list[IPv4Address | IPv6Address], + trusted_proxies: list[IPv4Network | IPv6Network], + ) -> str: + # always rtl + for addr in reversed(x_forwarded_for): + trust: bool = False + + for net in trusted_proxies: + if addr.version == net.version and addr in net: + logger.debug("trust proxy %s (member of %s)", addr, net) + trust = True + break + + # client address + if not trust: + return addr.compressed + + # fallback to first address + return x_forwarded_for[0].compressed + + def __call__(self, environ: WSGIEnvironment, start_response: StartResponse) -> abc.Iterable[bytes]: + # pylint: disable=too-many-statements + + trusted_proxies = self.trusted_proxies() + + # We do not rely on the REMOTE_ADDR from the WSGI environment / the + # variable is first removed from the WSGI environment and explicitly set + # in this function! + + orig_remote_addr: str | None = environ.pop("REMOTE_ADDR") + + # Validate the IPs involved in this game and delete all invalid ones + # from the WSGI environment. + + if orig_remote_addr: + try: + addr = ip_address(orig_remote_addr) + if addr.version == 6 and addr.ipv4_mapped: + addr = addr.ipv4_mapped + orig_remote_addr = addr.compressed + except ValueError as exc: + logger.error("REMOTE_ADDR: %s / discard REMOTE_ADDR from WSGI environment", exc) + orig_remote_addr = None + + x_real_ip: str | None = environ.get("HTTP_X_REAL_IP") + if x_real_ip: + try: + addr = ip_address(x_real_ip) + if addr.version == 6 and addr.ipv4_mapped: + addr = addr.ipv4_mapped + x_real_ip = addr.compressed + except ValueError as exc: + logger.error("X-Real-IP: %s / discard HTTP_X_REAL_IP from WSGI environment", exc) + environ.pop("HTTP_X_REAL_IP") + x_real_ip = None + + x_forwarded_for: list[IPv4Address | IPv6Address] = [] + if environ.get("HTTP_X_FORWARDED_FOR"): + for x_for_ip in parse_list_header(str(environ.get("HTTP_X_FORWARDED_FOR"))): + try: + addr = ip_address(x_for_ip) + except ValueError as exc: + logger.error("X-Forwarded-For: %s / discard HTTP_X_FORWARDED_FOR from WSGI environment", exc) + environ.pop("HTTP_X_FORWARDED_FOR") + x_forwarded_for = [] + break + + if addr.version == 6 and addr.ipv4_mapped: + addr = addr.ipv4_mapped + x_forwarded_for.append(addr) + + # log questionable WSGI environments + + if not x_forwarded_for and not x_real_ip: + log_error_only_once("X-Forwarded-For nor X-Real-IP header is set!") + + if x_forwarded_for and not trusted_proxies: + log_error_only_once("missing botdetection.trusted_proxies config") + # without trusted_proxies, this variable is useless for determining + # the real IP + x_forwarded_for = [] + + # securing the WSGI environment variables that are adjusted + + environ.update({"botdetection.trusted_proxies.orig": {"REMOTE_ADDR": orig_remote_addr}}) + + # determine *the real IP* + + if x_forwarded_for: + environ["REMOTE_ADDR"] = self.trusted_remote_addr(x_forwarded_for, trusted_proxies) + + elif x_real_ip: + environ["REMOTE_ADDR"] = x_real_ip + + elif orig_remote_addr: + environ["REMOTE_ADDR"] = orig_remote_addr + + else: + logger.error("No remote IP could be determined, use black-hole address: 100::") + environ["REMOTE_ADDR"] = "100::" + + try: + _ = ip_address(environ["REMOTE_ADDR"]) + except ValueError as exc: + logger.error("REMOTE_ADDR: %s, use black-hole address: 100::", exc) + environ["REMOTE_ADDR"] = "100::" + + logger.debug("final REMOTE_ADDR is: %s", environ["REMOTE_ADDR"]) + return self.wsgi_app(environ, start_response) |