summaryrefslogtreecommitdiff
path: root/searx/botdetection/trusted_proxies.py
diff options
context:
space:
mode:
authorIvan Gabaldon <igabaldon@inetol.net>2025-08-09 23:03:30 +0200
committerGitHub <noreply@github.com>2025-08-09 23:03:30 +0200
commitce8929cabe27c7cf0bfb21b47786c7442ffb3712 (patch)
tree8b478698e812efee8e457d34cef8fbd43d2da024 /searx/botdetection/trusted_proxies.py
parent341d718c7f8557e03e184102f63b2f4a4364b939 (diff)
[mod] limiter: trusted proxies (#4911)
Replaces `x_for` functionality with `trusted_proxies`. This allows defining which IP / ranges to trust extracting the client IP address from X-Forwarded-For and X-Real-IP headers. We don't know if the proxy chain will give us the proper client address (REMOTE_ADDR in the WSGI environment), so we rely on reading the headers of the proxy before SearXNG (if there is one, in that case it must be added to trusted_proxies) hoping it has done the proper checks. In case a proxy in the chain does not check the client address correctly, integrity is compromised and this should be fixed by whoever manages the proxy, not us. Closes: - https://github.com/searxng/searxng/issues/4940 - https://github.com/searxng/searxng/issues/4939 - https://github.com/searxng/searxng/issues/4907 - https://github.com/searxng/searxng/issues/3632 - https://github.com/searxng/searxng/issues/3191 - https://github.com/searxng/searxng/issues/1237 Related: - https://github.com/searxng/searxng-docker/issues/386 - https://github.com/inetol-infrastructure/searxng-container/issues/81
Diffstat (limited to 'searx/botdetection/trusted_proxies.py')
-rw-r--r--searx/botdetection/trusted_proxies.py175
1 files changed, 175 insertions, 0 deletions
diff --git a/searx/botdetection/trusted_proxies.py b/searx/botdetection/trusted_proxies.py
new file mode 100644
index 000000000..7191f0eb2
--- /dev/null
+++ b/searx/botdetection/trusted_proxies.py
@@ -0,0 +1,175 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Implementation of a middleware to determine the real IP of an HTTP request
+(:py:obj:`flask.request.remote_addr`) behind a proxy chain."""
+# pylint: disable=too-many-branches
+
+from __future__ import annotations
+import typing as t
+
+from collections import abc
+from ipaddress import IPv4Address, IPv6Address, ip_address, ip_network, IPv4Network, IPv6Network
+from werkzeug.http import parse_list_header
+
+from . import config
+from ._helpers import log_error_only_once, logger
+
+if t.TYPE_CHECKING:
+ from _typeshed.wsgi import StartResponse
+ from _typeshed.wsgi import WSGIApplication
+ from _typeshed.wsgi import WSGIEnvironment
+
+
+class ProxyFix:
+ """A middleware like the ProxyFix_ class, where the `x_for` argument is
+ replaced by a method that determines the number of trusted proxies via
+ the `botdetection.trusted_proxies` setting.
+
+ .. sidebar:: :py:obj:`flask.Request.remote_addr`
+
+ SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``).
+
+ The remote IP (py:obj:`flask.Request.remote_addr`) of the request is taken
+ from (first match):
+
+ - X-Forwarded-For_: If the header is set, the first untrusted IP that comes
+ before the IPs that are still part of the ``botdetection.trusted_proxies``
+ is used.
+
+ - `X-Real-IP <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__:
+ If X-Forwarded-For_ is not set, `X-Real-IP` is used
+ (``botdetection.trusted_proxies`` is ignored).
+
+ If none of the header is set, the REMOTE_ADDR_ from the WSGI layer is used.
+ If (for whatever reasons) none IP can be determined, an error message is
+ displayed and ``100::`` is used instead (:rfc:`6666`).
+
+ .. _ProxyFix:
+ https://werkzeug.palletsprojects.com/middleware/proxy_fix/
+
+ .. _X-Forwarded-For:
+ https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
+
+ .. _REMOTE_ADDR:
+ https://wsgi.readthedocs.io/en/latest/proposals-2.0.html#making-some-keys-required
+
+ """
+
+ def __init__(self, wsgi_app: WSGIApplication) -> None:
+ self.wsgi_app = wsgi_app
+
+ def trusted_proxies(self) -> list[IPv4Network | IPv6Network]:
+ cfg = config.get_global_cfg()
+ proxy_list: list[str] = cfg.get("botdetection.trusted_proxies", default=[])
+ return [ip_network(net, strict=False) for net in proxy_list]
+
+ def trusted_remote_addr(
+ self,
+ x_forwarded_for: list[IPv4Address | IPv6Address],
+ trusted_proxies: list[IPv4Network | IPv6Network],
+ ) -> str:
+ # always rtl
+ for addr in reversed(x_forwarded_for):
+ trust: bool = False
+
+ for net in trusted_proxies:
+ if addr.version == net.version and addr in net:
+ logger.debug("trust proxy %s (member of %s)", addr, net)
+ trust = True
+ break
+
+ # client address
+ if not trust:
+ return addr.compressed
+
+ # fallback to first address
+ return x_forwarded_for[0].compressed
+
+ def __call__(self, environ: WSGIEnvironment, start_response: StartResponse) -> abc.Iterable[bytes]:
+ # pylint: disable=too-many-statements
+
+ trusted_proxies = self.trusted_proxies()
+
+ # We do not rely on the REMOTE_ADDR from the WSGI environment / the
+ # variable is first removed from the WSGI environment and explicitly set
+ # in this function!
+
+ orig_remote_addr: str | None = environ.pop("REMOTE_ADDR")
+
+ # Validate the IPs involved in this game and delete all invalid ones
+ # from the WSGI environment.
+
+ if orig_remote_addr:
+ try:
+ addr = ip_address(orig_remote_addr)
+ if addr.version == 6 and addr.ipv4_mapped:
+ addr = addr.ipv4_mapped
+ orig_remote_addr = addr.compressed
+ except ValueError as exc:
+ logger.error("REMOTE_ADDR: %s / discard REMOTE_ADDR from WSGI environment", exc)
+ orig_remote_addr = None
+
+ x_real_ip: str | None = environ.get("HTTP_X_REAL_IP")
+ if x_real_ip:
+ try:
+ addr = ip_address(x_real_ip)
+ if addr.version == 6 and addr.ipv4_mapped:
+ addr = addr.ipv4_mapped
+ x_real_ip = addr.compressed
+ except ValueError as exc:
+ logger.error("X-Real-IP: %s / discard HTTP_X_REAL_IP from WSGI environment", exc)
+ environ.pop("HTTP_X_REAL_IP")
+ x_real_ip = None
+
+ x_forwarded_for: list[IPv4Address | IPv6Address] = []
+ if environ.get("HTTP_X_FORWARDED_FOR"):
+ for x_for_ip in parse_list_header(str(environ.get("HTTP_X_FORWARDED_FOR"))):
+ try:
+ addr = ip_address(x_for_ip)
+ except ValueError as exc:
+ logger.error("X-Forwarded-For: %s / discard HTTP_X_FORWARDED_FOR from WSGI environment", exc)
+ environ.pop("HTTP_X_FORWARDED_FOR")
+ x_forwarded_for = []
+ break
+
+ if addr.version == 6 and addr.ipv4_mapped:
+ addr = addr.ipv4_mapped
+ x_forwarded_for.append(addr)
+
+ # log questionable WSGI environments
+
+ if not x_forwarded_for and not x_real_ip:
+ log_error_only_once("X-Forwarded-For nor X-Real-IP header is set!")
+
+ if x_forwarded_for and not trusted_proxies:
+ log_error_only_once("missing botdetection.trusted_proxies config")
+ # without trusted_proxies, this variable is useless for determining
+ # the real IP
+ x_forwarded_for = []
+
+ # securing the WSGI environment variables that are adjusted
+
+ environ.update({"botdetection.trusted_proxies.orig": {"REMOTE_ADDR": orig_remote_addr}})
+
+ # determine *the real IP*
+
+ if x_forwarded_for:
+ environ["REMOTE_ADDR"] = self.trusted_remote_addr(x_forwarded_for, trusted_proxies)
+
+ elif x_real_ip:
+ environ["REMOTE_ADDR"] = x_real_ip
+
+ elif orig_remote_addr:
+ environ["REMOTE_ADDR"] = orig_remote_addr
+
+ else:
+ logger.error("No remote IP could be determined, use black-hole address: 100::")
+ environ["REMOTE_ADDR"] = "100::"
+
+ try:
+ _ = ip_address(environ["REMOTE_ADDR"])
+ except ValueError as exc:
+ logger.error("REMOTE_ADDR: %s, use black-hole address: 100::", exc)
+ environ["REMOTE_ADDR"] = "100::"
+
+ logger.debug("final REMOTE_ADDR is: %s", environ["REMOTE_ADDR"])
+ return self.wsgi_app(environ, start_response)