summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
Diffstat (limited to 'searx')
-rw-r--r--searx/botdetection/__init__.py23
-rw-r--r--searx/botdetection/_helpers.py99
-rw-r--r--searx/botdetection/config.py33
-rw-r--r--searx/botdetection/http_accept.py5
-rw-r--r--searx/botdetection/http_accept_encoding.py5
-rw-r--r--searx/botdetection/http_accept_language.py5
-rw-r--r--searx/botdetection/http_connection.py5
-rw-r--r--searx/botdetection/http_sec_fetch.py4
-rw-r--r--searx/botdetection/http_user_agent.py5
-rw-r--r--searx/botdetection/ip_limit.py7
-rw-r--r--searx/botdetection/ip_lists.py16
-rw-r--r--searx/botdetection/link_token.py42
-rw-r--r--searx/botdetection/trusted_proxies.py175
-rw-r--r--searx/botdetection/valkeydb.py22
-rw-r--r--searx/compat.py35
-rw-r--r--searx/flaskfix.py5
-rw-r--r--searx/limiter.py17
-rw-r--r--searx/limiter.toml21
-rw-r--r--searx/plugins/self_info.py9
-rw-r--r--searx/plugins/tor_check.py4
-rw-r--r--searx/valkeydb.py7
-rwxr-xr-xsearx/webapp.py3
22 files changed, 375 insertions, 172 deletions
diff --git a/searx/botdetection/__init__.py b/searx/botdetection/__init__.py
index 4079d97a9..e686e3de9 100644
--- a/searx/botdetection/__init__.py
+++ b/searx/botdetection/__init__.py
@@ -4,19 +4,22 @@
Implementations used for bot detection.
"""
+from __future__ import annotations
+
+__all__ = ["init", "dump_request", "get_network", "too_many_requests", "ProxyFix"]
+
+
+import valkey
from ._helpers import dump_request
-from ._helpers import get_real_ip
from ._helpers import get_network
from ._helpers import too_many_requests
-
-__all__ = ['dump_request', 'get_network', 'get_real_ip', 'too_many_requests']
-
-valkey_client = None
-cfg = None
+from . import config
+from . import valkeydb
+from .trusted_proxies import ProxyFix
-def init(_cfg, _valkey_client):
- global valkey_client, cfg # pylint: disable=global-statement
- valkey_client = _valkey_client
- cfg = _cfg
+def init(cfg: config.Config, valkey_client: valkey.Valkey | None):
+ config.set_global_cfg(cfg)
+ if valkey_client:
+ valkeydb.set_valkey_client(valkey_client)
diff --git a/searx/botdetection/_helpers.py b/searx/botdetection/_helpers.py
index 7b57ae694..72af693c1 100644
--- a/searx/botdetection/_helpers.py
+++ b/searx/botdetection/_helpers.py
@@ -1,6 +1,9 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, invalid-name
from __future__ import annotations
+import typing as t
+
+__all__ = ["log_error_only_once", "dump_request", "get_network", "logger", "too_many_requests"]
from ipaddress import (
IPv4Network,
@@ -8,20 +11,19 @@ from ipaddress import (
IPv4Address,
IPv6Address,
ip_network,
- ip_address,
)
import flask
import werkzeug
from searx import logger
-from searx.extended_types import SXNG_Request
-from . import config
+if t.TYPE_CHECKING:
+ from . import config
logger = logger.getChild('botdetection')
-def dump_request(request: SXNG_Request):
+def dump_request(request: flask.Request):
return (
request.path
+ " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For')
@@ -52,86 +54,33 @@ def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkz
def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4Network | IPv6Network:
- """Returns the (client) network of whether the real_ip is part of."""
+ """Returns the (client) network of whether the ``real_ip`` is part of.
+
+ The ``ipv4_prefix`` and ``ipv6_prefix`` define the number of leading bits in
+ an address that are compared to determine whether or not an address is part
+ of a (client) network.
+
+ .. code:: toml
+
+ [botdetection]
+
+ ipv4_prefix = 32
+ ipv6_prefix = 48
+
+ """
+ prefix: int = cfg["botdetection.ipv4_prefix"]
if real_ip.version == 6:
- prefix = cfg['real_ip.ipv6_prefix']
- else:
- prefix = cfg['real_ip.ipv4_prefix']
+ prefix: int = cfg["botdetection.ipv6_prefix"]
network = ip_network(f"{real_ip}/{prefix}", strict=False)
# logger.debug("get_network(): %s", network.compressed)
return network
-_logged_errors = []
+_logged_errors: list[str] = []
-def _log_error_only_once(err_msg):
+def log_error_only_once(err_msg: str):
if err_msg not in _logged_errors:
logger.error(err_msg)
_logged_errors.append(err_msg)
-
-
-def get_real_ip(request: SXNG_Request) -> str:
- """Returns real IP of the request. Since not all proxies set all the HTTP
- headers and incoming headers can be faked it may happen that the IP cannot
- be determined correctly.
-
- .. sidebar:: :py:obj:`flask.Request.remote_addr`
-
- SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``).
-
- This function tries to get the remote IP in the order listed below,
- additional some tests are done and if inconsistencies or errors are
- detected, they are logged.
-
- The remote IP of the request is taken from (first match):
-
- - X-Forwarded-For_ header
- - `X-real-IP header <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__
- - :py:obj:`flask.Request.remote_addr`
-
- .. _ProxyFix:
- https://werkzeug.palletsprojects.com/middleware/proxy_fix/
-
- .. _X-Forwarded-For:
- https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
-
- """
-
- forwarded_for = request.headers.get("X-Forwarded-For")
- real_ip = request.headers.get('X-Real-IP')
- remote_addr = request.remote_addr
- # logger.debug(
- # "X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr
- # )
-
- if not forwarded_for:
- _log_error_only_once("X-Forwarded-For header is not set!")
- else:
- from . import cfg # pylint: disable=import-outside-toplevel, cyclic-import
-
- forwarded_for = [x.strip() for x in forwarded_for.split(',')]
- x_for: int = cfg['real_ip.x_for'] # type: ignore
- forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
-
- if not real_ip:
- _log_error_only_once("X-Real-IP header is not set!")
-
- if forwarded_for and real_ip and forwarded_for != real_ip:
- logger.warning("IP from X-Real-IP (%s) is not equal to IP from X-Forwarded-For (%s)", real_ip, forwarded_for)
-
- if forwarded_for and remote_addr and forwarded_for != remote_addr:
- logger.warning(
- "IP from WSGI environment (%s) is not equal to IP from X-Forwarded-For (%s)", remote_addr, forwarded_for
- )
-
- if real_ip and remote_addr and real_ip != remote_addr:
- logger.warning("IP from WSGI environment (%s) is not equal to IP from X-Real-IP (%s)", remote_addr, real_ip)
-
- request_ip = ip_address(forwarded_for or real_ip or remote_addr or '0.0.0.0')
- if request_ip.version == 6 and request_ip.ipv4_mapped:
- request_ip = request_ip.ipv4_mapped
-
- # logger.debug("get_real_ip() -> %s", request_ip)
- return str(request_ip)
diff --git a/searx/botdetection/config.py b/searx/botdetection/config.py
index 5b73afe1c..6b35df84f 100644
--- a/searx/botdetection/config.py
+++ b/searx/botdetection/config.py
@@ -7,19 +7,32 @@ structured dictionaries. The configuration schema is defined in a dictionary
structure and the configuration data is given in a dictionary structure.
"""
from __future__ import annotations
-from typing import Any
+import typing
import copy
-import typing
import logging
import pathlib
from ..compat import tomllib
-__all__ = ['Config', 'UNSET', 'SchemaIssue']
+__all__ = ['Config', 'UNSET', 'SchemaIssue', 'set_global_cfg', 'get_global_cfg']
log = logging.getLogger(__name__)
+CFG: Config | None = None
+"""Global config of the botdetection."""
+
+
+def set_global_cfg(cfg: Config):
+ global CFG # pylint: disable=global-statement
+ CFG = cfg
+
+
+def get_global_cfg() -> Config:
+ if CFG is None:
+ raise ValueError("Botdetection's config is not yet initialized.")
+ return CFG
+
class FALSE:
"""Class of ``False`` singleton"""
@@ -57,7 +70,7 @@ class Config:
UNSET = UNSET
@classmethod
- def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict) -> Config:
+ def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict[str, str]) -> Config:
# init schema
@@ -80,7 +93,7 @@ class Config:
cfg.update(upd_cfg)
return cfg
- def __init__(self, cfg_schema: typing.Dict, deprecated: typing.Dict[str, str]):
+ def __init__(self, cfg_schema: dict[str, typing.Any], deprecated: dict[str, str]):
"""Constructor of class Config.
:param cfg_schema: Schema of the configuration
@@ -93,10 +106,10 @@ class Config:
self.deprecated = deprecated
self.cfg = copy.deepcopy(cfg_schema)
- def __getitem__(self, key: str) -> Any:
+ def __getitem__(self, key: str) -> typing.Any:
return self.get(key)
- def validate(self, cfg: dict):
+ def validate(self, cfg: dict[str, typing.Any]):
"""Validation of dictionary ``cfg`` on :py:obj:`Config.SCHEMA`.
Validation is done by :py:obj:`validate`."""
@@ -111,7 +124,7 @@ class Config:
"""Returns default value of field ``name`` in ``self.cfg_schema``."""
return value(name, self.cfg_schema)
- def get(self, name: str, default: Any = UNSET, replace: bool = True) -> Any:
+ def get(self, name: str, default: typing.Any = UNSET, replace: bool = True) -> typing.Any:
"""Returns the value to which ``name`` points in the configuration.
If there is no such ``name`` in the config and the ``default`` is
@@ -214,8 +227,8 @@ def value(name: str, data_dict: dict):
def validate(
- schema_dict: typing.Dict, data_dict: typing.Dict, deprecated: typing.Dict[str, str]
-) -> typing.Tuple[bool, list]:
+ schema_dict: dict[str, typing.Any], data_dict: dict[str, typing.Any], deprecated: dict[str, str]
+) -> tuple[bool, list[str]]:
"""Deep validation of dictionary in ``data_dict`` against dictionary in
``schema_dict``. Argument deprecated is a dictionary that maps deprecated
configuration names to a messages::
diff --git a/searx/botdetection/http_accept.py b/searx/botdetection/http_accept.py
index f64991d50..4543e7217 100644
--- a/searx/botdetection/http_accept.py
+++ b/searx/botdetection/http_accept.py
@@ -20,8 +20,7 @@ from ipaddress import (
)
import werkzeug
-
-from searx.extended_types import SXNG_Request
+import flask
from . import config
from ._helpers import too_many_requests
@@ -29,7 +28,7 @@ from ._helpers import too_many_requests
def filter_request(
network: IPv4Network | IPv6Network,
- request: SXNG_Request,
+ request: flask.Request,
cfg: config.Config, # pylint: disable=unused-argument
) -> werkzeug.Response | None:
diff --git a/searx/botdetection/http_accept_encoding.py b/searx/botdetection/http_accept_encoding.py
index 0975cc85e..3cc65ee17 100644
--- a/searx/botdetection/http_accept_encoding.py
+++ b/searx/botdetection/http_accept_encoding.py
@@ -21,8 +21,7 @@ from ipaddress import (
)
import werkzeug
-
-from searx.extended_types import SXNG_Request
+import flask
from . import config
from ._helpers import too_many_requests
@@ -30,7 +29,7 @@ from ._helpers import too_many_requests
def filter_request(
network: IPv4Network | IPv6Network,
- request: SXNG_Request,
+ request: flask.Request,
cfg: config.Config, # pylint: disable=unused-argument
) -> werkzeug.Response | None:
diff --git a/searx/botdetection/http_accept_language.py b/searx/botdetection/http_accept_language.py
index 1287e5453..6e7480728 100644
--- a/searx/botdetection/http_accept_language.py
+++ b/searx/botdetection/http_accept_language.py
@@ -18,8 +18,7 @@ from ipaddress import (
)
import werkzeug
-
-from searx.extended_types import SXNG_Request
+import flask
from . import config
from ._helpers import too_many_requests
@@ -27,7 +26,7 @@ from ._helpers import too_many_requests
def filter_request(
network: IPv4Network | IPv6Network,
- request: SXNG_Request,
+ request: flask.Request,
cfg: config.Config, # pylint: disable=unused-argument
) -> werkzeug.Response | None:
if request.headers.get('Accept-Language', '').strip() == '':
diff --git a/searx/botdetection/http_connection.py b/searx/botdetection/http_connection.py
index eed15f989..6adcd4b39 100644
--- a/searx/botdetection/http_connection.py
+++ b/searx/botdetection/http_connection.py
@@ -18,8 +18,7 @@ from ipaddress import (
)
import werkzeug
-
-from searx.extended_types import SXNG_Request
+import flask
from . import config
from ._helpers import too_many_requests
@@ -27,7 +26,7 @@ from ._helpers import too_many_requests
def filter_request(
network: IPv4Network | IPv6Network,
- request: SXNG_Request,
+ request: flask.Request,
cfg: config.Config, # pylint: disable=unused-argument
) -> werkzeug.Response | None:
diff --git a/searx/botdetection/http_sec_fetch.py b/searx/botdetection/http_sec_fetch.py
index f64ee4b2c..edead3bfa 100644
--- a/searx/botdetection/http_sec_fetch.py
+++ b/searx/botdetection/http_sec_fetch.py
@@ -32,8 +32,6 @@ import re
import flask
import werkzeug
-from searx.extended_types import SXNG_Request
-
from . import config
from ._helpers import logger
@@ -78,7 +76,7 @@ def is_browser_supported(user_agent: str) -> bool:
def filter_request(
network: IPv4Network | IPv6Network,
- request: SXNG_Request,
+ request: flask.Request,
cfg: config.Config,
) -> werkzeug.Response | None:
diff --git a/searx/botdetection/http_user_agent.py b/searx/botdetection/http_user_agent.py
index 57d5bfee7..9b28660fe 100644
--- a/searx/botdetection/http_user_agent.py
+++ b/searx/botdetection/http_user_agent.py
@@ -20,8 +20,7 @@ from ipaddress import (
)
import werkzeug
-
-from searx.extended_types import SXNG_Request
+import flask
from . import config
from ._helpers import too_many_requests
@@ -56,7 +55,7 @@ def regexp_user_agent():
def filter_request(
network: IPv4Network | IPv6Network,
- request: SXNG_Request,
+ request: flask.Request,
cfg: config.Config, # pylint: disable=unused-argument
) -> werkzeug.Response | None:
diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py
index 93af8b7c5..2b216baf7 100644
--- a/searx/botdetection/ip_limit.py
+++ b/searx/botdetection/ip_limit.py
@@ -45,12 +45,11 @@ from ipaddress import (
import flask
import werkzeug
-from searx.extended_types import SXNG_Request
-from searx import valkeydb
from searx.valkeylib import incr_sliding_window, drop_counter
from . import link_token
from . import config
+from . import valkeydb
from ._helpers import (
too_many_requests,
logger,
@@ -92,12 +91,12 @@ SUSPICIOUS_IP_MAX = 3
def filter_request(
network: IPv4Network | IPv6Network,
- request: SXNG_Request,
+ request: flask.Request,
cfg: config.Config,
) -> werkzeug.Response | None:
# pylint: disable=too-many-return-statements
- valkey_client = valkeydb.client()
+ valkey_client = valkeydb.get_valkey_client()
if network.is_link_local and not cfg['botdetection.ip_limit.filter_link_local']:
logger.debug("network %s is link-local -> not monitored by ip_limit method", network.compressed)
diff --git a/searx/botdetection/ip_lists.py b/searx/botdetection/ip_lists.py
index 2ad1c62d0..77628b577 100644
--- a/searx/botdetection/ip_lists.py
+++ b/searx/botdetection/ip_lists.py
@@ -4,21 +4,22 @@
Method ``ip_lists``
-------------------
-The ``ip_lists`` method implements IP :py:obj:`block- <block_ip>` and
-:py:obj:`pass-lists <pass_ip>`.
+The ``ip_lists`` method implements :py:obj:`block-list <block_ip>` and
+:py:obj:`pass-list <pass_ip>`.
.. code:: toml
[botdetection.ip_lists]
pass_ip = [
- '167.235.158.251', # IPv4 of check.searx.space
- '192.168.0.0/16', # IPv4 private network
- 'fe80::/10' # IPv6 linklocal
+ '167.235.158.251', # IPv4 of check.searx.space
+ '192.168.0.0/16', # IPv4 private network
+ 'fe80::/10', # IPv6 linklocal
]
+
block_ip = [
- '93.184.216.34', # IPv4 of example.org
- '257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
+ '93.184.216.34', # IPv4 of example.org
+ '257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
]
"""
@@ -72,7 +73,6 @@ def block_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bo
def ip_is_subnet_of_member_in_list(
real_ip: IPv4Address | IPv6Address, list_name: str, cfg: config.Config
) -> Tuple[bool, str]:
-
for net in cfg.get(list_name, default=[]):
try:
net = ip_network(net, strict=False)
diff --git a/searx/botdetection/link_token.py b/searx/botdetection/link_token.py
index 600796380..9e815e194 100644
--- a/searx/botdetection/link_token.py
+++ b/searx/botdetection/link_token.py
@@ -43,17 +43,18 @@ from ipaddress import (
import string
import random
+import flask
-from searx import logger
-from searx import valkeydb
from searx.valkeylib import secret_hash
-from searx.extended_types import SXNG_Request
from ._helpers import (
get_network,
- get_real_ip,
+ logger,
)
+from . import config
+from . import valkeydb
+
TOKEN_LIVE_TIME = 600
"""Lifetime (sec) of limiter's CSS token."""
@@ -69,17 +70,14 @@ TOKEN_KEY = 'SearXNG_limiter.token'
logger = logger.getChild('botdetection.link_token')
-def is_suspicious(network: IPv4Network | IPv6Network, request: SXNG_Request, renew: bool = False):
+def is_suspicious(network: IPv4Network | IPv6Network, request: flask.Request, renew: bool = False):
"""Checks whether a valid ping is exists for this (client) network, if not
this request is rated as *suspicious*. If a valid ping exists and argument
``renew`` is ``True`` the expire time of this ping is reset to
:py:obj:`PING_LIVE_TIME`.
"""
- valkey_client = valkeydb.client()
- if not valkey_client:
- return False
-
+ valkey_client = valkeydb.get_valkey_client()
ping_key = get_ping_key(network, request)
if not valkey_client.get(ping_key):
logger.info("missing ping (IP: %s) / request: %s", network.compressed, ping_key)
@@ -92,28 +90,29 @@ def is_suspicious(network: IPv4Network | IPv6Network, request: SXNG_Request, ren
return False
-def ping(request: SXNG_Request, token: str):
+def ping(request: flask.Request, token: str):
"""This function is called by a request to URL ``/client<token>.css``. If
``token`` is valid a :py:obj:`PING_KEY` for the client is stored in the DB.
The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`.
"""
- from . import valkey_client, cfg # pylint: disable=import-outside-toplevel, cyclic-import
+ valkey_client = valkeydb.get_valkey_client()
+ cfg = config.get_global_cfg()
- if not valkey_client:
- return
if not token_is_valid(token):
return
- real_ip = ip_address(get_real_ip(request))
+ real_ip = ip_address(request.remote_addr) # type: ignore
network = get_network(real_ip, cfg)
ping_key = get_ping_key(network, request)
- logger.debug("store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip, ping_key)
+ logger.debug(
+ "store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip.compressed, ping_key
+ )
valkey_client.set(ping_key, 1, ex=PING_LIVE_TIME)
-def get_ping_key(network: IPv4Network | IPv6Network, request: SXNG_Request) -> str:
+def get_ping_key(network: IPv4Network | IPv6Network, request: flask.Request) -> str:
"""Generates a hashed key that fits (more or less) to a *WEB-browser
session* in a network."""
return (
@@ -134,20 +133,23 @@ def token_is_valid(token) -> bool:
def get_token() -> str:
"""Returns current token. If there is no currently active token a new token
- is generated randomly and stored in the valkey DB.
+ is generated randomly and stored in the Valkey DB. Without without a
+ database connection, string "12345678" is returned.
- :py:obj:`TOKEN_LIVE_TIME`
- :py:obj:`TOKEN_KEY`
"""
- valkey_client = valkeydb.client()
- if not valkey_client:
+ try:
+ valkey_client = valkeydb.get_valkey_client()
+ except ValueError:
# This function is also called when limiter is inactive / no valkey DB
# (see render function in webapp.py)
return '12345678'
+
token = valkey_client.get(TOKEN_KEY)
if token:
- token = token.decode('UTF-8')
+ token = token.decode('UTF-8') # type: ignore
else:
token = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(16))
valkey_client.set(TOKEN_KEY, token, ex=TOKEN_LIVE_TIME)
diff --git a/searx/botdetection/trusted_proxies.py b/searx/botdetection/trusted_proxies.py
new file mode 100644
index 000000000..7191f0eb2
--- /dev/null
+++ b/searx/botdetection/trusted_proxies.py
@@ -0,0 +1,175 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Implementation of a middleware to determine the real IP of an HTTP request
+(:py:obj:`flask.request.remote_addr`) behind a proxy chain."""
+# pylint: disable=too-many-branches
+
+from __future__ import annotations
+import typing as t
+
+from collections import abc
+from ipaddress import IPv4Address, IPv6Address, ip_address, ip_network, IPv4Network, IPv6Network
+from werkzeug.http import parse_list_header
+
+from . import config
+from ._helpers import log_error_only_once, logger
+
+if t.TYPE_CHECKING:
+ from _typeshed.wsgi import StartResponse
+ from _typeshed.wsgi import WSGIApplication
+ from _typeshed.wsgi import WSGIEnvironment
+
+
+class ProxyFix:
+ """A middleware like the ProxyFix_ class, where the `x_for` argument is
+ replaced by a method that determines the number of trusted proxies via
+ the `botdetection.trusted_proxies` setting.
+
+ .. sidebar:: :py:obj:`flask.Request.remote_addr`
+
+ SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``).
+
+ The remote IP (py:obj:`flask.Request.remote_addr`) of the request is taken
+ from (first match):
+
+ - X-Forwarded-For_: If the header is set, the first untrusted IP that comes
+ before the IPs that are still part of the ``botdetection.trusted_proxies``
+ is used.
+
+ - `X-Real-IP <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__:
+ If X-Forwarded-For_ is not set, `X-Real-IP` is used
+ (``botdetection.trusted_proxies`` is ignored).
+
+ If none of the header is set, the REMOTE_ADDR_ from the WSGI layer is used.
+ If (for whatever reasons) none IP can be determined, an error message is
+ displayed and ``100::`` is used instead (:rfc:`6666`).
+
+ .. _ProxyFix:
+ https://werkzeug.palletsprojects.com/middleware/proxy_fix/
+
+ .. _X-Forwarded-For:
+ https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
+
+ .. _REMOTE_ADDR:
+ https://wsgi.readthedocs.io/en/latest/proposals-2.0.html#making-some-keys-required
+
+ """
+
+ def __init__(self, wsgi_app: WSGIApplication) -> None:
+ self.wsgi_app = wsgi_app
+
+ def trusted_proxies(self) -> list[IPv4Network | IPv6Network]:
+ cfg = config.get_global_cfg()
+ proxy_list: list[str] = cfg.get("botdetection.trusted_proxies", default=[])
+ return [ip_network(net, strict=False) for net in proxy_list]
+
+ def trusted_remote_addr(
+ self,
+ x_forwarded_for: list[IPv4Address | IPv6Address],
+ trusted_proxies: list[IPv4Network | IPv6Network],
+ ) -> str:
+ # always rtl
+ for addr in reversed(x_forwarded_for):
+ trust: bool = False
+
+ for net in trusted_proxies:
+ if addr.version == net.version and addr in net:
+ logger.debug("trust proxy %s (member of %s)", addr, net)
+ trust = True
+ break
+
+ # client address
+ if not trust:
+ return addr.compressed
+
+ # fallback to first address
+ return x_forwarded_for[0].compressed
+
+ def __call__(self, environ: WSGIEnvironment, start_response: StartResponse) -> abc.Iterable[bytes]:
+ # pylint: disable=too-many-statements
+
+ trusted_proxies = self.trusted_proxies()
+
+ # We do not rely on the REMOTE_ADDR from the WSGI environment / the
+ # variable is first removed from the WSGI environment and explicitly set
+ # in this function!
+
+ orig_remote_addr: str | None = environ.pop("REMOTE_ADDR")
+
+ # Validate the IPs involved in this game and delete all invalid ones
+ # from the WSGI environment.
+
+ if orig_remote_addr:
+ try:
+ addr = ip_address(orig_remote_addr)
+ if addr.version == 6 and addr.ipv4_mapped:
+ addr = addr.ipv4_mapped
+ orig_remote_addr = addr.compressed
+ except ValueError as exc:
+ logger.error("REMOTE_ADDR: %s / discard REMOTE_ADDR from WSGI environment", exc)
+ orig_remote_addr = None
+
+ x_real_ip: str | None = environ.get("HTTP_X_REAL_IP")
+ if x_real_ip:
+ try:
+ addr = ip_address(x_real_ip)
+ if addr.version == 6 and addr.ipv4_mapped:
+ addr = addr.ipv4_mapped
+ x_real_ip = addr.compressed
+ except ValueError as exc:
+ logger.error("X-Real-IP: %s / discard HTTP_X_REAL_IP from WSGI environment", exc)
+ environ.pop("HTTP_X_REAL_IP")
+ x_real_ip = None
+
+ x_forwarded_for: list[IPv4Address | IPv6Address] = []
+ if environ.get("HTTP_X_FORWARDED_FOR"):
+ for x_for_ip in parse_list_header(str(environ.get("HTTP_X_FORWARDED_FOR"))):
+ try:
+ addr = ip_address(x_for_ip)
+ except ValueError as exc:
+ logger.error("X-Forwarded-For: %s / discard HTTP_X_FORWARDED_FOR from WSGI environment", exc)
+ environ.pop("HTTP_X_FORWARDED_FOR")
+ x_forwarded_for = []
+ break
+
+ if addr.version == 6 and addr.ipv4_mapped:
+ addr = addr.ipv4_mapped
+ x_forwarded_for.append(addr)
+
+ # log questionable WSGI environments
+
+ if not x_forwarded_for and not x_real_ip:
+ log_error_only_once("X-Forwarded-For nor X-Real-IP header is set!")
+
+ if x_forwarded_for and not trusted_proxies:
+ log_error_only_once("missing botdetection.trusted_proxies config")
+ # without trusted_proxies, this variable is useless for determining
+ # the real IP
+ x_forwarded_for = []
+
+ # securing the WSGI environment variables that are adjusted
+
+ environ.update({"botdetection.trusted_proxies.orig": {"REMOTE_ADDR": orig_remote_addr}})
+
+ # determine *the real IP*
+
+ if x_forwarded_for:
+ environ["REMOTE_ADDR"] = self.trusted_remote_addr(x_forwarded_for, trusted_proxies)
+
+ elif x_real_ip:
+ environ["REMOTE_ADDR"] = x_real_ip
+
+ elif orig_remote_addr:
+ environ["REMOTE_ADDR"] = orig_remote_addr
+
+ else:
+ logger.error("No remote IP could be determined, use black-hole address: 100::")
+ environ["REMOTE_ADDR"] = "100::"
+
+ try:
+ _ = ip_address(environ["REMOTE_ADDR"])
+ except ValueError as exc:
+ logger.error("REMOTE_ADDR: %s, use black-hole address: 100::", exc)
+ environ["REMOTE_ADDR"] = "100::"
+
+ logger.debug("final REMOTE_ADDR is: %s", environ["REMOTE_ADDR"])
+ return self.wsgi_app(environ, start_response)
diff --git a/searx/botdetection/valkeydb.py b/searx/botdetection/valkeydb.py
new file mode 100644
index 000000000..3b8699786
--- /dev/null
+++ b/searx/botdetection/valkeydb.py
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Providing a Valkey database for the botdetection methods."""
+
+from __future__ import annotations
+
+import valkey
+
+__all__ = ["set_valkey_client", "get_valkey_client"]
+
+CLIENT: valkey.Valkey | None = None
+"""Global Valkey DB connection (Valkey client object)."""
+
+
+def set_valkey_client(valkey_client: valkey.Valkey):
+ global CLIENT # pylint: disable=global-statement
+ CLIENT = valkey_client
+
+
+def get_valkey_client() -> valkey.Valkey:
+ if CLIENT is None:
+ raise ValueError("No connection to the Valkey database has been established.")
+ return CLIENT
diff --git a/searx/compat.py b/searx/compat.py
index 035726469..2f45eb0e4 100644
--- a/searx/compat.py
+++ b/searx/compat.py
@@ -8,6 +8,8 @@ __all__ = [
]
import sys
+import warnings
+
# TOML (lib) compatibility
# ------------------------
@@ -16,3 +18,36 @@ if sys.version_info >= (3, 11):
import tomllib
else:
import tomli as tomllib
+
+
+# limiter backward compatibility
+# ------------------------------
+
+LIMITER_CFG_DEPRECATED = {
+ "real_ip": "limiter: config section 'real_ip' is deprecated",
+ "real_ip.x_for": "real_ip.x_for has been replaced by botdetection.trusted_proxies",
+ "real_ip.ipv4_prefix": "real_ip.ipv4_prefix has been replaced by botdetection.ipv4_prefix",
+ "real_ip.ipv6_prefix": "real_ip.ipv6_prefix has been replaced by botdetection.ipv6_prefix'",
+}
+
+
+def limiter_fix_cfg(cfg, cfg_file):
+
+ kwargs = {
+ "category": DeprecationWarning,
+ "filename": str(cfg_file),
+ "lineno": 0,
+ "module": "searx.limiter",
+ }
+
+ for opt, msg in LIMITER_CFG_DEPRECATED.items():
+ try:
+ val = cfg.get(opt)
+ except KeyError:
+ continue
+
+ warnings.warn_explicit(msg, **kwargs)
+ if opt == "real_ip.ipv4_prefix":
+ cfg.set("botdetection.ipv4_prefix", val)
+ if opt == "real_ip.ipv6_prefix":
+ cfg.set("botdetection.ipv6_prefix", val)
diff --git a/searx/flaskfix.py b/searx/flaskfix.py
index f2a54bdfc..4282824a3 100644
--- a/searx/flaskfix.py
+++ b/searx/flaskfix.py
@@ -3,7 +3,6 @@
from urllib.parse import urlparse
-from werkzeug.middleware.proxy_fix import ProxyFix
from werkzeug.serving import WSGIRequestHandler
from searx import settings
@@ -73,5 +72,5 @@ class ReverseProxyPathFix:
def patch_application(app):
# serve pages with HTTP/1.1
WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server']['http_protocol_version'])
- # patch app to handle non root url-s behind proxy & wsgi
- app.wsgi_app = ReverseProxyPathFix(ProxyFix(app.wsgi_app))
+ # patch app to handle non root url-s behind proxy
+ app.wsgi_app = ReverseProxyPathFix(app.wsgi_app)
diff --git a/searx/limiter.py b/searx/limiter.py
index 99bc338d1..2b889157a 100644
--- a/searx/limiter.py
+++ b/searx/limiter.py
@@ -93,13 +93,14 @@ Implementation
"""
from __future__ import annotations
+from ipaddress import ip_address
import sys
from pathlib import Path
-from ipaddress import ip_address
import flask
import werkzeug
+import searx.compat
from searx import (
logger,
valkeydb,
@@ -116,7 +117,6 @@ from searx.botdetection import (
ip_limit,
ip_lists,
get_network,
- get_real_ip,
dump_request,
)
@@ -124,25 +124,24 @@ from searx.botdetection import (
# coherency, the logger is "limiter"
logger = logger.getChild('limiter')
-CFG: config.Config = None # type: ignore
+CFG: config.Config | None = None # type: ignore
_INSTALLED = False
LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
"""Base configuration (schema) of the botdetection."""
-CFG_DEPRECATED = {
- # "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."
-}
-
def get_cfg() -> config.Config:
+ """Returns SearXNG's global limiter configuration."""
global CFG # pylint: disable=global-statement
if CFG is None:
from . import settings_loader # pylint: disable=import-outside-toplevel
cfg_file = (settings_loader.get_user_cfg_folder() or Path("/etc/searxng")) / "limiter.toml"
- CFG = config.Config.from_toml(LIMITER_CFG_SCHEMA, cfg_file, CFG_DEPRECATED)
+ CFG = config.Config.from_toml(LIMITER_CFG_SCHEMA, cfg_file, searx.compat.LIMITER_CFG_DEPRECATED)
+ searx.compat.limiter_fix_cfg(CFG, cfg_file)
+
return CFG
@@ -150,7 +149,7 @@ def filter_request(request: SXNG_Request) -> werkzeug.Response | None:
# pylint: disable=too-many-return-statements
cfg = get_cfg()
- real_ip = ip_address(get_real_ip(request))
+ real_ip = ip_address(request.remote_addr)
network = get_network(real_ip, cfg)
if request.path == '/healthz':
diff --git a/searx/limiter.toml b/searx/limiter.toml
index b64a7bf28..0b40bf81f 100644
--- a/searx/limiter.toml
+++ b/searx/limiter.toml
@@ -1,8 +1,4 @@
-[real_ip]
-
-# Number of values to trust for X-Forwarded-For.
-
-x_for = 1
+[botdetection]
# The prefix defines the number of leading bits in an address that are compared
# to determine whether or not an address is part of a (client) network.
@@ -10,6 +6,19 @@ x_for = 1
ipv4_prefix = 32
ipv6_prefix = 48
+# If the request IP is in trusted_proxies list, the client IP address is
+# extracted from the X-Forwarded-For and X-Real-IP headers. This should be
+# used if SearXNG is behind a reverse proxy or load balancer.
+
+trusted_proxies = [
+ '127.0.0.0/8',
+ '::1',
+ # '192.168.0.0/16',
+ # '172.16.0.0/12',
+ # '10.0.0.0/8',
+ # 'fd00::/8',
+]
+
[botdetection.ip_limit]
# To get unlimited access in a local network, by default link-local addresses
@@ -37,4 +46,4 @@ pass_ip = [
# Activate passlist of (hardcoded) IPs from the SearXNG organization,
# e.g. `check.searx.space`.
-pass_searxng_org = true \ No newline at end of file
+pass_searxng_org = true
diff --git a/searx/plugins/self_info.py b/searx/plugins/self_info.py
index ef035e683..1c51049a5 100644
--- a/searx/plugins/self_info.py
+++ b/searx/plugins/self_info.py
@@ -4,9 +4,10 @@ from __future__ import annotations
import typing
import re
+from ipaddress import ip_address
+
from flask_babel import gettext
-from searx.botdetection._helpers import get_real_ip
from searx.result_types import EngineResults
from . import Plugin, PluginInfo
@@ -48,8 +49,10 @@ class SXNGPlugin(Plugin):
if search.search_query.pageno > 1:
return results
- if self.ip_regex.search(search.search_query.query):
- results.add(results.types.Answer(answer=gettext("Your IP is: ") + get_real_ip(request)))
+ if self.ip_regex.search(search.search_query.query) and request.remote_addr:
+ results.add(
+ results.types.Answer(answer=gettext("Your IP is: ") + ip_address(request.remote_addr).compressed)
+ )
if self.ua_regex.match(search.search_query.query):
results.add(results.types.Answer(answer=gettext("Your user-agent is: ") + str(request.user_agent)))
diff --git a/searx/plugins/tor_check.py b/searx/plugins/tor_check.py
index 3338ff2ed..93506ff5a 100644
--- a/searx/plugins/tor_check.py
+++ b/searx/plugins/tor_check.py
@@ -5,6 +5,7 @@ user searches for ``tor-check``. It fetches the tor exit node list from
user's IP address is in it.
"""
from __future__ import annotations
+from ipaddress import ip_address
import typing
import re
@@ -14,7 +15,6 @@ from httpx import HTTPError
from searx.network import get
from searx.plugins import Plugin, PluginInfo
from searx.result_types import EngineResults
-from searx.botdetection import get_real_ip
if typing.TYPE_CHECKING:
from searx.search import SearchWithPlugins
@@ -66,7 +66,7 @@ class SXNGPlugin(Plugin):
results.add(results.types.Answer(answer=f"{msg} {url_exit_list}"))
return results
- real_ip = get_real_ip(request)
+ real_ip = ip_address(address=str(request.remote_addr)).compressed
if real_ip in node_list:
msg = gettext("You are using Tor and it looks like you have the external IP address")
diff --git a/searx/valkeydb.py b/searx/valkeydb.py
index 2817c6d0a..3a7be1fd9 100644
--- a/searx/valkeydb.py
+++ b/searx/valkeydb.py
@@ -17,6 +17,7 @@ A valkey DB connect can be tested by::
>>>
"""
+from __future__ import annotations
import os
import pwd
@@ -26,12 +27,12 @@ import warnings
import valkey
from searx import get_setting
-
-_CLIENT = None
+_CLIENT: valkey.Valkey | None = None
logger = logging.getLogger(__name__)
-def client() -> valkey.Valkey:
+def client() -> valkey.Valkey | None:
+ """Returns SearXNG's global Valkey DB connector (Valkey client object)."""
return _CLIENT
diff --git a/searx/webapp.py b/searx/webapp.py
index 906ec93e4..4179c32b0 100755
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -57,7 +57,7 @@ from searx import (
from searx import infopage
from searx import limiter
-from searx.botdetection import link_token
+from searx.botdetection import link_token, ProxyFix
from searx.data import ENGINE_DESCRIPTIONS
from searx.result_types import Answer
@@ -1391,6 +1391,7 @@ def static_headers(headers: Headers, _path: str, _url: str) -> None:
headers[header] = str(value)
+app.wsgi_app = ProxyFix(app.wsgi_app)
app.wsgi_app = WhiteNoise(
app.wsgi_app,
root=settings['ui']['static_path'],