diff options
| author | Markus Heiser <markus.heiser@darmarit.de> | 2024-10-27 13:17:40 +0100 |
|---|---|---|
| committer | Markus Heiser <markus.heiser@darmarIT.de> | 2025-05-04 02:07:26 +0200 |
| commit | fe08bb1d909cb6cef57ce91211c2cbed63300c9e (patch) | |
| tree | 7521c70630055928c5de6dc4d832118148c2285c | |
| parent | 8ef5fbca4e90668c8ae1f9f60f4d5d43816a593c (diff) | |
[mod] botdetection: HTTP Fetch Metadata Request Headers
HTTP Fetch Metadata Request Headers [1][2] are used to detect bot requests. Bots
with invalid *Fetch Metadata* will be redirected to the intro (`index`) page.
[1] https://www.w3.org/TR/fetch-metadata/
[2] https://developer.mozilla.org/en-US/docs/Glossary/Fetch_metadata_request_header
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
| -rw-r--r-- | docs/src/searx.botdetection.rst | 3 | ||||
| -rw-r--r-- | searx/botdetection/_helpers.py | 3 | ||||
| -rw-r--r-- | searx/botdetection/http_sec_fetch.py | 61 | ||||
| -rw-r--r-- | searx/limiter.py | 9 |
4 files changed, 74 insertions, 2 deletions
diff --git a/docs/src/searx.botdetection.rst b/docs/src/searx.botdetection.rst index 04cb81dfd..1c3e12dad 100644 --- a/docs/src/searx.botdetection.rst +++ b/docs/src/searx.botdetection.rst @@ -53,6 +53,9 @@ Probe HTTP headers .. automodule:: searx.botdetection.http_user_agent :members: +.. automodule:: searx.botdetection.sec_fetch + :members: + .. _botdetection config: Config diff --git a/searx/botdetection/_helpers.py b/searx/botdetection/_helpers.py index 0f6bafb17..7b57ae694 100644 --- a/searx/botdetection/_helpers.py +++ b/searx/botdetection/_helpers.py @@ -34,6 +34,9 @@ def dump_request(request: SXNG_Request): + " || Content-Length: %s" % request.headers.get('Content-Length') + " || Connection: %s" % request.headers.get('Connection') + " || User-Agent: %s" % request.headers.get('User-Agent') + + " || Sec-Fetch-Site: %s" % request.headers.get('Sec-Fetch-Site') + + " || Sec-Fetch-Mode: %s" % request.headers.get('Sec-Fetch-Mode') + + " || Sec-Fetch-Dest: %s" % request.headers.get('Sec-Fetch-Dest') ) diff --git a/searx/botdetection/http_sec_fetch.py b/searx/botdetection/http_sec_fetch.py new file mode 100644 index 000000000..9791e74e1 --- /dev/null +++ b/searx/botdetection/http_sec_fetch.py @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +""" +Method ``http_sec_fetch`` +------------------------- + +The ``http_sec_fetch`` method protect resources from web attacks with `Fetch +Metadata`_. A request is filtered out in case of: + +- http header Sec-Fetch-Mode_ is invalid +- http header Sec-Fetch-Dest_ is invalid + +.. _Fetch Metadata: + https://developer.mozilla.org/en-US/docs/Glossary/Fetch_metadata_request_header + +.. Sec-Fetch-Dest: + https://developer.mozilla.org/en-US/docs/Web/API/Request/destination + +.. Sec-Fetch-Mode: + https://developer.mozilla.org/en-US/docs/Web/API/Request/mode + + +""" +# pylint: disable=unused-argument + +from __future__ import annotations +from ipaddress import ( + IPv4Network, + IPv6Network, +) + +import flask +import werkzeug + +from searx.extended_types import SXNG_Request + +from . import config +from ._helpers import logger + + +def filter_request( + network: IPv4Network | IPv6Network, + request: SXNG_Request, + cfg: config.Config, +) -> werkzeug.Response | None: + + val = request.headers.get("Sec-Fetch-Mode", "") + if val != "navigate": + logger.debug("invalid Sec-Fetch-Mode '%s'", val) + return flask.redirect(flask.url_for('index'), code=302) + + val = request.headers.get("Sec-Fetch-Site", "") + if val not in ('same-origin', 'same-site', 'none'): + logger.debug("invalid Sec-Fetch-Site '%s'", val) + flask.redirect(flask.url_for('index'), code=302) + + val = request.headers.get("Sec-Fetch-Dest", "") + if val != "document": + logger.debug("invalid Sec-Fetch-Dest '%s'", val) + flask.redirect(flask.url_for('index'), code=302) + + return None diff --git a/searx/limiter.py b/searx/limiter.py index 293416366..92b38c68f 100644 --- a/searx/limiter.py +++ b/searx/limiter.py @@ -112,6 +112,7 @@ from searx.botdetection import ( http_accept_encoding, http_accept_language, http_user_agent, + http_sec_fetch, ip_limit, ip_lists, get_network, @@ -179,16 +180,17 @@ def filter_request(request: SXNG_Request) -> werkzeug.Response | None: logger.error("BLOCK %s: matched BLOCKLIST - %s", network.compressed, msg) return flask.make_response(('IP is on BLOCKLIST - %s' % msg, 429)) - # methods applied on / + # methods applied on all requests for func in [ http_user_agent, ]: val = func.filter_request(network, request, cfg) if val is not None: + logger.debug(f"NOT OK ({func.__name__}): {network}: %s", dump_request(sxng_request)) return val - # methods applied on /search + # methods applied on /search requests if request.path == '/search': @@ -197,11 +199,14 @@ def filter_request(request: SXNG_Request) -> werkzeug.Response | None: http_accept_encoding, http_accept_language, http_user_agent, + http_sec_fetch, ip_limit, ]: val = func.filter_request(network, request, cfg) if val is not None: + logger.debug(f"NOT OK ({func.__name__}): {network}: %s", dump_request(sxng_request)) return val + logger.debug(f"OK {network}: %s", dump_request(sxng_request)) return None |