summaryrefslogtreecommitdiff
path: root/searx/botdetection
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2024-10-27 13:17:40 +0100
committerMarkus Heiser <markus.heiser@darmarIT.de>2025-05-04 02:07:26 +0200
commitfe08bb1d909cb6cef57ce91211c2cbed63300c9e (patch)
tree7521c70630055928c5de6dc4d832118148c2285c /searx/botdetection
parent8ef5fbca4e90668c8ae1f9f60f4d5d43816a593c (diff)
[mod] botdetection: HTTP Fetch Metadata Request Headers
HTTP Fetch Metadata Request Headers [1][2] are used to detect bot requests. Bots with invalid *Fetch Metadata* will be redirected to the intro (`index`) page. [1] https://www.w3.org/TR/fetch-metadata/ [2] https://developer.mozilla.org/en-US/docs/Glossary/Fetch_metadata_request_header Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/botdetection')
-rw-r--r--searx/botdetection/_helpers.py3
-rw-r--r--searx/botdetection/http_sec_fetch.py61
2 files changed, 64 insertions, 0 deletions
diff --git a/searx/botdetection/_helpers.py b/searx/botdetection/_helpers.py
index 0f6bafb17..7b57ae694 100644
--- a/searx/botdetection/_helpers.py
+++ b/searx/botdetection/_helpers.py
@@ -34,6 +34,9 @@ def dump_request(request: SXNG_Request):
+ " || Content-Length: %s" % request.headers.get('Content-Length')
+ " || Connection: %s" % request.headers.get('Connection')
+ " || User-Agent: %s" % request.headers.get('User-Agent')
+ + " || Sec-Fetch-Site: %s" % request.headers.get('Sec-Fetch-Site')
+ + " || Sec-Fetch-Mode: %s" % request.headers.get('Sec-Fetch-Mode')
+ + " || Sec-Fetch-Dest: %s" % request.headers.get('Sec-Fetch-Dest')
)
diff --git a/searx/botdetection/http_sec_fetch.py b/searx/botdetection/http_sec_fetch.py
new file mode 100644
index 000000000..9791e74e1
--- /dev/null
+++ b/searx/botdetection/http_sec_fetch.py
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Method ``http_sec_fetch``
+-------------------------
+
+The ``http_sec_fetch`` method protect resources from web attacks with `Fetch
+Metadata`_. A request is filtered out in case of:
+
+- http header Sec-Fetch-Mode_ is invalid
+- http header Sec-Fetch-Dest_ is invalid
+
+.. _Fetch Metadata:
+ https://developer.mozilla.org/en-US/docs/Glossary/Fetch_metadata_request_header
+
+.. Sec-Fetch-Dest:
+ https://developer.mozilla.org/en-US/docs/Web/API/Request/destination
+
+.. Sec-Fetch-Mode:
+ https://developer.mozilla.org/en-US/docs/Web/API/Request/mode
+
+
+"""
+# pylint: disable=unused-argument
+
+from __future__ import annotations
+from ipaddress import (
+ IPv4Network,
+ IPv6Network,
+)
+
+import flask
+import werkzeug
+
+from searx.extended_types import SXNG_Request
+
+from . import config
+from ._helpers import logger
+
+
+def filter_request(
+ network: IPv4Network | IPv6Network,
+ request: SXNG_Request,
+ cfg: config.Config,
+) -> werkzeug.Response | None:
+
+ val = request.headers.get("Sec-Fetch-Mode", "")
+ if val != "navigate":
+ logger.debug("invalid Sec-Fetch-Mode '%s'", val)
+ return flask.redirect(flask.url_for('index'), code=302)
+
+ val = request.headers.get("Sec-Fetch-Site", "")
+ if val not in ('same-origin', 'same-site', 'none'):
+ logger.debug("invalid Sec-Fetch-Site '%s'", val)
+ flask.redirect(flask.url_for('index'), code=302)
+
+ val = request.headers.get("Sec-Fetch-Dest", "")
+ if val != "document":
+ logger.debug("invalid Sec-Fetch-Dest '%s'", val)
+ flask.redirect(flask.url_for('index'), code=302)
+
+ return None