From 1ec325adccc427fe05cf08da9a2d9d63da7365f4 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 23 May 2023 18:16:37 +0200 Subject: [mod] limiter -> botdetection: modularization and documentation In order to be able to meet the outstanding requirements, the implementation is modularized and supplemented with documentation. This patch does not contain functional change, except it fixes issue #2455 ---- Aktivate limiter in the settings.yml and simulate a bot request by:: curl -H 'Accept-Language: de-DE,en-US;q=0.7,en;q=0.3' \ -H 'Accept: text/html' -H 'User-Agent: xyz' \ -H 'Accept-Encoding: gzip' \ 'http://127.0.0.1:8888/search?q=foo' In the LOG: DEBUG searx.botdetection.link_token : missing ping for this request: ..... Since ``BURST_MAX_SUSPICIOUS = 2`` you can repeat the query above two time before you get a "Too Many Requests" response. Closes: https://github.com/searxng/searxng/issues/2455 Signed-off-by: Markus Heiser --- searx/botdetection/__init__.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 searx/botdetection/__init__.py (limited to 'searx/botdetection/__init__.py') diff --git a/searx/botdetection/__init__.py b/searx/botdetection/__init__.py new file mode 100644 index 000000000..78a7d30f3 --- /dev/null +++ b/searx/botdetection/__init__.py @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +""".. _botdetection src: + +Bot detection methods +--------------------- + +The methods implemented in this python package are use by the :ref:`limiter src`. + +""" + +import flask + + +def dump_request(request: flask.Request): + return ( + "%s: '%s'" % (request.headers.get('X-Forwarded-For'), request.path) + + " || form: %s" % request.form + + " || Accept: %s" % request.headers.get('Accept') + + " || Accept-Language: %s" % request.headers.get('Accept-Language') + + " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding') + + " || Content-Type: %s" % request.headers.get('Content-Type') + + " || Content-Length: %s" % request.headers.get('Content-Length') + + " || Connection: %s" % request.headers.get('Connection') + + " || User-Agent: %s" % request.headers.get('User-Agent') + ) -- cgit v1.2.3 From b8c7c2c9aa604fd1fb7be5559c9ad025ceb17aa4 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sun, 28 May 2023 18:58:31 +0200 Subject: [mod] botdetection - improve ip_limit and link_token methods - counting requests in LONG_WINDOW and BURST_WINDOW is not needed when the request is validated by the link_token method [1] - renew a ping-key on validation [2], this is needed for infinite scrolling, where no new token (CSS) is loaded. / this does not fix the BURST_MAX issue in the vanilla limiter - normalize the counter names of the ip_limit method to 'ip_limit.*' - just integrate the ip_limit method straight forward in the limiter plugin / non intermediate code --> ip_limit now returns None or a werkzeug.Response object that can be passed by the plugin to the flask application / non intermediate code that returns a tuple [1] https://github.com/searxng/searxng/pull/2357#issuecomment-1566113277 [2] https://github.com/searxng/searxng/pull/2357#discussion_r1208542206 [3] https://github.com/searxng/searxng/pull/2357#issuecomment-1566125979 Signed-off-by: Markus Heiser --- searx/botdetection/__init__.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'searx/botdetection/__init__.py') diff --git a/searx/botdetection/__init__.py b/searx/botdetection/__init__.py index 78a7d30f3..b4de0f9c8 100644 --- a/searx/botdetection/__init__.py +++ b/searx/botdetection/__init__.py @@ -9,18 +9,4 @@ The methods implemented in this python package are use by the :ref:`limiter src` """ -import flask - - -def dump_request(request: flask.Request): - return ( - "%s: '%s'" % (request.headers.get('X-Forwarded-For'), request.path) - + " || form: %s" % request.form - + " || Accept: %s" % request.headers.get('Accept') - + " || Accept-Language: %s" % request.headers.get('Accept-Language') - + " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding') - + " || Content-Type: %s" % request.headers.get('Content-Type') - + " || Content-Length: %s" % request.headers.get('Content-Length') - + " || Connection: %s" % request.headers.get('Connection') - + " || User-Agent: %s" % request.headers.get('User-Agent') - ) +from ._helpers import dump_request -- cgit v1.2.3 From 38431d2e142b7da6a9b48aad203f02a2eff7e6fd Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Mon, 29 May 2023 19:46:37 +0200 Subject: [fix] correct determination of the IP for the request For correct determination of the IP to the request the function botdetection.get_real_ip() is implemented. This fonction is used in the ip_limit and link_token method of the botdetection and it is used in the self_info plugin. A documentation about the X-Forwarded-For header has been added. [1] https://github.com/searxng/searxng/pull/2357#issuecomment-1566211059 Signed-off-by: Markus Heiser --- searx/botdetection/__init__.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'searx/botdetection/__init__.py') diff --git a/searx/botdetection/__init__.py b/searx/botdetection/__init__.py index b4de0f9c8..c903b0bb4 100644 --- a/searx/botdetection/__init__.py +++ b/searx/botdetection/__init__.py @@ -2,11 +2,25 @@ # lint: pylint """.. _botdetection src: -Bot detection methods ---------------------- +X-Forwarded-For +=============== -The methods implemented in this python package are use by the :ref:`limiter src`. +.. attention:: + + A correct setup of the HTTP request headers ``X-Forwarded-For`` and + ``X-Real-IP`` is essential to be able to assign a request to an IP correctly: + + - `NGINX RequestHeader`_ + - `Apache RequestHeader`_ + +.. _NGINX RequestHeader: + https://docs.searxng.org/admin/installation-nginx.html#nginx-s-searxng-site +.. _Apache RequestHeader: + https://docs.searxng.org/admin/installation-apache.html#apache-s-searxng-site + +.. autofunction:: searx.botdetection.get_real_ip """ from ._helpers import dump_request +from ._helpers import get_real_ip -- cgit v1.2.3 From 281e36f4b7848374535d5e953050ae73423191ca Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Thu, 1 Jun 2023 15:41:48 +0200 Subject: [fix] limiter: replace real_ip by IPv4/v6 network Closes: https://github.com/searxng/searxng/issues/2477 Signed-off-by: Markus Heiser --- searx/botdetection/__init__.py | 1 + 1 file changed, 1 insertion(+) (limited to 'searx/botdetection/__init__.py') diff --git a/searx/botdetection/__init__.py b/searx/botdetection/__init__.py index c903b0bb4..fcd8e5630 100644 --- a/searx/botdetection/__init__.py +++ b/searx/botdetection/__init__.py @@ -24,3 +24,4 @@ X-Forwarded-For from ._helpers import dump_request from ._helpers import get_real_ip +from ._helpers import too_many_requests -- cgit v1.2.3