From 1ec325adccc427fe05cf08da9a2d9d63da7365f4 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 23 May 2023 18:16:37 +0200 Subject: [mod] limiter -> botdetection: modularization and documentation In order to be able to meet the outstanding requirements, the implementation is modularized and supplemented with documentation. This patch does not contain functional change, except it fixes issue #2455 ---- Aktivate limiter in the settings.yml and simulate a bot request by:: curl -H 'Accept-Language: de-DE,en-US;q=0.7,en;q=0.3' \ -H 'Accept: text/html' -H 'User-Agent: xyz' \ -H 'Accept-Encoding: gzip' \ 'http://127.0.0.1:8888/search?q=foo' In the LOG: DEBUG searx.botdetection.link_token : missing ping for this request: ..... Since ``BURST_MAX_SUSPICIOUS = 2`` you can repeat the query above two time before you get a "Too Many Requests" response. Closes: https://github.com/searxng/searxng/issues/2455 Signed-off-by: Markus Heiser --- searx/botdetection/ip_limit.py | 90 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 searx/botdetection/ip_limit.py (limited to 'searx/botdetection/ip_limit.py') diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py new file mode 100644 index 000000000..fce3f8b67 --- /dev/null +++ b/searx/botdetection/ip_limit.py @@ -0,0 +1,90 @@ +""" +Method ``ip_limit`` +------------------- + +The ``ip_limit`` method counts request from an IP in *sliding windows*. If +there are to many requests in a sliding window, the request is evaluated as a +bot request. This method requires a redis DB and needs a HTTP X-Forwarded-For_ +header. To take privacy only the hash value of an IP is stored in the redis DB +and at least for a maximum of 10 minutes. + +The :py:obj:`link_token` method is used to investigate whether a request is +*suspicious*. If the :py:obj:`link_token` method is activated and a request is +*suspicious* the request rates are reduced: + +- :py:obj:`BURST_MAX` -> :py:obj:`BURST_MAX_SUSPICIOUS` +- :py:obj:`LONG_MAX` -> :py:obj:`LONG_MAX_SUSPICIOUS` + +.. _X-Forwarded-For: + https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For + +""" + +from typing import Optional, Tuple +import flask + +from searx import redisdb +from searx import logger +from searx.redislib import incr_sliding_window + +from . import link_token + +logger = logger.getChild('botdetection.ip_limit') + +BURST_WINDOW = 20 +"""Time (sec) before sliding window for *burst* requests expires.""" + +BURST_MAX = 15 +"""Maximum requests from one IP in the :py:obj:`BURST_WINDOW`""" + +BURST_MAX_SUSPICIOUS = 2 +"""Maximum of suspicious requests from one IP in the :py:obj:`BURST_WINDOW`""" + +LONG_WINDOW = 600 +"""Time (sec) before the longer sliding window expires.""" + +LONG_MAX = 150 +"""Maximum requests from one IP in the :py:obj:`LONG_WINDOW`""" + +LONG_MAX_SUSPICIOUS = 10 +"""Maximum suspicious requests from one IP in the :py:obj:`LONG_WINDOW`""" + +API_WONDOW = 3600 +"""Time (sec) before sliding window for API requests (format != html) expires.""" + +API_MAX = 4 +"""Maximum requests from one IP in the :py:obj:`API_WONDOW`""" + + +def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]: + redis_client = redisdb.client() + + x_forwarded_for = request.headers.get('X-Forwarded-For', '') + if not x_forwarded_for: + logger.error("missing HTTP header X-Forwarded-For") + + if request.args.get('format', 'html') != 'html': + c = incr_sliding_window(redis_client, 'IP limit - API_WONDOW:' + x_forwarded_for, API_WONDOW) + if c > API_MAX: + return 429, "BLOCK %s: API limit exceeded" + + suspicious = link_token.is_suspicious(request) + + if suspicious: + c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW) + if c > BURST_MAX_SUSPICIOUS: + return 429, f"bot detected, too many request from {x_forwarded_for} in BURST_MAX_SUSPICIOUS" + + c = incr_sliding_window(redis_client, 'IP limit - LONG_WINDOW:' + x_forwarded_for, LONG_WINDOW) + if c > LONG_MAX_SUSPICIOUS: + return 429, f"bot detected, too many request from {x_forwarded_for} in LONG_MAX_SUSPICIOUS" + + else: + c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW) + if c > BURST_MAX: + return 429, f"bot detected, too many request from {x_forwarded_for} in BURST_MAX" + + c = incr_sliding_window(redis_client, 'IP limit - LONG_WINDOW:' + x_forwarded_for, LONG_WINDOW) + if c > LONG_MAX: + return 429, f"bot detected, too many request from {x_forwarded_for} in LONG_MAX" + return None -- cgit v1.2.3 From 66fdec0eb92bf11c0bc477d6fb1df3dc783e4dcb Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 26 May 2023 17:24:43 +0200 Subject: [mod] limiter: add config file /etc/searxng/limiter.toml Signed-off-by: Markus Heiser --- searx/botdetection/ip_limit.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'searx/botdetection/ip_limit.py') diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py index fce3f8b67..2646920c2 100644 --- a/searx/botdetection/ip_limit.py +++ b/searx/botdetection/ip_limit.py @@ -1,4 +1,5 @@ -""" +""".. _botdetection.ip_limit: + Method ``ip_limit`` ------------------- @@ -22,6 +23,8 @@ The :py:obj:`link_token` method is used to investigate whether a request is from typing import Optional, Tuple import flask +from searx.tools import config + from searx import redisdb from searx import logger @@ -56,7 +59,7 @@ API_MAX = 4 """Maximum requests from one IP in the :py:obj:`API_WONDOW`""" -def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]: +def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]: redis_client = redisdb.client() x_forwarded_for = request.headers.get('X-Forwarded-For', '') @@ -68,7 +71,9 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]: if c > API_MAX: return 429, "BLOCK %s: API limit exceeded" - suspicious = link_token.is_suspicious(request) + suspicious = False + if cfg['botdetection.ip_limit.link_token']: + suspicious = link_token.is_suspicious(request) if suspicious: c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW) -- cgit v1.2.3 From 9d7456fd6c49fbd96f03f6a5dedd6ba05e924d0a Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sat, 27 May 2023 18:58:06 +0200 Subject: [fix] limiter.toml: botdetection.ip_limit turn off link_token by default To activate the ``link_token`` method in the ``ip_limit`` method add the following to your ``/etc/searxng/limiter.toml``:: [botdetection.ip_limit] link_token = true Related: https://github.com/searxng/searxng/pull/2357#issuecomment-1554116941 Signed-off-by: Markus Heiser --- searx/botdetection/ip_limit.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'searx/botdetection/ip_limit.py') diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py index 2646920c2..e72015190 100644 --- a/searx/botdetection/ip_limit.py +++ b/searx/botdetection/ip_limit.py @@ -9,9 +9,18 @@ bot request. This method requires a redis DB and needs a HTTP X-Forwarded-For_ header. To take privacy only the hash value of an IP is stored in the redis DB and at least for a maximum of 10 minutes. -The :py:obj:`link_token` method is used to investigate whether a request is -*suspicious*. If the :py:obj:`link_token` method is activated and a request is -*suspicious* the request rates are reduced: +The :py:obj:`.link_token` method can be used to investigate whether a request is +*suspicious*. To activate the :py:obj:`.link_token` method in the +:py:obj:`.ip_limit` method add the following to your +``/etc/searxng/limiter.toml``: + +.. code:: toml + + [botdetection.ip_limit] + link_token = true + +If the :py:obj:`.link_token` method is activated and a request is *suspicious* +the request rates are reduced: - :py:obj:`BURST_MAX` -> :py:obj:`BURST_MAX_SUSPICIOUS` - :py:obj:`LONG_MAX` -> :py:obj:`LONG_MAX_SUSPICIOUS` -- cgit v1.2.3 From 52f1452c09ab2ec74aa5898d9ea749f33a71a814 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sat, 27 May 2023 21:36:34 +0200 Subject: [mod] limiter: ip_limt - monitore suspicious IPs To intercept bots that get their IPs from a range of IPs, there is a ``SUSPICIOUS_IP_WINDOW``. In this window the suspicious IPs are stored for a longer time. IPs stored in this sliding window have a maximum of ``SUSPICIOUS_IP_MAX`` accesses before they are blocked. As soon as the IP makes a request that is not suspicious, the sliding window for this IP is droped. Signed-off-by: Markus Heiser --- searx/botdetection/ip_limit.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'searx/botdetection/ip_limit.py') diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py index e72015190..9cffff7f0 100644 --- a/searx/botdetection/ip_limit.py +++ b/searx/botdetection/ip_limit.py @@ -25,6 +25,13 @@ the request rates are reduced: - :py:obj:`BURST_MAX` -> :py:obj:`BURST_MAX_SUSPICIOUS` - :py:obj:`LONG_MAX` -> :py:obj:`LONG_MAX_SUSPICIOUS` +To intercept bots that get their IPs from a range of IPs, there is a +:py:obj:`SUSPICIOUS_IP_WINDOW`. In this window the suspicious IPs are stored +for a longer time. IPs stored in this sliding window have a maximum of +:py:obj:`SUSPICIOUS_IP_MAX` accesses before they are blocked. As soon as the IP +makes a request that is not suspicious, the sliding window for this IP is +droped. + .. _X-Forwarded-For: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For @@ -37,7 +44,7 @@ from searx.tools import config from searx import redisdb from searx import logger -from searx.redislib import incr_sliding_window +from searx.redislib import incr_sliding_window, drop_counter from . import link_token @@ -67,6 +74,12 @@ API_WONDOW = 3600 API_MAX = 4 """Maximum requests from one IP in the :py:obj:`API_WONDOW`""" +SUSPICIOUS_IP_WINDOW = 3600 * 24 +"""Time (sec) before sliding window for one suspicious IP expires.""" + +SUSPICIOUS_IP_MAX = 3 +"""Maximum requests from one suspicious IP in the :py:obj:`SUSPICIOUS_IP_WINDOW`.""" + def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]: redis_client = redisdb.client() @@ -81,10 +94,18 @@ def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple return 429, "BLOCK %s: API limit exceeded" suspicious = False + suspicious_ip_counter = 'IP limit - SUSPICIOUS_IP_WINDOW:' + x_forwarded_for + if cfg['botdetection.ip_limit.link_token']: suspicious = link_token.is_suspicious(request) if suspicious: + + # this IP is suspicious: count requests from this IP + c = incr_sliding_window(redis_client, suspicious_ip_counter, SUSPICIOUS_IP_WINDOW) + if c > SUSPICIOUS_IP_MAX: + return 429, f"bot detected, too many request from {x_forwarded_for} in SUSPICIOUS_IP_WINDOW" + c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW) if c > BURST_MAX_SUSPICIOUS: return 429, f"bot detected, too many request from {x_forwarded_for} in BURST_MAX_SUSPICIOUS" @@ -94,6 +115,11 @@ def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple return 429, f"bot detected, too many request from {x_forwarded_for} in LONG_MAX_SUSPICIOUS" else: + + if cfg['botdetection.ip_limit.link_token']: + # this IP is no longer suspicious: release ip again / delete the counter of this IP + drop_counter(redis_client, suspicious_ip_counter) + c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW) if c > BURST_MAX: return 429, f"bot detected, too many request from {x_forwarded_for} in BURST_MAX" -- cgit v1.2.3 From b8c7c2c9aa604fd1fb7be5559c9ad025ceb17aa4 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sun, 28 May 2023 18:58:31 +0200 Subject: [mod] botdetection - improve ip_limit and link_token methods - counting requests in LONG_WINDOW and BURST_WINDOW is not needed when the request is validated by the link_token method [1] - renew a ping-key on validation [2], this is needed for infinite scrolling, where no new token (CSS) is loaded. / this does not fix the BURST_MAX issue in the vanilla limiter - normalize the counter names of the ip_limit method to 'ip_limit.*' - just integrate the ip_limit method straight forward in the limiter plugin / non intermediate code --> ip_limit now returns None or a werkzeug.Response object that can be passed by the plugin to the flask application / non intermediate code that returns a tuple [1] https://github.com/searxng/searxng/pull/2357#issuecomment-1566113277 [2] https://github.com/searxng/searxng/pull/2357#discussion_r1208542206 [3] https://github.com/searxng/searxng/pull/2357#issuecomment-1566125979 Signed-off-by: Markus Heiser --- searx/botdetection/ip_limit.py | 61 +++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 28 deletions(-) (limited to 'searx/botdetection/ip_limit.py') diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py index 9cffff7f0..e7fa57187 100644 --- a/searx/botdetection/ip_limit.py +++ b/searx/botdetection/ip_limit.py @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint """.. _botdetection.ip_limit: Method ``ip_limit`` @@ -37,16 +39,18 @@ droped. """ -from typing import Optional, Tuple +from typing import Optional import flask +import werkzeug from searx.tools import config - from searx import redisdb from searx import logger from searx.redislib import incr_sliding_window, drop_counter from . import link_token +from ._helpers import too_many_requests + logger = logger.getChild('botdetection.ip_limit') @@ -81,50 +85,51 @@ SUSPICIOUS_IP_MAX = 3 """Maximum requests from one suspicious IP in the :py:obj:`SUSPICIOUS_IP_WINDOW`.""" -def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]: +def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]: + # pylint: disable=too-many-return-statements redis_client = redisdb.client() - x_forwarded_for = request.headers.get('X-Forwarded-For', '') - if not x_forwarded_for: + client_ip = request.headers.get('X-Forwarded-For', '') + if not client_ip: logger.error("missing HTTP header X-Forwarded-For") if request.args.get('format', 'html') != 'html': - c = incr_sliding_window(redis_client, 'IP limit - API_WONDOW:' + x_forwarded_for, API_WONDOW) + c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + client_ip, API_WONDOW) if c > API_MAX: - return 429, "BLOCK %s: API limit exceeded" - - suspicious = False - suspicious_ip_counter = 'IP limit - SUSPICIOUS_IP_WINDOW:' + x_forwarded_for + return too_many_requests(request, "too many request in API_WINDOW") if cfg['botdetection.ip_limit.link_token']: - suspicious = link_token.is_suspicious(request) - if suspicious: + suspicious = link_token.is_suspicious(request, True) + + if not suspicious: + # this IP is no longer suspicious: release ip again / delete the counter of this IP + drop_counter(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + client_ip) + return None # this IP is suspicious: count requests from this IP - c = incr_sliding_window(redis_client, suspicious_ip_counter, SUSPICIOUS_IP_WINDOW) + c = incr_sliding_window(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + client_ip, SUSPICIOUS_IP_WINDOW) if c > SUSPICIOUS_IP_MAX: - return 429, f"bot detected, too many request from {x_forwarded_for} in SUSPICIOUS_IP_WINDOW" + logger.error("BLOCK: too many request from %s in SUSPICIOUS_IP_WINDOW (redirect to /)", client_ip) + return flask.redirect(flask.url_for('index'), code=302) - c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW) + c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + client_ip, BURST_WINDOW) if c > BURST_MAX_SUSPICIOUS: - return 429, f"bot detected, too many request from {x_forwarded_for} in BURST_MAX_SUSPICIOUS" + return too_many_requests(request, "too many request in BURST_WINDOW (BURST_MAX_SUSPICIOUS)") - c = incr_sliding_window(redis_client, 'IP limit - LONG_WINDOW:' + x_forwarded_for, LONG_WINDOW) + c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + client_ip, LONG_WINDOW) if c > LONG_MAX_SUSPICIOUS: - return 429, f"bot detected, too many request from {x_forwarded_for} in LONG_MAX_SUSPICIOUS" + return too_many_requests(request, "too many request in LONG_WINDOW (LONG_MAX_SUSPICIOUS)") - else: + return None - if cfg['botdetection.ip_limit.link_token']: - # this IP is no longer suspicious: release ip again / delete the counter of this IP - drop_counter(redis_client, suspicious_ip_counter) + # vanilla limiter without extensions counts BURST_MAX and LONG_MAX + c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + client_ip, BURST_WINDOW) + if c > BURST_MAX: + return too_many_requests(request, "too many request in BURST_WINDOW (BURST_MAX)") - c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW) - if c > BURST_MAX: - return 429, f"bot detected, too many request from {x_forwarded_for} in BURST_MAX" + c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + client_ip, LONG_WINDOW) + if c > LONG_MAX: + return too_many_requests(request, "too many request in LONG_WINDOW (LONG_MAX)") - c = incr_sliding_window(redis_client, 'IP limit - LONG_WINDOW:' + x_forwarded_for, LONG_WINDOW) - if c > LONG_MAX: - return 429, f"bot detected, too many request from {x_forwarded_for} in LONG_MAX" return None -- cgit v1.2.3 From 38431d2e142b7da6a9b48aad203f02a2eff7e6fd Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Mon, 29 May 2023 19:46:37 +0200 Subject: [fix] correct determination of the IP for the request For correct determination of the IP to the request the function botdetection.get_real_ip() is implemented. This fonction is used in the ip_limit and link_token method of the botdetection and it is used in the self_info plugin. A documentation about the X-Forwarded-For header has been added. [1] https://github.com/searxng/searxng/pull/2357#issuecomment-1566211059 Signed-off-by: Markus Heiser --- searx/botdetection/ip_limit.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'searx/botdetection/ip_limit.py') diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py index e7fa57187..268285dd9 100644 --- a/searx/botdetection/ip_limit.py +++ b/searx/botdetection/ip_limit.py @@ -49,7 +49,7 @@ from searx import logger from searx.redislib import incr_sliding_window, drop_counter from . import link_token -from ._helpers import too_many_requests +from ._helpers import too_many_requests, get_real_ip logger = logger.getChild('botdetection.ip_limit') @@ -89,9 +89,7 @@ def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkz # pylint: disable=too-many-return-statements redis_client = redisdb.client() - client_ip = request.headers.get('X-Forwarded-For', '') - if not client_ip: - logger.error("missing HTTP header X-Forwarded-For") + client_ip = get_real_ip(request) if request.args.get('format', 'html') != 'html': c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + client_ip, API_WONDOW) -- cgit v1.2.3 From 281e36f4b7848374535d5e953050ae73423191ca Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Thu, 1 Jun 2023 15:41:48 +0200 Subject: [fix] limiter: replace real_ip by IPv4/v6 network Closes: https://github.com/searxng/searxng/issues/2477 Signed-off-by: Markus Heiser --- searx/botdetection/ip_limit.py | 49 ++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 18 deletions(-) (limited to 'searx/botdetection/ip_limit.py') diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py index 268285dd9..46e026371 100644 --- a/searx/botdetection/ip_limit.py +++ b/searx/botdetection/ip_limit.py @@ -38,8 +38,12 @@ droped. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For """ +from __future__ import annotations +from ipaddress import ( + IPv4Network, + IPv6Network, +) -from typing import Optional import flask import werkzeug from searx.tools import config @@ -49,7 +53,7 @@ from searx import logger from searx.redislib import incr_sliding_window, drop_counter from . import link_token -from ._helpers import too_many_requests, get_real_ip +from ._helpers import too_many_requests logger = logger.getChild('botdetection.ip_limit') @@ -85,49 +89,58 @@ SUSPICIOUS_IP_MAX = 3 """Maximum requests from one suspicious IP in the :py:obj:`SUSPICIOUS_IP_WINDOW`.""" -def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]: +def filter_request( + network: IPv4Network | IPv6Network, + request: flask.Request, + cfg: config.Config, +) -> werkzeug.Response | None: + # pylint: disable=too-many-return-statements redis_client = redisdb.client() - client_ip = get_real_ip(request) + if network.is_link_local and not cfg['botdetection.ip_limit.filter_link_local']: + logger.debug("network %s is link-local -> not monitored by ip_limit method", network.compressed) + return None if request.args.get('format', 'html') != 'html': - c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + client_ip, API_WONDOW) + c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + network.compressed, API_WONDOW) if c > API_MAX: - return too_many_requests(request, "too many request in API_WINDOW") + return too_many_requests(network, "too many request in API_WINDOW") if cfg['botdetection.ip_limit.link_token']: - suspicious = link_token.is_suspicious(request, True) + suspicious = link_token.is_suspicious(network, request, True) if not suspicious: # this IP is no longer suspicious: release ip again / delete the counter of this IP - drop_counter(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + client_ip) + drop_counter(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed) return None # this IP is suspicious: count requests from this IP - c = incr_sliding_window(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + client_ip, SUSPICIOUS_IP_WINDOW) + c = incr_sliding_window( + redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed, SUSPICIOUS_IP_WINDOW + ) if c > SUSPICIOUS_IP_MAX: - logger.error("BLOCK: too many request from %s in SUSPICIOUS_IP_WINDOW (redirect to /)", client_ip) + logger.error("BLOCK: too many request from %s in SUSPICIOUS_IP_WINDOW (redirect to /)", network) return flask.redirect(flask.url_for('index'), code=302) - c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + client_ip, BURST_WINDOW) + c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW) if c > BURST_MAX_SUSPICIOUS: - return too_many_requests(request, "too many request in BURST_WINDOW (BURST_MAX_SUSPICIOUS)") + return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX_SUSPICIOUS)") - c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + client_ip, LONG_WINDOW) + c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW) if c > LONG_MAX_SUSPICIOUS: - return too_many_requests(request, "too many request in LONG_WINDOW (LONG_MAX_SUSPICIOUS)") + return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX_SUSPICIOUS)") return None # vanilla limiter without extensions counts BURST_MAX and LONG_MAX - c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + client_ip, BURST_WINDOW) + c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW) if c > BURST_MAX: - return too_many_requests(request, "too many request in BURST_WINDOW (BURST_MAX)") + return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX)") - c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + client_ip, LONG_WINDOW) + c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW) if c > LONG_MAX: - return too_many_requests(request, "too many request in LONG_WINDOW (LONG_MAX)") + return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX)") return None -- cgit v1.2.3 From 80af38d37b21dc6e5edbf27bd22310db42a6f923 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Thu, 1 Jun 2023 16:00:49 +0200 Subject: [mod] increase SUSPICIOUS_IP_WINDOW from one day to 30 days In my tests I see bots rotating IPs (with endless IP lists). If such a bot has 100 IPs and has three attempts (SUSPICIOUS_IP_MAX = 3) then it can successfully send up to 300 requests in one day while rotating the IP. To block the bots for a longer period of time the SUSPICIOUS_IP_WINDOW, as the time period in which an IP is observed, must be increased. For normal WEB-browsers this is no problem, because the SUSPICIOUS_IP_WINDOW is deleted as soon as the CSS with the token is loaded. SUSPICIOUS_IP_WINDOW = 3600 * 24 * 30 Time (sec) before sliding window for one suspicious IP expires. SUSPICIOUS_IP_MAX = 3 Maximum requests from one suspicious IP in the :py:obj:`SUSPICIOUS_IP_WINDOW`.""" Signed-off-by: Markus Heiser --- searx/botdetection/ip_limit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'searx/botdetection/ip_limit.py') diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py index 46e026371..bb4229f0e 100644 --- a/searx/botdetection/ip_limit.py +++ b/searx/botdetection/ip_limit.py @@ -82,7 +82,7 @@ API_WONDOW = 3600 API_MAX = 4 """Maximum requests from one IP in the :py:obj:`API_WONDOW`""" -SUSPICIOUS_IP_WINDOW = 3600 * 24 +SUSPICIOUS_IP_WINDOW = 3600 * 24 * 30 """Time (sec) before sliding window for one suspicious IP expires.""" SUSPICIOUS_IP_MAX = 3 -- cgit v1.2.3