From 1ec325adccc427fe05cf08da9a2d9d63da7365f4 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 23 May 2023 18:16:37 +0200 Subject: [mod] limiter -> botdetection: modularization and documentation In order to be able to meet the outstanding requirements, the implementation is modularized and supplemented with documentation. This patch does not contain functional change, except it fixes issue #2455 ---- Aktivate limiter in the settings.yml and simulate a bot request by:: curl -H 'Accept-Language: de-DE,en-US;q=0.7,en;q=0.3' \ -H 'Accept: text/html' -H 'User-Agent: xyz' \ -H 'Accept-Encoding: gzip' \ 'http://127.0.0.1:8888/search?q=foo' In the LOG: DEBUG searx.botdetection.link_token : missing ping for this request: ..... Since ``BURST_MAX_SUSPICIOUS = 2`` you can repeat the query above two time before you get a "Too Many Requests" response. Closes: https://github.com/searxng/searxng/issues/2455 Signed-off-by: Markus Heiser --- searx/botdetection/link_token.py | 126 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 searx/botdetection/link_token.py (limited to 'searx/botdetection/link_token.py') diff --git a/searx/botdetection/link_token.py b/searx/botdetection/link_token.py new file mode 100644 index 000000000..8ef215f6c --- /dev/null +++ b/searx/botdetection/link_token.py @@ -0,0 +1,126 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +""" +Method ``link_token`` +--------------------- + +The ``link_token`` method evaluates a request as :py:obj:`suspicious +` if the URL ``/client.css`` is not requested by the +client. By adding a random component (the token) in the URL a bot can not send +a ping by request a static URL. + +.. note:: + + This method requires a redis DB and needs a HTTP X-Forwarded-For_ header. + +To get in use of this method a flask URL route needs to be added: + +.. code:: python + + @app.route('/client.css', methods=['GET', 'POST']) + def client_token(token=None): + link_token.ping(request, token) + return Response('', mimetype='text/css') + +And in the HTML template from flask a stylesheet link is needed (the value of +``link_token`` comes from :py:obj:`get_token`): + +.. code:: html + + + +.. _X-Forwarded-For: + https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For + +""" + +import string +import random +import flask + +from searx import logger +from searx import redisdb +from searx.redislib import secret_hash + +TOKEN_LIVE_TIME = 600 +"""Livetime (sec) of limiter's CSS token.""" + +PING_KEY = 'SearXNG_limiter.ping' +TOKEN_KEY = 'SearXNG_limiter.token' + +logger = logger.getChild('botdetection.link_token') + + +def is_suspicious(request: flask.Request): + """Checks if there is a valid ping for this request, if not this request is + rated as *suspicious*""" + redis_client = redisdb.client() + if not redis_client: + return False + + ping_key = get_ping_key(request) + if not redis_client.get(ping_key): + logger.warning( + "missing ping (IP: %s) / request: %s", + request.headers.get('X-Forwarded-For', ''), + ping_key, + ) + return True + + logger.debug("found ping for this request: %s", ping_key) + return False + + +def ping(request: flask.Request, token: str): + """This function is called by a request to URL ``/client.css``""" + redis_client = redisdb.client() + if not redis_client: + return + if not token_is_valid(token): + return + ping_key = get_ping_key(request) + logger.debug("store ping for: %s", ping_key) + redis_client.set(ping_key, 1, ex=TOKEN_LIVE_TIME) + + +def get_ping_key(request: flask.Request): + """Generates a hashed key that fits (more or less) to a request. At least + X-Forwarded-For_ is needed to be able to assign the request to an IP. + + """ + return secret_hash( + PING_KEY + + request.headers.get('X-Forwarded-For', '') + + request.headers.get('Accept-Language', '') + + request.headers.get('User-Agent', '') + ) + + +def token_is_valid(token) -> bool: + valid = token == get_token() + logger.debug("token is valid --> %s", valid) + return valid + + +def get_token() -> str: + """Returns current token. If there is no currently active token a new token + is generated randomly and stored in the redis DB. + + - :py:obj:`TOKEN_LIVE_TIME` + - :py:obj:`TOKEN_KEY` + + """ + redis_client = redisdb.client() + if not redis_client: + # This function is also called when limiter is inactive / no redis DB + # (see render function in webapp.py) + return '12345678' + token = redis_client.get(TOKEN_KEY) + if token: + token = token.decode('UTF-8') + else: + token = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(16)) + redis_client.set(TOKEN_KEY, token, ex=TOKEN_LIVE_TIME) + return token -- cgit v1.2.3 From b8c7c2c9aa604fd1fb7be5559c9ad025ceb17aa4 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sun, 28 May 2023 18:58:31 +0200 Subject: [mod] botdetection - improve ip_limit and link_token methods - counting requests in LONG_WINDOW and BURST_WINDOW is not needed when the request is validated by the link_token method [1] - renew a ping-key on validation [2], this is needed for infinite scrolling, where no new token (CSS) is loaded. / this does not fix the BURST_MAX issue in the vanilla limiter - normalize the counter names of the ip_limit method to 'ip_limit.*' - just integrate the ip_limit method straight forward in the limiter plugin / non intermediate code --> ip_limit now returns None or a werkzeug.Response object that can be passed by the plugin to the flask application / non intermediate code that returns a tuple [1] https://github.com/searxng/searxng/pull/2357#issuecomment-1566113277 [2] https://github.com/searxng/searxng/pull/2357#discussion_r1208542206 [3] https://github.com/searxng/searxng/pull/2357#issuecomment-1566125979 Signed-off-by: Markus Heiser --- searx/botdetection/link_token.py | 43 ++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 11 deletions(-) (limited to 'searx/botdetection/link_token.py') diff --git a/searx/botdetection/link_token.py b/searx/botdetection/link_token.py index 8ef215f6c..376d06d61 100644 --- a/searx/botdetection/link_token.py +++ b/searx/botdetection/link_token.py @@ -47,15 +47,24 @@ from searx.redislib import secret_hash TOKEN_LIVE_TIME = 600 """Livetime (sec) of limiter's CSS token.""" +PING_LIVE_TIME = 3600 +"""Livetime (sec) of the ping-key from a client (request)""" + PING_KEY = 'SearXNG_limiter.ping' +"""Prefix of all ping-keys generated by :py:obj:`get_ping_key`""" + TOKEN_KEY = 'SearXNG_limiter.token' +"""Key for which the current token is stored in the DB""" logger = logger.getChild('botdetection.link_token') -def is_suspicious(request: flask.Request): +def is_suspicious(request: flask.Request, renew: bool = False): """Checks if there is a valid ping for this request, if not this request is - rated as *suspicious*""" + rated as *suspicious*. If a valid ping exists and argument ``renew`` is + ``True`` the expire time of this ping is reset to :py:obj:`PING_LIVE_TIME`. + + """ redis_client = redisdb.client() if not redis_client: return False @@ -69,12 +78,19 @@ def is_suspicious(request: flask.Request): ) return True - logger.debug("found ping for this request: %s", ping_key) + if renew: + redis_client.set(ping_key, 1, ex=PING_LIVE_TIME) + + logger.debug("found ping for client request: %s", ping_key) return False def ping(request: flask.Request, token: str): - """This function is called by a request to URL ``/client.css``""" + """This function is called by a request to URL ``/client.css``. If + ``token`` is valid a :py:obj:`PING_KEY` for the client is stored in the DB. + The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`. + + """ redis_client = redisdb.client() if not redis_client: return @@ -82,19 +98,24 @@ def ping(request: flask.Request, token: str): return ping_key = get_ping_key(request) logger.debug("store ping for: %s", ping_key) - redis_client.set(ping_key, 1, ex=TOKEN_LIVE_TIME) + redis_client.set(ping_key, 1, ex=PING_LIVE_TIME) def get_ping_key(request: flask.Request): - """Generates a hashed key that fits (more or less) to a request. At least - X-Forwarded-For_ is needed to be able to assign the request to an IP. + """Generates a hashed key that fits (more or less) to a client (request). + At least X-Forwarded-For_ is needed to be able to assign the request to an + IP. """ - return secret_hash( + return ( PING_KEY - + request.headers.get('X-Forwarded-For', '') - + request.headers.get('Accept-Language', '') - + request.headers.get('User-Agent', '') + + "[" + + secret_hash( + request.headers.get('X-Forwarded-For', '') + + request.headers.get('Accept-Language', '') + + request.headers.get('User-Agent', '') + ) + + "]" ) -- cgit v1.2.3 From 38431d2e142b7da6a9b48aad203f02a2eff7e6fd Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Mon, 29 May 2023 19:46:37 +0200 Subject: [fix] correct determination of the IP for the request For correct determination of the IP to the request the function botdetection.get_real_ip() is implemented. This fonction is used in the ip_limit and link_token method of the botdetection and it is used in the self_info plugin. A documentation about the X-Forwarded-For header has been added. [1] https://github.com/searxng/searxng/pull/2357#issuecomment-1566211059 Signed-off-by: Markus Heiser --- searx/botdetection/link_token.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'searx/botdetection/link_token.py') diff --git a/searx/botdetection/link_token.py b/searx/botdetection/link_token.py index 376d06d61..a83214a33 100644 --- a/searx/botdetection/link_token.py +++ b/searx/botdetection/link_token.py @@ -43,6 +43,7 @@ import flask from searx import logger from searx import redisdb from searx.redislib import secret_hash +from ._helpers import get_real_ip TOKEN_LIVE_TIME = 600 """Livetime (sec) of limiter's CSS token.""" @@ -73,7 +74,7 @@ def is_suspicious(request: flask.Request, renew: bool = False): if not redis_client.get(ping_key): logger.warning( "missing ping (IP: %s) / request: %s", - request.headers.get('X-Forwarded-For', ''), + get_real_ip(request), ping_key, ) return True @@ -111,9 +112,7 @@ def get_ping_key(request: flask.Request): PING_KEY + "[" + secret_hash( - request.headers.get('X-Forwarded-For', '') - + request.headers.get('Accept-Language', '') - + request.headers.get('User-Agent', '') + get_real_ip(request) + request.headers.get('Accept-Language', '') + request.headers.get('User-Agent', '') ) + "]" ) -- cgit v1.2.3 From 281e36f4b7848374535d5e953050ae73423191ca Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Thu, 1 Jun 2023 15:41:48 +0200 Subject: [fix] limiter: replace real_ip by IPv4/v6 network Closes: https://github.com/searxng/searxng/issues/2477 Signed-off-by: Markus Heiser --- searx/botdetection/link_token.py | 54 ++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 22 deletions(-) (limited to 'searx/botdetection/link_token.py') diff --git a/searx/botdetection/link_token.py b/searx/botdetection/link_token.py index a83214a33..11a6a56b5 100644 --- a/searx/botdetection/link_token.py +++ b/searx/botdetection/link_token.py @@ -6,7 +6,7 @@ Method ``link_token`` The ``link_token`` method evaluates a request as :py:obj:`suspicious ` if the URL ``/client.css`` is not requested by the -client. By adding a random component (the token) in the URL a bot can not send +client. By adding a random component (the token) in the URL, a bot can not send a ping by request a static URL. .. note:: @@ -35,6 +35,11 @@ And in the HTML template from flask a stylesheet link is needed (the value of https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For """ +from __future__ import annotations +from ipaddress import ( + IPv4Network, + IPv6Network, +) import string import random @@ -43,7 +48,11 @@ import flask from searx import logger from searx import redisdb from searx.redislib import secret_hash -from ._helpers import get_real_ip + +from ._helpers import ( + get_network, + get_real_ip, +) TOKEN_LIVE_TIME = 600 """Livetime (sec) of limiter's CSS token.""" @@ -60,29 +69,26 @@ TOKEN_KEY = 'SearXNG_limiter.token' logger = logger.getChild('botdetection.link_token') -def is_suspicious(request: flask.Request, renew: bool = False): - """Checks if there is a valid ping for this request, if not this request is - rated as *suspicious*. If a valid ping exists and argument ``renew`` is - ``True`` the expire time of this ping is reset to :py:obj:`PING_LIVE_TIME`. +def is_suspicious(network: IPv4Network | IPv6Network, request: flask.Request, renew: bool = False): + """Checks whether a valid ping is exists for this (client) network, if not + this request is rated as *suspicious*. If a valid ping exists and argument + ``renew`` is ``True`` the expire time of this ping is reset to + :py:obj:`PING_LIVE_TIME`. """ redis_client = redisdb.client() if not redis_client: return False - ping_key = get_ping_key(request) + ping_key = get_ping_key(network, request) if not redis_client.get(ping_key): - logger.warning( - "missing ping (IP: %s) / request: %s", - get_real_ip(request), - ping_key, - ) + logger.warning("missing ping (IP: %s) / request: %s", network.compressed, ping_key) return True if renew: redis_client.set(ping_key, 1, ex=PING_LIVE_TIME) - logger.debug("found ping for client request: %s", ping_key) + logger.debug("found ping for (client) network %s -> %s", network.compressed, ping_key) return False @@ -92,27 +98,31 @@ def ping(request: flask.Request, token: str): The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`. """ + from . import limiter # pylint: disable=import-outside-toplevel, cyclic-import + redis_client = redisdb.client() if not redis_client: return if not token_is_valid(token): return - ping_key = get_ping_key(request) - logger.debug("store ping for: %s", ping_key) - redis_client.set(ping_key, 1, ex=PING_LIVE_TIME) + cfg = limiter.get_cfg() + real_ip = get_real_ip(request) + network = get_network(real_ip, cfg) -def get_ping_key(request: flask.Request): - """Generates a hashed key that fits (more or less) to a client (request). - At least X-Forwarded-For_ is needed to be able to assign the request to an - IP. + ping_key = get_ping_key(network, request) + logger.debug("store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip, ping_key) + redis_client.set(ping_key, 1, ex=PING_LIVE_TIME) - """ + +def get_ping_key(network: IPv4Network | IPv6Network, request: flask.Request) -> str: + """Generates a hashed key that fits (more or less) to a *WEB-browser + session* in a network.""" return ( PING_KEY + "[" + secret_hash( - get_real_ip(request) + request.headers.get('Accept-Language', '') + request.headers.get('User-Agent', '') + network.compressed + request.headers.get('Accept-Language', '') + request.headers.get('User-Agent', '') ) + "]" ) -- cgit v1.2.3