From f798ddd4922d793d5e6ccb7c4111810d549ff4f4 Mon Sep 17 00:00:00 2001 From: Gaspard d'Hautefeuille Date: Wed, 9 Jul 2025 07:55:37 +0200 Subject: [mod] migrate from Redis to Valkey (#4795) This patch migrates from `redis==5.2.1` [1] to `valkey==6.1.0` [2]. The migration to valkey is necessary because the company behind Redis has decided to abandon the open source license. After experiencing a drop in user numbers, they now want to run it under a dual license again. But this move demonstrates once again how unreliable the company is and how it treats open source developers. To review first, read the docs:: $ make docs.live Follow the instructions to remove redis: - http://0.0.0.0:8000/admin/settings/settings_redis.html Config and install a local valkey DB: - http://0.0.0.0:8000/admin/settings/settings_valkey.html [1] https://pypi.org/project/redis/ [2] https://pypi.org/project/valkey/ Co-authored-by: HLFH Co-authored-by: Markus Heiser --- searx/botdetection/__init__.py | 8 +- searx/botdetection/ip_limit.py | 24 ++-- searx/botdetection/link_token.py | 32 ++--- searx/engines/redis_server.py | 99 --------------- searx/engines/valkey_server.py | 99 +++++++++++++++ searx/limiter.py | 22 ++-- searx/redisdb.py | 69 ----------- searx/redislib.py | 240 ------------------------------------- searx/search/checker/background.py | 34 +++--- searx/search/checker/scheduler.lua | 12 +- searx/search/checker/scheduler.py | 20 ++-- searx/settings.yml | 13 +- searx/settings_defaults.py | 4 + searx/valkeydb.py | 65 ++++++++++ searx/valkeylib.py | 240 +++++++++++++++++++++++++++++++++++++ searx/webapp.py | 4 +- 16 files changed, 493 insertions(+), 492 deletions(-) delete mode 100644 searx/engines/redis_server.py create mode 100644 searx/engines/valkey_server.py delete mode 100644 searx/redisdb.py delete mode 100644 searx/redislib.py create mode 100644 searx/valkeydb.py create mode 100644 searx/valkeylib.py (limited to 'searx') diff --git a/searx/botdetection/__init__.py b/searx/botdetection/__init__.py index 51f437cf0..4079d97a9 100644 --- a/searx/botdetection/__init__.py +++ b/searx/botdetection/__init__.py @@ -12,11 +12,11 @@ from ._helpers import too_many_requests __all__ = ['dump_request', 'get_network', 'get_real_ip', 'too_many_requests'] -redis_client = None +valkey_client = None cfg = None -def init(_cfg, _redis_client): - global redis_client, cfg # pylint: disable=global-statement - redis_client = _redis_client +def init(_cfg, _valkey_client): + global valkey_client, cfg # pylint: disable=global-statement + valkey_client = _valkey_client cfg = _cfg diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py index 161a9826e..93af8b7c5 100644 --- a/searx/botdetection/ip_limit.py +++ b/searx/botdetection/ip_limit.py @@ -6,8 +6,8 @@ Method ``ip_limit`` The ``ip_limit`` method counts request from an IP in *sliding windows*. If there are to many requests in a sliding window, the request is evaluated as a -bot request. This method requires a redis DB and needs a HTTP X-Forwarded-For_ -header. To take privacy only the hash value of an IP is stored in the redis DB +bot request. This method requires a valkey DB and needs a HTTP X-Forwarded-For_ +header. To take privacy only the hash value of an IP is stored in the valkey DB and at least for a maximum of 10 minutes. The :py:obj:`.link_token` method can be used to investigate whether a request is @@ -46,8 +46,8 @@ import flask import werkzeug from searx.extended_types import SXNG_Request -from searx import redisdb -from searx.redislib import incr_sliding_window, drop_counter +from searx import valkeydb +from searx.valkeylib import incr_sliding_window, drop_counter from . import link_token from . import config @@ -97,14 +97,14 @@ def filter_request( ) -> werkzeug.Response | None: # pylint: disable=too-many-return-statements - redis_client = redisdb.client() + valkey_client = valkeydb.client() if network.is_link_local and not cfg['botdetection.ip_limit.filter_link_local']: logger.debug("network %s is link-local -> not monitored by ip_limit method", network.compressed) return None if request.args.get('format', 'html') != 'html': - c = incr_sliding_window(redis_client, 'ip_limit.API_WINDOW:' + network.compressed, API_WINDOW) + c = incr_sliding_window(valkey_client, 'ip_limit.API_WINDOW:' + network.compressed, API_WINDOW) if c > API_MAX: return too_many_requests(network, "too many request in API_WINDOW") @@ -114,12 +114,12 @@ def filter_request( if not suspicious: # this IP is no longer suspicious: release ip again / delete the counter of this IP - drop_counter(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed) + drop_counter(valkey_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed) return None # this IP is suspicious: count requests from this IP c = incr_sliding_window( - redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed, SUSPICIOUS_IP_WINDOW + valkey_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed, SUSPICIOUS_IP_WINDOW ) if c > SUSPICIOUS_IP_MAX: logger.error("BLOCK: too many request from %s in SUSPICIOUS_IP_WINDOW (redirect to /)", network) @@ -127,22 +127,22 @@ def filter_request( response.headers["Cache-Control"] = "no-store, max-age=0" return response - c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW) + c = incr_sliding_window(valkey_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW) if c > BURST_MAX_SUSPICIOUS: return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX_SUSPICIOUS)") - c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW) + c = incr_sliding_window(valkey_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW) if c > LONG_MAX_SUSPICIOUS: return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX_SUSPICIOUS)") return None # vanilla limiter without extensions counts BURST_MAX and LONG_MAX - c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW) + c = incr_sliding_window(valkey_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW) if c > BURST_MAX: return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX)") - c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW) + c = incr_sliding_window(valkey_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW) if c > LONG_MAX: return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX)") diff --git a/searx/botdetection/link_token.py b/searx/botdetection/link_token.py index c255790cb..600796380 100644 --- a/searx/botdetection/link_token.py +++ b/searx/botdetection/link_token.py @@ -10,7 +10,7 @@ a ping by request a static URL. .. note:: - This method requires a redis DB and needs a HTTP X-Forwarded-For_ header. + This method requires a valkey DB and needs a HTTP X-Forwarded-For_ header. To get in use of this method a flask URL route needs to be added: @@ -45,8 +45,8 @@ import string import random from searx import logger -from searx import redisdb -from searx.redislib import secret_hash +from searx import valkeydb +from searx.valkeylib import secret_hash from searx.extended_types import SXNG_Request from ._helpers import ( @@ -76,17 +76,17 @@ def is_suspicious(network: IPv4Network | IPv6Network, request: SXNG_Request, ren :py:obj:`PING_LIVE_TIME`. """ - redis_client = redisdb.client() - if not redis_client: + valkey_client = valkeydb.client() + if not valkey_client: return False ping_key = get_ping_key(network, request) - if not redis_client.get(ping_key): + if not valkey_client.get(ping_key): logger.info("missing ping (IP: %s) / request: %s", network.compressed, ping_key) return True if renew: - redis_client.set(ping_key, 1, ex=PING_LIVE_TIME) + valkey_client.set(ping_key, 1, ex=PING_LIVE_TIME) logger.debug("found ping for (client) network %s -> %s", network.compressed, ping_key) return False @@ -98,9 +98,9 @@ def ping(request: SXNG_Request, token: str): The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`. """ - from . import redis_client, cfg # pylint: disable=import-outside-toplevel, cyclic-import + from . import valkey_client, cfg # pylint: disable=import-outside-toplevel, cyclic-import - if not redis_client: + if not valkey_client: return if not token_is_valid(token): return @@ -110,7 +110,7 @@ def ping(request: SXNG_Request, token: str): ping_key = get_ping_key(network, request) logger.debug("store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip, ping_key) - redis_client.set(ping_key, 1, ex=PING_LIVE_TIME) + valkey_client.set(ping_key, 1, ex=PING_LIVE_TIME) def get_ping_key(network: IPv4Network | IPv6Network, request: SXNG_Request) -> str: @@ -134,21 +134,21 @@ def token_is_valid(token) -> bool: def get_token() -> str: """Returns current token. If there is no currently active token a new token - is generated randomly and stored in the redis DB. + is generated randomly and stored in the valkey DB. - :py:obj:`TOKEN_LIVE_TIME` - :py:obj:`TOKEN_KEY` """ - redis_client = redisdb.client() - if not redis_client: - # This function is also called when limiter is inactive / no redis DB + valkey_client = valkeydb.client() + if not valkey_client: + # This function is also called when limiter is inactive / no valkey DB # (see render function in webapp.py) return '12345678' - token = redis_client.get(TOKEN_KEY) + token = valkey_client.get(TOKEN_KEY) if token: token = token.decode('UTF-8') else: token = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(16)) - redis_client.set(TOKEN_KEY, token, ex=TOKEN_LIVE_TIME) + valkey_client.set(TOKEN_KEY, token, ex=TOKEN_LIVE_TIME) return token diff --git a/searx/engines/redis_server.py b/searx/engines/redis_server.py deleted file mode 100644 index eebb5809b..000000000 --- a/searx/engines/redis_server.py +++ /dev/null @@ -1,99 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -"""Redis is an open source (BSD licensed), in-memory data structure (key value -based) store. Before configuring the ``redis_server`` engine, you must install -the dependency redis_. - -Configuration -============= - -Select a database to search in and set its index in the option ``db``. You can -either look for exact matches or use partial keywords to find what you are -looking for by configuring ``exact_match_only``. - -Example -======= - -Below is an example configuration: - -.. code:: yaml - - # Required dependency: redis - - - name: myredis - shortcut : rds - engine: redis_server - exact_match_only: false - host: '127.0.0.1' - port: 6379 - enable_http: true - password: '' - db: 0 - -Implementations -=============== - -""" - -import redis # pylint: disable=import-error - -from searx.result_types import EngineResults - -engine_type = 'offline' - -# redis connection variables -host = '127.0.0.1' -port = 6379 -password = '' -db = 0 - -# engine specific variables -paging = False -exact_match_only = True - -_redis_client = None - - -def init(_engine_settings): - global _redis_client # pylint: disable=global-statement - _redis_client = redis.StrictRedis( - host=host, - port=port, - db=db, - password=password or None, - decode_responses=True, - ) - - -def search(query, _params) -> EngineResults: - res = EngineResults() - - if not exact_match_only: - for kvmap in search_keys(query): - res.add(res.types.KeyValue(kvmap=kvmap)) - return res - - kvmap: dict[str, str] = _redis_client.hgetall(query) - if kvmap: - res.add(res.types.KeyValue(kvmap=kvmap)) - elif " " in query: - qset, rest = query.split(" ", 1) - for row in _redis_client.hscan_iter(qset, match='*{}*'.format(rest)): - res.add(res.types.KeyValue(kvmap={row[0]: row[1]})) - return res - - -def search_keys(query) -> list[dict]: - ret = [] - for key in _redis_client.scan_iter(match='*{}*'.format(query)): - key_type = _redis_client.type(key) - res = None - - if key_type == 'hash': - res = _redis_client.hgetall(key) - elif key_type == 'list': - res = dict(enumerate(_redis_client.lrange(key, 0, -1))) - - if res: - res['redis_key'] = key - ret.append(res) - return ret diff --git a/searx/engines/valkey_server.py b/searx/engines/valkey_server.py new file mode 100644 index 000000000..b2d3dd26f --- /dev/null +++ b/searx/engines/valkey_server.py @@ -0,0 +1,99 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Valkey is an open source (BSD licensed), in-memory data structure (key value +based) store. Before configuring the ``valkey_server`` engine, you must install +the dependency valkey_. + +Configuration +============= + +Select a database to search in and set its index in the option ``db``. You can +either look for exact matches or use partial keywords to find what you are +looking for by configuring ``exact_match_only``. + +Example +======= + +Below is an example configuration: + +.. code:: yaml + + # Required dependency: valkey + + - name: myvalkey + shortcut : rds + engine: valkey_server + exact_match_only: false + host: '127.0.0.1' + port: 6379 + enable_http: true + password: '' + db: 0 + +Implementations +=============== + +""" + +import valkey # pylint: disable=import-error + +from searx.result_types import EngineResults + +engine_type = 'offline' + +# valkey connection variables +host = '127.0.0.1' +port = 6379 +password = '' +db = 0 + +# engine specific variables +paging = False +exact_match_only = True + +_valkey_client = None + + +def init(_engine_settings): + global _valkey_client # pylint: disable=global-statement + _valkey_client = valkey.StrictValkey( + host=host, + port=port, + db=db, + password=password or None, + decode_responses=True, + ) + + +def search(query, _params) -> EngineResults: + res = EngineResults() + + if not exact_match_only: + for kvmap in search_keys(query): + res.add(res.types.KeyValue(kvmap=kvmap)) + return res + + kvmap: dict[str, str] = _valkey_client.hgetall(query) + if kvmap: + res.add(res.types.KeyValue(kvmap=kvmap)) + elif " " in query: + qset, rest = query.split(" ", 1) + for row in _valkey_client.hscan_iter(qset, match='*{}*'.format(rest)): + res.add(res.types.KeyValue(kvmap={row[0]: row[1]})) + return res + + +def search_keys(query) -> list[dict]: + ret = [] + for key in _valkey_client.scan_iter(match='*{}*'.format(query)): + key_type = _valkey_client.type(key) + res = None + + if key_type == 'hash': + res = _valkey_client.hgetall(key) + elif key_type == 'list': + res = dict(enumerate(_valkey_client.lrange(key, 0, -1))) + + if res: + res['valkey_key'] = key + ret.append(res) + return ret diff --git a/searx/limiter.py b/searx/limiter.py index 92b38c68f..99bc338d1 100644 --- a/searx/limiter.py +++ b/searx/limiter.py @@ -17,7 +17,7 @@ from the :ref:`botdetection`: the time. - Detection & dynamically :ref:`botdetection rate limit` of bots based on the - behavior of the requests. For dynamically changeable IP lists a Redis + behavior of the requests. For dynamically changeable IP lists a Valkey database is needed. The prerequisite for IP based methods is the correct determination of the IP of @@ -50,13 +50,13 @@ To enable the limiter activate: ... limiter: true # rate limit the number of request on the instance, block some bots -and set the redis-url connection. Check the value, it depends on your redis DB -(see :ref:`settings redis`), by example: +and set the valkey-url connection. Check the value, it depends on your valkey DB +(see :ref:`settings valkey`), by example: .. code:: yaml - redis: - url: unix:///usr/local/searxng-redis/run/redis.sock?db=0 + valkey: + url: valkey://localhost:6379/0 Configure Limiter @@ -102,7 +102,7 @@ import werkzeug from searx import ( logger, - redisdb, + valkeydb, ) from searx import botdetection from searx.extended_types import SXNG_Request, sxng_request @@ -217,7 +217,7 @@ def pre_request(): def is_installed(): - """Returns ``True`` if limiter is active and a redis DB is available.""" + """Returns ``True`` if limiter is active and a valkey DB is available.""" return _INSTALLED @@ -229,15 +229,15 @@ def initialize(app: flask.Flask, settings): # (e.g. the self_info plugin uses the botdetection to get client IP) cfg = get_cfg() - redis_client = redisdb.client() - botdetection.init(cfg, redis_client) + valkey_client = valkeydb.client() + botdetection.init(cfg, valkey_client) if not (settings['server']['limiter'] or settings['server']['public_instance']): return - if not redis_client: + if not valkey_client: logger.error( - "The limiter requires Redis, please consult the documentation: " + "The limiter requires Valkey, please consult the documentation: " "https://docs.searxng.org/admin/searx.limiter.html" ) if settings['server']['public_instance']: diff --git a/searx/redisdb.py b/searx/redisdb.py deleted file mode 100644 index bed0c347b..000000000 --- a/searx/redisdb.py +++ /dev/null @@ -1,69 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -"""Implementation of the redis client (redis-py_). - -.. _redis-py: https://github.com/redis/redis-py - -This implementation uses the :ref:`settings redis` setup from ``settings.yml``. -A redis DB connect can be tested by:: - - >>> from searx import redisdb - >>> redisdb.initialize() - True - >>> db = redisdb.client() - >>> db.set("foo", "bar") - True - >>> db.get("foo") - b'bar' - >>> - -""" - -import os -import pwd -import logging -import redis -from searx import get_setting - - -OLD_REDIS_URL_DEFAULT_URL = 'unix:///usr/local/searxng-redis/run/redis.sock?db=0' -"""This was the default Redis URL in settings.yml.""" - -_CLIENT = None -logger = logging.getLogger(__name__) - - -def client() -> redis.Redis: - return _CLIENT - - -def initialize(): - global _CLIENT # pylint: disable=global-statement - redis_url = get_setting('redis.url') - if not redis_url: - return False - try: - # create a client, but no connection is done - _CLIENT = redis.Redis.from_url(redis_url) - - # log the parameters as seen by the redis lib, without the password - kwargs = _CLIENT.get_connection_kwargs().copy() - kwargs.pop('password', None) - kwargs = ' '.join([f'{k}={v!r}' for k, v in kwargs.items()]) - logger.info("connecting to Redis %s", kwargs) - - # check the connection - _CLIENT.ping() - - # no error: the redis connection is working - logger.info("connected to Redis") - return True - except redis.exceptions.RedisError as e: - _CLIENT = None - _pw = pwd.getpwuid(os.getuid()) - logger.exception("[%s (%s)] can't connect redis DB ...", _pw.pw_name, _pw.pw_uid) - if redis_url == OLD_REDIS_URL_DEFAULT_URL and isinstance(e, redis.exceptions.ConnectionError): - logger.info( - "You can safely ignore the above Redis error if you don't use Redis. " - "You can remove this error by setting redis.url to false in your settings.yml." - ) - return False diff --git a/searx/redislib.py b/searx/redislib.py deleted file mode 100644 index 5fa8f2dae..000000000 --- a/searx/redislib.py +++ /dev/null @@ -1,240 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -"""A collection of convenient functions and redis/lua scripts. - -This code was partial inspired by the `Bullet-Proofing Lua Scripts in RedisPy`_ -article. - -.. _Bullet-Proofing Lua Scripts in RedisPy: - https://redis.com/blog/bullet-proofing-lua-scripts-in-redispy/ - -""" - -import hmac - -from searx import get_setting - -LUA_SCRIPT_STORAGE = {} -"""A global dictionary to cache client's ``Script`` objects, used by -:py:obj:`lua_script_storage`""" - - -def lua_script_storage(client, script): - """Returns a redis :py:obj:`Script - ` instance. - - Due to performance reason the ``Script`` object is instantiated only once - for a client (``client.register_script(..)``) and is cached in - :py:obj:`LUA_SCRIPT_STORAGE`. - - """ - - # redis connection can be closed, lets use the id() of the redis connector - # as key in the script-storage: - client_id = id(client) - - if LUA_SCRIPT_STORAGE.get(client_id) is None: - LUA_SCRIPT_STORAGE[client_id] = {} - - if LUA_SCRIPT_STORAGE[client_id].get(script) is None: - LUA_SCRIPT_STORAGE[client_id][script] = client.register_script(script) - - return LUA_SCRIPT_STORAGE[client_id][script] - - -PURGE_BY_PREFIX = """ -local prefix = tostring(ARGV[1]) -for i, name in ipairs(redis.call('KEYS', prefix .. '*')) do - redis.call('EXPIRE', name, 0) -end -""" - - -def purge_by_prefix(client, prefix: str = "SearXNG_"): - """Purge all keys with ``prefix`` from database. - - Queries all keys in the database by the given prefix and set expire time to - zero. The default prefix will drop all keys which has been set by SearXNG - (drops SearXNG schema entirely from database). - - The implementation is the lua script from string :py:obj:`PURGE_BY_PREFIX`. - The lua script uses EXPIRE_ instead of DEL_: if there are a lot keys to - delete and/or their values are big, `DEL` could take more time and blocks - the command loop while `EXPIRE` turns back immediate. - - :param prefix: prefix of the key to delete (default: ``SearXNG_``) - :type name: str - - .. _EXPIRE: https://redis.io/commands/expire/ - .. _DEL: https://redis.io/commands/del/ - - """ - script = lua_script_storage(client, PURGE_BY_PREFIX) - script(args=[prefix]) - - -def secret_hash(name: str): - """Creates a hash of the ``name``. - - Combines argument ``name`` with the ``secret_key`` from :ref:`settings - server`. This function can be used to get a more anonymized name of a Redis - KEY. - - :param name: the name to create a secret hash for - :type name: str - """ - m = hmac.new(bytes(name, encoding='utf-8'), digestmod='sha256') - m.update(bytes(get_setting('server.secret_key'), encoding='utf-8')) - return m.hexdigest() - - -INCR_COUNTER = """ -local limit = tonumber(ARGV[1]) -local expire = tonumber(ARGV[2]) -local c_name = KEYS[1] - -local c = redis.call('GET', c_name) - -if not c then - c = redis.call('INCR', c_name) - if expire > 0 then - redis.call('EXPIRE', c_name, expire) - end -else - c = tonumber(c) - if limit == 0 or c < limit then - c = redis.call('INCR', c_name) - end -end -return c -""" - - -def incr_counter(client, name: str, limit: int = 0, expire: int = 0): - """Increment a counter and return the new value. - - If counter with redis key ``SearXNG_counter_`` does not exists it is - created with initial value 1 returned. The replacement ```` is a - *secret hash* of the value from argument ``name`` (see - :py:func:`secret_hash`). - - The implementation of the redis counter is the lua script from string - :py:obj:`INCR_COUNTER`. - - :param name: name of the counter - :type name: str - - :param expire: live-time of the counter in seconds (default ``None`` means - infinite). - :type expire: int / see EXPIRE_ - - :param limit: limit where the counter stops to increment (default ``None``) - :type limit: int / limit is 2^64 see INCR_ - - :return: value of the incremented counter - :type return: int - - .. _EXPIRE: https://redis.io/commands/expire/ - .. _INCR: https://redis.io/commands/incr/ - - A simple demo of a counter with expire time and limit:: - - >>> for i in range(6): - ... i, incr_counter(client, "foo", 3, 5) # max 3, duration 5 sec - ... time.sleep(1) # from the third call on max has been reached - ... - (0, 1) - (1, 2) - (2, 3) - (3, 3) - (4, 3) - (5, 1) - - """ - script = lua_script_storage(client, INCR_COUNTER) - name = "SearXNG_counter_" + secret_hash(name) - c = script(args=[limit, expire], keys=[name]) - return c - - -def drop_counter(client, name): - """Drop counter with redis key ``SearXNG_counter_`` - - The replacement ```` is a *secret hash* of the value from argument - ``name`` (see :py:func:`incr_counter` and :py:func:`incr_sliding_window`). - """ - name = "SearXNG_counter_" + secret_hash(name) - client.delete(name) - - -INCR_SLIDING_WINDOW = """ -local expire = tonumber(ARGV[1]) -local name = KEYS[1] -local current_time = redis.call('TIME') - -redis.call('ZREMRANGEBYSCORE', name, 0, current_time[1] - expire) -redis.call('ZADD', name, current_time[1], current_time[1] .. current_time[2]) -local result = redis.call('ZCOUNT', name, 0, current_time[1] + 1) -redis.call('EXPIRE', name, expire) -return result -""" - - -def incr_sliding_window(client, name: str, duration: int): - """Increment a sliding-window counter and return the new value. - - If counter with redis key ``SearXNG_counter_`` does not exists it is - created with initial value 1 returned. The replacement ```` is a - *secret hash* of the value from argument ``name`` (see - :py:func:`secret_hash`). - - :param name: name of the counter - :type name: str - - :param duration: live-time of the sliding window in seconds - :typeduration: int - - :return: value of the incremented counter - :type return: int - - The implementation of the redis counter is the lua script from string - :py:obj:`INCR_SLIDING_WINDOW`. The lua script uses `sorted sets in Redis`_ - to implement a sliding window for the redis key ``SearXNG_counter_`` - (ZADD_). The current TIME_ is used to score the items in the sorted set and - the time window is moved by removing items with a score lower current time - minus *duration* time (ZREMRANGEBYSCORE_). - - The EXPIRE_ time (the duration of the sliding window) is refreshed on each - call (increment) and if there is no call in this duration, the sorted - set expires from the redis DB. - - The return value is the amount of items in the sorted set (ZCOUNT_), what - means the number of calls in the sliding window. - - .. _Sorted sets in Redis: - https://redis.com/ebook/part-1-getting-started/chapter-1-getting-to-know-redis/1-2-what-redis-data-structures-look-like/1-2-5-sorted-sets-in-redis/ - .. _TIME: https://redis.io/commands/time/ - .. _ZADD: https://redis.io/commands/zadd/ - .. _EXPIRE: https://redis.io/commands/expire/ - .. _ZREMRANGEBYSCORE: https://redis.io/commands/zremrangebyscore/ - .. _ZCOUNT: https://redis.io/commands/zcount/ - - A simple demo of the sliding window:: - - >>> for i in range(5): - ... incr_sliding_window(client, "foo", 3) # duration 3 sec - ... time.sleep(1) # from the third call (second) on the window is moved - ... - 1 - 2 - 3 - 3 - 3 - >>> time.sleep(3) # wait until expire - >>> incr_sliding_window(client, "foo", 3) - 1 - - """ - script = lua_script_storage(client, INCR_SLIDING_WINDOW) - name = "SearXNG_counter_" + secret_hash(name) - c = script(args=[duration], keys=[name]) - return c diff --git a/searx/search/checker/background.py b/searx/search/checker/background.py index 7333e6ad0..1890c77d5 100644 --- a/searx/search/checker/background.py +++ b/searx/search/checker/background.py @@ -8,18 +8,18 @@ import os import signal from typing import Any, Dict, List, Literal, Optional, Tuple, TypedDict, Union -import redis.exceptions +import valkey.exceptions from searx import logger, settings, sxng_debug -from searx.redisdb import client as get_redis_client +from searx.valkeydb import client as get_valkey_client from searx.exceptions import SearxSettingsException from searx.search.processors import PROCESSORS from searx.search.checker import Checker from searx.search.checker.scheduler import scheduler_function -REDIS_RESULT_KEY = 'SearXNG_checker_result' -REDIS_LOCK_KEY = 'SearXNG_checker_lock' +VALKEY_RESULT_KEY = 'SearXNG_checker_result' +VALKEY_LOCK_KEY = 'SearXNG_checker_lock' CheckerResult = Union['CheckerOk', 'CheckerErr', 'CheckerOther'] @@ -77,23 +77,23 @@ def _get_interval(every: Any, error_msg: str) -> Tuple[int, int]: def get_result() -> CheckerResult: - client = get_redis_client() + client = get_valkey_client() if client is None: - # without Redis, the checker is disabled + # without Valkey, the checker is disabled return {'status': 'disabled'} - serialized_result: Optional[bytes] = client.get(REDIS_RESULT_KEY) + serialized_result: Optional[bytes] = client.get(VALKEY_RESULT_KEY) if serialized_result is None: - # the Redis key does not exist + # the Valkey key does not exist return {'status': 'unknown'} return json.loads(serialized_result) def _set_result(result: CheckerResult): - client = get_redis_client() + client = get_valkey_client() if client is None: - # without Redis, the function does nothing + # without Valkey, the function does nothing return - client.set(REDIS_RESULT_KEY, json.dumps(result)) + client.set(VALKEY_RESULT_KEY, json.dumps(result)) def _timestamp(): @@ -102,9 +102,9 @@ def _timestamp(): def run(): try: - # use a Redis lock to make sure there is no checker running at the same time + # use a Valkey lock to make sure there is no checker running at the same time # (this should not happen, this is a safety measure) - with get_redis_client().lock(REDIS_LOCK_KEY, blocking_timeout=60, timeout=3600): + with get_valkey_client().lock(VALKEY_LOCK_KEY, blocking_timeout=60, timeout=3600): logger.info('Starting checker') result: CheckerOk = {'status': 'ok', 'engines': {}, 'timestamp': _timestamp()} for name, processor in PROCESSORS.items(): @@ -118,7 +118,7 @@ def run(): _set_result(result) logger.info('Check done') - except redis.exceptions.LockError: + except valkey.exceptions.LockError: _set_result({'status': 'error', 'timestamp': _timestamp()}) logger.exception('Error while running the checker') except Exception: # pylint: disable=broad-except @@ -149,9 +149,9 @@ def initialize(): logger.info('Checker scheduler is disabled') return - # make sure there is a Redis connection - if get_redis_client() is None: - logger.error('The checker requires Redis') + # make sure there is a Valkey connection + if get_valkey_client() is None: + logger.error('The checker requires Valkey') return # start the background scheduler diff --git a/searx/search/checker/scheduler.lua b/searx/search/checker/scheduler.lua index 0de9b404d..ec318ddd6 100644 --- a/searx/search/checker/scheduler.lua +++ b/searx/search/checker/scheduler.lua @@ -2,9 +2,9 @@ -- -- This script is not a string in scheduler.py, so editors can provide syntax highlighting. --- The Redis KEY is defined here and not in Python on purpose: +-- The Valkey KEY is defined here and not in Python on purpose: -- only this LUA script can read and update this key to avoid lock and concurrency issues. -local redis_key = 'SearXNG_checker_next_call_ts' +local valkey_key = 'SearXNG_checker_next_call_ts' local now = redis.call('TIME')[1] local start_after_from = ARGV[1] @@ -12,14 +12,14 @@ local start_after_to = ARGV[2] local every_from = ARGV[3] local every_to = ARGV[4] -local next_call_ts = redis.call('GET', redis_key) +local next_call_ts = redis.call('GET', valkey_key) if (next_call_ts == false or next_call_ts == nil) then - -- the scheduler has never run on this Redis instance, so: + -- the scheduler has never run on this Valkey instance, so: -- 1/ the scheduler does not run now -- 2/ the next call is a random time between start_after_from and start_after_to local initial_delay = math.random(start_after_from, start_after_to) - redis.call('SET', redis_key, now + initial_delay) + redis.call('SET', valkey_key, now + initial_delay) return { false, initial_delay } end @@ -31,6 +31,6 @@ if call_now then -- the checker runs now, define the timestamp of the next call: -- this is a random delay between every_from and every_to local periodic_delay = math.random(every_from, every_to) - next_call_ts = redis.call('INCRBY', redis_key, periodic_delay) + next_call_ts = redis.call('INCRBY', valkey_key, periodic_delay) end return { call_now, next_call_ts - now } diff --git a/searx/search/checker/scheduler.py b/searx/search/checker/scheduler.py index c0d3f799a..b093a9ab7 100644 --- a/searx/search/checker/scheduler.py +++ b/searx/search/checker/scheduler.py @@ -1,11 +1,11 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # pylint: disable=missing-module-docstring -"""Lame scheduler which use Redis as a source of truth: -* the Redis key SearXNG_checker_next_call_ts contains the next time the embedded checker should run. -* to avoid lock, a unique Redis script reads and updates the Redis key SearXNG_checker_next_call_ts. -* this Redis script returns a list of two elements: +"""Lame scheduler which use Valkey as a source of truth: +* the Valkey key SearXNG_checker_next_call_ts contains the next time the embedded checker should run. +* to avoid lock, a unique Valkey script reads and updates the Valkey key SearXNG_checker_next_call_ts. +* this Valkey script returns a list of two elements: * the first one is a boolean. If True, the embedded checker must run now in this worker. - * the second element is the delay in second to wait before the next call to the Redis script. + * the second element is the delay in second to wait before the next call to the Valkey script. This scheduler is not generic on purpose: if more feature are required, a dedicate scheduler must be used (= a better scheduler should not use the web workers) @@ -16,8 +16,8 @@ import time from pathlib import Path from typing import Callable -from searx.redisdb import client as get_redis_client -from searx.redislib import lua_script_storage +from searx.valkeydb import client as get_valkey_client +from searx.valkeylib import lua_script_storage logger = logging.getLogger('searx.search.checker') @@ -29,7 +29,7 @@ def scheduler_function(start_after_from: int, start_after_to: int, every_from: i """Run the checker periodically. The function never returns. Parameters: - * start_after_from and start_after_to: when to call "callback" for the first on the Redis instance + * start_after_from and start_after_to: when to call "callback" for the first on the Valkey instance * every_from and every_to: after the first call, how often to call "callback" There is no issue: @@ -38,11 +38,11 @@ def scheduler_function(start_after_from: int, start_after_to: int, every_from: i """ scheduler_now_script = SCHEDULER_LUA.open().read() while True: - # ask the Redis script what to do + # ask the Valkey script what to do # the script says # * if the checker must run now. # * how to long to way before calling the script again (it can be call earlier, but not later). - script = lua_script_storage(get_redis_client(), scheduler_now_script) + script = lua_script_storage(get_valkey_client(), scheduler_now_script) call_now, wait_time = script(args=[start_after_from, start_after_to, every_from, every_to]) # does the worker run the checker now? diff --git a/searx/settings.yml b/searx/settings.yml index cb7504efe..fb85f0ff4 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -110,9 +110,10 @@ server: X-Robots-Tag: noindex, nofollow Referrer-Policy: no-referrer -redis: - # URL to connect redis database. Is overwritten by ${SEARXNG_REDIS_URL}. - # https://docs.searxng.org/admin/settings/settings_redis.html#settings-redis +valkey: + # URL to connect valkey database. Is overwritten by ${SEARXNG_VALKEY_URL}. + # https://docs.searxng.org/admin/settings/settings_valkey.html#settings-valkey + # url: valkey://localhost:6379/0 url: false ui: @@ -1809,10 +1810,10 @@ engines: shortcut: rt disabled: true - # Required dependency: redis - # - name: myredis + # Required dependency: valkey + # - name: myvalkey # shortcut : rds - # engine: redis_server + # engine: valkey_server # exact_match_only: false # host: '127.0.0.1' # port: 6379 diff --git a/searx/settings_defaults.py b/searx/settings_defaults.py index 4cee7e345..7e785e4d2 100644 --- a/searx/settings_defaults.py +++ b/searx/settings_defaults.py @@ -185,9 +185,13 @@ SCHEMA = { 'method': SettingsValue(('POST', 'GET'), 'POST', 'SEARXNG_METHOD'), 'default_http_headers': SettingsValue(dict, {}), }, + # redis is deprecated .. 'redis': { 'url': SettingsValue((None, False, str), False, 'SEARXNG_REDIS_URL'), }, + 'valkey': { + 'url': SettingsValue((None, False, str), False, 'SEARXNG_VALKEY_URL'), + }, 'ui': { 'static_path': SettingsDirectoryValue(str, os.path.join(searx_dir, 'static')), 'static_use_hash': SettingsValue(bool, False, 'SEARXNG_STATIC_USE_HASH'), diff --git a/searx/valkeydb.py b/searx/valkeydb.py new file mode 100644 index 000000000..2817c6d0a --- /dev/null +++ b/searx/valkeydb.py @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Implementation of the valkey client (valkey-py_). + +.. _valkey-py: https://github.com/valkey-io/valkey-py + +This implementation uses the :ref:`settings valkey` setup from ``settings.yml``. +A valkey DB connect can be tested by:: + + >>> from searx import valkeydb + >>> valkeydb.initialize() + True + >>> db = valkeydb.client() + >>> db.set("foo", "bar") + True + >>> db.get("foo") + b'bar' + >>> + +""" + +import os +import pwd +import logging +import warnings + +import valkey +from searx import get_setting + + +_CLIENT = None +logger = logging.getLogger(__name__) + + +def client() -> valkey.Valkey: + return _CLIENT + + +def initialize(): + global _CLIENT # pylint: disable=global-statement + if get_setting('redis.url'): + warnings.warn("setting redis.url is deprecated, use valkey.url", DeprecationWarning) + valkey_url = get_setting('valkey.url') or get_setting('redis.url') + if not valkey_url: + return False + try: + # create a client, but no connection is done + _CLIENT = valkey.Valkey.from_url(valkey_url) + + # log the parameters as seen by the valkey lib, without the password + kwargs = _CLIENT.get_connection_kwargs().copy() + kwargs.pop('password', None) + kwargs = ' '.join([f'{k}={v!r}' for k, v in kwargs.items()]) + logger.info("connecting to Valkey %s", kwargs) + + # check the connection + _CLIENT.ping() + + # no error: the valkey connection is working + logger.info("connected to Valkey") + return True + except valkey.exceptions.ValkeyError: + _CLIENT = None + _pw = pwd.getpwuid(os.getuid()) + logger.exception("[%s (%s)] can't connect valkey DB ...", _pw.pw_name, _pw.pw_uid) + return False diff --git a/searx/valkeylib.py b/searx/valkeylib.py new file mode 100644 index 000000000..733988eb1 --- /dev/null +++ b/searx/valkeylib.py @@ -0,0 +1,240 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""A collection of convenient functions and valkey/lua scripts. + +This code was partial inspired by the `Bullet-Proofing Lua Scripts in ValkeyPy`_ +article. + +.. _Bullet-Proofing Lua Scripts in ValkeyPy: + https://redis.com/blog/bullet-proofing-lua-scripts-in-redispy/ + +""" + +import hmac + +from searx import get_setting + +LUA_SCRIPT_STORAGE = {} +"""A global dictionary to cache client's ``Script`` objects, used by +:py:obj:`lua_script_storage`""" + + +def lua_script_storage(client, script): + """Returns a valkey :py:obj:`Script + ` instance. + + Due to performance reason the ``Script`` object is instantiated only once + for a client (``client.register_script(..)``) and is cached in + :py:obj:`LUA_SCRIPT_STORAGE`. + + """ + + # valkey connection can be closed, lets use the id() of the valkey connector + # as key in the script-storage: + client_id = id(client) + + if LUA_SCRIPT_STORAGE.get(client_id) is None: + LUA_SCRIPT_STORAGE[client_id] = {} + + if LUA_SCRIPT_STORAGE[client_id].get(script) is None: + LUA_SCRIPT_STORAGE[client_id][script] = client.register_script(script) + + return LUA_SCRIPT_STORAGE[client_id][script] + + +PURGE_BY_PREFIX = """ +local prefix = tostring(ARGV[1]) +for i, name in ipairs(redis.call('KEYS', prefix .. '*')) do + redis.call('EXPIRE', name, 0) +end +""" + + +def purge_by_prefix(client, prefix: str = "SearXNG_"): + """Purge all keys with ``prefix`` from database. + + Queries all keys in the database by the given prefix and set expire time to + zero. The default prefix will drop all keys which has been set by SearXNG + (drops SearXNG schema entirely from database). + + The implementation is the lua script from string :py:obj:`PURGE_BY_PREFIX`. + The lua script uses EXPIRE_ instead of DEL_: if there are a lot keys to + delete and/or their values are big, `DEL` could take more time and blocks + the command loop while `EXPIRE` turns back immediate. + + :param prefix: prefix of the key to delete (default: ``SearXNG_``) + :type name: str + + .. _EXPIRE: https://valkey.io/commands/expire/ + .. _DEL: https://valkey.io/commands/del/ + + """ + script = lua_script_storage(client, PURGE_BY_PREFIX) + script(args=[prefix]) + + +def secret_hash(name: str): + """Creates a hash of the ``name``. + + Combines argument ``name`` with the ``secret_key`` from :ref:`settings + server`. This function can be used to get a more anonymized name of a Valkey + KEY. + + :param name: the name to create a secret hash for + :type name: str + """ + m = hmac.new(bytes(name, encoding='utf-8'), digestmod='sha256') + m.update(bytes(get_setting('server.secret_key'), encoding='utf-8')) + return m.hexdigest() + + +INCR_COUNTER = """ +local limit = tonumber(ARGV[1]) +local expire = tonumber(ARGV[2]) +local c_name = KEYS[1] + +local c = redis.call('GET', c_name) + +if not c then + c = redis.call('INCR', c_name) + if expire > 0 then + redis.call('EXPIRE', c_name, expire) + end +else + c = tonumber(c) + if limit == 0 or c < limit then + c = redis.call('INCR', c_name) + end +end +return c +""" + + +def incr_counter(client, name: str, limit: int = 0, expire: int = 0): + """Increment a counter and return the new value. + + If counter with valkey key ``SearXNG_counter_`` does not exists it is + created with initial value 1 returned. The replacement ```` is a + *secret hash* of the value from argument ``name`` (see + :py:func:`secret_hash`). + + The implementation of the valkey counter is the lua script from string + :py:obj:`INCR_COUNTER`. + + :param name: name of the counter + :type name: str + + :param expire: live-time of the counter in seconds (default ``None`` means + infinite). + :type expire: int / see EXPIRE_ + + :param limit: limit where the counter stops to increment (default ``None``) + :type limit: int / limit is 2^64 see INCR_ + + :return: value of the incremented counter + :type return: int + + .. _EXPIRE: https://valkey.io/commands/expire/ + .. _INCR: https://valkey.io/commands/incr/ + + A simple demo of a counter with expire time and limit:: + + >>> for i in range(6): + ... i, incr_counter(client, "foo", 3, 5) # max 3, duration 5 sec + ... time.sleep(1) # from the third call on max has been reached + ... + (0, 1) + (1, 2) + (2, 3) + (3, 3) + (4, 3) + (5, 1) + + """ + script = lua_script_storage(client, INCR_COUNTER) + name = "SearXNG_counter_" + secret_hash(name) + c = script(args=[limit, expire], keys=[name]) + return c + + +def drop_counter(client, name): + """Drop counter with valkey key ``SearXNG_counter_`` + + The replacement ```` is a *secret hash* of the value from argument + ``name`` (see :py:func:`incr_counter` and :py:func:`incr_sliding_window`). + """ + name = "SearXNG_counter_" + secret_hash(name) + client.delete(name) + + +INCR_SLIDING_WINDOW = """ +local expire = tonumber(ARGV[1]) +local name = KEYS[1] +local current_time = redis.call('TIME') + +redis.call('ZREMRANGEBYSCORE', name, 0, current_time[1] - expire) +redis.call('ZADD', name, current_time[1], current_time[1] .. current_time[2]) +local result = redis.call('ZCOUNT', name, 0, current_time[1] + 1) +redis.call('EXPIRE', name, expire) +return result +""" + + +def incr_sliding_window(client, name: str, duration: int): + """Increment a sliding-window counter and return the new value. + + If counter with valkey key ``SearXNG_counter_`` does not exists it is + created with initial value 1 returned. The replacement ```` is a + *secret hash* of the value from argument ``name`` (see + :py:func:`secret_hash`). + + :param name: name of the counter + :type name: str + + :param duration: live-time of the sliding window in seconds + :typeduration: int + + :return: value of the incremented counter + :type return: int + + The implementation of the valkey counter is the lua script from string + :py:obj:`INCR_SLIDING_WINDOW`. The lua script uses `sorted sets in Valkey`_ + to implement a sliding window for the valkey key ``SearXNG_counter_`` + (ZADD_). The current TIME_ is used to score the items in the sorted set and + the time window is moved by removing items with a score lower current time + minus *duration* time (ZREMRANGEBYSCORE_). + + The EXPIRE_ time (the duration of the sliding window) is refreshed on each + call (increment) and if there is no call in this duration, the sorted + set expires from the valkey DB. + + The return value is the amount of items in the sorted set (ZCOUNT_), what + means the number of calls in the sliding window. + + .. _Sorted sets in Valkey: + https://valkey.com/ebook/part-1-getting-started/chapter-1-getting-to-know-valkey/1-2-what-valkey-data-structures-look-like/1-2-5-sorted-sets-in-valkey/ + .. _TIME: https://valkey.io/commands/time/ + .. _ZADD: https://valkey.io/commands/zadd/ + .. _EXPIRE: https://valkey.io/commands/expire/ + .. _ZREMRANGEBYSCORE: https://valkey.io/commands/zremrangebyscore/ + .. _ZCOUNT: https://valkey.io/commands/zcount/ + + A simple demo of the sliding window:: + + >>> for i in range(5): + ... incr_sliding_window(client, "foo", 3) # duration 3 sec + ... time.sleep(1) # from the third call (second) on the window is moved + ... + 1 + 2 + 3 + 3 + 3 + >>> time.sleep(3) # wait until expire + >>> incr_sliding_window(client, "foo", 3) + 1 + + """ + script = lua_script_storage(client, INCR_SLIDING_WINDOW) + name = "SearXNG_counter_" + secret_hash(name) + c = script(args=[duration], keys=[name]) + return c diff --git a/searx/webapp.py b/searx/webapp.py index 868d95e3e..15f79f151 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -118,7 +118,7 @@ from searx.locales import ( from searx.autocomplete import search_autocomplete, backends as autocomplete_backends from searx import favicons -from searx.redisdb import initialize as redis_initialize +from searx.valkeydb import initialize as valkey_initialize from searx.sxng_locales import sxng_locales import searx.search from searx.network import stream as http_stream, set_context_network_name @@ -1397,7 +1397,7 @@ def init(): return locales_initialize() - redis_initialize() + valkey_initialize() searx.plugins.initialize(app) metrics: bool = get_setting("general.enable_metrics") # type: ignore -- cgit v1.2.3