summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--searx/cache.py95
-rw-r--r--searx/data/__main__.py20
-rw-r--r--searx/data/currencies.py19
-rw-r--r--searx/data/tracker_patterns.py37
4 files changed, 131 insertions, 40 deletions
diff --git a/searx/cache.py b/searx/cache.py
index 0de482363..ea58c9328 100644
--- a/searx/cache.py
+++ b/searx/cache.py
@@ -29,6 +29,8 @@ from searx import get_setting
log = logger.getChild("cache")
+CacheRowType: typing.TypeAlias = tuple[str, typing.Any, int | None]
+
class ExpireCacheCfg(msgspec.Struct): # pylint: disable=too-few-public-methods
"""Configuration of a :py:obj:`ExpireCache` cache."""
@@ -81,7 +83,7 @@ class ExpireCacheCfg(msgspec.Struct): # pylint: disable=too-few-public-methods
class ExpireCacheStats:
"""Dataclass which provides information on the status of the cache."""
- cached_items: dict[str, list[tuple[str, typing.Any, int]]]
+ cached_items: dict[str, list[CacheRowType]]
"""Values in the cache mapped by context name.
.. code: python
@@ -108,7 +110,9 @@ class ExpireCacheStats:
continue
for key, value, expire in kv_list:
- valid_until = datetime.datetime.fromtimestamp(expire).strftime("%Y-%m-%d %H:%M:%S")
+ valid_until = ""
+ if expire:
+ valid_until = datetime.datetime.fromtimestamp(expire).strftime("%Y-%m-%d %H:%M:%S")
c_kv += 1
lines.append(f"[{ctx_name:20s}] {valid_until} {key:12}" f" --> ({type(value).__name__}) {value} ")
@@ -339,38 +343,97 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
exists, it will be created (on demand) by :py:obj:`self.create_table
<ExpireCacheSQLite.create_table>`.
"""
- table = ctx
- self.maintenance()
+ c, err_msg_list = self._setmany([(key, value, expire)], ctx=ctx)
+ if c:
+ log.debug("%s -- %s: key '%s' updated or inserted (%s errors)", self.cfg.name, ctx, key, len(err_msg_list))
+ else:
+ for msg in err_msg_list:
+ log.error("%s -- %s: %s", self.cfg.name, ctx, msg)
+ return bool(c)
+
+ def setmany(
+ self,
+ opt_list: list[CacheRowType],
+ ctx: str | None = None,
+ ) -> int:
+ """Efficient bootload of the cache from a list of options. The list
+ contains tuples with the arguments described in
+ :py:obj:`ExpireCacheSQLite.set`."""
+ _start = time.time()
+ c, err_msg_list = self._setmany(opt_list=opt_list, ctx=ctx)
+ _end = time.time()
+ for msg in err_msg_list:
+ log.error("%s -- %s: %s", self.cfg.name, ctx, msg)
+
+ log.debug(
+ "%s -- %s: %s/%s key/value pairs updated or inserted in %s sec (%s errors)",
+ self.cfg.name,
+ ctx,
+ c,
+ len(opt_list),
+ _end - _start,
+ len(err_msg_list),
+ )
+ return c
- value = self.serialize(value=value)
- if len(value) > self.cfg.MAX_VALUE_LEN:
- log.warning("ExpireCache.set(): %s.key='%s' - value too big to cache (len: %s) ", table, value, len(value))
- return False
+ def _setmany(
+ self,
+ opt_list: list[CacheRowType],
+ ctx: str | None = None,
+ ) -> tuple[int, list[str]]:
- if not expire:
- expire = self.cfg.MAXHOLD_TIME
- expire = int(time.time()) + expire
+ table = ctx
+ self.maintenance()
table_name = table
if not table_name:
table_name = self.normalize_name(self.cfg.name)
self.create_table(table_name)
- sql = (
+ sql_str = (
f"INSERT INTO {table_name} (key, value, expire) VALUES (?, ?, ?)"
f" ON CONFLICT DO "
f"UPDATE SET value=?, expire=?"
)
+ sql_rows: list[
+ tuple[
+ str, # key
+ typing.Any, # value
+ int | None, # expire
+ typing.Any, # value
+ int | None, # expire
+ ]
+ ] = []
+
+ err_msg_list: list[str] = []
+ for key, _val, expire in opt_list:
+
+ value: bytes = self.serialize(value=_val)
+ if len(value) > self.cfg.MAX_VALUE_LEN:
+ err_msg_list.append(f"{table}.key='{key}' - serialized value too big to cache (len: {len(value)}) ")
+ continue
+
+ if not expire:
+ expire = self.cfg.MAXHOLD_TIME
+ expire = int(time.time()) + expire
+
+ # positional arguments of the INSERT INTO statement
+ sql_args = (key, value, expire, value, expire)
+ sql_rows.append(sql_args)
+
+ if not sql_rows:
+ return 0, err_msg_list
+
if table:
with self.DB:
- self.DB.execute(sql, (key, value, expire, value, expire))
+ self.DB.executemany(sql_str, sql_rows)
else:
with self.connect() as conn:
- conn.execute(sql, (key, value, expire, value, expire))
+ conn.executemany(sql_str, sql_rows)
conn.close()
- return True
+ return len(sql_rows), err_msg_list
def get(self, key: str, default: typing.Any = None, ctx: str | None = None) -> typing.Any:
"""Get value of ``key`` from table given by argument ``ctx``. If
@@ -410,7 +473,7 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
yield row[0], self.deserialize(row[1])
def state(self) -> ExpireCacheStats:
- cached_items: dict[str, list[tuple[str, typing.Any, int]]] = {}
+ cached_items: dict[str, list[CacheRowType]] = {}
for table in self.table_names:
cached_items[table] = []
for row in self.DB.execute(f"SELECT key, value, expire FROM {table}"):
diff --git a/searx/data/__main__.py b/searx/data/__main__.py
new file mode 100644
index 000000000..8e7852751
--- /dev/null
+++ b/searx/data/__main__.py
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Command line implementation"""
+
+import typer
+
+from .core import get_cache
+
+app = typer.Typer()
+
+
+@app.command()
+def state():
+ """show state of the cache"""
+ cache = get_cache()
+ for table in cache.table_names:
+ for row in cache.DB.execute(f"SELECT count(*) FROM {table}"):
+ print(f"cache table {table} holds {row[0]} key/value pairs")
+
+
+app()
diff --git a/searx/data/currencies.py b/searx/data/currencies.py
index 33aa9530c..538900762 100644
--- a/searx/data/currencies.py
+++ b/searx/data/currencies.py
@@ -6,10 +6,12 @@ __all__ = ["CurrenciesDB"]
import typing as t
import json
import pathlib
-import time
from .core import get_cache, log
+if t.TYPE_CHECKING:
+ from searx.cache import CacheRowType
+
@t.final
class CurrenciesDB:
@@ -33,19 +35,14 @@ class CurrenciesDB:
# in /tmp and will be rebuild during the reboot anyway
def load(self):
- _start = time.time()
log.debug("init searx.data.CURRENCIES")
with open(self.json_file, encoding="utf-8") as f:
data_dict: dict[str, dict[str, str]] = json.load(f)
- for key, value in data_dict["names"].items():
- self.cache.set(key=key, value=value, ctx=self.ctx_names, expire=None)
- for key, value in data_dict["iso4217"].items():
- self.cache.set(key=key, value=value, ctx=self.ctx_iso4217, expire=None)
- log.debug(
- "init searx.data.CURRENCIES added %s items in %s sec.",
- len(data_dict["names"]) + len(data_dict["iso4217"]),
- time.time() - _start,
- )
+
+ rows: "list[CacheRowType]" = [(k, v, None) for k, v in data_dict["names"].items()]
+ self.cache.setmany(rows, ctx=self.ctx_names)
+ rows = [(k, v, None) for k, v in data_dict["iso4217"].items()]
+ self.cache.setmany(rows, ctx=self.ctx_iso4217)
def name_to_iso4217(self, name: str) -> str | None:
self.init()
diff --git a/searx/data/tracker_patterns.py b/searx/data/tracker_patterns.py
index 2966c0f31..fd4746e5c 100644
--- a/searx/data/tracker_patterns.py
+++ b/searx/data/tracker_patterns.py
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Simple implementation to store TrackerPatterns data in a SQL database."""
-import typing
+import typing as t
__all__ = ["TrackerPatternsDB"]
@@ -14,9 +14,14 @@ from httpx import HTTPError
from searx.data.core import get_cache, log
from searx.network import get as http_get
+if t.TYPE_CHECKING:
+ from searx.cache import CacheRowType
+
+
RuleType = tuple[str, list[str], list[str]]
+@t.final
class TrackerPatternsDB:
# pylint: disable=missing-class-docstring
@@ -31,9 +36,9 @@ class TrackerPatternsDB:
class Fields:
# pylint: disable=too-few-public-methods, invalid-name
- url_regexp: typing.Final = 0 # URL (regular expression) match condition of the link
- url_ignore: typing.Final = 1 # URL (regular expression) to ignore
- del_args: typing.Final = 2 # list of URL arguments (regular expression) to delete
+ url_regexp: t.Final = 0 # URL (regular expression) match condition of the link
+ url_ignore: t.Final = 1 # URL (regular expression) to ignore
+ del_args: t.Final = 2 # list of URL arguments (regular expression) to delete
def __init__(self):
self.cache = get_cache()
@@ -49,19 +54,25 @@ class TrackerPatternsDB:
def load(self):
log.debug("init searx.data.TRACKER_PATTERNS")
- for rule in self.iter_clear_list():
- self.add(rule)
+ rows: "list[CacheRowType]" = []
- def add(self, rule: RuleType):
- self.cache.set(
- key=rule[self.Fields.url_regexp],
- value=(
+ for rule in self.iter_clear_list():
+ key = rule[self.Fields.url_regexp]
+ value = (
rule[self.Fields.url_ignore],
rule[self.Fields.del_args],
- ),
- ctx=self.ctx_name,
- expire=None,
+ )
+ rows.append((key, value, None))
+
+ self.cache.setmany(rows, ctx=self.ctx_name)
+
+ def add(self, rule: RuleType):
+ key = rule[self.Fields.url_regexp]
+ value = (
+ rule[self.Fields.url_ignore],
+ rule[self.Fields.del_args],
)
+ self.cache.set(key=key, value=value, ctx=self.ctx_name, expire=None)
def rules(self) -> Iterator[RuleType]:
self.init()