summaryrefslogtreecommitdiff
path: root/searx/data
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2025-09-24 19:34:03 +0200
committerMarkus Heiser <markus.heiser@darmarIT.de>2025-09-28 07:32:41 +0200
commit18a58943ccaa324b39232fa503462eb39f407399 (patch)
tree04b355ad60c205b28ea1083ea97abdfabea857ff /searx/data
parent4f4de3fc8743b3732834c5ffaa4a3e264d200e6c (diff)
[mod] ExpireCacheSQLite - implement .setmany() for bulk loading
[1] https://github.com/searxng/searxng/issues/5223#issuecomment-3328597147 Suggested-by: Ivan G <igabaldon@inetol.net> [1] Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/data')
-rw-r--r--searx/data/__main__.py20
-rw-r--r--searx/data/currencies.py19
-rw-r--r--searx/data/tracker_patterns.py37
3 files changed, 52 insertions, 24 deletions
diff --git a/searx/data/__main__.py b/searx/data/__main__.py
new file mode 100644
index 000000000..8e7852751
--- /dev/null
+++ b/searx/data/__main__.py
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Command line implementation"""
+
+import typer
+
+from .core import get_cache
+
+app = typer.Typer()
+
+
+@app.command()
+def state():
+ """show state of the cache"""
+ cache = get_cache()
+ for table in cache.table_names:
+ for row in cache.DB.execute(f"SELECT count(*) FROM {table}"):
+ print(f"cache table {table} holds {row[0]} key/value pairs")
+
+
+app()
diff --git a/searx/data/currencies.py b/searx/data/currencies.py
index 33aa9530c..538900762 100644
--- a/searx/data/currencies.py
+++ b/searx/data/currencies.py
@@ -6,10 +6,12 @@ __all__ = ["CurrenciesDB"]
import typing as t
import json
import pathlib
-import time
from .core import get_cache, log
+if t.TYPE_CHECKING:
+ from searx.cache import CacheRowType
+
@t.final
class CurrenciesDB:
@@ -33,19 +35,14 @@ class CurrenciesDB:
# in /tmp and will be rebuild during the reboot anyway
def load(self):
- _start = time.time()
log.debug("init searx.data.CURRENCIES")
with open(self.json_file, encoding="utf-8") as f:
data_dict: dict[str, dict[str, str]] = json.load(f)
- for key, value in data_dict["names"].items():
- self.cache.set(key=key, value=value, ctx=self.ctx_names, expire=None)
- for key, value in data_dict["iso4217"].items():
- self.cache.set(key=key, value=value, ctx=self.ctx_iso4217, expire=None)
- log.debug(
- "init searx.data.CURRENCIES added %s items in %s sec.",
- len(data_dict["names"]) + len(data_dict["iso4217"]),
- time.time() - _start,
- )
+
+ rows: "list[CacheRowType]" = [(k, v, None) for k, v in data_dict["names"].items()]
+ self.cache.setmany(rows, ctx=self.ctx_names)
+ rows = [(k, v, None) for k, v in data_dict["iso4217"].items()]
+ self.cache.setmany(rows, ctx=self.ctx_iso4217)
def name_to_iso4217(self, name: str) -> str | None:
self.init()
diff --git a/searx/data/tracker_patterns.py b/searx/data/tracker_patterns.py
index 2966c0f31..fd4746e5c 100644
--- a/searx/data/tracker_patterns.py
+++ b/searx/data/tracker_patterns.py
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Simple implementation to store TrackerPatterns data in a SQL database."""
-import typing
+import typing as t
__all__ = ["TrackerPatternsDB"]
@@ -14,9 +14,14 @@ from httpx import HTTPError
from searx.data.core import get_cache, log
from searx.network import get as http_get
+if t.TYPE_CHECKING:
+ from searx.cache import CacheRowType
+
+
RuleType = tuple[str, list[str], list[str]]
+@t.final
class TrackerPatternsDB:
# pylint: disable=missing-class-docstring
@@ -31,9 +36,9 @@ class TrackerPatternsDB:
class Fields:
# pylint: disable=too-few-public-methods, invalid-name
- url_regexp: typing.Final = 0 # URL (regular expression) match condition of the link
- url_ignore: typing.Final = 1 # URL (regular expression) to ignore
- del_args: typing.Final = 2 # list of URL arguments (regular expression) to delete
+ url_regexp: t.Final = 0 # URL (regular expression) match condition of the link
+ url_ignore: t.Final = 1 # URL (regular expression) to ignore
+ del_args: t.Final = 2 # list of URL arguments (regular expression) to delete
def __init__(self):
self.cache = get_cache()
@@ -49,19 +54,25 @@ class TrackerPatternsDB:
def load(self):
log.debug("init searx.data.TRACKER_PATTERNS")
- for rule in self.iter_clear_list():
- self.add(rule)
+ rows: "list[CacheRowType]" = []
- def add(self, rule: RuleType):
- self.cache.set(
- key=rule[self.Fields.url_regexp],
- value=(
+ for rule in self.iter_clear_list():
+ key = rule[self.Fields.url_regexp]
+ value = (
rule[self.Fields.url_ignore],
rule[self.Fields.del_args],
- ),
- ctx=self.ctx_name,
- expire=None,
+ )
+ rows.append((key, value, None))
+
+ self.cache.setmany(rows, ctx=self.ctx_name)
+
+ def add(self, rule: RuleType):
+ key = rule[self.Fields.url_regexp]
+ value = (
+ rule[self.Fields.url_ignore],
+ rule[self.Fields.del_args],
)
+ self.cache.set(key=key, value=value, ctx=self.ctx_name, expire=None)
def rules(self) -> Iterator[RuleType]:
self.init()