summaryrefslogtreecommitdiff
path: root/searx/engines/startpage.py
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2025-04-21 14:17:49 +0200
committerMarkus Heiser <markus.heiser@darmarIT.de>2025-05-03 08:39:12 +0200
commitbdfe1c2a158ea4d9f52391e2527078045eca9cdd (patch)
treea47a4157b7570b515f60d439e5264aefe4704689 /searx/engines/startpage.py
parent4cbfba9d7b930edd8f5727ba091cd800d8a02eab (diff)
[mod] engines: migration of the individual cache solutions to EngineCache
The EngineCache class replaces all previously individual solutions for caches in the context of the engines. - demo_offline.py - duckduckgo.py - radio_browser.py - soundcloud.py - startpage.py - wolframalpha_api.py - wolframalpha_noapi.py Search term to test most of the modified engines:: !ddg !rb !sc !sp !wa test !ddg !rb !sc !sp !wa foo For introspection of the DB, jump into developer environment and run command to show cache state:: $ ./manage pyenv.cmd bash --norc --noprofile (py3) python -m searx.enginelib cache state cache tables and key/values =========================== [demo_offline ] 2025-04-22 11:32:50 count --> (int) 4 [startpage ] 2025-04-22 12:32:30 SC_CODE --> (str) fSOBnhEMlDfE20 [duckduckgo ] 2025-04-22 12:32:31 4dff493e.... --> (str) 4-128634958369380006627592672385352473325 [duckduckgo ] 2025-04-22 12:40:06 3e2583e2.... --> (str) 4-263126175288871260472289814259666848451 [radio_browser ] 2025-04-23 11:33:08 servers --> (list) ['https://de2.api.radio-browser.info', ...] [soundcloud ] 2025-04-29 11:40:06 guest_client_id --> (str) EjkRJG0BLNEZquRiPZYdNtJdyGtTuHdp [wolframalpha ] 2025-04-22 12:40:06 code --> (str) 5aa79f86205ad26188e0e26e28fb7ae7 number of tables: 6 number of key/value pairs: 7 In the "cache tables and key/values" section, the table name (engine name) is at first position on the second there is the calculated expire date and on the third and fourth position the key/value is shown. About duckduckgo: The *vqd coode* of ddg depends on the query term and therefore the key is a hash value of the query term (to not to store the raw query term). In the "properties of ENGINES_CACHE" section all properties of the SQLiteAppl / ExpireCache and their last modification date are shown:: properties of ENGINES_CACHE =========================== [last modified: 2025-04-22 11:32:27] DB_SCHEMA : 1 [last modified: 2025-04-22 11:32:27] LAST_MAINTENANCE : [last modified: 2025-04-22 11:32:27] crypt_hash : ca612e3566fdfd7cf7efe2b1c9349f461158d07cb78a3750e5c5be686aa8ebdc [last modified: 2025-04-22 11:32:30] CACHE-TABLE--demo_offline: demo_offline [last modified: 2025-04-22 11:32:30] CACHE-TABLE--startpage: startpage [last modified: 2025-04-22 11:32:31] CACHE-TABLE--duckduckgo: duckduckgo [last modified: 2025-04-22 11:33:08] CACHE-TABLE--radio_browser: radio_browser [last modified: 2025-04-22 11:40:06] CACHE-TABLE--soundcloud: soundcloud [last modified: 2025-04-22 11:40:06] CACHE-TABLE--wolframalpha: wolframalpha These properties provide information about the state of the ExpireCache and control the behavior. For example, the maintenance intervals are controlled by the last modification date of the LAST_MAINTENANCE property and the hash value of the password can be used to detect whether the password has been changed (in this case the DB entries can no longer be decrypted and the entire cache must be discarded). Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines/startpage.py')
-rw-r--r--searx/engines/startpage.py34
1 files changed, 21 insertions, 13 deletions
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
index 54e05604b..6c77e37c8 100644
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@@ -84,7 +84,6 @@ from typing import TYPE_CHECKING, Any
from collections import OrderedDict
import re
from unicodedata import normalize, combining
-from time import time
from datetime import datetime, timedelta
from json import loads
@@ -97,6 +96,7 @@ from searx.network import get # see https://github.com/searxng/searxng/issues/7
from searx.exceptions import SearxEngineCaptchaException
from searx.locales import region_tag
from searx.enginelib.traits import EngineTraits
+from searx.enginelib import EngineCache
if TYPE_CHECKING:
import logging
@@ -159,10 +159,21 @@ search_form_xpath = '//form[@id="search"]'
</form>
"""
-# timestamp of the last fetch of 'sc' code
-sc_code_ts = 0
-sc_code = ''
-sc_code_cache_sec = 30
+
+CACHE: EngineCache
+"""Persistent (SQLite) key/value cache that deletes its values after ``expire``
+seconds."""
+
+
+def init(_):
+ global CACHE # pylint: disable=global-statement
+
+ # hint: all three startpage engines (WEB, Images & News) can/should use the
+ # same sc_code ..
+ CACHE = EngineCache("startpage") # type:ignore
+
+
+sc_code_cache_sec = 3600
"""Time in seconds the sc-code is cached in memory :py:obj:`get_sc_code`."""
@@ -176,14 +187,10 @@ def get_sc_code(searxng_locale, params):
Startpage's search form generates a new sc-code on each request. This
function scrap a new sc-code from Startpage's home page every
- :py:obj:`sc_code_cache_sec` seconds.
-
- """
-
- global sc_code_ts, sc_code # pylint: disable=global-statement
+ :py:obj:`sc_code_cache_sec` seconds."""
- if sc_code and (time() < (sc_code_ts + sc_code_cache_sec)):
- logger.debug("get_sc_code: reuse '%s'", sc_code)
+ sc_code = CACHE.get("SC_CODE", "")
+ if sc_code:
return sc_code
headers = {**params['headers']}
@@ -233,8 +240,9 @@ def get_sc_code(searxng_locale, params):
message="get_sc_code: [PR-695] query new sc time-stamp failed! (%s)" % resp.url, # type: ignore
) from exc
- sc_code_ts = time()
+ sc_code = str(sc_code)
logger.debug("get_sc_code: new value is: %s", sc_code)
+ CACHE.set(key="SC_CODE", value=sc_code, expire=sc_code_cache_sec)
return sc_code