diff options
| author | mggh0139 <223163273+mggh0139@users.noreply.github.com> | 2025-07-28 01:03:01 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-07-28 07:03:01 +0200 |
| commit | 54a2b553f46df6dc98a4b8410788e35165e0fa3e (patch) | |
| tree | 89d4d5630b328b676a4c97310a16b9f1172d24ce /searx | |
| parent | f04c273732350ab9f3ffe35ffe5e670bdf7d5cb3 (diff) | |
[fix] tracker pattern: let startup continue if url fetch fails (#5055)
Use Python exception to prevent startup crash in case of fetch ClearURL
failure. Also add some logs.
Closes: https://github.com/searxng/searxng/issues/5054
Diffstat (limited to 'searx')
| -rw-r--r-- | searx/data/tracker_patterns.py | 19 |
1 files changed, 15 insertions, 4 deletions
diff --git a/searx/data/tracker_patterns.py b/searx/data/tracker_patterns.py index 31b0af9b1..544031f4e 100644 --- a/searx/data/tracker_patterns.py +++ b/searx/data/tracker_patterns.py @@ -10,6 +10,8 @@ import re from collections.abc import Iterator from urllib.parse import urlparse, urlunparse, parse_qsl, urlencode +from httpx import HTTPError + from searx.data.core import get_cache, log from searx.network import get as http_get @@ -70,10 +72,19 @@ class TrackerPatternsDB: def iter_clear_list(self) -> Iterator[RuleType]: resp = None for url in self.CLEAR_LIST_URL: - resp = http_get(url, timeout=3) - if resp.status_code == 200: - break - log.warning(f"TRACKER_PATTERNS: ClearURL ignore HTTP {resp.status_code} {url}") + log.debug("TRACKER_PATTERNS: Trying to fetch %s...", url) + try: + resp = http_get(url, timeout=3) + + except HTTPError as exc: + log.warning("TRACKER_PATTERNS: HTTPError (%s) occured while fetching %s", url, exc) + continue + + if resp.status_code != 200: + log.warning(f"TRACKER_PATTERNS: ClearURL ignore HTTP {resp.status_code} {url}") + continue + + break if resp is None: log.error("TRACKER_PATTERNS: failed fetching ClearURL rule lists") |