diff options
| author | Markus Heiser <markus.heiser@darmarit.de> | 2025-05-24 17:53:57 +0200 |
|---|---|---|
| committer | Bnyro <bnyro@tutanota.com> | 2025-06-23 22:12:18 +0200 |
| commit | 2dd4f7b9721b201dc51cb2fb06d32cb1cb833458 (patch) | |
| tree | fe74795a1a6fa06bf5761083a1e9c57428be1b3c /searxng_extra/update | |
| parent | 58c10f758b09affda1a15c105e7ce86f3a3bdd3a (diff) | |
[mod] data: implement a simple tracker URL (SQL) database
On demand, the tracker data is loaded directly into the cache, so that the
maintenance of this data via PRs is no longer necessary.
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searxng_extra/update')
| -rw-r--r-- | searxng_extra/update/update_tracker_patterns.py | 36 |
1 files changed, 0 insertions, 36 deletions
diff --git a/searxng_extra/update/update_tracker_patterns.py b/searxng_extra/update/update_tracker_patterns.py deleted file mode 100644 index f8928d354..000000000 --- a/searxng_extra/update/update_tracker_patterns.py +++ /dev/null @@ -1,36 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -"""Fetch trackers""" - -import json -import httpx - -from searx.data import data_dir - -DATA_FILE = data_dir / "tracker_patterns.json" -CLEAR_LIST_URL = "https://raw.githubusercontent.com/ClearURLs/Rules/refs/heads/master/data.min.json" - - -def fetch_clear_url_filters(): - resp = httpx.get(CLEAR_LIST_URL) - if resp.status_code != 200: - # pylint: disable=broad-exception-raised - raise Exception(f"Error fetching ClearURL filter lists, HTTP code {resp.status_code}") - - providers = resp.json()["providers"] - rules = [] - for rule in providers.values(): - rules.append( - { - "urlPattern": rule["urlPattern"].replace("\\\\", "\\"), # fix javascript regex syntax - "exceptions": [exc.replace("\\\\", "\\") for exc in rule["exceptions"]], - "trackerParams": rule["rules"], - } - ) - - return rules - - -if __name__ == '__main__': - filter_list = fetch_clear_url_filters() - with DATA_FILE.open("w", encoding='utf-8') as f: - json.dump(filter_list, f, indent=4, sort_keys=True, ensure_ascii=False) |