summaryrefslogtreecommitdiff
path: root/searxng_extra/update
diff options
context:
space:
mode:
authorBnyro <bnyro@tutanota.com>2025-04-28 18:06:59 +0200
committerBnyro <bnyro@tutanota.com>2025-06-23 22:12:18 +0200
commit8f7eee24733727d15f21f29a8038a85eb2b05e4d (patch)
tree6a01d5866f5e21be485a164566aeedba79659833 /searxng_extra/update
parent58df3e8e9776341279eb58c6766fe7d189a3ee60 (diff)
[feat] tracker url plugin: use ClearURL tracking param list
Diffstat (limited to 'searxng_extra/update')
-rw-r--r--searxng_extra/update/update_tracker_patterns.py36
1 files changed, 36 insertions, 0 deletions
diff --git a/searxng_extra/update/update_tracker_patterns.py b/searxng_extra/update/update_tracker_patterns.py
new file mode 100644
index 000000000..f8928d354
--- /dev/null
+++ b/searxng_extra/update/update_tracker_patterns.py
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Fetch trackers"""
+
+import json
+import httpx
+
+from searx.data import data_dir
+
+DATA_FILE = data_dir / "tracker_patterns.json"
+CLEAR_LIST_URL = "https://raw.githubusercontent.com/ClearURLs/Rules/refs/heads/master/data.min.json"
+
+
+def fetch_clear_url_filters():
+ resp = httpx.get(CLEAR_LIST_URL)
+ if resp.status_code != 200:
+ # pylint: disable=broad-exception-raised
+ raise Exception(f"Error fetching ClearURL filter lists, HTTP code {resp.status_code}")
+
+ providers = resp.json()["providers"]
+ rules = []
+ for rule in providers.values():
+ rules.append(
+ {
+ "urlPattern": rule["urlPattern"].replace("\\\\", "\\"), # fix javascript regex syntax
+ "exceptions": [exc.replace("\\\\", "\\") for exc in rule["exceptions"]],
+ "trackerParams": rule["rules"],
+ }
+ )
+
+ return rules
+
+
+if __name__ == '__main__':
+ filter_list = fetch_clear_url_filters()
+ with DATA_FILE.open("w", encoding='utf-8') as f:
+ json.dump(filter_list, f, indent=4, sort_keys=True, ensure_ascii=False)