summaryrefslogtreecommitdiff
path: root/searxng_extra/update/update_gsa_useragents.py
diff options
context:
space:
mode:
Diffstat (limited to 'searxng_extra/update/update_gsa_useragents.py')
-rw-r--r--searxng_extra/update/update_gsa_useragents.py40
1 files changed, 40 insertions, 0 deletions
diff --git a/searxng_extra/update/update_gsa_useragents.py b/searxng_extra/update/update_gsa_useragents.py
new file mode 100644
index 000000000..e5e90e046
--- /dev/null
+++ b/searxng_extra/update/update_gsa_useragents.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""This script fetches user agents suitable for Google.
+
+Output file: :origin:`searx/data/gsa_useragents.txt` (:origin:`CI Update data
+... <.github/workflows/data-update.yml>`).
+
+.. Source for user agents: https://github.com/intoli/user-agents/
+
+"""
+# pylint: disable=use-dict-literal
+
+from json import loads
+from gzip import decompress
+
+from searx.data import data_dir
+from searx.utils import searxng_useragent
+from searx.network import get as http_get
+
+DATA_FILE = data_dir / 'gsa_useragents.txt'
+URL = 'https://raw.githubusercontent.com/intoli/user-agents/main/src/user-agents.json.gz'
+
+
+def fetch_gsa_useragents() -> list[str]:
+ response = http_get(URL, timeout=3.0, headers={"User-Agent": searxng_useragent()})
+ response.raise_for_status()
+
+ uas = []
+ for ua in loads(decompress(response.content)):
+ if ua["platform"] == "iPhone" and "GSA" in ua["userAgent"]:
+ uas.append(ua["userAgent"])
+
+ uas.sort()
+ return uas
+
+
+if __name__ == '__main__':
+ useragents = fetch_gsa_useragents()
+ with DATA_FILE.open("w", encoding='utf-8') as f:
+ f.write('\n'.join(useragents))