summaryrefslogtreecommitdiff
path: root/searx_extra/update/update_wikidata_units.py
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2021-03-05 09:43:39 +0100
committerGitHub <noreply@github.com>2021-03-05 09:43:39 +0100
commitaaae9a209e4d6518965064e8b515e411fb8dd0d6 (patch)
tree922a8875536ac058d501bb207c04219744eee538 /searx_extra/update/update_wikidata_units.py
parent1d10ae175c0929d383d268f56bfadb304365ccf2 (diff)
parentb8cd3264644208d7afa1a239f829222d45226334 (diff)
Merge pull request #2600 from dalf/searx-extra
Add searx_extra package
Diffstat (limited to 'searx_extra/update/update_wikidata_units.py')
-rwxr-xr-xsearx_extra/update/update_wikidata_units.py54
1 files changed, 54 insertions, 0 deletions
diff --git a/searx_extra/update/update_wikidata_units.py b/searx_extra/update/update_wikidata_units.py
new file mode 100755
index 000000000..1e6b8b9ca
--- /dev/null
+++ b/searx_extra/update/update_wikidata_units.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+
+import json
+import collections
+
+# set path
+from os.path import join
+
+from searx import searx_dir
+from searx.engines.wikidata import send_wikidata_query
+
+
+# the response contains duplicate ?item with the different ?symbol
+# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
+# even if a ?item has different ?symbol of the same rank.
+# A deterministic result
+# see:
+# * https://www.wikidata.org/wiki/Help:Ranking
+# * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section)
+# * https://w.wiki/32BT
+# see the result for https://www.wikidata.org/wiki/Q11582
+# there are multiple symbols the same rank
+SARQL_REQUEST = """
+SELECT DISTINCT ?item ?symbol
+WHERE
+{
+ ?item wdt:P31/wdt:P279 wd:Q47574 .
+ ?item p:P5061 ?symbolP .
+ ?symbolP ps:P5061 ?symbol ;
+ wikibase:rank ?rank .
+ FILTER(LANG(?symbol) = "en").
+}
+ORDER BY ?item DESC(?rank) ?symbol
+"""
+
+
+def get_data():
+ results = collections.OrderedDict()
+ response = send_wikidata_query(SARQL_REQUEST)
+ for unit in response['results']['bindings']:
+ name = unit['item']['value'].replace('http://www.wikidata.org/entity/', '')
+ unit = unit['symbol']['value']
+ if name not in results:
+ # ignore duplicate: always use the first one
+ results[name] = unit
+ return results
+
+
+def get_wikidata_units_filename():
+ return join(join(searx_dir, "data"), "wikidata_units.json")
+
+
+with open(get_wikidata_units_filename(), 'w') as f:
+ json.dump(get_data(), f, indent=4, ensure_ascii=False)