diff options
| author | Alexandre Flament <alex@al-f.net> | 2021-10-02 17:30:39 +0200 |
|---|---|---|
| committer | Alexandre Flament <alex@al-f.net> | 2021-10-02 17:30:39 +0200 |
| commit | 1bb82a6b54e53d683c3041a1576be64ae234abee (patch) | |
| tree | a0d30ba74780168169e82643335d2bf3aeb0c748 /searx_extra/update/update_wikidata_units.py | |
| parent | e39a03cc61e9792afb34084fb4d9973a61deecea (diff) | |
SearXNG: searxng_extra
Diffstat (limited to 'searx_extra/update/update_wikidata_units.py')
| -rwxr-xr-x | searx_extra/update/update_wikidata_units.py | 55 |
1 files changed, 0 insertions, 55 deletions
diff --git a/searx_extra/update/update_wikidata_units.py b/searx_extra/update/update_wikidata_units.py deleted file mode 100755 index ddde4c135..000000000 --- a/searx_extra/update/update_wikidata_units.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python - -import json -import collections - -# set path -from os.path import join - -from searx import searx_dir -from searx.engines import wikidata, set_loggers - -set_loggers(wikidata, 'wikidata') - -# the response contains duplicate ?item with the different ?symbol -# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result -# even if a ?item has different ?symbol of the same rank. -# A deterministic result -# see: -# * https://www.wikidata.org/wiki/Help:Ranking -# * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section) -# * https://w.wiki/32BT -# see the result for https://www.wikidata.org/wiki/Q11582 -# there are multiple symbols the same rank -SARQL_REQUEST = """ -SELECT DISTINCT ?item ?symbol -WHERE -{ - ?item wdt:P31/wdt:P279 wd:Q47574 . - ?item p:P5061 ?symbolP . - ?symbolP ps:P5061 ?symbol ; - wikibase:rank ?rank . - FILTER(LANG(?symbol) = "en"). -} -ORDER BY ?item DESC(?rank) ?symbol -""" - - -def get_data(): - results = collections.OrderedDict() - response = wikidata.send_wikidata_query(SARQL_REQUEST) - for unit in response['results']['bindings']: - name = unit['item']['value'].replace('http://www.wikidata.org/entity/', '') - unit = unit['symbol']['value'] - if name not in results: - # ignore duplicate: always use the first one - results[name] = unit - return results - - -def get_wikidata_units_filename(): - return join(join(searx_dir, "data"), "wikidata_units.json") - - -with open(get_wikidata_units_filename(), 'w') as f: - json.dump(get_data(), f, indent=4, ensure_ascii=False) |