diff options
| author | Alexandre Flament <alex@al-f.net> | 2021-03-05 09:43:39 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-03-05 09:43:39 +0100 |
| commit | aaae9a209e4d6518965064e8b515e411fb8dd0d6 (patch) | |
| tree | 922a8875536ac058d501bb207c04219744eee538 /utils/fetch_wikidata_units.py | |
| parent | 1d10ae175c0929d383d268f56bfadb304365ccf2 (diff) | |
| parent | b8cd3264644208d7afa1a239f829222d45226334 (diff) | |
Merge pull request #2600 from dalf/searx-extra
Add searx_extra package
Diffstat (limited to 'utils/fetch_wikidata_units.py')
| -rw-r--r-- | utils/fetch_wikidata_units.py | 56 |
1 files changed, 0 insertions, 56 deletions
diff --git a/utils/fetch_wikidata_units.py b/utils/fetch_wikidata_units.py deleted file mode 100644 index 69ae8ab27..000000000 --- a/utils/fetch_wikidata_units.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python - -import json -import collections - -# set path -from sys import path -from os.path import realpath, dirname, join -path.append(realpath(dirname(realpath(__file__)) + '/../')) - -from searx import searx_dir -from searx.engines.wikidata import send_wikidata_query - - -# the response contains duplicate ?item with the different ?symbol -# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result -# even if a ?item has different ?symbol of the same rank. -# A deterministic result -# see: -# * https://www.wikidata.org/wiki/Help:Ranking -# * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section) -# * https://w.wiki/32BT -# see the result for https://www.wikidata.org/wiki/Q11582 -# there are multiple symbols the same rank -SARQL_REQUEST = """ -SELECT DISTINCT ?item ?symbol -WHERE -{ - ?item wdt:P31/wdt:P279 wd:Q47574 . - ?item p:P5061 ?symbolP . - ?symbolP ps:P5061 ?symbol ; - wikibase:rank ?rank . - FILTER(LANG(?symbol) = "en"). -} -ORDER BY ?item DESC(?rank) ?symbol -""" - - -def get_data(): - results = collections.OrderedDict() - response = send_wikidata_query(SARQL_REQUEST) - for unit in response['results']['bindings']: - name = unit['item']['value'].replace('http://www.wikidata.org/entity/', '') - unit = unit['symbol']['value'] - if name not in results: - # ignore duplicate: always use the first one - results[name] = unit - return results - - -def get_wikidata_units_filename(): - return join(join(searx_dir, "data"), "wikidata_units.json") - - -with open(get_wikidata_units_filename(), 'w') as f: - json.dump(get_data(), f, indent=4, ensure_ascii=False) |