diff options
| author | Alexandre Flament <alex@al-f.net> | 2020-10-26 19:19:18 +0100 |
|---|---|---|
| committer | Alexandre Flament <alex@al-f.net> | 2020-10-28 08:09:25 +0100 |
| commit | ed6696e6bf13d2a1f7536fe05d043d81f57f4081 (patch) | |
| tree | 4046768ddacb8f5a0309a22d37a0aec488477e72 /utils/fetch_wikidata_units.py | |
| parent | d3d50eff665f03c16adcb26a774b25b4fd5ade08 (diff) | |
[mod] add external_urls.json and wikidata_units.json
Diffstat (limited to 'utils/fetch_wikidata_units.py')
| -rw-r--r-- | utils/fetch_wikidata_units.py | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/utils/fetch_wikidata_units.py b/utils/fetch_wikidata_units.py new file mode 100644 index 000000000..69505968e --- /dev/null +++ b/utils/fetch_wikidata_units.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python + +import json +import collections + +# set path +from sys import path +from os.path import realpath, dirname, join +path.append(realpath(dirname(realpath(__file__)) + '/../')) + +from searx import searx_dir +from searx.engines.wikidata import send_wikidata_query + + +SARQL_REQUEST = """ +SELECT DISTINCT ?item ?symbol ?P2370 ?P2370Unit ?P2442 ?P2442Unit +WHERE +{ +?item wdt:P31/wdt:P279 wd:Q47574. +?item wdt:P5061 ?symbol. +FILTER(LANG(?symbol) = "en"). +} +ORDER BY ?item +""" + + +def get_data(): + def get_key(unit): + return unit['item']['value'].replace('http://www.wikidata.org/entity/', '') + + def get_value(unit): + return unit['symbol']['value'] + + result = send_wikidata_query(SARQL_REQUEST) + if result is not None: + # sort the unit by entity name + # so different fetchs keep the file unchanged. + list(result['results']['bindings']).sort(key=get_key) + return collections.OrderedDict([(get_key(unit), get_value(unit)) for unit in result['results']['bindings']]) + + +def get_wikidata_units_filename(): + return join(join(searx_dir, "data"), "wikidata_units.json") + + +with open(get_wikidata_units_filename(), 'w') as f: + json.dump(get_data(), f, indent=4, ensure_ascii=False) |