From c75425655fdadf9554b97ae0309a6181acd34ce3 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Fri, 4 Jun 2021 09:35:26 +0200 Subject: [enh] openstreetmap / map template: improve results implements ideas described in #69 * update the engine * use wikidata * update map.html template --- searx_extra/update/update_osm_keys_tags.py | 204 +++++++++++++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100755 searx_extra/update/update_osm_keys_tags.py (limited to 'searx_extra') diff --git a/searx_extra/update/update_osm_keys_tags.py b/searx_extra/update/update_osm_keys_tags.py new file mode 100755 index 000000000..98c7617fb --- /dev/null +++ b/searx_extra/update/update_osm_keys_tags.py @@ -0,0 +1,204 @@ +#!/usr/bin/env python +""" +Fetch OSM keys and tags + +to get the i18n names, the scripts uses query.wikidata.org +instead of for example https://taginfo.openstreetmap.org/taginfo/apidoc + +https://map.atownsend.org.uk/maps/map/changelog.html (the actual change log) +might be useful to normalize OSM tags +""" + +import json +import collections +from pathlib import Path + +from searx import searx_dir +from searx.network import set_timeout_for_thread +from searx.engines.wikidata import send_wikidata_query +from searx.languages import language_codes +from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK + + +# nominatim return type category and type +# the tag is "Tag:{category}={type}" +# Example: +# * https://taginfo.openstreetmap.org/tags/building=house#overview +# * https://wiki.openstreetmap.org/wiki/Tag:building%3Dhouse +# at the bottom of the infobox (right side), there is a link to wikidata: +# https://www.wikidata.org/wiki/Q3947 +# see property "OpenStreetMap tag or key" (P1282) +# * https://wiki.openstreetmap.org/wiki/Tag%3Abuilding%3Dbungalow +# https://www.wikidata.org/wiki/Q850107 +SARQL_TAGS_REQUEST = """ +SELECT ?tag ?item ?itemLabel WHERE { + ?item wdt:P1282 ?tag . + ?item rdfs:label ?itemLabel . + FILTER(STRSTARTS(?tag, 'Tag')) +} +GROUP BY ?tag ?item ?itemLabel +ORDER BY ?tag ?item ?itemLabel +""" + +# keys +# Example with "payment"": +# * https://wiki.openstreetmap.org/wiki/Key%3Apayment +# at the bottom of infobox (right side), there is a link to wikidata: +# https://www.wikidata.org/wiki/Q1148747 +# link made using the "OpenStreetMap tag or key" property (P1282) +# to be confirm: there is a one wiki page per key ? +# * https://taginfo.openstreetmap.org/keys/payment#values +# * https://taginfo.openstreetmap.org/keys/payment:cash#values +# +# rdfs:label get all the labels without language selection +# (as opposed to SERVICE wikibase:label) +SARQL_KEYS_REQUEST = """ +SELECT ?key ?item ?itemLabel WHERE { + ?item wdt:P1282 ?key . + ?item rdfs:label ?itemLabel . + FILTER(STRSTARTS(?key, 'Key')) +} +GROUP BY ?key ?item ?itemLabel +ORDER BY ?key ?item ?itemLabel +""" + +LANGUAGES = [l[0].lower() for l in language_codes] +PRESET_KEYS = { + ('wikidata',): {'en': 'Wikidata'}, + ('wikipedia',): {'en': 'Wikipedia'}, + ('email',): {'en': 'Email'}, + ('facebook',): {'en': 'Facebook'}, + ('fax',): {'en': 'Fax'}, + ('internet_access', 'ssid'): {'en': 'Wi-Fi'}, +} +INCLUDED_KEYS = { + ('addr', ) +} + + +def get_preset_keys(): + results = collections.OrderedDict() + for keys, value in PRESET_KEYS.items(): + r = results + for k in keys: + r = r.setdefault(k, {}) + r.setdefault('*', value) + return results + + +def get_keys(): + results = get_preset_keys() + response = send_wikidata_query(SARQL_KEYS_REQUEST) + + for key in response['results']['bindings']: + keys = key['key']['value'].split(':')[1:] + if keys[0] == 'currency' and len(keys) > 1: + # special case in openstreetmap.py + continue + if keys[0] == 'contact' and len(keys) > 1: + # label for the key "contact.email" is "Email" + # whatever the language + r = results.setdefault('contact', {}) + r[keys[1]] = { + '*': { + 'en': keys[1] + } + } + continue + if tuple(keys) in PRESET_KEYS: + # skip presets (already set above) + continue + if get_key_rank(':'.join(keys)) is None\ + and ':'.join(keys) not in VALUE_TO_LINK\ + and tuple(keys) not in INCLUDED_KEYS: + # keep only keys that will be displayed by openstreetmap.py + continue + label = key['itemLabel']['value'].lower() + lang = key['itemLabel']['xml:lang'] + r = results + for k in keys: + r = r.setdefault(k, {}) + r = r.setdefault('*', {}) + if lang in LANGUAGES: + r.setdefault(lang, label) + + # special cases + results['delivery']['covid19']['*'].clear() + for k, v in results['delivery']['*'].items(): + results['delivery']['covid19']['*'][k] = v + ' (COVID19)' + + results['opening_hours']['covid19']['*'].clear() + for k, v in results['opening_hours']['*'].items(): + results['opening_hours']['covid19']['*'][k] = v + ' (COVID19)' + + return results + + +def get_tags(): + results = collections.OrderedDict() + response = send_wikidata_query(SARQL_TAGS_REQUEST) + for tag in response['results']['bindings']: + tag_names = tag['tag']['value'].split(':')[1].split('=') + if len(tag_names) == 2: + tag_category, tag_type = tag_names + else: + tag_category, tag_type = tag_names[0], '' + label = tag['itemLabel']['value'].lower() + lang = tag['itemLabel']['xml:lang'] + if lang in LANGUAGES: + results.setdefault(tag_category, {}).setdefault(tag_type, {}).setdefault(lang, label) + return results + + +def optimize_data_lang(translations): + language_to_delete = [] + # remove "zh-hk" entry if the value is the same as "zh" + # same for "en-ca" / "en" etc... + for language in translations: + if '-' in language: + base_language = language.split('-')[0] + if translations.get(base_language) == translations.get(language): + language_to_delete.append(language) + + for language in language_to_delete: + del translations[language] + language_to_delete = [] + + # remove entries that have the same value than the "en" entry + value_en = translations.get('en') + if value_en: + for language, value in translations.items(): + if language != 'en' and value == value_en: + language_to_delete.append(language) + + for language in language_to_delete: + del translations[language] + + +def optimize_tags(data): + for v in data.values(): + for translations in v.values(): + optimize_data_lang(translations) + return data + + +def optimize_keys(data): + for k, v in data.items(): + if k == '*': + optimize_data_lang(v) + elif isinstance(v, dict): + optimize_keys(v) + return data + + +def get_osm_tags_filename(): + return Path(searx_dir) / "data" / "osm_keys_tags.json" + + +set_timeout_for_thread(60) +result = { + 'keys': optimize_keys(get_keys()), + 'tags': optimize_tags(get_tags()), +} +with open(get_osm_tags_filename(), 'w') as f: + json.dump(result, f, indent=4, ensure_ascii=False) -- cgit v1.2.3 From 8871e39122c951437f407a7f0ebbe1e6a0b7161d Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sun, 6 Jun 2021 12:58:53 +0200 Subject: [pylint] searx_extra/update/update_osm_keys_tags.py BTW: move some comments into script's doc-string Signed-off-by: Markus Heiser --- searx_extra/update/update_osm_keys_tags.py | 103 +++++++++++++++-------------- 1 file changed, 54 insertions(+), 49 deletions(-) (limited to 'searx_extra') diff --git a/searx_extra/update/update_osm_keys_tags.py b/searx_extra/update/update_osm_keys_tags.py index 98c7617fb..f803d0c33 100755 --- a/searx_extra/update/update_osm_keys_tags.py +++ b/searx_extra/update/update_osm_keys_tags.py @@ -1,12 +1,43 @@ #!/usr/bin/env python -""" -Fetch OSM keys and tags - -to get the i18n names, the scripts uses query.wikidata.org -instead of for example https://taginfo.openstreetmap.org/taginfo/apidoc +# lint: pylint +# pylint: disable=missing-function-docstring +"""Fetch OSM keys and tags. + +To get the i18n names, the scripts uses `Wikidata Query Service`_ instead of for +example `OSM tags API`_ (sidenote: the actual change log from +map.atownsend.org.uk_ might be useful to normalize OSM tags) + +.. _Wikidata Query Service: https://query.wikidata.org/ +.. _OSM tags API: https://taginfo.openstreetmap.org/taginfo/apidoc +.. _map.atownsend.org.uk: https://map.atownsend.org.uk/maps/map/changelog.html + +:py:obj:`SPARQL_TAGS_REQUEST` : + Wikidata SPARQL query that returns *type-categories* and *types*. The + returned tag is ``Tag:{category}={type}`` (see :py:func:`get_tags`). + Example: + + - https://taginfo.openstreetmap.org/tags/building=house#overview + - https://wiki.openstreetmap.org/wiki/Tag:building%3Dhouse + at the bottom of the infobox (right side), there is a link to wikidata: + https://www.wikidata.org/wiki/Q3947 + see property "OpenStreetMap tag or key" (P1282) + - https://wiki.openstreetmap.org/wiki/Tag%3Abuilding%3Dbungalow + https://www.wikidata.org/wiki/Q850107 + +:py:obj:`SPARQL_KEYS_REQUEST` : + Wikidata SPARQL query that returns *keys*. Example with "payment": + + - https://wiki.openstreetmap.org/wiki/Key%3Apayment + at the bottom of infobox (right side), there is a link to wikidata: + https://www.wikidata.org/wiki/Q1148747 + link made using the "OpenStreetMap tag or key" property (P1282) + to be confirm: there is a one wiki page per key ? + - https://taginfo.openstreetmap.org/keys/payment#values + - https://taginfo.openstreetmap.org/keys/payment:cash#values + + ``rdfs:label`` get all the labels without language selection + (as opposed to SERVICE ``wikibase:label``). -https://map.atownsend.org.uk/maps/map/changelog.html (the actual change log) -might be useful to normalize OSM tags """ import json @@ -19,50 +50,28 @@ from searx.engines.wikidata import send_wikidata_query from searx.languages import language_codes from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK - -# nominatim return type category and type -# the tag is "Tag:{category}={type}" -# Example: -# * https://taginfo.openstreetmap.org/tags/building=house#overview -# * https://wiki.openstreetmap.org/wiki/Tag:building%3Dhouse -# at the bottom of the infobox (right side), there is a link to wikidata: -# https://www.wikidata.org/wiki/Q3947 -# see property "OpenStreetMap tag or key" (P1282) -# * https://wiki.openstreetmap.org/wiki/Tag%3Abuilding%3Dbungalow -# https://www.wikidata.org/wiki/Q850107 -SARQL_TAGS_REQUEST = """ +SPARQL_TAGS_REQUEST = """ SELECT ?tag ?item ?itemLabel WHERE { ?item wdt:P1282 ?tag . ?item rdfs:label ?itemLabel . FILTER(STRSTARTS(?tag, 'Tag')) } -GROUP BY ?tag ?item ?itemLabel +GROUP BY ?tag ?item ?itemLabel ORDER BY ?tag ?item ?itemLabel """ -# keys -# Example with "payment"": -# * https://wiki.openstreetmap.org/wiki/Key%3Apayment -# at the bottom of infobox (right side), there is a link to wikidata: -# https://www.wikidata.org/wiki/Q1148747 -# link made using the "OpenStreetMap tag or key" property (P1282) -# to be confirm: there is a one wiki page per key ? -# * https://taginfo.openstreetmap.org/keys/payment#values -# * https://taginfo.openstreetmap.org/keys/payment:cash#values -# -# rdfs:label get all the labels without language selection -# (as opposed to SERVICE wikibase:label) -SARQL_KEYS_REQUEST = """ +SPARQL_KEYS_REQUEST = """ SELECT ?key ?item ?itemLabel WHERE { ?item wdt:P1282 ?key . ?item rdfs:label ?itemLabel . FILTER(STRSTARTS(?key, 'Key')) } -GROUP BY ?key ?item ?itemLabel +GROUP BY ?key ?item ?itemLabel ORDER BY ?key ?item ?itemLabel """ LANGUAGES = [l[0].lower() for l in language_codes] + PRESET_KEYS = { ('wikidata',): {'en': 'Wikidata'}, ('wikipedia',): {'en': 'Wikipedia'}, @@ -71,11 +80,11 @@ PRESET_KEYS = { ('fax',): {'en': 'Fax'}, ('internet_access', 'ssid'): {'en': 'Wi-Fi'}, } + INCLUDED_KEYS = { ('addr', ) } - def get_preset_keys(): results = collections.OrderedDict() for keys, value in PRESET_KEYS.items(): @@ -85,10 +94,9 @@ def get_preset_keys(): r.setdefault('*', value) return results - def get_keys(): results = get_preset_keys() - response = send_wikidata_query(SARQL_KEYS_REQUEST) + response = send_wikidata_query(SPARQL_KEYS_REQUEST) for key in response['results']['bindings']: keys = key['key']['value'].split(':')[1:] @@ -136,7 +144,7 @@ def get_keys(): def get_tags(): results = collections.OrderedDict() - response = send_wikidata_query(SARQL_TAGS_REQUEST) + response = send_wikidata_query(SPARQL_TAGS_REQUEST) for tag in response['results']['bindings']: tag_names = tag['tag']['value'].split(':')[1].split('=') if len(tag_names) == 2: @@ -149,7 +157,6 @@ def get_tags(): results.setdefault(tag_category, {}).setdefault(tag_type, {}).setdefault(lang, label) return results - def optimize_data_lang(translations): language_to_delete = [] # remove "zh-hk" entry if the value is the same as "zh" @@ -174,14 +181,12 @@ def optimize_data_lang(translations): for language in language_to_delete: del translations[language] - def optimize_tags(data): for v in data.values(): for translations in v.values(): optimize_data_lang(translations) return data - def optimize_keys(data): for k, v in data.items(): if k == '*': @@ -190,15 +195,15 @@ def optimize_keys(data): optimize_keys(v) return data - def get_osm_tags_filename(): return Path(searx_dir) / "data" / "osm_keys_tags.json" +if __name__ == '__main__': -set_timeout_for_thread(60) -result = { - 'keys': optimize_keys(get_keys()), - 'tags': optimize_tags(get_tags()), -} -with open(get_osm_tags_filename(), 'w') as f: - json.dump(result, f, indent=4, ensure_ascii=False) + set_timeout_for_thread(60) + result = { + 'keys': optimize_keys(get_keys()), + 'tags': optimize_tags(get_tags()), + } + with open(get_osm_tags_filename(), 'w') as f: + json.dump(result, f, indent=4, ensure_ascii=False) -- cgit v1.2.3