summaryrefslogtreecommitdiff
path: root/searx/engines/wikidata.py
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2022-10-28 19:12:59 +0200
committerMarkus Heiser <markus.heiser@darmarit.de>2023-03-24 10:37:42 +0100
commit858aa3e6043a5102aec1b05e94ef1d65059f8898 (patch)
tree18513c6ddb9a4dd7262023747e85c417325c03ba /searx/engines/wikidata.py
parente0a6ca96cc071c7b02fb024d34a8902e636d0653 (diff)
[mod] wikipedia & wikidata: upgrade to data_type: traits_v1
BTW this fix an issue in wikipedia: SearXNG's locales zh-TW and zh-HK are now using language `zh-classical` from wikipedia (and not `zh`). Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines/wikidata.py')
-rw-r--r--searx/engines/wikidata.py51
1 files changed, 36 insertions, 15 deletions
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index a38600978..6ea77f092 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -1,9 +1,12 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""Wikidata
+"""This module implements the Wikidata engine. Some implementations are shared
+from :ref:`wikipedia engine`.
+
"""
# pylint: disable=missing-class-docstring
+from typing import TYPE_CHECKING
from hashlib import md5
from urllib.parse import urlencode, unquote
from json import loads
@@ -13,13 +16,17 @@ from babel.dates import format_datetime, format_date, format_time, get_datetime_
from searx.data import WIKIDATA_UNITS
from searx.network import post, get
-from searx.utils import match_language, searx_useragent, get_string_replaces_function
+from searx.utils import searx_useragent, get_string_replaces_function
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
-from searx.engines.wikipedia import ( # pylint: disable=unused-import
- fetch_traits,
- _fetch_supported_languages,
- supported_languages_url,
-)
+from searx.engines.wikipedia import fetch_traits as _fetch_traits
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+ import logging
+
+ logger: logging.Logger
+
+traits: EngineTraits
# about
about = {
@@ -155,33 +162,35 @@ def send_wikidata_query(query, method='GET'):
def request(query, params):
- language = params['language'].split('-')[0]
- if language == 'all':
- language = 'en'
- else:
- language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
+
+ # wikidata does not support zh-classical (zh_Hans) / zh-TW, zh-HK and zh-CN
+ # mapped to zh
+ sxng_lang = params['searxng_locale'].split('-')[0]
+ language = traits.get_language(sxng_lang, 'en')
query, attributes = get_query(query, language)
+ logger.debug("request --> language %s // len(attributes): %s", language, len(attributes))
params['method'] = 'POST'
params['url'] = SPARQL_ENDPOINT_URL
params['data'] = {'query': query}
params['headers'] = get_headers()
-
params['language'] = language
params['attributes'] = attributes
+
return params
def response(resp):
+
results = []
jsonresponse = loads(resp.content.decode())
- language = resp.search_params['language'].lower()
+ language = resp.search_params['language']
attributes = resp.search_params['attributes']
+ logger.debug("request --> language %s // len(attributes): %s", language, len(attributes))
seen_entities = set()
-
for result in jsonresponse.get('results', {}).get('bindings', []):
attribute_result = {key: value['value'] for key, value in result.items()}
entity_url = attribute_result['item']
@@ -757,3 +766,15 @@ def init(engine_settings=None): # pylint: disable=unused-argument
lang = result['name']['xml:lang']
entity_id = result['item']['value'].replace('http://www.wikidata.org/entity/', '')
WIKIDATA_PROPERTIES[(entity_id, lang)] = name.capitalize()
+
+
+def fetch_traits(engine_traits: EngineTraits):
+ """Use languages evaluated from :py:obj:`wikipedia.fetch_traits
+ <searx.engines.wikipedia.fetch_traits>` except zh-classical (zh_Hans) what
+ is not supported by wikidata."""
+
+ _fetch_traits(engine_traits)
+ # wikidata does not support zh-classical (zh_Hans)
+ engine_traits.languages.pop('zh_Hans')
+ # wikidata does not have net-locations for the languages
+ engine_traits.custom['wiki_netloc'] = {}