summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2025-05-04 20:37:06 +0200
committerMarkus Heiser <markus.heiser@darmarIT.de>2025-05-31 20:34:59 +0200
commita800dd04735c98a293edff00493a5fee3dfeaed7 (patch)
tree7ea69e8d6dd525ca6c22c40593e91c3a446f34f0 /searx
parentcf59ee2efcf5bbebf2899b1f57e892f25d23864d (diff)
[mod] implement searx.wikidata_units for unit converters
Diffstat (limited to 'searx')
-rw-r--r--searx/plugins/unit_converter.py128
-rw-r--r--searx/wikidata_units.py231
2 files changed, 232 insertions, 127 deletions
diff --git a/searx/plugins/unit_converter.py b/searx/plugins/unit_converter.py
index 2bab598f2..0072afe55 100644
--- a/searx/plugins/unit_converter.py
+++ b/searx/plugins/unit_converter.py
@@ -15,7 +15,7 @@ import babel.numbers
from flask_babel import gettext, get_locale
-from searx import data
+from searx.units import symbol_to_si
from searx.plugins import Plugin, PluginInfo
from searx.result_types import EngineResults
@@ -86,132 +86,6 @@ RE_MEASURE = r'''
'''
-ADDITIONAL_UNITS = [
- {
- "si_name": "Q11579",
- "symbol": "°C",
- "to_si": lambda val: val + 273.15,
- "from_si": lambda val: val - 273.15,
- },
- {
- "si_name": "Q11579",
- "symbol": "°F",
- "to_si": lambda val: (val + 459.67) * 5 / 9,
- "from_si": lambda val: (val * 9 / 5) - 459.67,
- },
-]
-"""Additional items to convert from a measure unit to a SI unit (vice versa).
-
-.. code:: python
-
- {
- "si_name": "Q11579", # Wikidata item ID of the SI unit (Kelvin)
- "symbol": "°C", # symbol of the measure unit
- "to_si": lambda val: val + 273.15, # convert measure value (val) to SI unit
- "from_si": lambda val: val - 273.15, # convert SI value (val) measure unit
- },
- {
- "si_name": "Q11573",
- "symbol": "mi",
- "to_si": 1609.344, # convert measure value (val) to SI unit
- "from_si": 1 / 1609.344 # convert SI value (val) measure unit
- },
-
-The values of ``to_si`` and ``from_si`` can be of :py:obj:`float` (a multiplier)
-or a callable_ (val in / converted value returned).
-
-.. _callable: https://docs.python.org/3/glossary.html#term-callable
-"""
-
-
-ALIAS_SYMBOLS = {
- '°C': ('C',),
- '°F': ('F',),
- 'mi': ('L',),
-}
-"""Alias symbols for known unit of measure symbols / by example::
-
- '°C': ('C', ...), # list of alias symbols for °C (Q69362731)
- '°F': ('F', ...), # list of alias symbols for °F (Q99490479)
- 'mi': ('L',), # list of alias symbols for mi (Q253276)
-"""
-
-
-SYMBOL_TO_SI = []
-
-
-def symbol_to_si():
- """Generates a list of tuples, each tuple is a measure unit and the fields
- in the tuple are:
-
- 0. Symbol of the measure unit (e.g. 'mi' for measure unit 'miles' Q253276)
-
- 1. SI name of the measure unit (e.g. Q11573 for SI unit 'metre')
-
- 2. Factor to get SI value from measure unit (e.g. 1mi is equal to SI 1m
- multiplied by 1609.344)
-
- 3. Factor to get measure value from from SI value (e.g. SI 100m is equal to
- 100mi divided by 1609.344)
-
- The returned list is sorted, the first items are created from
- ``WIKIDATA_UNITS``, the second group of items is build from
- :py:obj:`ADDITIONAL_UNITS` and items created from :py:obj:`ALIAS_SYMBOLS`.
-
- If you search this list for a symbol, then a match with a symbol from
- Wikidata has the highest weighting (first hit in the list), followed by the
- symbols from the :py:obj:`ADDITIONAL_UNITS` and the lowest weighting is
- given to the symbols resulting from the aliases :py:obj:`ALIAS_SYMBOLS`.
-
- """
-
- global SYMBOL_TO_SI # pylint: disable=global-statement
- if SYMBOL_TO_SI:
- return SYMBOL_TO_SI
-
- # filter out units which can't be normalized to a SI unit and filter out
- # units without a symbol / arcsecond does not have a symbol
- # https://www.wikidata.org/wiki/Q829073
-
- for item in data.WIKIDATA_UNITS.values():
- if item['to_si_factor'] and item['symbol']:
- SYMBOL_TO_SI.append(
- (
- item['symbol'],
- item['si_name'],
- 1 / item['to_si_factor'], # from_si
- item['to_si_factor'], # to_si
- item['symbol'],
- )
- )
-
- for item in ADDITIONAL_UNITS:
- SYMBOL_TO_SI.append(
- (
- item['symbol'],
- item['si_name'],
- item['from_si'],
- item['to_si'],
- item['symbol'],
- )
- )
-
- alias_items = []
- for item in SYMBOL_TO_SI:
- for alias in ALIAS_SYMBOLS.get(item[0], ()):
- alias_items.append(
- (
- alias,
- item[1],
- item[2], # from_si
- item[3], # to_si
- item[0], # origin unit
- )
- )
- SYMBOL_TO_SI = SYMBOL_TO_SI + alias_items
- return SYMBOL_TO_SI
-
-
def _parse_text_and_convert(from_query, to_query) -> str | None:
# pylint: disable=too-many-branches, too-many-locals
diff --git a/searx/wikidata_units.py b/searx/wikidata_units.py
new file mode 100644
index 000000000..9fc94585f
--- /dev/null
+++ b/searx/wikidata_units.py
@@ -0,0 +1,231 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Unit conversion on the basis of `SPARQL/WIKIDATA Precision, Units and
+Coordinates`_
+
+.. _SPARQL/WIKIDATA Precision, Units and Coordinates:
+ https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
+"""
+
+__all__ = ["convert_from_si", "convert_to_si", "symbol_to_si"]
+
+import collections
+
+from searx import data
+from searx.engines import wikidata
+
+ADDITIONAL_UNITS = [
+ {
+ "si_name": "Q11579",
+ "symbol": "°C",
+ "to_si": lambda val: val + 273.15,
+ "from_si": lambda val: val - 273.15,
+ },
+ {
+ "si_name": "Q11579",
+ "symbol": "°F",
+ "to_si": lambda val: (val + 459.67) * 5 / 9,
+ "from_si": lambda val: (val * 9 / 5) - 459.67,
+ },
+]
+"""Additional items to convert from a measure unit to a SI unit (vice versa).
+
+.. code:: python
+
+ {
+ "si_name": "Q11579", # Wikidata item ID of the SI unit (Kelvin)
+ "symbol": "°C", # symbol of the measure unit
+ "to_si": lambda val: val + 273.15, # convert measure value (val) to SI unit
+ "from_si": lambda val: val - 273.15, # convert SI value (val) measure unit
+ },
+ {
+ "si_name": "Q11573",
+ "symbol": "mi",
+ "to_si": 1609.344, # convert measure value (val) to SI unit
+ "from_si": 1 / 1609.344 # convert SI value (val) measure unit
+ },
+
+The values of ``to_si`` and ``from_si`` can be of :py:obj:`float` (a multiplier)
+or a callable_ (val in / converted value returned).
+
+.. _callable: https://docs.python.org/3/glossary.html#term-callable
+"""
+
+
+ALIAS_SYMBOLS = {
+ '°C': ('C',),
+ '°F': ('F',),
+ 'mi': ('L',),
+}
+"""Alias symbols for known unit of measure symbols / by example::
+
+ '°C': ('C', ...), # list of alias symbols for °C (Q69362731)
+ '°F': ('F', ...), # list of alias symbols for °F (Q99490479)
+ 'mi': ('L',), # list of alias symbols for mi (Q253276)
+"""
+
+
+SYMBOL_TO_SI = []
+UNITS_BY_SI_NAME: dict | None = None
+
+
+def convert_from_si(si_name: str, symbol: str, value: float | int) -> float:
+ from_si = units_by_si_name(si_name)[symbol][symbol]["from_si"]
+ if isinstance(from_si, (float, int)):
+ value = float(value) * from_si
+ else:
+ value = from_si(float(value))
+ return value
+
+
+def convert_to_si(si_name: str, symbol: str, value: float | int) -> float:
+ to_si = units_by_si_name(si_name)[symbol][symbol]["to_si"]
+ if isinstance(to_si, (float, int)):
+ value = float(value) * to_si
+ else:
+ value = to_si(float(value))
+ return value
+
+
+def units_by_si_name(si_name):
+
+ global UNITS_BY_SI_NAME
+ if UNITS_BY_SI_NAME is not None:
+ return UNITS_BY_SI_NAME[si_name]
+
+ UNITS_BY_SI_NAME = {}
+ for item in symbol_to_si():
+ by_symbol = UNITS_BY_SI_NAME.get(si_name)
+ if by_symbol is None:
+ by_symbol = {}
+ UNITS_BY_SI_NAME[si_name] = by_symbol
+ by_symbol[item["symbol"]] = item
+ return UNITS_BY_SI_NAME[si_name]
+
+
+def symbol_to_si():
+ """Generates a list of tuples, each tuple is a measure unit and the fields
+ in the tuple are:
+
+ 0. Symbol of the measure unit (e.g. 'mi' for measure unit 'miles' Q253276)
+
+ 1. SI name of the measure unit (e.g. Q11573 for SI unit 'metre')
+
+ 2. Factor to get SI value from measure unit (e.g. 1mi is equal to SI 1m
+ multiplied by 1609.344)
+
+ 3. Factor to get measure value from from SI value (e.g. SI 100m is equal to
+ 100mi divided by 1609.344)
+
+ The returned list is sorted, the first items are created from
+ ``WIKIDATA_UNITS``, the second group of items is build from
+ :py:obj:`ADDITIONAL_UNITS` and items created from :py:obj:`ALIAS_SYMBOLS`.
+
+ If you search this list for a symbol, then a match with a symbol from
+ Wikidata has the highest weighting (first hit in the list), followed by the
+ symbols from the :py:obj:`ADDITIONAL_UNITS` and the lowest weighting is
+ given to the symbols resulting from the aliases :py:obj:`ALIAS_SYMBOLS`.
+
+ """
+
+ global SYMBOL_TO_SI # pylint: disable=global-statement
+ if SYMBOL_TO_SI:
+ return SYMBOL_TO_SI
+
+ # filter out units which can't be normalized to a SI unit and filter out
+ # units without a symbol / arcsecond does not have a symbol
+ # https://www.wikidata.org/wiki/Q829073
+
+ for item in data.WIKIDATA_UNITS.values():
+ if item['to_si_factor'] and item['symbol']:
+ SYMBOL_TO_SI.append(
+ (
+ item['symbol'],
+ item['si_name'],
+ 1 / item['to_si_factor'], # from_si
+ item['to_si_factor'], # to_si
+ item['symbol'],
+ )
+ )
+
+ for item in ADDITIONAL_UNITS:
+ SYMBOL_TO_SI.append(
+ (
+ item['symbol'],
+ item['si_name'],
+ item['from_si'],
+ item['to_si'],
+ item['symbol'],
+ )
+ )
+
+ alias_items = []
+ for item in SYMBOL_TO_SI:
+ for alias in ALIAS_SYMBOLS.get(item[0], ()):
+ alias_items.append(
+ (
+ alias,
+ item[1],
+ item[2], # from_si
+ item[3], # to_si
+ item[0], # origin unit
+ )
+ )
+ SYMBOL_TO_SI = SYMBOL_TO_SI + alias_items
+ return SYMBOL_TO_SI
+
+
+# the response contains duplicate ?item with the different ?symbol
+# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
+# even if a ?item has different ?symbol of the same rank.
+# A deterministic result
+# see:
+# * https://www.wikidata.org/wiki/Help:Ranking
+# * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section)
+# * https://w.wiki/32BT
+# * https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
+# see the result for https://www.wikidata.org/wiki/Q11582
+# there are multiple symbols the same rank
+
+SARQL_REQUEST = """
+SELECT DISTINCT ?item ?symbol ?tosi ?tosiUnit
+WHERE
+{
+ ?item wdt:P31/wdt:P279 wd:Q47574 .
+ ?item p:P5061 ?symbolP .
+ ?symbolP ps:P5061 ?symbol ;
+ wikibase:rank ?rank .
+ OPTIONAL {
+ ?item p:P2370 ?tosistmt .
+ ?tosistmt psv:P2370 ?tosinode .
+ ?tosinode wikibase:quantityAmount ?tosi .
+ ?tosinode wikibase:quantityUnit ?tosiUnit .
+ }
+ FILTER(LANG(?symbol) = "en").
+}
+ORDER BY ?item DESC(?rank) ?symbol
+"""
+
+
+def fetch_units():
+ """Fetch units from Wikidata. Function is used to update persistence of
+ :py:obj:`searx.data.WIKIDATA_UNITS`."""
+
+ results = collections.OrderedDict()
+ response = wikidata.send_wikidata_query(SARQL_REQUEST)
+ for unit in response['results']['bindings']:
+
+ symbol = unit['symbol']['value']
+ name = unit['item']['value'].rsplit('/', 1)[1]
+ si_name = unit.get('tosiUnit', {}).get('value', '')
+ if si_name:
+ si_name = si_name.rsplit('/', 1)[1]
+
+ to_si_factor = unit.get('tosi', {}).get('value', '')
+ if name not in results:
+ # ignore duplicate: always use the first one
+ results[name] = {
+ 'symbol': symbol,
+ 'si_name': si_name if si_name else None,
+ 'to_si_factor': float(to_si_factor) if to_si_factor else None,
+ }
+ return results