summaryrefslogtreecommitdiff
path: root/searx/wikidata_units.py
blob: 9fc94585ff56267dc20a369617dcd0e6783457ef (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Unit conversion on the basis of `SPARQL/WIKIDATA Precision, Units and
Coordinates`_

.. _SPARQL/WIKIDATA Precision, Units and Coordinates:
   https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
"""

__all__ = ["convert_from_si", "convert_to_si", "symbol_to_si"]

import collections

from searx import data
from searx.engines import wikidata

ADDITIONAL_UNITS = [
    {
        "si_name": "Q11579",
        "symbol": "°C",
        "to_si": lambda val: val + 273.15,
        "from_si": lambda val: val - 273.15,
    },
    {
        "si_name": "Q11579",
        "symbol": "°F",
        "to_si": lambda val: (val + 459.67) * 5 / 9,
        "from_si": lambda val: (val * 9 / 5) - 459.67,
    },
]
"""Additional items to convert from a measure unit to a SI unit (vice versa).

.. code:: python

    {
        "si_name": "Q11579",                 # Wikidata item ID of the SI unit (Kelvin)
        "symbol": "°C",                      # symbol of the measure unit
        "to_si": lambda val: val + 273.15,   # convert measure value (val) to SI unit
        "from_si": lambda val: val - 273.15, # convert SI value (val) measure unit
    },
    {
        "si_name": "Q11573",
        "symbol": "mi",
        "to_si": 1609.344,                   # convert measure value (val) to SI unit
        "from_si": 1 / 1609.344              # convert SI value (val) measure unit
    },

The values of ``to_si`` and ``from_si`` can be of :py:obj:`float` (a multiplier)
or a callable_ (val in / converted value returned).

.. _callable: https://docs.python.org/3/glossary.html#term-callable
"""


ALIAS_SYMBOLS = {
    '°C': ('C',),
    '°F': ('F',),
    'mi': ('L',),
}
"""Alias symbols for known unit of measure symbols / by example::

    '°C': ('C', ...),  # list of alias symbols for °C (Q69362731)
    '°F': ('F', ...),  # list of alias symbols for °F (Q99490479)
    'mi': ('L',),      # list of alias symbols for mi (Q253276)
"""


SYMBOL_TO_SI = []
UNITS_BY_SI_NAME: dict | None = None


def convert_from_si(si_name: str, symbol: str, value: float | int) -> float:
    from_si = units_by_si_name(si_name)[symbol][symbol]["from_si"]
    if isinstance(from_si, (float, int)):
        value = float(value) * from_si
    else:
        value = from_si(float(value))
    return value


def convert_to_si(si_name: str, symbol: str, value: float | int) -> float:
    to_si = units_by_si_name(si_name)[symbol][symbol]["to_si"]
    if isinstance(to_si, (float, int)):
        value = float(value) * to_si
    else:
        value = to_si(float(value))
    return value


def units_by_si_name(si_name):

    global UNITS_BY_SI_NAME
    if UNITS_BY_SI_NAME is not None:
        return UNITS_BY_SI_NAME[si_name]

    UNITS_BY_SI_NAME = {}
    for item in symbol_to_si():
        by_symbol = UNITS_BY_SI_NAME.get(si_name)
        if by_symbol is None:
            by_symbol = {}
            UNITS_BY_SI_NAME[si_name] = by_symbol
        by_symbol[item["symbol"]] = item
    return UNITS_BY_SI_NAME[si_name]


def symbol_to_si():
    """Generates a list of tuples, each tuple is a measure unit and the fields
    in the tuple are:

    0. Symbol of the measure unit (e.g. 'mi' for measure unit 'miles' Q253276)

    1. SI name of the measure unit (e.g. Q11573 for SI unit 'metre')

    2. Factor to get SI value from measure unit (e.g. 1mi is equal to SI 1m
       multiplied by 1609.344)

    3. Factor to get measure value from from SI value (e.g. SI 100m is equal to
       100mi divided by 1609.344)

    The returned list is sorted, the first items are created from
    ``WIKIDATA_UNITS``, the second group of items is build from
    :py:obj:`ADDITIONAL_UNITS` and items created from :py:obj:`ALIAS_SYMBOLS`.

    If you search this list for a symbol, then a match with a symbol from
    Wikidata has the highest weighting (first hit in the list), followed by the
    symbols from the :py:obj:`ADDITIONAL_UNITS` and the lowest weighting is
    given to the symbols resulting from the aliases :py:obj:`ALIAS_SYMBOLS`.

    """

    global SYMBOL_TO_SI  # pylint: disable=global-statement
    if SYMBOL_TO_SI:
        return SYMBOL_TO_SI

    # filter out units which can't be normalized to a SI unit and filter out
    # units without a symbol / arcsecond does not have a symbol
    # https://www.wikidata.org/wiki/Q829073

    for item in data.WIKIDATA_UNITS.values():
        if item['to_si_factor'] and item['symbol']:
            SYMBOL_TO_SI.append(
                (
                    item['symbol'],
                    item['si_name'],
                    1 / item['to_si_factor'],  # from_si
                    item['to_si_factor'],  # to_si
                    item['symbol'],
                )
            )

    for item in ADDITIONAL_UNITS:
        SYMBOL_TO_SI.append(
            (
                item['symbol'],
                item['si_name'],
                item['from_si'],
                item['to_si'],
                item['symbol'],
            )
        )

    alias_items = []
    for item in SYMBOL_TO_SI:
        for alias in ALIAS_SYMBOLS.get(item[0], ()):
            alias_items.append(
                (
                    alias,
                    item[1],
                    item[2],  # from_si
                    item[3],  # to_si
                    item[0],  # origin unit
                )
            )
    SYMBOL_TO_SI = SYMBOL_TO_SI + alias_items
    return SYMBOL_TO_SI


# the response contains duplicate ?item with the different ?symbol
# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
# even if a ?item has different ?symbol of the same rank.
# A deterministic result
# see:
# * https://www.wikidata.org/wiki/Help:Ranking
# * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section)
# * https://w.wiki/32BT
# * https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
#   see the result for https://www.wikidata.org/wiki/Q11582
#   there are multiple symbols the same rank

SARQL_REQUEST = """
SELECT DISTINCT ?item ?symbol ?tosi ?tosiUnit
WHERE
{
  ?item wdt:P31/wdt:P279 wd:Q47574 .
  ?item p:P5061 ?symbolP .
  ?symbolP ps:P5061 ?symbol ;
           wikibase:rank ?rank .
  OPTIONAL {
    ?item p:P2370 ?tosistmt .
    ?tosistmt psv:P2370 ?tosinode .
    ?tosinode wikibase:quantityAmount ?tosi .
    ?tosinode wikibase:quantityUnit ?tosiUnit .
  }
  FILTER(LANG(?symbol) = "en").
}
ORDER BY ?item DESC(?rank) ?symbol
"""


def fetch_units():
    """Fetch units from Wikidata.  Function is used to update persistence of
    :py:obj:`searx.data.WIKIDATA_UNITS`."""

    results = collections.OrderedDict()
    response = wikidata.send_wikidata_query(SARQL_REQUEST)
    for unit in response['results']['bindings']:

        symbol = unit['symbol']['value']
        name = unit['item']['value'].rsplit('/', 1)[1]
        si_name = unit.get('tosiUnit', {}).get('value', '')
        if si_name:
            si_name = si_name.rsplit('/', 1)[1]

        to_si_factor = unit.get('tosi', {}).get('value', '')
        if name not in results:
            # ignore duplicate: always use the first one
            results[name] = {
                'symbol': symbol,
                'si_name': si_name if si_name else None,
                'to_si_factor': float(to_si_factor) if to_si_factor else None,
            }
    return results