From 6e5f22e5583cfc2a413e0afac66d3c5ea9f628b1 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Thu, 29 Sep 2022 20:54:46 +0200 Subject: [mod] replace engines_languages.json by engines_traits.json Implementations of the *traits* of the engines. Engine's traits are fetched from the origin engine and stored in a JSON file in the *data folder*. Most often traits are languages and region codes and their mapping from SearXNG's representation to the representation in the origin search engine. To load traits from the persistence:: searx.enginelib.traits.EngineTraitsMap.from_data() For new traits new properties can be added to the class:: searx.enginelib.traits.EngineTraits .. hint:: Implementation is downward compatible to the deprecated *supported_languages method* from the vintage implementation. The vintage code is tagged as *deprecated* an can be removed when all engines has been ported to the *traits method*. Signed-off-by: Markus Heiser --- searx/locales.py | 82 ++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 71 insertions(+), 11 deletions(-) (limited to 'searx/locales.py') diff --git a/searx/locales.py b/searx/locales.py index 9e06bf39d..a4560aab7 100644 --- a/searx/locales.py +++ b/searx/locales.py @@ -8,7 +8,7 @@ from typing import Set import os import pathlib -from babel import Locale +import babel from babel.support import Translations import babel.languages import babel.core @@ -134,7 +134,7 @@ def locales_initialize(directory=None): flask_babel.get_translations = get_translations for tag, descr in ADDITIONAL_TRANSLATIONS.items(): - locale = Locale.parse(LOCALE_BEST_MATCH[tag], sep='-') + locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-') LOCALE_NAMES[tag] = descr if locale.text_direction == 'rtl': RTL_LOCALES.add(tag) @@ -142,7 +142,7 @@ def locales_initialize(directory=None): for tag in LOCALE_BEST_MATCH: descr = LOCALE_NAMES.get(tag) if not descr: - locale = Locale.parse(tag, sep='-') + locale = babel.Locale.parse(tag, sep='-') LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_')) if locale.text_direction == 'rtl': RTL_LOCALES.add(tag) @@ -154,12 +154,66 @@ def locales_initialize(directory=None): tag = dirname.replace('_', '-') descr = LOCALE_NAMES.get(tag) if not descr: - locale = Locale.parse(dirname) + locale = babel.Locale.parse(dirname) LOCALE_NAMES[tag] = get_locale_descr(locale, dirname) if locale.text_direction == 'rtl': RTL_LOCALES.add(tag) +def region_tag(locale: babel.Locale) -> str: + """Returns SearXNG's region tag from the locale (e.g. zh-TW , en-US).""" + if not locale.territory: + raise ValueError('%s missed a territory') + return locale.language + '-' + locale.territory + + +def language_tag(locale: babel.Locale) -> str: + """Returns SearXNG's language tag from the locale and if exits, the tag + includes the script name (e.g. en, zh_Hant). + """ + sxng_lang = locale.language + if locale.script: + sxng_lang += '_' + locale.script + return sxng_lang + + +def get_offical_locales( + territory: str, languages=None, regional: bool = False, de_facto: bool = True +) -> Set[babel.Locale]: + """Returns a list of :py:obj:`babel.Locale` with languages from + :py:obj:`babel.languages.get_official_languages`. + + :param territory: The territory (country or region) code. + + :param languages: A list of language codes the languages from + :py:obj:`babel.languages.get_official_languages` should be in + (intersection). If this argument is ``None``, all official languages in + this territory are used. + + :param regional: If the regional flag is set, then languages which are + regionally official are also returned. + + :param de_facto: If the de_facto flag is set to `False`, then languages + which are “de facto” official are not returned. + + """ + ret_val = set() + o_languages = babel.languages.get_official_languages(territory, regional=regional, de_facto=de_facto) + + if languages: + languages = [l.lower() for l in languages] + o_languages = set(l for l in o_languages if l.lower() in languages) + + for lang in o_languages: + try: + locale = babel.Locale.parse(lang + '_' + territory) + ret_val.add(locale) + except babel.UnknownLocaleError: + continue + + return ret_val + + def get_engine_locale(searxng_locale, engine_locales, default=None): """Return engine's language (aka locale) string that best fits to argument ``searxng_locale``. @@ -177,6 +231,10 @@ def get_engine_locale(searxng_locale, engine_locales, default=None): ... 'pl-PL' : 'pl_PL', 'pt-PT' : 'pt_PT' + .. + 'zh' : 'zh' + 'zh_Hans' : 'zh' + 'zh_Hant' : 'zh-classical' } .. hint:: @@ -210,13 +268,13 @@ def get_engine_locale(searxng_locale, engine_locales, default=None): engine. """ - # pylint: disable=too-many-branches + # pylint: disable=too-many-branches, too-many-return-statements engine_locale = engine_locales.get(searxng_locale) if engine_locale is not None: - # There was a 1:1 mapping (e.g. "fr-BE --> fr_BE" or "fr --> fr_FR"), no - # need to narrow language nor territory. + # There was a 1:1 mapping (e.g. a region "fr-BE --> fr_BE" or a language + # "zh --> zh"), no need to narrow language-script nor territory. return engine_locale try: @@ -227,6 +285,12 @@ def get_engine_locale(searxng_locale, engine_locales, default=None): except babel.core.UnknownLocaleError: return default + searxng_lang = language_tag(locale) + engine_locale = engine_locales.get(searxng_lang) + if engine_locale is not None: + # There was a 1:1 mapping (e.g. "zh-HK --> zh_Hant" or "zh-CN --> zh_Hans") + return engine_locale + # SearXNG's selected locale is not supported by the engine .. if locale.territory: @@ -247,10 +311,6 @@ def get_engine_locale(searxng_locale, engine_locales, default=None): if locale.language: - searxng_lang = locale.language - if locale.script: - searxng_lang += '_' + locale.script - terr_lang_dict = {} for territory, langs in babel.core.get_global("territory_languages").items(): if not langs.get(searxng_lang, {}).get('official_status'): -- cgit v1.2.3 From 16f0db44939c23d2980d6fd2e5dfada13d8f5ee9 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 7 Feb 2023 14:11:58 +0100 Subject: [mod] replace utils.match_language by locales.match_locale This patch replaces the *full of magic* ``utils.match_language`` function by a ``locales.match_locale``. The ``locales.match_locale`` function is based on the ``locales.build_engine_locales`` introduced in 9ae409a0 [1]. In the past SearXNG did only support a search by a language but not in a region. This has been changed a long time ago and regions have been added to SearXNG core but not to the engines. The ``utils.match_language`` was the function to handle the different aspects of language/regions in SearXNG core and the supported *languages* in the engine. The ``utils.match_language`` did it with some magic and works good for most use cases but fails in some edge case. To replace the concurrence of languages and regions in the SearXNG core the ``locales.build_engine_locales`` was introduced in 9ae409a0 [1]. With the last patches all engines has been migrated to a ``fetch_traits`` and a language/region concept that is based on ``locales.build_engine_locales``. To summarize: there is no longer a need for the ``locales.match_language``. [1] https://github.com/searxng/searxng/pull/1652 Signed-off-by: Markus Heiser --- searx/locales.py | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 1 deletion(-) (limited to 'searx/locales.py') diff --git a/searx/locales.py b/searx/locales.py index a4560aab7..ffa5e731c 100644 --- a/searx/locales.py +++ b/searx/locales.py @@ -4,7 +4,7 @@ """Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`. """ -from typing import Set +from typing import Set, Optional, List import os import pathlib @@ -177,6 +177,17 @@ def language_tag(locale: babel.Locale) -> str: return sxng_lang +def get_locale(locale_tag: str) -> Optional[babel.Locale]: + """Returns a :py:obj:`babel.Locale` object parsed from argument + ``locale_tag``""" + try: + locale = babel.Locale.parse(locale_tag, sep='-') + return locale + + except babel.core.UnknownLocaleError: + return None + + def get_offical_locales( territory: str, languages=None, regional: bool = False, de_facto: bool = True ) -> Set[babel.Locale]: @@ -363,3 +374,98 @@ def get_engine_locale(searxng_locale, engine_locales, default=None): engine_locale = default return default + + +def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Optional[str] = None) -> Optional[str]: + """Return tag from ``locale_tag_list`` that best fits to ``searxng_locale``. + + :param str searxng_locale: SearXNG's internal representation of locale (de, + de-DE, fr-BE, zh, zh-CN, zh-TW ..). + + :param list locale_tag_list: The list of locale tags to select from + + :param str fallback: fallback locale tag (if unset --> ``None``) + + The rules to find a match are implemented in :py:obj:`get_engine_locale`, + the ``engine_locales`` is build up by :py:obj:`build_engine_locales`. + + .. hint:: + + The *SearXNG locale* string and the members of ``locale_tag_list`` has to + be known by babel! The :py:obj:`ADDITIONAL_TRANSLATIONS` are used in the + UI and are not known by babel --> will be ignored. + """ + + # searxng_locale = 'es' + # locale_tag_list = ['es-AR', 'es-ES', 'es-MX'] + + if not searxng_locale: + return fallback + + locale = get_locale(searxng_locale) + if locale is None: + return fallback + + # normalize to a SearXNG locale that can be passed to get_engine_locale + + searxng_locale = language_tag(locale) + if locale.territory: + searxng_locale = region_tag(locale) + + # clean up locale_tag_list + + tag_list = [] + for tag in locale_tag_list: + if tag in ('all', 'auto') or tag in ADDITIONAL_TRANSLATIONS: + continue + tag_list.append(tag) + + # emulate fetch_traits + engine_locales = build_engine_locales(tag_list) + return get_engine_locale(searxng_locale, engine_locales, default=fallback) + + +def build_engine_locales(tag_list: List[str]): + """From a list of locale tags a dictionary is build that can be passed by + argument ``engine_locales`` to :py:obj:`get_engine_locale`. This function + is mainly used by :py:obj:`match_locale` and is similar to what the + ``fetch_traits(..)`` function of engines do. + + If there are territory codes in the ``tag_list`` that have a *script code* + additional keys are added to the returned dictionary. + + .. code:: python + + >>> import locales + >>> engine_locales = locales.build_engine_locales(['en', 'en-US', 'zh', 'zh-CN', 'zh-TW']) + >>> engine_locales + { + 'en': 'en', 'en-US': 'en-US', + 'zh': 'zh', 'zh-CN': 'zh-CN', 'zh_Hans': 'zh-CN', + 'zh-TW': 'zh-TW', 'zh_Hant': 'zh-TW' + } + >>> get_engine_locale('zh-Hans', engine_locales) + 'zh-CN' + + This function is a good example to understand the language/region model + of SearXNG: + + SearXNG only distinguishes between **search languages** and **search + regions**, by adding the *script-tags*, languages with *script-tags* can + be assigned to the **regions** that SearXNG supports. + + """ + engine_locales = {} + + for tag in tag_list: + locale = get_locale(tag) + if locale is None: + logger.warn("build_engine_locales: skip locale tag %s / unknown by babel", tag) + continue + if locale.territory: + engine_locales[region_tag(locale)] = tag + if locale.script: + engine_locales[language_tag(locale)] = tag + else: + engine_locales[language_tag(locale)] = tag + return engine_locales -- cgit v1.2.3