summaryrefslogtreecommitdiff
path: root/searx/locales.py
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarIT.de>2023-03-29 09:47:21 +0200
committerGitHub <noreply@github.com>2023-03-29 09:47:21 +0200
commitf950119ca87363aec81591dc4985f11371aa2b3e (patch)
treeab893ff1f60d8c969ff0f5c2fad0cff49148aa3c /searx/locales.py
parent64fea2f9cb079bd0055c6a23360097d285204515 (diff)
parent6f9e678346e5978a09ee453a62fa133cdc0ee0bd (diff)
Merge pull request #2269 from return42/locale-revision
Revision of the locale- and language- handling in SearXNG
Diffstat (limited to 'searx/locales.py')
-rw-r--r--searx/locales.py190
1 files changed, 178 insertions, 12 deletions
diff --git a/searx/locales.py b/searx/locales.py
index 9e06bf39d..ffa5e731c 100644
--- a/searx/locales.py
+++ b/searx/locales.py
@@ -4,11 +4,11 @@
"""Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.
"""
-from typing import Set
+from typing import Set, Optional, List
import os
import pathlib
-from babel import Locale
+import babel
from babel.support import Translations
import babel.languages
import babel.core
@@ -134,7 +134,7 @@ def locales_initialize(directory=None):
flask_babel.get_translations = get_translations
for tag, descr in ADDITIONAL_TRANSLATIONS.items():
- locale = Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
+ locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
LOCALE_NAMES[tag] = descr
if locale.text_direction == 'rtl':
RTL_LOCALES.add(tag)
@@ -142,7 +142,7 @@ def locales_initialize(directory=None):
for tag in LOCALE_BEST_MATCH:
descr = LOCALE_NAMES.get(tag)
if not descr:
- locale = Locale.parse(tag, sep='-')
+ locale = babel.Locale.parse(tag, sep='-')
LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
if locale.text_direction == 'rtl':
RTL_LOCALES.add(tag)
@@ -154,12 +154,77 @@ def locales_initialize(directory=None):
tag = dirname.replace('_', '-')
descr = LOCALE_NAMES.get(tag)
if not descr:
- locale = Locale.parse(dirname)
+ locale = babel.Locale.parse(dirname)
LOCALE_NAMES[tag] = get_locale_descr(locale, dirname)
if locale.text_direction == 'rtl':
RTL_LOCALES.add(tag)
+def region_tag(locale: babel.Locale) -> str:
+ """Returns SearXNG's region tag from the locale (e.g. zh-TW , en-US)."""
+ if not locale.territory:
+ raise ValueError('%s missed a territory')
+ return locale.language + '-' + locale.territory
+
+
+def language_tag(locale: babel.Locale) -> str:
+ """Returns SearXNG's language tag from the locale and if exits, the tag
+ includes the script name (e.g. en, zh_Hant).
+ """
+ sxng_lang = locale.language
+ if locale.script:
+ sxng_lang += '_' + locale.script
+ return sxng_lang
+
+
+def get_locale(locale_tag: str) -> Optional[babel.Locale]:
+ """Returns a :py:obj:`babel.Locale` object parsed from argument
+ ``locale_tag``"""
+ try:
+ locale = babel.Locale.parse(locale_tag, sep='-')
+ return locale
+
+ except babel.core.UnknownLocaleError:
+ return None
+
+
+def get_offical_locales(
+ territory: str, languages=None, regional: bool = False, de_facto: bool = True
+) -> Set[babel.Locale]:
+ """Returns a list of :py:obj:`babel.Locale` with languages from
+ :py:obj:`babel.languages.get_official_languages`.
+
+ :param territory: The territory (country or region) code.
+
+ :param languages: A list of language codes the languages from
+ :py:obj:`babel.languages.get_official_languages` should be in
+ (intersection). If this argument is ``None``, all official languages in
+ this territory are used.
+
+ :param regional: If the regional flag is set, then languages which are
+ regionally official are also returned.
+
+ :param de_facto: If the de_facto flag is set to `False`, then languages
+ which are “de facto” official are not returned.
+
+ """
+ ret_val = set()
+ o_languages = babel.languages.get_official_languages(territory, regional=regional, de_facto=de_facto)
+
+ if languages:
+ languages = [l.lower() for l in languages]
+ o_languages = set(l for l in o_languages if l.lower() in languages)
+
+ for lang in o_languages:
+ try:
+ locale = babel.Locale.parse(lang + '_' + territory)
+ ret_val.add(locale)
+ except babel.UnknownLocaleError:
+ continue
+
+ return ret_val
+
+
def get_engine_locale(searxng_locale, engine_locales, default=None):
"""Return engine's language (aka locale) string that best fits to argument
``searxng_locale``.
@@ -177,6 +242,10 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
...
'pl-PL' : 'pl_PL',
'pt-PT' : 'pt_PT'
+ ..
+ 'zh' : 'zh'
+ 'zh_Hans' : 'zh'
+ 'zh_Hant' : 'zh-classical'
}
.. hint::
@@ -210,13 +279,13 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
engine.
"""
- # pylint: disable=too-many-branches
+ # pylint: disable=too-many-branches, too-many-return-statements
engine_locale = engine_locales.get(searxng_locale)
if engine_locale is not None:
- # There was a 1:1 mapping (e.g. "fr-BE --> fr_BE" or "fr --> fr_FR"), no
- # need to narrow language nor territory.
+ # There was a 1:1 mapping (e.g. a region "fr-BE --> fr_BE" or a language
+ # "zh --> zh"), no need to narrow language-script nor territory.
return engine_locale
try:
@@ -227,6 +296,12 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
except babel.core.UnknownLocaleError:
return default
+ searxng_lang = language_tag(locale)
+ engine_locale = engine_locales.get(searxng_lang)
+ if engine_locale is not None:
+ # There was a 1:1 mapping (e.g. "zh-HK --> zh_Hant" or "zh-CN --> zh_Hans")
+ return engine_locale
+
# SearXNG's selected locale is not supported by the engine ..
if locale.territory:
@@ -247,10 +322,6 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
if locale.language:
- searxng_lang = locale.language
- if locale.script:
- searxng_lang += '_' + locale.script
-
terr_lang_dict = {}
for territory, langs in babel.core.get_global("territory_languages").items():
if not langs.get(searxng_lang, {}).get('official_status'):
@@ -303,3 +374,98 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
engine_locale = default
return default
+
+
+def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Optional[str] = None) -> Optional[str]:
+ """Return tag from ``locale_tag_list`` that best fits to ``searxng_locale``.
+
+ :param str searxng_locale: SearXNG's internal representation of locale (de,
+ de-DE, fr-BE, zh, zh-CN, zh-TW ..).
+
+ :param list locale_tag_list: The list of locale tags to select from
+
+ :param str fallback: fallback locale tag (if unset --> ``None``)
+
+ The rules to find a match are implemented in :py:obj:`get_engine_locale`,
+ the ``engine_locales`` is build up by :py:obj:`build_engine_locales`.
+
+ .. hint::
+
+ The *SearXNG locale* string and the members of ``locale_tag_list`` has to
+ be known by babel! The :py:obj:`ADDITIONAL_TRANSLATIONS` are used in the
+ UI and are not known by babel --> will be ignored.
+ """
+
+ # searxng_locale = 'es'
+ # locale_tag_list = ['es-AR', 'es-ES', 'es-MX']
+
+ if not searxng_locale:
+ return fallback
+
+ locale = get_locale(searxng_locale)
+ if locale is None:
+ return fallback
+
+ # normalize to a SearXNG locale that can be passed to get_engine_locale
+
+ searxng_locale = language_tag(locale)
+ if locale.territory:
+ searxng_locale = region_tag(locale)
+
+ # clean up locale_tag_list
+
+ tag_list = []
+ for tag in locale_tag_list:
+ if tag in ('all', 'auto') or tag in ADDITIONAL_TRANSLATIONS:
+ continue
+ tag_list.append(tag)
+
+ # emulate fetch_traits
+ engine_locales = build_engine_locales(tag_list)
+ return get_engine_locale(searxng_locale, engine_locales, default=fallback)
+
+
+def build_engine_locales(tag_list: List[str]):
+ """From a list of locale tags a dictionary is build that can be passed by
+ argument ``engine_locales`` to :py:obj:`get_engine_locale`. This function
+ is mainly used by :py:obj:`match_locale` and is similar to what the
+ ``fetch_traits(..)`` function of engines do.
+
+ If there are territory codes in the ``tag_list`` that have a *script code*
+ additional keys are added to the returned dictionary.
+
+ .. code:: python
+
+ >>> import locales
+ >>> engine_locales = locales.build_engine_locales(['en', 'en-US', 'zh', 'zh-CN', 'zh-TW'])
+ >>> engine_locales
+ {
+ 'en': 'en', 'en-US': 'en-US',
+ 'zh': 'zh', 'zh-CN': 'zh-CN', 'zh_Hans': 'zh-CN',
+ 'zh-TW': 'zh-TW', 'zh_Hant': 'zh-TW'
+ }
+ >>> get_engine_locale('zh-Hans', engine_locales)
+ 'zh-CN'
+
+ This function is a good example to understand the language/region model
+ of SearXNG:
+
+ SearXNG only distinguishes between **search languages** and **search
+ regions**, by adding the *script-tags*, languages with *script-tags* can
+ be assigned to the **regions** that SearXNG supports.
+
+ """
+ engine_locales = {}
+
+ for tag in tag_list:
+ locale = get_locale(tag)
+ if locale is None:
+ logger.warn("build_engine_locales: skip locale tag %s / unknown by babel", tag)
+ continue
+ if locale.territory:
+ engine_locales[region_tag(locale)] = tag
+ if locale.script:
+ engine_locales[language_tag(locale)] = tag
+ else:
+ engine_locales[language_tag(locale)] = tag
+ return engine_locales