diff options
Diffstat (limited to 'searx')
| -rw-r--r-- | searx/locales.py | 108 | ||||
| -rw-r--r-- | searx/utils.py | 88 | ||||
| -rwxr-xr-x | searx/webapp.py | 30 |
3 files changed, 125 insertions, 101 deletions
diff --git a/searx/locales.py b/searx/locales.py index a4560aab7..ffa5e731c 100644 --- a/searx/locales.py +++ b/searx/locales.py @@ -4,7 +4,7 @@ """Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`. """ -from typing import Set +from typing import Set, Optional, List import os import pathlib @@ -177,6 +177,17 @@ def language_tag(locale: babel.Locale) -> str: return sxng_lang +def get_locale(locale_tag: str) -> Optional[babel.Locale]: + """Returns a :py:obj:`babel.Locale` object parsed from argument + ``locale_tag``""" + try: + locale = babel.Locale.parse(locale_tag, sep='-') + return locale + + except babel.core.UnknownLocaleError: + return None + + def get_offical_locales( territory: str, languages=None, regional: bool = False, de_facto: bool = True ) -> Set[babel.Locale]: @@ -363,3 +374,98 @@ def get_engine_locale(searxng_locale, engine_locales, default=None): engine_locale = default return default + + +def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Optional[str] = None) -> Optional[str]: + """Return tag from ``locale_tag_list`` that best fits to ``searxng_locale``. + + :param str searxng_locale: SearXNG's internal representation of locale (de, + de-DE, fr-BE, zh, zh-CN, zh-TW ..). + + :param list locale_tag_list: The list of locale tags to select from + + :param str fallback: fallback locale tag (if unset --> ``None``) + + The rules to find a match are implemented in :py:obj:`get_engine_locale`, + the ``engine_locales`` is build up by :py:obj:`build_engine_locales`. + + .. hint:: + + The *SearXNG locale* string and the members of ``locale_tag_list`` has to + be known by babel! The :py:obj:`ADDITIONAL_TRANSLATIONS` are used in the + UI and are not known by babel --> will be ignored. + """ + + # searxng_locale = 'es' + # locale_tag_list = ['es-AR', 'es-ES', 'es-MX'] + + if not searxng_locale: + return fallback + + locale = get_locale(searxng_locale) + if locale is None: + return fallback + + # normalize to a SearXNG locale that can be passed to get_engine_locale + + searxng_locale = language_tag(locale) + if locale.territory: + searxng_locale = region_tag(locale) + + # clean up locale_tag_list + + tag_list = [] + for tag in locale_tag_list: + if tag in ('all', 'auto') or tag in ADDITIONAL_TRANSLATIONS: + continue + tag_list.append(tag) + + # emulate fetch_traits + engine_locales = build_engine_locales(tag_list) + return get_engine_locale(searxng_locale, engine_locales, default=fallback) + + +def build_engine_locales(tag_list: List[str]): + """From a list of locale tags a dictionary is build that can be passed by + argument ``engine_locales`` to :py:obj:`get_engine_locale`. This function + is mainly used by :py:obj:`match_locale` and is similar to what the + ``fetch_traits(..)`` function of engines do. + + If there are territory codes in the ``tag_list`` that have a *script code* + additional keys are added to the returned dictionary. + + .. code:: python + + >>> import locales + >>> engine_locales = locales.build_engine_locales(['en', 'en-US', 'zh', 'zh-CN', 'zh-TW']) + >>> engine_locales + { + 'en': 'en', 'en-US': 'en-US', + 'zh': 'zh', 'zh-CN': 'zh-CN', 'zh_Hans': 'zh-CN', + 'zh-TW': 'zh-TW', 'zh_Hant': 'zh-TW' + } + >>> get_engine_locale('zh-Hans', engine_locales) + 'zh-CN' + + This function is a good example to understand the language/region model + of SearXNG: + + SearXNG only distinguishes between **search languages** and **search + regions**, by adding the *script-tags*, languages with *script-tags* can + be assigned to the **regions** that SearXNG supports. + + """ + engine_locales = {} + + for tag in tag_list: + locale = get_locale(tag) + if locale is None: + logger.warn("build_engine_locales: skip locale tag %s / unknown by babel", tag) + continue + if locale.territory: + engine_locales[region_tag(locale)] = tag + if locale.script: + engine_locales[language_tag(locale)] = tag + else: + engine_locales[language_tag(locale)] = tag + return engine_locales diff --git a/searx/utils.py b/searx/utils.py index f7a71b649..161983011 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -18,8 +18,6 @@ from urllib.parse import urljoin, urlparse from lxml import html from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult -from babel.core import get_global - from searx import settings from searx.data import USER_AGENTS, data_dir @@ -365,92 +363,6 @@ def is_valid_lang(lang) -> Optional[Tuple[bool, str, str]]: return None -def _get_lang_to_lc_dict(lang_list: List[str]) -> Dict[str, str]: - key = str(lang_list) - value = _LANG_TO_LC_CACHE.get(key, None) - if value is None: - value = {} - for lang in lang_list: - value.setdefault(lang.split('-')[0], lang) - _LANG_TO_LC_CACHE[key] = value - return value - - -# babel's get_global contains all sorts of miscellaneous locale and territory related data -# see get_global in: https://github.com/python-babel/babel/blob/master/babel/core.py -def _get_from_babel(lang_code: str, key): - match = get_global(key).get(lang_code.replace('-', '_')) - # for some keys, such as territory_aliases, match may be a list - if isinstance(match, str): - return match.replace('_', '-') - return match - - -def _match_language(lang_code: str, lang_list=[], custom_aliases={}) -> Optional[str]: # pylint: disable=W0102 - """auxiliary function to match lang_code in lang_list""" - # replace language code with a custom alias if necessary - if lang_code in custom_aliases: - lang_code = custom_aliases[lang_code] - - if lang_code in lang_list: - return lang_code - - # try to get the most likely country for this language - subtags = _get_from_babel(lang_code, 'likely_subtags') - if subtags: - if subtags in lang_list: - return subtags - subtag_parts = subtags.split('-') - new_code = subtag_parts[0] + '-' + subtag_parts[-1] - if new_code in custom_aliases: - new_code = custom_aliases[new_code] - if new_code in lang_list: - return new_code - - # try to get the any supported country for this language - return _get_lang_to_lc_dict(lang_list).get(lang_code) - - -def match_language( # pylint: disable=W0102 - locale_code, lang_list=[], custom_aliases={}, fallback: Optional[str] = 'en-US' -) -> Optional[str]: - """get the language code from lang_list that best matches locale_code""" - # try to get language from given locale_code - language = _match_language(locale_code, lang_list, custom_aliases) - if language: - return language - - locale_parts = locale_code.split('-') - lang_code = locale_parts[0] - - # if locale_code has script, try matching without it - if len(locale_parts) > 2: - language = _match_language(lang_code + '-' + locale_parts[-1], lang_list, custom_aliases) - if language: - return language - - # try to get language using an equivalent country code - if len(locale_parts) > 1: - country_alias = _get_from_babel(locale_parts[-1], 'territory_aliases') - if country_alias: - language = _match_language(lang_code + '-' + country_alias[0], lang_list, custom_aliases) - if language: - return language - - # try to get language using an equivalent language code - alias = _get_from_babel(lang_code, 'language_aliases') - if alias: - language = _match_language(alias, lang_list, custom_aliases) - if language: - return language - - if lang_code != locale_code: - # try to get language from given language without giving the country - language = _match_language(lang_code, lang_list, custom_aliases) - - return language or fallback - - def load_module(filename: str, module_dir: str) -> types.ModuleType: modname = splitext(filename)[0] modpath = join(module_dir, filename) diff --git a/searx/webapp.py b/searx/webapp.py index bc2a50784..4ed6c2eb7 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -89,7 +89,6 @@ from searx.utils import ( html_to_text, gen_useragent, dict_subset, - match_language, ) from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH from searx.query import RawTextQuery @@ -117,6 +116,7 @@ from searx.locales import ( RTL_LOCALES, localeselector, locales_initialize, + match_locale, ) # renaming names from searx imports ... @@ -227,7 +227,7 @@ def _get_browser_language(req, lang_list): if '-' in lang: lang_parts = lang.split('-') lang = "{}-{}".format(lang_parts[0], lang_parts[-1].upper()) - locale = match_language(lang, lang_list, fallback=None) + locale = match_locale(lang, lang_list, fallback=None) if locale is not None: return locale return 'en' @@ -407,7 +407,7 @@ def get_client_settings(): def render(template_name: str, **kwargs): - + # pylint: disable=too-many-statements kwargs['client_settings'] = str( base64.b64encode( bytes( @@ -445,10 +445,13 @@ def render(template_name: str, **kwargs): if locale in RTL_LOCALES and 'rtl' not in kwargs: kwargs['rtl'] = True + if 'current_language' not in kwargs: - kwargs['current_language'] = match_language( - request.preferences.get_value('language'), settings['search']['languages'] - ) + _locale = request.preferences.get_value('language') + if _locale in ('auto', 'all'): + kwargs['current_language'] = _locale + else: + kwargs['current_language'] = match_locale(_locale, settings['search']['languages']) # values from settings kwargs['search_formats'] = [x for x in settings['search']['formats'] if x != 'html'] @@ -810,6 +813,13 @@ def search(): ) ) + if search_query.lang in ('auto', 'all'): + current_language = search_query.lang + else: + current_language = match_locale( + search_query.lang, settings['search']['languages'], fallback=request.preferences.get_value("language") + ) + # search_query.lang contains the user choice (all, auto, en, ...) # when the user choice is "auto", search.search_query.lang contains the detected language # otherwise it is equals to search_query.lang @@ -832,12 +842,8 @@ def search(): result_container.unresponsive_engines ), current_locale = request.preferences.get_value("locale"), - current_language = match_language( - search_query.lang, - settings['search']['languages'], - fallback=request.preferences.get_value("language") - ), - search_language = match_language( + current_language = current_language, + search_language = match_locale( search.search_query.lang, settings['search']['languages'], fallback=request.preferences.get_value("language") |