summaryrefslogtreecommitdiff
path: root/searx/utils.py
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarIT.de>2023-03-29 09:47:21 +0200
committerGitHub <noreply@github.com>2023-03-29 09:47:21 +0200
commitf950119ca87363aec81591dc4985f11371aa2b3e (patch)
treeab893ff1f60d8c969ff0f5c2fad0cff49148aa3c /searx/utils.py
parent64fea2f9cb079bd0055c6a23360097d285204515 (diff)
parent6f9e678346e5978a09ee453a62fa133cdc0ee0bd (diff)
Merge pull request #2269 from return42/locale-revision
Revision of the locale- and language- handling in SearXNG
Diffstat (limited to 'searx/utils.py')
-rw-r--r--searx/utils.py98
1 files changed, 5 insertions, 93 deletions
diff --git a/searx/utils.py b/searx/utils.py
index e6180906b..161983011 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -18,13 +18,11 @@ from urllib.parse import urljoin, urlparse
from lxml import html
from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult
-from babel.core import get_global
-
from searx import settings
from searx.data import USER_AGENTS, data_dir
from searx.version import VERSION_TAG
-from searx.languages import language_codes
+from searx.sxng_locales import sxng_locales
from searx.exceptions import SearxXPathSyntaxException, SearxEngineXPathException
from searx import logger
@@ -53,8 +51,8 @@ _LANG_TO_LC_CACHE: Dict[str, Dict[str, str]] = {}
_FASTTEXT_MODEL: Optional["fasttext.FastText._FastText"] = None
"""fasttext model to predict laguage of a search term"""
-SEARCH_LANGUAGE_CODES = frozenset([searxng_locale[0].split('-')[0] for searxng_locale in language_codes])
-"""Languages supported by most searxng engines (:py:obj:`searx.languages.language_codes`)."""
+SEARCH_LANGUAGE_CODES = frozenset([searxng_locale[0].split('-')[0] for searxng_locale in sxng_locales])
+"""Languages supported by most searxng engines (:py:obj:`searx.sxng_locales.sxng_locales`)."""
class _NotSetClass: # pylint: disable=too-few-public-methods
@@ -355,102 +353,16 @@ def is_valid_lang(lang) -> Optional[Tuple[bool, str, str]]:
is_abbr = len(lang) == 2
lang = lang.lower()
if is_abbr:
- for l in language_codes:
+ for l in sxng_locales:
if l[0][:2] == lang:
return (True, l[0][:2], l[3].lower())
return None
- for l in language_codes:
+ for l in sxng_locales:
if l[1].lower() == lang or l[3].lower() == lang:
return (True, l[0][:2], l[3].lower())
return None
-def _get_lang_to_lc_dict(lang_list: List[str]) -> Dict[str, str]:
- key = str(lang_list)
- value = _LANG_TO_LC_CACHE.get(key, None)
- if value is None:
- value = {}
- for lang in lang_list:
- value.setdefault(lang.split('-')[0], lang)
- _LANG_TO_LC_CACHE[key] = value
- return value
-
-
-# babel's get_global contains all sorts of miscellaneous locale and territory related data
-# see get_global in: https://github.com/python-babel/babel/blob/master/babel/core.py
-def _get_from_babel(lang_code: str, key):
- match = get_global(key).get(lang_code.replace('-', '_'))
- # for some keys, such as territory_aliases, match may be a list
- if isinstance(match, str):
- return match.replace('_', '-')
- return match
-
-
-def _match_language(lang_code: str, lang_list=[], custom_aliases={}) -> Optional[str]: # pylint: disable=W0102
- """auxiliary function to match lang_code in lang_list"""
- # replace language code with a custom alias if necessary
- if lang_code in custom_aliases:
- lang_code = custom_aliases[lang_code]
-
- if lang_code in lang_list:
- return lang_code
-
- # try to get the most likely country for this language
- subtags = _get_from_babel(lang_code, 'likely_subtags')
- if subtags:
- if subtags in lang_list:
- return subtags
- subtag_parts = subtags.split('-')
- new_code = subtag_parts[0] + '-' + subtag_parts[-1]
- if new_code in custom_aliases:
- new_code = custom_aliases[new_code]
- if new_code in lang_list:
- return new_code
-
- # try to get the any supported country for this language
- return _get_lang_to_lc_dict(lang_list).get(lang_code)
-
-
-def match_language( # pylint: disable=W0102
- locale_code, lang_list=[], custom_aliases={}, fallback: Optional[str] = 'en-US'
-) -> Optional[str]:
- """get the language code from lang_list that best matches locale_code"""
- # try to get language from given locale_code
- language = _match_language(locale_code, lang_list, custom_aliases)
- if language:
- return language
-
- locale_parts = locale_code.split('-')
- lang_code = locale_parts[0]
-
- # if locale_code has script, try matching without it
- if len(locale_parts) > 2:
- language = _match_language(lang_code + '-' + locale_parts[-1], lang_list, custom_aliases)
- if language:
- return language
-
- # try to get language using an equivalent country code
- if len(locale_parts) > 1:
- country_alias = _get_from_babel(locale_parts[-1], 'territory_aliases')
- if country_alias:
- language = _match_language(lang_code + '-' + country_alias[0], lang_list, custom_aliases)
- if language:
- return language
-
- # try to get language using an equivalent language code
- alias = _get_from_babel(lang_code, 'language_aliases')
- if alias:
- language = _match_language(alias, lang_list, custom_aliases)
- if language:
- return language
-
- if lang_code != locale_code:
- # try to get language from given language without giving the country
- language = _match_language(lang_code, lang_list, custom_aliases)
-
- return language or fallback
-
-
def load_module(filename: str, module_dir: str) -> types.ModuleType:
modname = splitext(filename)[0]
modpath = join(module_dir, filename)