summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2024-02-16 20:46:18 +0000
committerMarkus Heiser <markus.heiser@darmarIT.de>2024-02-20 10:43:20 +0100
commited66ed758dbd1c926296aa227da8a82fff9166e1 (patch)
tree1e7f818ee5dab0d7c09bad09d3586f0f87231584 /searx
parent76845ea42c2c3484e30c118f07671b13ade07a29 (diff)
[mod] reduce memory footprint by not calling babel.Locale.parse at runtime
babel.Locale.parse loads more than 60MB in RAM. The only purpose is to get: LOCALE_NAMES - searx.data.LOCALES["LOCALE_NAMES"] RTL_LOCALES - searx.data.LOCALES["RTL_LOCALES"] This commit calls babel.Locale.parse when the translations are update from weblate and stored in:: searx/data/locales.json This file can be build by:: ./manage data.locales By store these variables in searx.data when the translations are updated we save round about 65MB (usually 4 worker = 260MB of RAM saved. Suggested-by: https://github.com/searxng/searxng/discussions/2633#discussioncomment-8490494 Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx')
-rw-r--r--searx/data/__init__.py2
-rw-r--r--searx/data/locales.json69
-rw-r--r--searx/locales.py125
-rw-r--r--searx/sxng_locales.py6
4 files changed, 133 insertions, 69 deletions
diff --git a/searx/data/__init__.py b/searx/data/__init__.py
index 0822f4ac8..c79d1042f 100644
--- a/searx/data/__init__.py
+++ b/searx/data/__init__.py
@@ -15,6 +15,7 @@ __all__ = [
'EXTERNAL_BANGS',
'OSM_KEYS_TAGS',
'ENGINE_DESCRIPTIONS',
+ 'LOCALES',
'ahmia_blacklist_loader',
]
@@ -50,3 +51,4 @@ EXTERNAL_BANGS = _load('external_bangs.json')
OSM_KEYS_TAGS = _load('osm_keys_tags.json')
ENGINE_DESCRIPTIONS = _load('engine_descriptions.json')
ENGINE_TRAITS = _load('engine_traits.json')
+LOCALES = _load('locales.json')
diff --git a/searx/data/locales.json b/searx/data/locales.json
new file mode 100644
index 000000000..cb45b1601
--- /dev/null
+++ b/searx/data/locales.json
@@ -0,0 +1,69 @@
+{
+ "LOCALE_NAMES": {
+ "af": "Afrikaans",
+ "ar": "العربية (Arabic)",
+ "bg": "Български (Bulgarian)",
+ "bn": "বাংলা (Bangla)",
+ "bo": "བོད་སྐད་ (Tibetan)",
+ "ca": "Català (Catalan)",
+ "cs": "Čeština (Czech)",
+ "cy": "Cymraeg (Welsh)",
+ "da": "Dansk (Danish)",
+ "de": "Deutsch (German)",
+ "dv": "ދިވެހި (Dhivehi)",
+ "el-GR": "Ελληνικά, Ελλάδα (Greek, Greece)",
+ "en": "English",
+ "eo": "Esperanto",
+ "es": "Español (Spanish)",
+ "et": "Eesti (Estonian)",
+ "eu": "Euskara (Basque)",
+ "fa-IR": "فارسی, ایران (Persian, Iran)",
+ "fi": "Suomi (Finnish)",
+ "fil": "Filipino",
+ "fr": "Français (French)",
+ "gl": "Galego (Galician)",
+ "he": "עברית (Hebrew)",
+ "hr": "Hrvatski (Croatian)",
+ "hu": "Magyar (Hungarian)",
+ "ia": "Interlingua",
+ "id": "Indonesia (Indonesian)",
+ "it": "Italiano (Italian)",
+ "ja": "日本語 (Japanese)",
+ "ko": "한국어 (Korean)",
+ "lt": "Lietuvių (Lithuanian)",
+ "lv": "Latviešu (Latvian)",
+ "ml": "മലയാളം (Malayalam)",
+ "ms": "Melayu (Malay)",
+ "nb-NO": "Norsk bokmål, Norge (Norwegian bokmål, Norway)",
+ "nl": "Nederlands (Dutch)",
+ "nl-BE": "Nederlands, België (Dutch, Belgium)",
+ "oc": "Occitan",
+ "pa": "ਪੰਜਾਬੀ (Punjabi)",
+ "pap": "Papiamento",
+ "pl": "Polski (Polish)",
+ "pt": "Português (Portuguese)",
+ "pt-BR": "Português, Brasil (Portuguese, Brazil)",
+ "ro": "Română (Romanian)",
+ "ru": "Русский (Russian)",
+ "si": "සිංහල (Sinhala)",
+ "sk": "Slovenčina (Slovak)",
+ "sl": "Slovenščina (Slovenian)",
+ "sr": "Српски (Serbian)",
+ "sv": "Svenska (Swedish)",
+ "szl": "Ślōnski (Silesian)",
+ "ta": "தமிழ் (Tamil)",
+ "te": "తెలుగు (Telugu)",
+ "th": "ไทย (Thai)",
+ "tr": "Türkçe (Turkish)",
+ "uk": "Українська (Ukrainian)",
+ "vi": "Tiếng việt (Vietnamese)",
+ "zh-HK": "中文, 中國香港特別行政區 (Chinese, Hong Kong SAR China)",
+ "zh-Hans-CN": "中文, 中国 (Chinese, China)",
+ "zh-Hant-TW": "中文, 台灣 (Chinese, Taiwan)"
+ },
+ "RTL_LOCALES": [
+ "fa-IR",
+ "ar",
+ "he"
+ ]
+} \ No newline at end of file
diff --git a/searx/locales.py b/searx/locales.py
index 655f365ab..c2fa030b1 100644
--- a/searx/locales.py
+++ b/searx/locales.py
@@ -1,12 +1,36 @@
# -*- coding: utf-8 -*-
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.
"""
+SearXNG’s locale data
+=====================
-from typing import Set, Optional, List
-import os
-import pathlib
+The variables :py:obj:`RTL_LOCALES` and :py:obj:`LOCALE_NAMES` are loaded from
+:origin:`searx/data/locales.json` / see :py:obj:`locales_initialize` and
+:ref:`update_locales.py`.
+
+.. hint::
+
+ Whenever the value of :py:obj:`ADDITIONAL_TRANSLATIONS` or
+ :py:obj:`LOCALE_BEST_MATCH` is modified, the
+ :origin:`searx/data/locales.json` needs to be rebuild::
+
+ ./manage data.locales
+
+SearXNG's locale codes
+======================
+
+.. automodule:: searx.sxng_locales
+ :members:
+
+
+SearXNG’s locale implementations
+================================
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
import babel
from babel.support import Translations
@@ -15,7 +39,11 @@ import babel.core
import flask_babel
import flask
from flask.ctx import has_request_context
-from searx import logger
+from searx import (
+ data,
+ logger,
+ searx_dir,
+)
logger = logger.getChild('locales')
@@ -30,7 +58,7 @@ LOCALE_NAMES = {}
:meta hide-value:
"""
-RTL_LOCALES: Set[str] = set()
+RTL_LOCALES: set[str] = set()
"""List of *Right-To-Left* locales e.g. 'he' or 'fa-IR' (see
:py:obj:`locales_initialize`)."""
@@ -52,7 +80,7 @@ LOCALE_BEST_MATCH = {
"pap": "pt-BR",
}
"""Map a locale we do not have a translations for to a locale we have a
-translation for. By example: use Taiwan version of the translation for Hong
+translation for. By example: use Taiwan version of the translation for Hong
Kong."""
@@ -90,74 +118,37 @@ def get_translations():
return _flask_babel_get_translations()
-def get_locale_descr(locale, locale_name):
- """Get locale name e.g. 'Français - fr' or 'Português (Brasil) - pt-BR'
+_TR_LOCALES: list[str] = []
- :param locale: instance of :py:class:`Locale`
- :param locale_name: name e.g. 'fr' or 'pt_BR' (delimiter is *underscore*)
- """
-
- native_language, native_territory = _get_locale_descr(locale, locale_name)
- english_language, english_territory = _get_locale_descr(locale, 'en')
-
- if native_territory == english_territory:
- english_territory = None
- if not native_territory and not english_territory:
- if native_language == english_language:
- return native_language
- return native_language + ' (' + english_language + ')'
+def get_translation_locales() -> list[str]:
+ """Returns the list of transaltion locales (*underscore*). The list is
+ generated from the translation folders in :origin:`searx/translations`"""
- result = native_language + ', ' + native_territory + ' (' + english_language
- if english_territory:
- return result + ', ' + english_territory + ')'
- return result + ')'
+ global _TR_LOCALES # pylint:disable=global-statement
+ if _TR_LOCALES:
+ return _TR_LOCALES
-
-def _get_locale_descr(locale, language_code):
- language_name = locale.get_language_name(language_code).capitalize()
- if language_name and ('a' <= language_name[0] <= 'z'):
- language_name = language_name.capitalize()
- territory_name = locale.get_territory_name(language_code)
- return language_name, territory_name
+ tr_locales = []
+ for folder in (Path(searx_dir) / 'translations').iterdir():
+ if not folder.is_dir():
+ continue
+ if not (folder / 'LC_MESSAGES').is_dir():
+ continue
+ tr_locales.append(folder.name)
+ _TR_LOCALES = sorted(tr_locales)
+ return _TR_LOCALES
-def locales_initialize(directory=None):
+def locales_initialize():
"""Initialize locales environment of the SearXNG session.
- monkey patch :py:obj:`flask_babel.get_translations` by :py:obj:`get_translations`
- init global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`
"""
-
- directory = directory or pathlib.Path(__file__).parent / 'translations'
- logger.debug("locales_initialize: %s", directory)
flask_babel.get_translations = get_translations
-
- for tag, descr in ADDITIONAL_TRANSLATIONS.items():
- locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
- LOCALE_NAMES[tag] = descr
- if locale.text_direction == 'rtl':
- RTL_LOCALES.add(tag)
-
- for tag in LOCALE_BEST_MATCH:
- descr = LOCALE_NAMES.get(tag)
- if not descr:
- locale = babel.Locale.parse(tag, sep='-')
- LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
- if locale.text_direction == 'rtl':
- RTL_LOCALES.add(tag)
-
- for dirname in sorted(os.listdir(directory)):
- # Based on https://flask-babel.tkte.ch/_modules/flask_babel.html#Babel.list_translations
- if not os.path.isdir(os.path.join(directory, dirname, 'LC_MESSAGES')):
- continue
- tag = dirname.replace('_', '-')
- descr = LOCALE_NAMES.get(tag)
- if not descr:
- locale = babel.Locale.parse(dirname)
- LOCALE_NAMES[tag] = get_locale_descr(locale, dirname)
- if locale.text_direction == 'rtl':
- RTL_LOCALES.add(tag)
+ LOCALE_NAMES.update(data.LOCALES["LOCALE_NAMES"])
+ RTL_LOCALES.update(data.LOCALES["RTL_LOCALES"])
def region_tag(locale: babel.Locale) -> str:
@@ -177,7 +168,7 @@ def language_tag(locale: babel.Locale) -> str:
return sxng_lang
-def get_locale(locale_tag: str) -> Optional[babel.Locale]:
+def get_locale(locale_tag: str) -> babel.Locale | None:
"""Returns a :py:obj:`babel.Locale` object parsed from argument
``locale_tag``"""
try:
@@ -190,7 +181,7 @@ def get_locale(locale_tag: str) -> Optional[babel.Locale]:
def get_official_locales(
territory: str, languages=None, regional: bool = False, de_facto: bool = True
-) -> Set[babel.Locale]:
+) -> set[babel.Locale]:
"""Returns a list of :py:obj:`babel.Locale` with languages from
:py:obj:`babel.languages.get_official_languages`.
@@ -376,7 +367,7 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
return default
-def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Optional[str] = None) -> Optional[str]:
+def match_locale(searxng_locale: str, locale_tag_list: list[str], fallback: str | None = None) -> str | None:
"""Return tag from ``locale_tag_list`` that best fits to ``searxng_locale``.
:param str searxng_locale: SearXNG's internal representation of locale (de,
@@ -425,7 +416,7 @@ def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Opti
return get_engine_locale(searxng_locale, engine_locales, default=fallback)
-def build_engine_locales(tag_list: List[str]):
+def build_engine_locales(tag_list: list[str]):
"""From a list of locale tags a dictionary is build that can be passed by
argument ``engine_locales`` to :py:obj:`get_engine_locale`. This function
is mainly used by :py:obj:`match_locale` and is similar to what the
diff --git a/searx/sxng_locales.py b/searx/sxng_locales.py
index 1ea673d7c..27f892386 100644
--- a/searx/sxng_locales.py
+++ b/searx/sxng_locales.py
@@ -1,9 +1,11 @@
# -*- coding: utf-8 -*-
'''List of SearXNG's locale codes.
-This file is generated automatically by::
+.. hint::
- ./manage pyenv.cmd searxng_extra/update/update_engine_traits.py
+ Don't modify this file, this file is generated by::
+
+ ./manage data.traits
'''
sxng_locales = (