From 6e5f22e5583cfc2a413e0afac66d3c5ea9f628b1 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Thu, 29 Sep 2022 20:54:46 +0200 Subject: [mod] replace engines_languages.json by engines_traits.json Implementations of the *traits* of the engines. Engine's traits are fetched from the origin engine and stored in a JSON file in the *data folder*. Most often traits are languages and region codes and their mapping from SearXNG's representation to the representation in the origin search engine. To load traits from the persistence:: searx.enginelib.traits.EngineTraitsMap.from_data() For new traits new properties can be added to the class:: searx.enginelib.traits.EngineTraits .. hint:: Implementation is downward compatible to the deprecated *supported_languages method* from the vintage implementation. The vintage code is tagged as *deprecated* an can be removed when all engines has been ported to the *traits method*. Signed-off-by: Markus Heiser --- searx/webapp.py | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-) (limited to 'searx/webapp.py') diff --git a/searx/webapp.py b/searx/webapp.py index 95c33f704..e62badaf7 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -907,16 +907,11 @@ def autocompleter(): # and there is a query part if len(raw_text_query.autocomplete_list) == 0 and len(sug_prefix) > 0: - # get language from cookie - language = request.preferences.get_value('language') - if not language or language == 'all': - language = 'en' - else: - language = language.split('-')[0] + # get SearXNG's locale and autocomplete backend from cookie + sxng_locale = request.preferences.get_value('language') + backend_name = request.preferences.get_value('autocomplete') - # run autocompletion - raw_results = search_autocomplete(request.preferences.get_value('autocomplete'), sug_prefix, language) - for result in raw_results: + for result in search_autocomplete(backend_name, sug_prefix, sxng_locale): # attention: this loop will change raw_text_query object and this is # the reason why the sug_prefix was stored before (see above) if result != sug_prefix: @@ -1001,7 +996,9 @@ def preferences(): 'rate80': rate80, 'rate95': rate95, 'warn_timeout': e.timeout > settings['outgoing']['request_timeout'], - 'supports_selected_language': _is_selected_language_supported(e, request.preferences), + 'supports_selected_language': e.traits.is_locale_supported( + str(request.preferences.get_value('language') or 'all') + ), 'result_count': result_count, } # end of stats @@ -1052,7 +1049,9 @@ def preferences(): # supports supports = {} for _, e in filtered_engines.items(): - supports_selected_language = _is_selected_language_supported(e, request.preferences) + supports_selected_language = e.traits.is_locale_supported( + str(request.preferences.get_value('language') or 'all') + ) safesearch = e.safesearch time_range_support = e.time_range_support for checker_test_name in checker_results.get(e.name, {}).get('errors', {}): @@ -1099,16 +1098,6 @@ def preferences(): ) -def _is_selected_language_supported(engine, preferences: Preferences): # pylint: disable=redefined-outer-name - language = preferences.get_value('language') - if language == 'all': - return True - x = match_language( - language, getattr(engine, 'supported_languages', []), getattr(engine, 'language_aliases', {}), None - ) - return bool(x) - - @app.route('/image_proxy', methods=['GET']) def image_proxy(): # pylint: disable=too-many-return-statements, too-many-branches @@ -1327,9 +1316,11 @@ def config(): if not request.preferences.validate_token(engine): continue - supported_languages = engine.supported_languages - if isinstance(engine.supported_languages, dict): - supported_languages = list(engine.supported_languages.keys()) + _languages = engine.traits.languages.keys() + if engine.traits.data_type == 'supported_languages': # vintage / deprecated + _languages = engine.traits.supported_languages + if isinstance(_languages, dict): + _languages = _languages.keys() _engines.append( { @@ -1339,7 +1330,8 @@ def config(): 'enabled': not engine.disabled, 'paging': engine.paging, 'language_support': engine.language_support, - 'supported_languages': supported_languages, + 'languages': list(_languages), + 'regions': list(engine.traits.regions.keys()), 'safesearch': engine.safesearch, 'time_range_support': engine.time_range_support, 'timeout': engine.timeout, -- cgit v1.2.3 From c9cd376186d12d2d281e655d0b5539d1359fe148 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Mon, 10 Oct 2022 19:31:22 +0200 Subject: [mod] replace searx.languages by searx.sxng_locales With the language and region tags from the EngineTraitsMap the handling of SearXNG's tags of languages and regions has been normalized and is no longer a *mystery*. The "languages" became "locales" that are supported by babel and by this, the update_engine_traits.py can be simplified a lot. Other code places can be simplified as well, but these simplifications should (respectively can) only be done when none of the engines work with the deprecated EngineTraits.supported_languages interface anymore. This commit replaces searx.languages by searx.sxng_locales and fix the naming of some names from "language" to "locale" (e.g. language_codes --> sxng_locales). Signed-off-by: Markus Heiser --- searx/webapp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'searx/webapp.py') diff --git a/searx/webapp.py b/searx/webapp.py index e62badaf7..fdd119128 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -121,8 +121,8 @@ from searx.locales import ( # renaming names from searx imports ... from searx.autocomplete import search_autocomplete, backends as autocomplete_backends -from searx.languages import language_codes as languages from searx.redisdb import initialize as redis_initialize +from searx.sxng_locales import sxng_locales from searx.search import SearchWithPlugins, initialize as search_initialize from searx.network import stream as http_stream, set_context_network_name from searx.search.checker import get_result as checker_get_result @@ -438,7 +438,7 @@ def render(template_name: str, **kwargs): kwargs['OTHER_CATEGORY'] = OTHER_CATEGORY # i18n - kwargs['language_codes'] = [l for l in languages if l[0] in settings['search']['languages']] + kwargs['sxng_locales'] = [l for l in sxng_locales if l[0] in settings['search']['languages']] locale = request.preferences.get_value('locale') kwargs['locale_rfc5646'] = _get_locale_rfc5646(locale) -- cgit v1.2.3 From 4d4aa13e1f1d254e5d57c67973a7809d9c1e21f9 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 30 Dec 2022 18:28:02 +0100 Subject: [mod] remove obsolete EngineTraits.supported_languages All engines has been migrated from ``supported_languages`` to the ``fetch_traits`` concept. There is no longer a need for the obsolete code that implements the ``supported_languages`` concept. Signed-off-by: Markus Heiser --- searx/webapp.py | 5 ----- 1 file changed, 5 deletions(-) (limited to 'searx/webapp.py') diff --git a/searx/webapp.py b/searx/webapp.py index fdd119128..bc2a50784 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -1317,11 +1317,6 @@ def config(): continue _languages = engine.traits.languages.keys() - if engine.traits.data_type == 'supported_languages': # vintage / deprecated - _languages = engine.traits.supported_languages - if isinstance(_languages, dict): - _languages = _languages.keys() - _engines.append( { 'name': name, -- cgit v1.2.3 From 16f0db44939c23d2980d6fd2e5dfada13d8f5ee9 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 7 Feb 2023 14:11:58 +0100 Subject: [mod] replace utils.match_language by locales.match_locale This patch replaces the *full of magic* ``utils.match_language`` function by a ``locales.match_locale``. The ``locales.match_locale`` function is based on the ``locales.build_engine_locales`` introduced in 9ae409a0 [1]. In the past SearXNG did only support a search by a language but not in a region. This has been changed a long time ago and regions have been added to SearXNG core but not to the engines. The ``utils.match_language`` was the function to handle the different aspects of language/regions in SearXNG core and the supported *languages* in the engine. The ``utils.match_language`` did it with some magic and works good for most use cases but fails in some edge case. To replace the concurrence of languages and regions in the SearXNG core the ``locales.build_engine_locales`` was introduced in 9ae409a0 [1]. With the last patches all engines has been migrated to a ``fetch_traits`` and a language/region concept that is based on ``locales.build_engine_locales``. To summarize: there is no longer a need for the ``locales.match_language``. [1] https://github.com/searxng/searxng/pull/1652 Signed-off-by: Markus Heiser --- searx/webapp.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'searx/webapp.py') diff --git a/searx/webapp.py b/searx/webapp.py index bc2a50784..4ed6c2eb7 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -89,7 +89,6 @@ from searx.utils import ( html_to_text, gen_useragent, dict_subset, - match_language, ) from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH from searx.query import RawTextQuery @@ -117,6 +116,7 @@ from searx.locales import ( RTL_LOCALES, localeselector, locales_initialize, + match_locale, ) # renaming names from searx imports ... @@ -227,7 +227,7 @@ def _get_browser_language(req, lang_list): if '-' in lang: lang_parts = lang.split('-') lang = "{}-{}".format(lang_parts[0], lang_parts[-1].upper()) - locale = match_language(lang, lang_list, fallback=None) + locale = match_locale(lang, lang_list, fallback=None) if locale is not None: return locale return 'en' @@ -407,7 +407,7 @@ def get_client_settings(): def render(template_name: str, **kwargs): - + # pylint: disable=too-many-statements kwargs['client_settings'] = str( base64.b64encode( bytes( @@ -445,10 +445,13 @@ def render(template_name: str, **kwargs): if locale in RTL_LOCALES and 'rtl' not in kwargs: kwargs['rtl'] = True + if 'current_language' not in kwargs: - kwargs['current_language'] = match_language( - request.preferences.get_value('language'), settings['search']['languages'] - ) + _locale = request.preferences.get_value('language') + if _locale in ('auto', 'all'): + kwargs['current_language'] = _locale + else: + kwargs['current_language'] = match_locale(_locale, settings['search']['languages']) # values from settings kwargs['search_formats'] = [x for x in settings['search']['formats'] if x != 'html'] @@ -810,6 +813,13 @@ def search(): ) ) + if search_query.lang in ('auto', 'all'): + current_language = search_query.lang + else: + current_language = match_locale( + search_query.lang, settings['search']['languages'], fallback=request.preferences.get_value("language") + ) + # search_query.lang contains the user choice (all, auto, en, ...) # when the user choice is "auto", search.search_query.lang contains the detected language # otherwise it is equals to search_query.lang @@ -832,12 +842,8 @@ def search(): result_container.unresponsive_engines ), current_locale = request.preferences.get_value("locale"), - current_language = match_language( - search_query.lang, - settings['search']['languages'], - fallback=request.preferences.get_value("language") - ), - search_language = match_language( + current_language = current_language, + search_language = match_locale( search.search_query.lang, settings['search']['languages'], fallback=request.preferences.get_value("language") -- cgit v1.2.3