From 2841abaf5571328fd4b4d433f9854aa6908b58cd Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Wed, 16 Mar 2022 18:07:00 +0100 Subject: [mod] add flags to the languages filter Signed-off-by: Markus Heiser --- searxng_extra/update/update_languages.py | 80 ++++++++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 5 deletions(-) (limited to 'searxng_extra') diff --git a/searxng_extra/update/update_languages.py b/searxng_extra/update/update_languages.py index 754180c47..ebe9c884d 100755 --- a/searxng_extra/update/update_languages.py +++ b/searxng_extra/update/update_languages.py @@ -12,12 +12,13 @@ Output files: :origin:`searx/data/engines_languages.json` and """ # pylint: disable=invalid-name - +from unicodedata import lookup import json from pathlib import Path from pprint import pformat from babel import Locale, UnknownLocaleError from babel.languages import get_global +from babel.core import parse_locale from searx import settings, searx_dir from searx.engines import load_engines, engines @@ -61,6 +62,62 @@ def get_locale(lang_code): return None +lang2emoji = { + 'ha': '\U0001F1F3\U0001F1EA', # Hausa / Niger + 'bs': '\U0001F1E7\U0001F1E6', # Bosnian / Bosnia & Herzegovina + 'jp': '\U0001F1EF\U0001F1F5', # Japanese + 'ua': '\U0001F1FA\U0001F1E6', # Ukrainian + 'he': '\U0001F1EE\U0001F1F7', # Hebrew + 'zh': '\U0001F1E8\U0001F1F3', # China (zh) +} + + +def get_unicode_flag(lang_code): + """Determine a unicode flag (emoji) that fits to the ``lang_code``""" + + emoji = lang2emoji.get(lang_code.lower()) + if emoji: + return emoji + + if len(lang_code) == 2: + l_code = lang_code.lower() + c_code = lang_code.upper() + if c_code == 'EN': + c_code = 'GB' + lang_code = "%s-%s" % (l_code, c_code) + + language = territory = script = variant = '' + try: + language, territory, script, variant = parse_locale(lang_code, '-') + except ValueError as exc: + print(exc) + + # https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2 + if not territory: + # https://www.unicode.org/emoji/charts/emoji-list.html#country-flag + emoji = lang2emoji.get(language) + if not emoji: + print( + "%s --> language: %s / territory: %s / script: %s / variant: %s" + % (lang_code, language, territory, script, variant) + ) + return emoji + + emoji = lang2emoji.get(territory.lower()) + if emoji: + return emoji + + try: + c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[0]) + c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[1]) + # print("%s --> territory: %s --> %s%s" %(lang_code, territory, c1, c2 )) + except KeyError as exc: + print("%s --> territory: %s --> %s" % (lang_code, territory, exc)) + return None + + return c1 + c2 + + # Join all language lists. def join_language_lists(engines_languages): language_list = {} @@ -113,7 +170,10 @@ def join_language_lists(engines_languages): print("ERROR: %s --> %s" % (locale, exc)) locale = None - language_list[short_code]['countries'][lang_code] = {'country_name': country_name, 'counter': set()} + language_list[short_code]['countries'][lang_code] = { + 'country_name': country_name, + 'counter': set(), + } # count engine for both language_country combination and language alone language_list[short_code]['counter'].add(engine_name) @@ -167,7 +227,7 @@ def filter_language_list(all_languages): # add language without countries too if there's more than one country to choose from if len(filtered_countries) > 1: - filtered_countries[lang] = _copy_lang_data(lang) + filtered_countries[lang] = _copy_lang_data(lang, None) elif len(filtered_countries) == 1: # if there's only one country per language, it's not necessary to show country name lang_country = next(iter(filtered_countries)) @@ -183,15 +243,22 @@ def filter_language_list(all_languages): lang_country = "{lang}-{country}".format(lang=lang, country=country_code) if lang_country: - filtered_countries[lang_country] = _copy_lang_data(lang) + filtered_countries[lang_country] = _copy_lang_data(lang, None) else: - filtered_countries[lang] = _copy_lang_data(lang) + filtered_countries[lang] = _copy_lang_data(lang, None) filtered_languages_with_countries.update(filtered_countries) return filtered_languages_with_countries +class UnicodeEscape(str): + """Escape unicode string in :py:obj:`pprint.pformat`""" + + def __repr__(self): + return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'" + + # Write languages.py. def write_languages_file(languages): file_headers = ( @@ -209,11 +276,14 @@ def write_languages_file(languages): if name is None: print("ERROR: languages['%s'] --> %s" % (code, languages[code])) continue + + flag = get_unicode_flag(code) or '' item = ( code, languages[code]['name'].split(' (')[0], languages[code].get('country_name') or '', languages[code].get('english_name') or '', + UnicodeEscape(flag), ) language_codes.append(item) -- cgit v1.2.3 From a25e3767d44d512d14de471d28023333482dbc22 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sat, 19 Mar 2022 12:14:07 +0100 Subject: [fix] don't show flags for languages without region identifier SearXNG shows two different things: region: "de-CH" is the equivalent of "Schweiz (de)" in DDG. languages: "en" doesn't say anything about the location. It is up the engines to do their best to select English results without a region. Suggested-by: @dalf https://github.com/searxng/searxng/pull/967#issuecomment-1072979693 Signed-off-by: Markus Heiser --- searxng_extra/update/update_languages.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'searxng_extra') diff --git a/searxng_extra/update/update_languages.py b/searxng_extra/update/update_languages.py index ebe9c884d..631f197e4 100755 --- a/searxng_extra/update/update_languages.py +++ b/searxng_extra/update/update_languages.py @@ -68,7 +68,6 @@ lang2emoji = { 'jp': '\U0001F1EF\U0001F1F5', # Japanese 'ua': '\U0001F1FA\U0001F1E6', # Ukrainian 'he': '\U0001F1EE\U0001F1F7', # Hebrew - 'zh': '\U0001F1E8\U0001F1F3', # China (zh) } @@ -80,11 +79,7 @@ def get_unicode_flag(lang_code): return emoji if len(lang_code) == 2: - l_code = lang_code.lower() - c_code = lang_code.upper() - if c_code == 'EN': - c_code = 'GB' - lang_code = "%s-%s" % (l_code, c_code) + return '\U0001F310' language = territory = script = variant = '' try: -- cgit v1.2.3 From 2e4557f3f3de4d423b4ef1fb0e51a98948a79e31 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sat, 19 Mar 2022 16:45:14 +0100 Subject: [fix] languages: show country name even if there is only one country Signed-off-by: Markus Heiser --- searxng_extra/update/update_languages.py | 2 -- 1 file changed, 2 deletions(-) (limited to 'searxng_extra') diff --git a/searxng_extra/update/update_languages.py b/searxng_extra/update/update_languages.py index 631f197e4..92083f39f 100755 --- a/searxng_extra/update/update_languages.py +++ b/searxng_extra/update/update_languages.py @@ -224,9 +224,7 @@ def filter_language_list(all_languages): if len(filtered_countries) > 1: filtered_countries[lang] = _copy_lang_data(lang, None) elif len(filtered_countries) == 1: - # if there's only one country per language, it's not necessary to show country name lang_country = next(iter(filtered_countries)) - filtered_countries[lang_country]['country_name'] = None # if no country has enough engines try to get most likely country code from babel if not filtered_countries: -- cgit v1.2.3