Merge pull request #2269 from return42/locale-revision

Revision of the locale- and language- handling in SearXNG
author: Markus Heiser <markus.heiser@darmarIT.de> 2023-03-29 09:47:21 +0200
committer: GitHub <noreply@github.com> 2023-03-29 09:47:21 +0200
commit: f950119ca87363aec81591dc4985f11371aa2b3e (patch)
tree: ab893ff1f60d8c969ff0f5c2fad0cff49148aa3c /searx/engines
parent: 64fea2f9cb079bd0055c6a23360097d285204515 (diff)
parent: 6f9e678346e5978a09ee453a62fa133cdc0ee0bd (diff)
25 files changed, 2582 insertions, 1394 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 52bb5f20d..c8e8e7241 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -11,24 +11,22 @@ usage::
 
 """
 
+from __future__ import annotations
+
 import sys
 import copy
-from typing import Dict, List, Optional
-
 from os.path import realpath, dirname
-from babel.localedata import locale_identifiers
+
+from typing import TYPE_CHECKING, Dict, Optional
+
 from searx import logger, settings
-from searx.data import ENGINES_LANGUAGES
-from searx.network import get
-from searx.utils import load_module, match_language, gen_useragent
+from searx.utils import load_module
 
+if TYPE_CHECKING:
+    from searx.enginelib import Engine
 
 logger = logger.getChild('engines')
 ENGINE_DIR = dirname(realpath(__file__))
-BABEL_LANGS = [
-    lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
-    for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())
-]
 ENGINE_DEFAULT_ARGS = {
     "engine_type": "online",
     "inactive": False,
@@ -36,8 +34,6 @@ ENGINE_DEFAULT_ARGS = {
     "timeout": settings["outgoing"]["request_timeout"],
     "shortcut": "-",
     "categories": ["general"],
-    "supported_languages": [],
-    "language_aliases": {},
     "paging": False,
     "safesearch": False,
     "time_range_support": False,
@@ -52,24 +48,6 @@ ENGINE_DEFAULT_ARGS = {
 OTHER_CATEGORY = 'other'
 
 
-class Engine:  # pylint: disable=too-few-public-methods
-    """This class is currently never initialized and only used for type hinting."""
-
-    name: str
-    engine: str
-    shortcut: str
-    categories: List[str]
-    supported_languages: List[str]
-    about: dict
-    inactive: bool
-    disabled: bool
-    language_support: bool
-    paging: bool
-    safesearch: bool
-    time_range_support: bool
-    timeout: float
-
-
 # Defaults for the namespace of an engine module, see :py:func:`load_engine`
 
 categories = {'general': []}
@@ -136,9 +114,15 @@ def load_engine(engine_data: dict) -> Optional[Engine]:
         return None
 
     update_engine_attributes(engine, engine_data)
-    set_language_attributes(engine)
     update_attributes_for_tor(engine)
 
+    # avoid cyclic imports
+    # pylint: disable=import-outside-toplevel
+    from searx.enginelib.traits import EngineTraitsMap
+
+    trait_map = EngineTraitsMap.from_data()
+    trait_map.set_traits(engine)
+
     if not is_engine_active(engine):
         return None
 
@@ -190,60 +174,6 @@ def update_engine_attributes(engine: Engine, engine_data):
             setattr(engine, arg_name, copy.deepcopy(arg_value))
 
 
-def set_language_attributes(engine: Engine):
-    # assign supported languages from json file
-    if engine.name in ENGINES_LANGUAGES:
-        engine.supported_languages = ENGINES_LANGUAGES[engine.name]
-
-    elif engine.engine in ENGINES_LANGUAGES:
-        # The key of the dictionary ENGINES_LANGUAGES is the *engine name*
-        # configured in settings.xml.  When multiple engines are configured in
-        # settings.yml to use the same origin engine (python module) these
-        # additional engines can use the languages from the origin engine.
-        # For this use the configured ``engine: ...`` from settings.yml
-        engine.supported_languages = ENGINES_LANGUAGES[engine.engine]
-
-    if hasattr(engine, 'language'):
-        # For an engine, when there is `language: ...` in the YAML settings, the
-        # engine supports only one language, in this case
-        # engine.supported_languages should contains this value defined in
-        # settings.yml
-        if engine.language not in engine.supported_languages:
-            raise ValueError(
-                "settings.yml - engine: '%s' / language: '%s' not supported" % (engine.name, engine.language)
-            )
-
-        if isinstance(engine.supported_languages, dict):
-            engine.supported_languages = {engine.language: engine.supported_languages[engine.language]}
-        else:
-            engine.supported_languages = [engine.language]
-
-    # find custom aliases for non standard language codes
-    for engine_lang in engine.supported_languages:
-        iso_lang = match_language(engine_lang, BABEL_LANGS, fallback=None)
-        if (
-            iso_lang
-            and iso_lang != engine_lang
-            and not engine_lang.startswith(iso_lang)
-            and iso_lang not in engine.supported_languages
-        ):
-            engine.language_aliases[iso_lang] = engine_lang
-
-    # language_support
-    engine.language_support = len(engine.supported_languages) > 0
-
-    # assign language fetching method if auxiliary method exists
-    if hasattr(engine, '_fetch_supported_languages'):
-        headers = {
-            'User-Agent': gen_useragent(),
-            'Accept-Language': "en-US,en;q=0.5",  # bing needs to set the English language
-        }
-        engine.fetch_supported_languages = (
-            # pylint: disable=protected-access
-            lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers))
-        )
-
-
 def update_attributes_for_tor(engine: Engine) -> bool:
     if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
         engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')
diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py
index b5e426107..56c3b447f 100644
--- a/searx/engines/archlinux.py
+++ b/searx/engines/archlinux.py
@@ -1,15 +1,32 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
 """
- Arch Linux Wiki
+Arch Linux Wiki
+~~~~~~~~~~~~~~~
+
+This implementation does not use a official API: Mediawiki provides API, but
+Arch Wiki blocks access to it.
 
- API: Mediawiki provides API, but Arch Wiki blocks access to it
 """
 
-from urllib.parse import urlencode, urljoin
-from lxml import html
+from typing import TYPE_CHECKING
+from urllib.parse import urlencode, urljoin, urlparse
+import lxml
+import babel
+
+from searx import network
 from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
+from searx.enginelib.traits import EngineTraits
+from searx.locales import language_tag
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
+
 
-# about
 about = {
     "website": 'https://wiki.archlinux.org/',
     "wikidata_id": 'Q101445877',
@@ -22,125 +39,113 @@ about = {
 # engine dependent config
 categories = ['it', 'software wikis']
 paging = True
-base_url = 'https://wiki.archlinux.org'
-
-# xpath queries
-xpath_results = '//ul[@class="mw-search-results"]/li'
-xpath_link = './/div[@class="mw-search-result-heading"]/a'
-
-
-# cut 'en' from 'en-US', 'de' from 'de-CH', and so on
-def locale_to_lang_code(locale):
-    if locale.find('-') >= 0:
-        locale = locale.split('-')[0]
-    return locale
-
-
-# wikis for some languages were moved off from the main site, we need to make
-# requests to correct URLs to be able to get results in those languages
-lang_urls = {
-    # fmt: off
-    'all': {
-        'base': 'https://wiki.archlinux.org',
-        'search': '/index.php?title=Special:Search&offset={offset}&{query}'
-    },
-    'de': {
-        'base': 'https://wiki.archlinux.de',
-        'search': '/index.php?title=Spezial:Suche&offset={offset}&{query}'
-    },
-    'fr': {
-        'base': 'https://wiki.archlinux.fr',
-        'search': '/index.php?title=Spécial:Recherche&offset={offset}&{query}'
-    },
-    'ja': {
-        'base': 'https://wiki.archlinuxjp.org',
-        'search': '/index.php?title=特別:検索&offset={offset}&{query}'
-    },
-    'ro': {
-        'base': 'http://wiki.archlinux.ro',
-        'search': '/index.php?title=Special:Căutare&offset={offset}&{query}'
-    },
-    'tr': {
-        'base': 'http://archtr.org/wiki',
-        'search': '/index.php?title=Özel:Ara&offset={offset}&{query}'
-    }
-    # fmt: on
-}
-
-
-# get base & search URLs for selected language
-def get_lang_urls(language):
-    if language in lang_urls:
-        return lang_urls[language]
-    return lang_urls['all']
-
-
-# Language names to build search requests for
-# those languages which are hosted on the main site.
-main_langs = {
-    'ar': 'العربية',
-    'bg': 'Български',
-    'cs': 'Česky',
-    'da': 'Dansk',
-    'el': 'Ελληνικά',
-    'es': 'Español',
-    'he': 'עברית',
-    'hr': 'Hrvatski',
-    'hu': 'Magyar',
-    'it': 'Italiano',
-    'ko': '한국어',
-    'lt': 'Lietuviškai',
-    'nl': 'Nederlands',
-    'pl': 'Polski',
-    'pt': 'Português',
-    'ru': 'Русский',
-    'sl': 'Slovenský',
-    'th': 'ไทย',
-    'uk': 'Українська',
-    'zh': '简体中文',
-}
-supported_languages = dict(lang_urls, **main_langs)
+main_wiki = 'wiki.archlinux.org'
 
 
-# do search-request
 def request(query, params):
-    # translate the locale (e.g. 'en-US') to language code ('en')
-    language = locale_to_lang_code(params['language'])
-
-    # if our language is hosted on the main site, we need to add its name
-    # to the query in order to narrow the results to that language
-    if language in main_langs:
-        query += ' (' + main_langs[language] + ')'
 
-    # prepare the request parameters
-    query = urlencode({'search': query})
+    sxng_lang = params['searxng_locale'].split('-')[0]
+    netloc = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki)
+    title = traits.custom['title'].get(sxng_lang, 'Special:Search')
+    base_url = 'https://' + netloc + '/index.php?'
     offset = (params['pageno'] - 1) * 20
 
-    # get request URLs for our language of choice
-    urls = get_lang_urls(language)
-    search_url = urls['base'] + urls['search']
-
-    params['url'] = search_url.format(query=query, offset=offset)
+    if netloc == main_wiki:
+        eng_lang: str = traits.get_language(sxng_lang, 'English')
+        query += ' (' + eng_lang + ')'
+    elif netloc == 'wiki.archlinuxcn.org':
+        base_url = 'https://' + netloc + '/wzh/index.php?'
+
+    args = {
+        'search': query,
+        'title': title,
+        'limit': 20,
+        'offset': offset,
+        'profile': 'default',
+    }
 
+    params['url'] = base_url + urlencode(args)
     return params
 
 
-# get response from search-request
 def response(resp):
-    # get the base URL for the language in which request was made
-    language = locale_to_lang_code(resp.search_params['language'])
-    base_url = get_lang_urls(language)['base']
 
     results = []
+    dom = lxml.html.fromstring(resp.text)
 
-    dom = html.fromstring(resp.text)
+    # get the base URL for the language in which request was made
+    sxng_lang = resp.search_params['searxng_locale'].split('-')[0]
+    netloc = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki)
+    base_url = 'https://' + netloc + '/index.php?'
+
+    for result in eval_xpath_list(dom, '//ul[@class="mw-search-results"]/li'):
+        link = eval_xpath_getindex(result, './/div[@class="mw-search-result-heading"]/a', 0)
+        content = extract_text(result.xpath('.//div[@class="searchresult"]'))
+        results.append(
+            {
+                'url': urljoin(base_url, link.get('href')),
+                'title': extract_text(link),
+                'content': content,
+            }
+        )
 
-    # parse results
-    for result in eval_xpath_list(dom, xpath_results):
-        link = eval_xpath_getindex(result, xpath_link, 0)
-        href = urljoin(base_url, link.attrib.get('href'))
-        title = extract_text(link)
+    return results
 
-        results.append({'url': href, 'title': title})
 
-    return results
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch languages from Archlinix-Wiki.  The location of the Wiki address of a
+    language is mapped in a :py:obj:`custom field
+    <searx.enginelib.traits.EngineTraits.custom>` (``wiki_netloc``).  Depending
+    on the location, the ``title`` argument in the request is translated.
+
+    .. code:: python
+
+       "custom": {
+         "wiki_netloc": {
+           "de": "wiki.archlinux.de",
+            # ...
+           "zh": "wiki.archlinuxcn.org"
+         }
+         "title": {
+           "de": "Spezial:Suche",
+            # ...
+           "zh": "Special:\u641c\u7d22"
+         },
+       },
+
+    """
+
+    engine_traits.custom['wiki_netloc'] = {}
+    engine_traits.custom['title'] = {}
+
+    title_map = {
+        'de': 'Spezial:Suche',
+        'fa': 'ویژه:جستجو',
+        'ja': '特別:検索',
+        'zh': 'Special:搜索',
+    }
+
+    resp = network.get('https://wiki.archlinux.org/')
+    if not resp.ok:
+        print("ERROR: response from wiki.archlinix.org is not OK.")
+
+    dom = lxml.html.fromstring(resp.text)
+    for a in eval_xpath_list(dom, "//a[@class='interlanguage-link-target']"):
+
+        sxng_tag = language_tag(babel.Locale.parse(a.get('lang'), sep='-'))
+        # zh_Hans --> zh
+        sxng_tag = sxng_tag.split('_')[0]
+
+        netloc = urlparse(a.get('href')).netloc
+        if netloc != 'wiki.archlinux.org':
+            title = title_map.get(sxng_tag)
+            if not title:
+                print("ERROR: title tag from %s (%s) is unknown" % (netloc, sxng_tag))
+                continue
+            engine_traits.custom['wiki_netloc'][sxng_tag] = netloc
+            engine_traits.custom['title'][sxng_tag] = title
+
+        eng_tag = extract_text(eval_xpath_list(a, ".//span"))
+        engine_traits.languages[sxng_tag] = eng_tag
+
+    engine_traits.languages['en'] = 'English'
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index 783c0056a..0f85c7036 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -1,16 +1,53 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""Bing (Web)
+"""This is the implementation of the Bing-WEB engine. Some of this
+implementations are shared by other engines:
+
+- :ref:`bing images engine`
+- :ref:`bing news engine`
+- :ref:`bing videos engine`
+
+On the `preference page`_ Bing offers a lot of languages an regions (see section
+'Search results languages' and 'Country/region').  However, the abundant choice
+does not correspond to reality, where Bing has a full-text indexer only for a
+limited number of languages.  By example: you can select a language like Māori
+but you never get a result in this language.
+
+What comes a bit closer to the truth are the `search-APIs`_ but they don`t seem
+to be completely correct either (if you take a closer look you will find some
+inaccuracies there too):
+
+- :py:obj:`searx.engines.bing.bing_traits_url`
+- :py:obj:`searx.engines.bing_videos.bing_traits_url`
+- :py:obj:`searx.engines.bing_images.bing_traits_url`
+- :py:obj:`searx.engines.bing_news.bing_traits_url`
+
+.. _preference page: https://www.bing.com/account/general
+.. _search-APIs: https://learn.microsoft.com/en-us/bing/search-apis/
 
-- https://github.com/searx/searx/issues/2019#issuecomment-648227442
 """
-# pylint: disable=too-many-branches
+# pylint: disable=too-many-branches, invalid-name
 
+from typing import TYPE_CHECKING
+import datetime
 import re
-from urllib.parse import urlencode, urlparse, parse_qs
+import uuid
+from urllib.parse import urlencode
 from lxml import html
-from searx.utils import eval_xpath, extract_text, eval_xpath_list, match_language, eval_xpath_getindex
-from searx.network import multi_requests, Request
+import babel
+import babel.languages
+
+from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex
+from searx import network
+from searx.locales import language_tag, region_tag
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
 
 about = {
     "website": 'https://www.bing.com',
@@ -21,56 +58,124 @@ about = {
     "results": 'HTML',
 }
 
+send_accept_language_header = True
+"""Bing tries to guess user's language and territory from the HTTP
+Accept-Language.  Optional the user can select a search-language (can be
+different to the UI language) and a region (market code)."""
+
 # engine dependent config
 categories = ['general', 'web']
 paging = True
-time_range_support = False
-safesearch = False
-send_accept_language_header = True
-supported_languages_url = 'https://www.bing.com/account/general'
-language_aliases = {}
-
-# search-url
-base_url = 'https://www.bing.com/'
+time_range_support = True
+safesearch = True
+safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'}  # cookie: ADLT=STRICT
 
-# initial query:     https://www.bing.com/search?q=foo&search=&form=QBLH
-inital_query = 'search?{query}&search=&form=QBLH'
+base_url = 'https://www.bing.com/search'
+"""Bing (Web) search URL"""
 
-# following queries: https://www.bing.com/search?q=foo&search=&first=11&FORM=PERE
-page_query = 'search?{query}&search=&first={offset}&FORM=PERE'
+bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/market-codes'
+"""Bing (Web) search API description"""
 
 
 def _get_offset_from_pageno(pageno):
     return (pageno - 1) * 10 + 1
 
 
-def request(query, params):
+def set_bing_cookies(params, engine_language, engine_region, SID):
+
+    # set cookies
+    # -----------
+
+    params['cookies']['_EDGE_V'] = '1'
+
+    # _EDGE_S: F=1&SID=3A5253BD6BCA609509B741876AF961CA&mkt=zh-tw
+    _EDGE_S = [
+        'F=1',
+        'SID=%s' % SID,
+        'mkt=%s' % engine_region.lower(),
+        'ui=%s' % engine_language.lower(),
+    ]
+    params['cookies']['_EDGE_S'] = '&'.join(_EDGE_S)
+    logger.debug("cookie _EDGE_S=%s", params['cookies']['_EDGE_S'])
+
+    # "_EDGE_CD": "m=zh-tw",
+
+    _EDGE_CD = [  # pylint: disable=invalid-name
+        'm=%s' % engine_region.lower(),  # search region: zh-cn
+        'u=%s' % engine_language.lower(),  # UI: en-us
+    ]
+
+    params['cookies']['_EDGE_CD'] = '&'.join(_EDGE_CD) + ';'
+    logger.debug("cookie _EDGE_CD=%s", params['cookies']['_EDGE_CD'])
 
-    offset = _get_offset_from_pageno(params.get('pageno', 1))
+    SRCHHPGUSR = [  # pylint: disable=invalid-name
+        'SRCHLANG=%s' % engine_language,
+        # Trying to set ADLT cookie here seems not to have any effect, I assume
+        # there is some age verification by a cookie (and/or session ID) needed,
+        # to disable the SafeSearch.
+        'ADLT=%s' % safesearch_types.get(params['safesearch'], 'DEMOTE'),
+    ]
+    params['cookies']['SRCHHPGUSR'] = '&'.join(SRCHHPGUSR)
+    logger.debug("cookie SRCHHPGUSR=%s", params['cookies']['SRCHHPGUSR'])
+
+
+def request(query, params):
+    """Assemble a Bing-Web request."""
 
-    # logger.debug("params['pageno'] --> %s", params.get('pageno'))
-    # logger.debug("          offset --> %s", offset)
+    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
+    engine_language = traits.get_language(params['searxng_locale'], 'en')
 
-    search_string = page_query
-    if offset == 1:
-        search_string = inital_query
+    SID = uuid.uuid1().hex.upper()
+    CVID = uuid.uuid1().hex.upper()
 
-    if params['language'] == 'all':
-        lang = 'EN'
-    else:
-        lang = match_language(params['language'], supported_languages, language_aliases)
+    set_bing_cookies(params, engine_language, engine_region, SID)
 
-    query = 'language:{} {}'.format(lang.split('-')[0].upper(), query)
+    # build URL query
+    # ---------------
 
-    search_path = search_string.format(query=urlencode({'q': query}), offset=offset)
+    # query term
+    page = int(params.get('pageno', 1))
+    query_params = {
+        # fmt: off
+        'q': query,
+        'pq': query,
+        'cvid': CVID,
+        'qs': 'n',
+        'sp': '-1'
+        # fmt: on
+    }
 
-    if offset > 1:
-        referer = base_url + inital_query.format(query=urlencode({'q': query}))
+    # page
+    if page > 1:
+        referer = base_url + '?' + urlencode(query_params)
         params['headers']['Referer'] = referer
         logger.debug("headers.Referer --> %s", referer)
 
-    params['url'] = base_url + search_path
-    params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
+    query_params['first'] = _get_offset_from_pageno(page)
+
+    if page == 2:
+        query_params['FORM'] = 'PERE'
+    elif page > 2:
+        query_params['FORM'] = 'PERE%s' % (page - 2)
+
+    filters = ''
+    if params['time_range']:
+        query_params['filt'] = 'custom'
+
+        if params['time_range'] == 'day':
+            filters = 'ex1:"ez1"'
+        elif params['time_range'] == 'week':
+            filters = 'ex1:"ez2"'
+        elif params['time_range'] == 'month':
+            filters = 'ex1:"ez3"'
+        elif params['time_range'] == 'year':
+            epoch_1970 = datetime.date(1970, 1, 1)
+            today_no = (datetime.date.today() - epoch_1970).days
+            filters = 'ex1:"ez5_%s_%s"' % (today_no - 365, today_no)
+
+    params['url'] = base_url + '?' + urlencode(query_params)
+    if filters:
+        params['url'] = params['url'] + '&filters=' + filters
     return params
 
 
@@ -107,7 +212,8 @@ def response(resp):
             url_cite = extract_text(eval_xpath(result, './/div[@class="b_attribution"]/cite'))
             # Bing can shorten the URL either at the end or in the middle of the string
             if (
-                url_cite.startswith('https://')
+                url_cite
+                and url_cite.startswith('https://')
                 and '…' not in url_cite
                 and '...' not in url_cite
                 and '›' not in url_cite
@@ -127,9 +233,9 @@ def response(resp):
 
     # resolve all Bing redirections in parallel
     request_list = [
-        Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve
+        network.Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve
     ]
-    response_list = multi_requests(request_list)
+    response_list = network.multi_requests(request_list)
     for i, redirect_response in enumerate(response_list):
         if not isinstance(redirect_response, Exception):
             results[url_to_resolve_index[i]]['url'] = redirect_response.headers['location']
@@ -157,27 +263,71 @@ def response(resp):
     return results
 
 
-# get supported languages from their site
-def _fetch_supported_languages(resp):
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch languages and regions from Bing-Web."""
+
+    xpath_market_codes = '//table[1]/tbody/tr/td[3]'
+    # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
+    xpath_language_codes = '//table[3]/tbody/tr/td[2]'
+
+    _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
+
+
+def _fetch_traits(engine_traits: EngineTraits, url: str, xpath_language_codes: str, xpath_market_codes: str):
+
+    # insert alias to map from a language (zh) to a language + script (zh_Hans)
+    engine_traits.languages['zh'] = 'zh-hans'
 
-    lang_tags = set()
+    resp = network.get(url)
+
+    if not resp.ok:
+        print("ERROR: response from peertube is not OK.")
 
     dom = html.fromstring(resp.text)
-    lang_links = eval_xpath(dom, '//div[@id="language-section"]//li')
 
-    for _li in lang_links:
+    map_lang = {'jp': 'ja'}
+    for td in eval_xpath(dom, xpath_language_codes):
+        eng_lang = td.text
 
-        href = eval_xpath(_li, './/@href')[0]
-        (_scheme, _netloc, _path, _params, query, _fragment) = urlparse(href)
-        query = parse_qs(query, keep_blank_values=True)
+        if eng_lang in ('en-gb', 'pt-br'):
+            # language 'en' is already in the list and a language 'en-gb' can't
+            # be handled in SearXNG, same with pt-br which is covered by pt-pt.
+            continue
 
-        # fmt: off
-        setlang = query.get('setlang', [None, ])[0]
-        # example: 'mn-Cyrl-MN' --> '['mn', 'Cyrl-MN']
-        lang, nation = (setlang.split('-', maxsplit=1) + [None,])[:2]  # fmt: skip
-        # fmt: on
+        babel_lang = map_lang.get(eng_lang, eng_lang).replace('-', '_')
+        try:
+            sxng_tag = language_tag(babel.Locale.parse(babel_lang))
+        except babel.UnknownLocaleError:
+            print("ERROR: language (%s) is unknown by babel" % (eng_lang))
+            continue
+        conflict = engine_traits.languages.get(sxng_tag)
+        if conflict:
+            if conflict != eng_lang:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang))
+            continue
+        engine_traits.languages[sxng_tag] = eng_lang
 
-        tag = lang + '-' + nation if nation else lang
-        lang_tags.add(tag)
+    map_region = {
+        'en-ID': 'id_ID',
+        'no-NO': 'nb_NO',
+    }
 
-    return list(lang_tags)
+    for td in eval_xpath(dom, xpath_market_codes):
+        eng_region = td.text
+        babel_region = map_region.get(eng_region, eng_region).replace('-', '_')
+
+        if eng_region == 'en-WW':
+            engine_traits.all_locale = eng_region
+            continue
+
+        try:
+            sxng_tag = region_tag(babel.Locale.parse(babel_region))
+        except babel.UnknownLocaleError:
+            print("ERROR: region (%s) is unknown by babel" % (eng_region))
+            continue
+        conflict = engine_traits.regions.get(sxng_tag)
+        if conflict:
+            if conflict != eng_region:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_region))
+            continue
+        engine_traits.regions[sxng_tag] = eng_region
diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py
index 107ce3cff..bd3a34aa5 100644
--- a/searx/engines/bing_images.py
+++ b/searx/engines/bing_images.py
@@ -1,20 +1,30 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""Bing (Images)
-
+"""Bing-Images: description see :py:obj:`searx.engines.bing`.
 """
+# pylint: disable=invalid-name
+
 
-from json import loads
+from typing import TYPE_CHECKING
+import uuid
+import json
 from urllib.parse import urlencode
 
 from lxml import html
 
-from searx.utils import match_language
-from searx.engines.bing import language_aliases
-from searx.engines.bing import (  # pylint: disable=unused-import
-    _fetch_supported_languages,
-    supported_languages_url,
+from searx.enginelib.traits import EngineTraits
+from searx.engines.bing import (
+    set_bing_cookies,
+    _fetch_traits,
 )
+from searx.engines.bing import send_accept_language_header  # pylint: disable=unused-import
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
 
 # about
 about = {
@@ -31,77 +41,92 @@ categories = ['images', 'web']
 paging = True
 safesearch = True
 time_range_support = True
-send_accept_language_header = True
-supported_languages_url = 'https://www.bing.com/account/general'
-number_of_results = 28
 
-# search-url
-base_url = 'https://www.bing.com/'
-search_string = (
+base_url = 'https://www.bing.com/images/async'
+"""Bing (Images) search URL"""
+
+bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-image-search/reference/market-codes'
+"""Bing (Images) search API description"""
+
+time_map = {
     # fmt: off
-    'images/search'
-    '?{query}'
-    '&count={count}'
-    '&first={first}'
-    '&tsc=ImageHoverTitle'
+    'day': 60 * 24,
+    'week': 60 * 24 * 7,
+    'month': 60 * 24 * 31,
+    'year': 60 * 24 * 365,
     # fmt: on
-)
-time_range_string = '&qft=+filterui:age-lt{interval}'
-time_range_dict = {'day': '1440', 'week': '10080', 'month': '43200', 'year': '525600'}
-
-# safesearch definitions
-safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'}
+}
 
 
-# do search-request
 def request(query, params):
-    offset = ((params['pageno'] - 1) * number_of_results) + 1
+    """Assemble a Bing-Image request."""
 
-    search_path = search_string.format(query=urlencode({'q': query}), count=number_of_results, first=offset)
+    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
+    engine_language = traits.get_language(params['searxng_locale'], 'en')
 
-    language = match_language(params['language'], supported_languages, language_aliases).lower()
+    SID = uuid.uuid1().hex.upper()
+    set_bing_cookies(params, engine_language, engine_region, SID)
 
-    params['cookies']['SRCHHPGUSR'] = 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
+    # build URL query
+    # - example: https://www.bing.com/images/async?q=foo&first=155&count=35
 
-    params['cookies']['_EDGE_S'] = 'mkt=' + language + '&ui=' + language + '&F=1'
+    query_params = {
+        # fmt: off
+        'q': query,
+        'async' : 'content',
+        # to simplify the page count lets use the default of 35 images per page
+        'first' : (int(params.get('pageno', 1)) - 1) * 35 + 1,
+        'count' : 35,
+        # fmt: on
+    }
 
-    params['url'] = base_url + search_path
-    if params['time_range'] in time_range_dict:
-        params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
+    # time range
+    # - example: one year (525600 minutes) 'qft=+filterui:age-lt525600'
+
+    if params['time_range']:
+        query_params['qft'] = 'filterui:age-lt%s' % time_map[params['time_range']]
+
+    params['url'] = base_url + '?' + urlencode(query_params)
 
     return params
 
 
-# get response from search-request
 def response(resp):
-    results = []
+    """Get response from Bing-Images"""
 
+    results = []
     dom = html.fromstring(resp.text)
 
-    # parse results
-    for result in dom.xpath('//div[@class="imgpt"]'):
-        img_format = result.xpath('./div[contains(@class, "img_info")]/span/text()')[0]
-        # Microsoft seems to experiment with this code so don't make the path too specific,
-        # just catch the text section for the first anchor in img_info assuming this to be
-        # the originating site.
-        source = result.xpath('./div[contains(@class, "img_info")]//a/text()')[0]
+    for result in dom.xpath('//ul[contains(@class, "dgControl_list")]/li'):
 
-        m = loads(result.xpath('./a/@m')[0])
+        metadata = result.xpath('.//a[@class="iusc"]/@m')
+        if not metadata:
+            continue
 
-        # strip 'Unicode private use area' highlighting, they render to Tux
-        # the Linux penguin and a standing diamond on my machine...
-        title = m.get('t', '').replace('\ue000', '').replace('\ue001', '')
+        metadata = json.loads(result.xpath('.//a[@class="iusc"]/@m')[0])
+        title = ' '.join(result.xpath('.//div[@class="infnmpt"]//a/text()')).strip()
+        img_format = ' '.join(result.xpath('.//div[@class="imgpt"]/div/span/text()')).strip()
+        source = ' '.join(result.xpath('.//div[@class="imgpt"]//div[@class="lnkw"]//a/text()')).strip()
         results.append(
             {
                 'template': 'images.html',
-                'url': m['purl'],
-                'thumbnail_src': m['turl'],
-                'img_src': m['murl'],
-                'content': '',
+                'url': metadata['purl'],
+                'thumbnail_src': metadata['turl'],
+                'img_src': metadata['murl'],
+                'content': metadata['desc'],
                 'title': title,
                 'source': source,
                 'img_format': img_format,
             }
         )
-
     return results
+
+
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch languages and regions from Bing-News."""
+
+    xpath_market_codes = '//table[1]/tbody/tr/td[3]'
+    # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
+    xpath_language_codes = '//table[3]/tbody/tr/td[2]'
+
+    _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py
index 7eea17bb4..d8c63857a 100644
--- a/searx/engines/bing_news.py
+++ b/searx/engines/bing_news.py
@@ -1,24 +1,30 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""Bing (News)
+"""Bing-News: description see :py:obj:`searx.engines.bing`.
 """
 
-from urllib.parse import (
-    urlencode,
-    urlparse,
-    parse_qsl,
-    quote,
-)
-from datetime import datetime
-from dateutil import parser
-from lxml import etree
-from lxml.etree import XPath
-from searx.utils import match_language, eval_xpath_getindex
-from searx.engines.bing import (  # pylint: disable=unused-import
-    language_aliases,
-    _fetch_supported_languages,
-    supported_languages_url,
+# pylint: disable=invalid-name
+
+from typing import TYPE_CHECKING
+import uuid
+from urllib.parse import urlencode
+
+from lxml import html
+
+from searx.enginelib.traits import EngineTraits
+from searx.engines.bing import (
+    set_bing_cookies,
+    _fetch_traits,
 )
+from searx.engines.bing import send_accept_language_header  # pylint: disable=unused-import
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
+
 
 # about
 about = {
@@ -34,108 +40,111 @@ about = {
 categories = ['news']
 paging = True
 time_range_support = True
-send_accept_language_header = True
-
-# search-url
-base_url = 'https://www.bing.com/'
-search_string = 'news/search?{query}&first={offset}&format=RSS'
-search_string_with_time = 'news/search?{query}&first={offset}&qft=interval%3d"{interval}"&format=RSS'
-time_range_dict = {'day': '7', 'week': '8', 'month': '9'}
-
-
-def url_cleanup(url_string):
-    """remove click"""
-
-    parsed_url = urlparse(url_string)
-    if parsed_url.netloc == 'www.bing.com' and parsed_url.path == '/news/apiclick.aspx':
-        query = dict(parse_qsl(parsed_url.query))
-        url_string = query.get('url', None)
-    return url_string
-
-
-def image_url_cleanup(url_string):
-    """replace the http://*bing.com/th?id=... by https://www.bing.com/th?id=..."""
-
-    parsed_url = urlparse(url_string)
-    if parsed_url.netloc.endswith('bing.com') and parsed_url.path == '/th':
-        query = dict(parse_qsl(parsed_url.query))
-        url_string = "https://www.bing.com/th?id=" + quote(query.get('id'))
-    return url_string
-
-
-def _get_url(query, language, offset, time_range):
-    if time_range in time_range_dict:
-        search_path = search_string_with_time.format(
-            # fmt: off
-            query = urlencode({
-                'q': query,
-                'setmkt': language
-            }),
-            offset = offset,
-            interval = time_range_dict[time_range]
-            # fmt: on
-        )
-    else:
-        # e.g. setmkt=de-de&setlang=de
-        search_path = search_string.format(
-            # fmt: off
-            query = urlencode({
-                'q': query,
-                'setmkt': language
-            }),
-            offset = offset
-            # fmt: on
-        )
-    return base_url + search_path
+time_map = {
+    'day': '4',
+    'week': '8',
+    'month': '9',
+}
+"""A string '4' means *last hour*. We use *last hour* for ``day`` here since the
+difference of *last day* and *last week* in the result list is just marginally.
+"""
+
+base_url = 'https://www.bing.com/news/infinitescrollajax'
+"""Bing (News) search URL"""
+
+bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-news-search/reference/market-codes'
+"""Bing (News) search API description"""
+
+mkt_alias = {
+    'zh': 'en-WW',
+    'zh-CN': 'en-WW',
+}
+"""Bing News has an official market code 'zh-CN' but we won't get a result with
+this market code.  For 'zh' and 'zh-CN' we better use the *Worldwide aggregate*
+market code (en-WW).
+"""
 
 
 def request(query, params):
+    """Assemble a Bing-News request."""
+
+    sxng_locale = params['searxng_locale']
+    engine_region = traits.get_region(mkt_alias.get(sxng_locale, sxng_locale), traits.all_locale)
+    engine_language = traits.get_language(sxng_locale, 'en')
+
+    SID = uuid.uuid1().hex.upper()
+    set_bing_cookies(params, engine_language, engine_region, SID)
 
-    if params['time_range'] and params['time_range'] not in time_range_dict:
-        return params
+    # build URL query
+    #
+    # example: https://www.bing.com/news/infinitescrollajax?q=london&first=1
 
-    offset = (params['pageno'] - 1) * 10 + 1
-    if params['language'] == 'all':
-        language = 'en-US'
-    else:
-        language = match_language(params['language'], supported_languages, language_aliases)
-    params['url'] = _get_url(query, language, offset, params['time_range'])
+    query_params = {
+        # fmt: off
+        'q': query,
+        'InfiniteScroll': 1,
+        # to simplify the page count lets use the default of 10 images per page
+        'first' : (int(params.get('pageno', 1)) - 1) * 10 + 1,
+        # fmt: on
+    }
+
+    if params['time_range']:
+        # qft=interval:"7"
+        query_params['qft'] = 'qft=interval="%s"' % time_map.get(params['time_range'], '9')
+
+    params['url'] = base_url + '?' + urlencode(query_params)
 
     return params
 
 
 def response(resp):
-
+    """Get response from Bing-Video"""
     results = []
-    rss = etree.fromstring(resp.content)
-    namespaces = rss.nsmap
-
-    for item in rss.xpath('./channel/item'):
-        # url / title / content
-        url = url_cleanup(eval_xpath_getindex(item, './link/text()', 0, default=None))
-        title = eval_xpath_getindex(item, './title/text()', 0, default=url)
-        content = eval_xpath_getindex(item, './description/text()', 0, default='')
-
-        # publishedDate
-        publishedDate = eval_xpath_getindex(item, './pubDate/text()', 0, default=None)
-        try:
-            publishedDate = parser.parse(publishedDate, dayfirst=False)
-        except TypeError:
-            publishedDate = datetime.now()
-        except ValueError:
-            publishedDate = datetime.now()
-
-        # thumbnail
-        thumbnail = eval_xpath_getindex(item, XPath('./News:Image/text()', namespaces=namespaces), 0, default=None)
-        if thumbnail is not None:
-            thumbnail = image_url_cleanup(thumbnail)
-
-        # append result
-        if thumbnail is not None:
-            results.append(
-                {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content, 'img_src': thumbnail}
-            )
-        else:
-            results.append({'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content})
+
+    if not resp.ok or not resp.text:
+        return results
+
+    dom = html.fromstring(resp.text)
+
+    for newsitem in dom.xpath('//div[contains(@class, "newsitem")]'):
+
+        url = newsitem.xpath('./@url')[0]
+        title = ' '.join(newsitem.xpath('.//div[@class="caption"]//a[@class="title"]/text()')).strip()
+        content = ' '.join(newsitem.xpath('.//div[@class="snippet"]/text()')).strip()
+        thumbnail = None
+        author = newsitem.xpath('./@data-author')[0]
+        metadata = ' '.join(newsitem.xpath('.//div[@class="source"]/span/text()')).strip()
+
+        img_src = newsitem.xpath('.//a[@class="imagelink"]//img/@src')
+        if img_src:
+            thumbnail = 'https://www.bing.com/' + img_src[0]
+
+        results.append(
+            {
+                'url': url,
+                'title': title,
+                'content': content,
+                'img_src': thumbnail,
+                'author': author,
+                'metadata': metadata,
+            }
+        )
 
     return results
+
+
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch languages and regions from Bing-News.
+
+    The :py:obj:`description <searx.engines.bing_news.bing_traits_url>` of the
+    first table says *"query parameter when calling the Video Search API."*
+    .. thats why I use the 4. table "News Category API markets" for the
+    ``xpath_market_codes``.
+
+    """
+
+    xpath_market_codes = '//table[4]/tbody/tr/td[3]'
+    # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
+    xpath_language_codes = '//table[3]/tbody/tr/td[2]'
+
+    _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py
index 85071de21..8ee0bb66e 100644
--- a/searx/engines/bing_videos.py
+++ b/searx/engines/bing_videos.py
@@ -1,21 +1,30 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""Bing (Videos)
-
+"""Bing-Videos: description see :py:obj:`searx.engines.bing`.
 """
+# pylint: disable=invalid-name
 
-from json import loads
+from typing import TYPE_CHECKING
+import uuid
+import json
 from urllib.parse import urlencode
 
 from lxml import html
 
-from searx.utils import match_language
-from searx.engines.bing import language_aliases
-
-from searx.engines.bing import (  # pylint: disable=unused-import
-    _fetch_supported_languages,
-    supported_languages_url,
+from searx.enginelib.traits import EngineTraits
+from searx.engines.bing import (
+    set_bing_cookies,
+    _fetch_traits,
 )
+from searx.engines.bing import send_accept_language_header  # pylint: disable=unused-import
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
+
 
 about = {
     "website": 'https://www.bing.com/videos',
@@ -26,65 +35,76 @@ about = {
     "results": 'HTML',
 }
 
+# engine dependent config
 categories = ['videos', 'web']
 paging = True
 safesearch = True
 time_range_support = True
-send_accept_language_header = True
-number_of_results = 28
 
-base_url = 'https://www.bing.com/'
-search_string = (
+base_url = 'https://www.bing.com/videos/asyncv2'
+"""Bing (Videos) async search URL."""
+
+bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-video-search/reference/market-codes'
+"""Bing (Video) search API description"""
+
+time_map = {
     # fmt: off
-    'videos/search'
-    '?{query}'
-    '&count={count}'
-    '&first={first}'
-    '&scope=video'
-    '&FORM=QBLH'
+    'day': 60 * 24,
+    'week': 60 * 24 * 7,
+    'month': 60 * 24 * 31,
+    'year': 60 * 24 * 365,
     # fmt: on
-)
-time_range_string = '&qft=+filterui:videoage-lt{interval}'
-time_range_dict = {'day': '1440', 'week': '10080', 'month': '43200', 'year': '525600'}
-
-# safesearch definitions
-safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'}
+}
 
 
-# do search-request
 def request(query, params):
-    offset = ((params['pageno'] - 1) * number_of_results) + 1
+    """Assemble a Bing-Video request."""
 
-    search_path = search_string.format(query=urlencode({'q': query}), count=number_of_results, first=offset)
+    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
+    engine_language = traits.get_language(params['searxng_locale'], 'en')
 
-    # safesearch cookie
-    params['cookies']['SRCHHPGUSR'] = 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
+    SID = uuid.uuid1().hex.upper()
+    set_bing_cookies(params, engine_language, engine_region, SID)
 
-    # language cookie
-    language = match_language(params['language'], supported_languages, language_aliases).lower()
-    params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1'
+    # build URL query
+    #
+    # example: https://www.bing.com/videos/asyncv2?q=foo&async=content&first=1&count=35
 
-    # query and paging
-    params['url'] = base_url + search_path
+    query_params = {
+        # fmt: off
+        'q': query,
+        'async' : 'content',
+        # to simplify the page count lets use the default of 35 images per page
+        'first' : (int(params.get('pageno', 1)) - 1) * 35 + 1,
+        'count' : 35,
+        # fmt: on
+    }
 
     # time range
-    if params['time_range'] in time_range_dict:
-        params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
+    #
+    # example: one week (10080 minutes) '&qft= filterui:videoage-lt10080'  '&form=VRFLTR'
+
+    if params['time_range']:
+        query_params['form'] = 'VRFLTR'
+        query_params['qft'] = ' filterui:videoage-lt%s' % time_map[params['time_range']]
+
+    params['url'] = base_url + '?' + urlencode(query_params)
 
     return params
 
 
-# get response from search-request
 def response(resp):
+    """Get response from Bing-Video"""
     results = []
 
     dom = html.fromstring(resp.text)
 
-    for result in dom.xpath('//div[@class="dg_u"]/div[contains(@class, "mc_vtvc")]'):
-        metadata = loads(result.xpath('.//div[@class="vrhdata"]/@vrhm')[0])
+    for result in dom.xpath('//div[@class="dg_u"]//div[contains(@id, "mc_vtvc_video")]'):
+        metadata = json.loads(result.xpath('.//div[@class="vrhdata"]/@vrhm')[0])
         info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip()
         content = '{0} - {1}'.format(metadata['du'], info)
-        thumbnail = '{0}th?id={1}'.format(base_url, metadata['thid'])
+        thumbnail = result.xpath('.//div[contains(@class, "mc_vtvc_th")]//img/@src')[0]
+
         results.append(
             {
                 'url': metadata['murl'],
@@ -96,3 +116,13 @@ def response(resp):
         )
 
     return results
+
+
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch languages and regions from Bing-Videos."""
+
+    xpath_market_codes = '//table[1]/tbody/tr/td[3]'
+    # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
+    xpath_language_codes = '//table[3]/tbody/tr/td[2]'
+
+    _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py
index 7dd84dd27..d734ec3c8 100644
--- a/searx/engines/dailymotion.py
+++ b/searx/engines/dailymotion.py
@@ -1,17 +1,35 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""Dailymotion (Videos)
+# lint: pylint
+"""
+Dailymotion (Videos)
+~~~~~~~~~~~~~~~~~~~~
+
+.. _REST GET: https://developers.dailymotion.com/tools/
+.. _Global API Parameters: https://developers.dailymotion.com/api/#global-parameters
+.. _Video filters API: https://developers.dailymotion.com/api/#video-filters
+.. _Fields selection: https://developers.dailymotion.com/api/#fields-selection
 
 """
 
-from typing import Set
+from typing import TYPE_CHECKING
+
 from datetime import datetime, timedelta
 from urllib.parse import urlencode
 import time
 import babel
 
 from searx.exceptions import SearxEngineAPIException
-from searx.network import raise_for_httperror
+from searx import network
 from searx.utils import html_to_text
+from searx.locales import region_tag, language_tag
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
 
 # about
 about = {
@@ -37,11 +55,24 @@ time_delta_dict = {
 }
 
 safesearch = True
-safesearch_params = {2: '&is_created_for_kids=true', 1: '&is_created_for_kids=true', 0: ''}
+safesearch_params = {
+    2: {'is_created_for_kids': 'true'},
+    1: {'is_created_for_kids': 'true'},
+    0: {},
+}
+"""True if this video is "Created for Kids" / intends to target an audience
+under the age of 16 (``is_created_for_kids`` in `Video filters API`_ )
+"""
 
-# search-url
-# - https://developers.dailymotion.com/tools/
-# - https://www.dailymotion.com/doc/api/obj-video.html
+family_filter_map = {
+    2: 'true',
+    1: 'true',
+    0: 'false',
+}
+"""By default, the family filter is turned on. Setting this parameter to
+``false`` will stop filtering-out explicit content from searches and global
+contexts (``family_filter`` in `Global API Parameters`_ ).
+"""
 
 result_fields = [
     'allow_embed',
@@ -53,27 +84,21 @@ result_fields = [
     'thumbnail_360_url',
     'id',
 ]
-search_url = (
-    'https://api.dailymotion.com/videos?'
-    'fields={fields}&password_protected={password_protected}&private={private}&sort={sort}&limit={limit}'
-).format(
-    fields=','.join(result_fields),
-    password_protected='false',
-    private='false',
-    sort='relevance',
-    limit=number_of_results,
-)
-iframe_src = "https://www.dailymotion.com/embed/video/{video_id}"
+"""`Fields selection`_, by default, a few fields are returned. To request more
+specific fields, the ``fields`` parameter is used with the list of fields
+SearXNG needs in the response to build a video result list.
+"""
 
-# The request query filters by 'languages' & 'country', therefore instead of
-# fetching only languages we need to fetch locales.
-supported_languages_url = 'https://api.dailymotion.com/locales'
-supported_languages_iso639: Set[str] = set()
+search_url = 'https://api.dailymotion.com/videos?'
+"""URL to retrieve a list of videos.
 
+- `REST GET`_
+- `Global API Parameters`_
+- `Video filters API`_
+"""
 
-def init(_engine_settings):
-    global supported_languages_iso639
-    supported_languages_iso639 = set([language.split('_')[0] for language in supported_languages])
+iframe_src = "https://www.dailymotion.com/embed/video/{video_id}"
+"""URL template to embed video in SearXNG's result list."""
 
 
 def request(query, params):
@@ -81,34 +106,42 @@ def request(query, params):
     if not query:
         return False
 
-    language = params['language']
-    if language == 'all':
-        language = 'en-US'
-    locale = babel.Locale.parse(language, sep='-')
+    eng_region = traits.get_region(params['searxng_locale'], 'en_US')
+    eng_lang = traits.get_language(params['searxng_locale'], 'en')
 
-    language_iso639 = locale.language
-    if locale.language not in supported_languages_iso639:
-        language_iso639 = 'en'
-
-    query_args = {
+    args = {
         'search': query,
-        'languages': language_iso639,
+        'family_filter': family_filter_map.get(params['safesearch'], 'false'),
+        'thumbnail_ratio': 'original',  # original|widescreen|square
+        # https://developers.dailymotion.com/api/#video-filters
+        'languages': eng_lang,
         'page': params['pageno'],
+        'password_protected': 'false',
+        'private': 'false',
+        'sort': 'relevance',
+        'limit': number_of_results,
+        'fields': ','.join(result_fields),
     }
 
-    if locale.territory:
-        localization = locale.language + '_' + locale.territory
-        if localization in supported_languages:
-            query_args['country'] = locale.territory
+    args.update(safesearch_params.get(params['safesearch'], {}))
+
+    # Don't add localization and country arguments if the user does select a
+    # language (:de, :en, ..)
+
+    if len(params['searxng_locale'].split('-')) > 1:
+        # https://developers.dailymotion.com/api/#global-parameters
+        args['localization'] = eng_region
+        args['country'] = eng_region.split('_')[1]
+        # Insufficient rights for the `ams_country' parameter of route `GET /videos'
+        # 'ams_country': eng_region.split('_')[1],
 
     time_delta = time_delta_dict.get(params["time_range"])
     if time_delta:
         created_after = datetime.now() - time_delta
-        query_args['created_after'] = datetime.timestamp(created_after)
+        args['created_after'] = datetime.timestamp(created_after)
 
-    query_str = urlencode(query_args)
-    params['url'] = search_url + '&' + query_str + safesearch_params.get(params['safesearch'], '')
-    params['raise_for_httperror'] = False
+    query_str = urlencode(args)
+    params['url'] = search_url + query_str
 
     return params
 
@@ -123,7 +156,7 @@ def response(resp):
     if 'error' in search_res:
         raise SearxEngineAPIException(search_res['error'].get('message'))
 
-    raise_for_httperror(resp)
+    network.raise_for_httperror(resp)
 
     # parse results
     for res in search_res.get('list', []):
@@ -167,7 +200,53 @@ def response(resp):
     return results
 
 
-# get supported languages from their site
-def _fetch_supported_languages(resp):
-    response_json = resp.json()
-    return [item['locale'] for item in response_json['list']]
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch locales & languages from dailymotion.
+
+    Locales fetched from `api/locales <https://api.dailymotion.com/locales>`_.
+    There are duplications in the locale codes returned from Dailymotion which
+    can be ignored::
+
+      en_EN --> en_GB, en_US
+      ar_AA --> ar_EG, ar_AE, ar_SA
+
+    The language list `api/languages <https://api.dailymotion.com/languages>`_
+    contains over 7000 *languages* codes (see PR1071_).  We use only those
+    language codes that are used in the locales.
+
+    .. _PR1071: https://github.com/searxng/searxng/pull/1071
+
+    """
+
+    resp = network.get('https://api.dailymotion.com/locales')
+    if not resp.ok:
+        print("ERROR: response from dailymotion/locales is not OK.")
+
+    for item in resp.json()['list']:
+        eng_tag = item['locale']
+        if eng_tag in ('en_EN', 'ar_AA'):
+            continue
+        try:
+            sxng_tag = region_tag(babel.Locale.parse(eng_tag))
+        except babel.UnknownLocaleError:
+            print("ERROR: item unknown --> %s" % item)
+            continue
+
+        conflict = engine_traits.regions.get(sxng_tag)
+        if conflict:
+            if conflict != eng_tag:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+            continue
+        engine_traits.regions[sxng_tag] = eng_tag
+
+    locale_lang_list = [x.split('_')[0] for x in engine_traits.regions.values()]
+
+    resp = network.get('https://api.dailymotion.com/languages')
+    if not resp.ok:
+        print("ERROR: response from dailymotion/languages is not OK.")
+
+    for item in resp.json()['list']:
+        eng_tag = item['code']
+        if eng_tag in locale_lang_list:
+            sxng_tag = language_tag(babel.Locale.parse(eng_tag))
+            engine_traits.languages[sxng_tag] = eng_tag
diff --git a/searx/engines/demo_offline.py b/searx/engines/demo_offline.py
index aeb74f443..9d6e3b52d 100644
--- a/searx/engines/demo_offline.py
+++ b/searx/engines/demo_offline.py
@@ -63,7 +63,7 @@ def search(query, request_params):
     for row in result_list:
         entry = {
             'query': query,
-            'language': request_params['language'],
+            'language': request_params['searxng_locale'],
             'value': row.get("value"),
             # choose a result template or comment out to use the *default*
             'template': 'key-value.html',
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 2a7956ca8..85e977bdb 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -1,71 +1,207 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""DuckDuckGo Lite
+"""
+DuckDuckGo Lite
+~~~~~~~~~~~~~~~
 """
 
-from json import loads
-
-from lxml.html import fromstring
+from typing import TYPE_CHECKING
+from urllib.parse import urlencode
+import json
+import babel
+import lxml.html
 
+from searx import (
+    network,
+    locales,
+    redislib,
+)
+from searx import redisdb
 from searx.utils import (
-    dict_subset,
     eval_xpath,
     eval_xpath_getindex,
     extract_text,
-    match_language,
 )
-from searx.network import get
+from searx.enginelib.traits import EngineTraits
+from searx.exceptions import SearxEngineAPIException
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
 
-# about
 about = {
     "website": 'https://lite.duckduckgo.com/lite/',
     "wikidata_id": 'Q12805',
-    "official_api_documentation": 'https://duckduckgo.com/api',
     "use_official_api": False,
     "require_api_key": False,
     "results": 'HTML',
 }
 
+send_accept_language_header = True
+"""DuckDuckGo-Lite tries to guess user's prefered language from the HTTP
+``Accept-Language``.  Optional the user can select a region filter (but not a
+language).
+"""
+
 # engine dependent config
 categories = ['general', 'web']
 paging = True
-supported_languages_url = 'https://duckduckgo.com/util/u588.js'
 time_range_support = True
-send_accept_language_header = True
+safesearch = True  # user can't select but the results are filtered
 
-language_aliases = {
-    'ar-SA': 'ar-XA',
-    'es-419': 'es-XL',
-    'ja': 'jp-JP',
-    'ko': 'kr-KR',
-    'sl-SI': 'sl-SL',
-    'zh-TW': 'tzh-TW',
-    'zh-HK': 'tzh-HK',
-}
+url = 'https://lite.duckduckgo.com/lite/'
+# url_ping = 'https://duckduckgo.com/t/sl_l'
 
 time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
+form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
 
-# search-url
-url = 'https://lite.duckduckgo.com/lite/'
-url_ping = 'https://duckduckgo.com/t/sl_l'
 
-# match query's language to a region code that duckduckgo will accept
-def get_region_code(lang, lang_list=None):
-    if lang == 'all':
-        return None
+def cache_vqd(query, value):
+    """Caches a ``vqd`` value from a query.
+
+    The vqd value depends on the query string and is needed for the follow up
+    pages or the images loaded by a XMLHttpRequest:
+
+    - DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...`
+    - DuckDuckGo Images: `https://duckduckgo.com/i.js??q=...&vqd=...`
+
+    """
+    c = redisdb.client()
+    if c:
+        logger.debug("cache vqd value: %s", value)
+        key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
+        c.set(key, value, ex=600)
+
+
+def get_vqd(query, headers):
+    """Returns the ``vqd`` that fits to the *query*.  If there is no ``vqd`` cached
+    (:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the
+    response.
+
+    """
+    value = None
+    c = redisdb.client()
+    if c:
+        key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
+        value = c.get(key)
+        if value:
+            value = value.decode('utf-8')
+            logger.debug("re-use cached vqd value: %s", value)
+            return value
 
-    lang_code = match_language(lang, lang_list or [], language_aliases, 'wt-WT')
-    lang_parts = lang_code.split('-')
+    query_url = 'https://duckduckgo.com/?{query}&iar=images'.format(query=urlencode({'q': query}))
+    res = network.get(query_url, headers=headers)
+    content = res.text
+    if content.find('vqd=\'') == -1:
+        raise SearxEngineAPIException('Request failed')
+    value = content[content.find('vqd=\'') + 5 :]
+    value = value[: value.find('\'')]
+    logger.debug("new vqd value: %s", value)
+    cache_vqd(query, value)
+    return value
 
-    # country code goes first
-    return lang_parts[1].lower() + '-' + lang_parts[0].lower()
+
+def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'):
+    """Get DuckDuckGo's language identifier from SearXNG's locale.
+
+    DuckDuckGo defines its lanaguages by region codes (see
+    :py:obj:`fetch_traits`).
+
+    To get region and language of a DDG service use:
+
+    .. code: python
+
+       eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
+       eng_lang = get_ddg_lang(traits, params['searxng_locale'])
+
+    It might confuse, but the ``l`` value of the cookie is what SearXNG calls
+    the *region*:
+
+    .. code:: python
+
+        # !ddi paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'}
+        params['cookies']['ad'] = eng_lang
+        params['cookies']['ah'] = eng_region
+        params['cookies']['l'] = eng_region
+
+    .. hint::
+
+       `DDG-lite <https://lite.duckduckgo.com/lite>`__ does not offer a language
+       selection to the user, only a region can be selected by the user
+       (``eng_region`` from the example above).  DDG-lite stores the selected
+       region in a cookie::
+
+         params['cookies']['kl'] = eng_region  # 'ar-es'
+
+    """
+    return eng_traits.custom['lang_region'].get(sxng_locale, eng_traits.get_language(sxng_locale, default))
+
+
+ddg_reg_map = {
+    'tw-tzh': 'zh_TW',
+    'hk-tzh': 'zh_HK',
+    'ct-ca': 'skip',  # ct-ca and es-ca both map to ca_ES
+    'es-ca': 'ca_ES',
+    'id-en': 'id_ID',
+    'no-no': 'nb_NO',
+    'jp-jp': 'ja_JP',
+    'kr-kr': 'ko_KR',
+    'xa-ar': 'ar_SA',
+    'sl-sl': 'sl_SI',
+    'th-en': 'th_TH',
+    'vn-en': 'vi_VN',
+}
+
+ddg_lang_map = {
+    # use ar --> ar_EG (Egypt's arabic)
+    "ar_DZ": 'lang_region',
+    "ar_JO": 'lang_region',
+    "ar_SA": 'lang_region',
+    # use bn --> bn_BD
+    'bn_IN': 'lang_region',
+    # use de --> de_DE
+    'de_CH': 'lang_region',
+    # use en --> en_US,
+    'en_AU': 'lang_region',
+    'en_CA': 'lang_region',
+    'en_GB': 'lang_region',
+    # Esperanto
+    'eo_XX': 'eo',
+    # use es --> es_ES,
+    'es_AR': 'lang_region',
+    'es_CL': 'lang_region',
+    'es_CO': 'lang_region',
+    'es_CR': 'lang_region',
+    'es_EC': 'lang_region',
+    'es_MX': 'lang_region',
+    'es_PE': 'lang_region',
+    'es_UY': 'lang_region',
+    'es_VE': 'lang_region',
+    # use fr --> rf_FR
+    'fr_CA': 'lang_region',
+    'fr_CH': 'lang_region',
+    'fr_BE': 'lang_region',
+    # use nl --> nl_NL
+    'nl_BE': 'lang_region',
+    # use pt --> pt_PT
+    'pt_BR': 'lang_region',
+    # skip these languages
+    'od_IN': 'skip',
+    'io_XX': 'skip',
+    'tokipona_XX': 'skip',
+}
 
 
 def request(query, params):
 
+    eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
+    # eng_lang = get_ddg_lang(traits, params['searxng_locale'])
+
     params['url'] = url
     params['method'] = 'POST'
-
     params['data']['q'] = query
 
     # The API is not documented, so we do some reverse engineering and emulate
@@ -88,23 +224,19 @@ def request(query, params):
         params['data']['s'] = offset
         params['data']['dc'] = offset + 1
 
+    # request needs a vqd argument
+    params['data']['vqd'] = get_vqd(query, params["headers"])
+
     # initial page does not have additional data in the input form
     if params['pageno'] > 1:
-        # request the second page (and more pages) needs 'o' and 'api' arguments
-        params['data']['o'] = 'json'
-        params['data']['api'] = 'd.js'
 
-    # initial page does not have additional data in the input form
-    if params['pageno'] > 2:
-        # request the third page (and more pages) some more arguments
-        params['data']['nextParams'] = ''
-        params['data']['v'] = ''
-        params['data']['vqd'] = ''
+        params['data']['o'] = form_data.get('o', 'json')
+        params['data']['api'] = form_data.get('api', 'd.js')
+        params['data']['nextParams'] = form_data.get('nextParams', '')
+        params['data']['v'] = form_data.get('v', 'l')
 
-    region_code = get_region_code(params['language'], supported_languages)
-    if region_code:
-        params['data']['kl'] = region_code
-        params['cookies']['kl'] = region_code
+    params['data']['kl'] = eng_region
+    params['cookies']['kl'] = eng_region
 
     params['data']['df'] = ''
     if params['time_range'] in time_range_dict:
@@ -116,26 +248,40 @@ def request(query, params):
     return params
 
 
-# get response from search-request
 def response(resp):
 
-    headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
-    get(url_ping, headers=headers_ping)
-
     if resp.status_code == 303:
         return []
 
     results = []
-    doc = fromstring(resp.text)
+    doc = lxml.html.fromstring(resp.text)
 
     result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
-    if not len(result_table) >= 3:
+
+    if len(result_table) == 2:
+        # some locales (at least China) does not have a "next page" button and
+        # the layout of the HTML tables is different.
+        result_table = result_table[1]
+    elif not len(result_table) >= 3:
         # no more results
         return []
-    result_table = result_table[2]
+    else:
+        result_table = result_table[2]
+        # update form data from response
+        form = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table//input/..')
+        if len(form):
+
+            form = form[0]
+            form_data['v'] = eval_xpath(form, '//input[@name="v"]/@value')[0]
+            form_data['api'] = eval_xpath(form, '//input[@name="api"]/@value')[0]
+            form_data['o'] = eval_xpath(form, '//input[@name="o"]/@value')[0]
+            logger.debug('form_data: %s', form_data)
+
+            value = eval_xpath(form, '//input[@name="vqd"]/@value')[0]
+            query = resp.search_params['data']['q']
+            cache_vqd(query, value)
 
     tr_rows = eval_xpath(result_table, './/tr')
-
     # In the last <tr> is the form of the 'previous/next page' links
     tr_rows = tr_rows[:-1]
 
@@ -172,15 +318,105 @@ def response(resp):
     return results
 
 
-# get supported languages from their site
-def _fetch_supported_languages(resp):
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch languages & regions from DuckDuckGo.
+
+    SearXNG's ``all`` locale maps DuckDuckGo's "Alle regions" (``wt-wt``).
+    DuckDuckGo's language "Browsers prefered language" (``wt_WT``) makes no
+    sense in a SearXNG request since SearXNG's ``all`` will not add a
+    ``Accept-Language`` HTTP header.  The value in ``engine_traits.all_locale``
+    is ``wt-wt`` (the region).
+
+    Beside regions DuckDuckGo also defines its lanaguages by region codes.  By
+    example these are the english languages in DuckDuckGo:
+
+    - en_US
+    - en_AU
+    - en_CA
+    - en_GB
+
+    The function :py:obj:`get_ddg_lang` evaluates DuckDuckGo's language from
+    SearXNG's locale.
 
-    # response is a js file with regions as an embedded object
-    response_page = resp.text
-    response_page = response_page[response_page.find('regions:{') + 8 :]
-    response_page = response_page[: response_page.find('}') + 1]
+    """
+    # pylint: disable=too-many-branches, too-many-statements
+    # fetch regions
+
+    engine_traits.all_locale = 'wt-wt'
+
+    # updated from u588 to u661 / should be updated automatically?
+    resp = network.get('https://duckduckgo.com/util/u661.js')
+
+    if not resp.ok:
+        print("ERROR: response from DuckDuckGo is not OK.")
+
+    pos = resp.text.find('regions:{') + 8
+    js_code = resp.text[pos:]
+    pos = js_code.find('}') + 1
+    regions = json.loads(js_code[:pos])
+
+    for eng_tag, name in regions.items():
+
+        if eng_tag == 'wt-wt':
+            engine_traits.all_locale = 'wt-wt'
+            continue
+
+        region = ddg_reg_map.get(eng_tag)
+        if region == 'skip':
+            continue
 
-    regions_json = loads(response_page)
-    supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
+        if not region:
+            eng_territory, eng_lang = eng_tag.split('-')
+            region = eng_lang + '_' + eng_territory.upper()
 
-    return list(supported_languages)
+        try:
+            sxng_tag = locales.region_tag(babel.Locale.parse(region))
+        except babel.UnknownLocaleError:
+            print("ERROR: %s (%s) -> %s is unknown by babel" % (name, eng_tag, region))
+            continue
+
+        conflict = engine_traits.regions.get(sxng_tag)
+        if conflict:
+            if conflict != eng_tag:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+            continue
+        engine_traits.regions[sxng_tag] = eng_tag
+
+    # fetch languages
+
+    engine_traits.custom['lang_region'] = {}
+
+    pos = resp.text.find('languages:{') + 10
+    js_code = resp.text[pos:]
+    pos = js_code.find('}') + 1
+    js_code = '{"' + js_code[1:pos].replace(':', '":').replace(',', ',"')
+    languages = json.loads(js_code)
+
+    for eng_lang, name in languages.items():
+
+        if eng_lang == 'wt_WT':
+            continue
+
+        babel_tag = ddg_lang_map.get(eng_lang, eng_lang)
+        if babel_tag == 'skip':
+            continue
+
+        try:
+
+            if babel_tag == 'lang_region':
+                sxng_tag = locales.region_tag(babel.Locale.parse(eng_lang))
+                engine_traits.custom['lang_region'][sxng_tag] = eng_lang
+                continue
+
+            sxng_tag = locales.language_tag(babel.Locale.parse(babel_tag))
+
+        except babel.UnknownLocaleError:
+            print("ERROR: language %s (%s) is unknown by babel" % (name, eng_lang))
+            continue
+
+        conflict = engine_traits.languages.get(sxng_tag)
+        if conflict:
+            if conflict != eng_lang:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang))
+            continue
+        engine_traits.languages[sxng_tag] = eng_lang
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index 7ed0de35c..39fed87e7 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -1,22 +1,33 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""DuckDuckGo (Instant Answer API)
+"""
+DuckDuckGo Instant Answer API
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The `DDG-API <https://duckduckgo.com/api>`__ is no longer documented but from
+reverse engineering we can see that some services (e.g. instant answers) still
+in use from the DDG search engine.
+
+As far we can say the *instant answers* API does not support languages, or at
+least we could not find out how language support should work.  It seems that
+most of the features are based on English terms.
 
 """
 
-import json
+from typing import TYPE_CHECKING
+
 from urllib.parse import urlencode, urlparse, urljoin
 from lxml import html
 
 from searx.data import WIKIDATA_UNITS
-from searx.engines.duckduckgo import language_aliases
-from searx.engines.duckduckgo import (  # pylint: disable=unused-import
-    _fetch_supported_languages,
-    supported_languages_url,
-)
-from searx.utils import extract_text, html_to_text, match_language, get_string_replaces_function
+from searx.utils import extract_text, html_to_text, get_string_replaces_function
 from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
 
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
 # about
 about = {
     "website": 'https://duckduckgo.com/',
@@ -37,7 +48,7 @@ replace_http_by_https = get_string_replaces_function({'http:': 'https:'})
 
 
 def is_broken_text(text):
-    """duckduckgo may return something like "<a href="xxxx">http://somewhere Related website<a/>"
+    """duckduckgo may return something like ``<a href="xxxx">http://somewhere Related website<a/>``
 
     The href URL is broken, the "Related website" may contains some HTML.
 
@@ -62,8 +73,6 @@ def result_to_text(text, htmlResult):
 
 def request(query, params):
     params['url'] = URL.format(query=urlencode({'q': query}))
-    language = match_language(params['language'], supported_languages, language_aliases)
-    language = language.split('-')[0]
     return params
 
 
@@ -71,7 +80,7 @@ def response(resp):
     # pylint: disable=too-many-locals, too-many-branches, too-many-statements
     results = []
 
-    search_res = json.loads(resp.text)
+    search_res = resp.json()
 
     # search_res.get('Entity') possible values (not exhaustive) :
     # * continent / country / department / location / waterfall
@@ -235,7 +244,7 @@ def unit_to_str(unit):
 
 
 def area_to_str(area):
-    """parse {'unit': 'http://www.wikidata.org/entity/Q712226', 'amount': '+20.99'}"""
+    """parse ``{'unit': 'https://www.wikidata.org/entity/Q712226', 'amount': '+20.99'}``"""
     unit = unit_to_str(area.get('unit'))
     if unit is not None:
         try:
diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py
index 19f649ef4..d8a6f1340 100644
--- a/searx/engines/duckduckgo_images.py
+++ b/searx/engines/duckduckgo_images.py
@@ -1,26 +1,30 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
- DuckDuckGo (Images)
+DuckDuckGo Images
+~~~~~~~~~~~~~~~~~
 """
 
-from json import loads
+from typing import TYPE_CHECKING
 from urllib.parse import urlencode
-from searx.exceptions import SearxEngineAPIException
-from searx.engines.duckduckgo import get_region_code
-from searx.engines.duckduckgo import (  # pylint: disable=unused-import
-    _fetch_supported_languages,
-    supported_languages_url,
+
+from searx.engines.duckduckgo import fetch_traits  # pylint: disable=unused-import
+from searx.engines.duckduckgo import (
+    get_ddg_lang,
+    get_vqd,
 )
-from searx.network import get
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
 
 # about
 about = {
     "website": 'https://duckduckgo.com/',
     "wikidata_id": 'Q12805',
-    "official_api_documentation": {
-        'url': 'https://duckduckgo.com/api',
-        'comment': 'but images are not supported',
-    },
     "use_official_api": False,
     "require_api_key": False,
     "results": 'JSON (site requires js to get images)',
@@ -32,70 +36,64 @@ paging = True
 safesearch = True
 send_accept_language_header = True
 
-# search-url
-images_url = 'https://duckduckgo.com/i.js?{query}&s={offset}&p={safesearch}&o=json&vqd={vqd}'
-site_url = 'https://duckduckgo.com/?{query}&iar=images&iax=1&ia=images'
+safesearch_cookies = {0: '-2', 1: None, 2: '1'}
+safesearch_args = {0: '1', 1: None, 2: '1'}
 
 
-# run query in site to get vqd number needed for requesting images
-# TODO: find a way to get this number without an extra request (is it a hash of the query?)
-def get_vqd(query, headers):
-    query_url = site_url.format(query=urlencode({'q': query}))
-    res = get(query_url, headers=headers)
-    content = res.text
-    if content.find('vqd=\'') == -1:
-        raise SearxEngineAPIException('Request failed')
-    vqd = content[content.find('vqd=\'') + 5 :]
-    vqd = vqd[: vqd.find('\'')]
-    return vqd
+def request(query, params):
 
+    eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
+    eng_lang = get_ddg_lang(traits, params['searxng_locale'])
 
-# do search-request
-def request(query, params):
-    # to avoid running actual external requests when testing
-    if 'is_test' not in params:
-        vqd = get_vqd(query, params['headers'])
-    else:
-        vqd = '12345'
+    args = {
+        'q': query,
+        'o': 'json',
+        # 'u': 'bing',
+        'l': eng_region,
+        'vqd': get_vqd(query, params["headers"]),
+    }
 
-    offset = (params['pageno'] - 1) * 50
+    if params['pageno'] > 1:
+        args['s'] = (params['pageno'] - 1) * 100
 
-    safesearch = params['safesearch'] - 1
+    params['cookies']['ad'] = eng_lang  # zh_CN
+    params['cookies']['ah'] = eng_region  # "us-en,de-de"
+    params['cookies']['l'] = eng_region  # "hk-tzh"
+    logger.debug("cookies: %s", params['cookies'])
 
-    region_code = get_region_code(params['language'], lang_list=supported_languages)
-    if region_code:
-        params['url'] = images_url.format(
-            query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd
-        )
-    else:
-        params['url'] = images_url.format(query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd)
+    safe_search = safesearch_cookies.get(params['safesearch'])
+    if safe_search is not None:
+        params['cookies']['p'] = safe_search  # "-2", "1"
+    safe_search = safesearch_args.get(params['safesearch'])
+    if safe_search is not None:
+        args['p'] = safe_search  # "-1", "1"
+
+    args = urlencode(args)
+    params['url'] = 'https://duckduckgo.com/i.js?{args}&f={f}'.format(args=args, f=',,,,,')
+
+    params['headers']['Accept'] = 'application/json, text/javascript, */*; q=0.01'
+    params['headers']['Referer'] = 'https://duckduckgo.com/'
+    params['headers']['X-Requested-With'] = 'XMLHttpRequest'
+    logger.debug("headers: %s", params['headers'])
 
     return params
 
 
-# get response from search-request
 def response(resp):
     results = []
+    res_json = resp.json()
 
-    content = resp.text
-    res_json = loads(content)
-
-    # parse results
     for result in res_json['results']:
-        title = result['title']
-        url = result['url']
-        thumbnail = result['thumbnail']
-        image = result['image']
-
-        # append result
         results.append(
             {
                 'template': 'images.html',
-                'title': title,
+                'title': result['title'],
                 'content': '',
-                'thumbnail_src': thumbnail,
-                'img_src': image,
-                'url': url,
+                'thumbnail_src': result['thumbnail'],
+                'img_src': result['image'],
+                'url': result['url'],
+                'img_format': '%s x %s' % (result['width'], result['height']),
+                'source': result['source'],
             }
         )
 
diff --git a/searx/engines/duckduckgo_weather.py b/searx/engines/duckduckgo_weather.py
index 0540cbcb5..4f0ce1b49 100644
--- a/searx/engines/duckduckgo_weather.py
+++ b/searx/engines/duckduckgo_weather.py
@@ -1,13 +1,29 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""DuckDuckGo Weather"""
+"""
+DuckDuckGo Weather
+~~~~~~~~~~~~~~~~~~
+"""
 
+from typing import TYPE_CHECKING
 from json import loads
 from urllib.parse import quote
 
 from datetime import datetime
 from flask_babel import gettext
 
+from searx.engines.duckduckgo import fetch_traits  # pylint: disable=unused-import
+from searx.engines.duckduckgo import get_ddg_lang
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
+
+
 about = {
     "website": 'https://duckduckgo.com/',
     "wikidata_id": 'Q12805',
@@ -17,9 +33,11 @@ about = {
     "results": "JSON",
 }
 
-categories = ["others"]
+send_accept_language_header = True
 
-url = "https://duckduckgo.com/js/spice/forecast/{query}/{lang}"
+# engine dependent config
+categories = ["others"]
+URL = "https://duckduckgo.com/js/spice/forecast/{query}/{lang}"
 
 
 def generate_condition_table(condition):
@@ -72,8 +90,17 @@ def generate_day_table(day):
 
 
 def request(query, params):
-    params["url"] = url.format(query=quote(query), lang=params['language'].split('-')[0])
 
+    eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
+    eng_lang = get_ddg_lang(traits, params['searxng_locale'])
+
+    # !ddw paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'}
+    params['cookies']['ad'] = eng_lang
+    params['cookies']['ah'] = eng_region
+    params['cookies']['l'] = eng_region
+    logger.debug("cookies: %s", params['cookies'])
+
+    params["url"] = URL.format(query=quote(query), lang=eng_lang.split('_')[0])
     return params
 
 
diff --git a/searx/engines/gentoo.py b/searx/engines/gentoo.py
index 856c93710..f0cb6a794 100644
--- a/searx/engines/gentoo.py
+++ b/searx/engines/gentoo.py
@@ -25,6 +25,7 @@ base_url = 'https://wiki.gentoo.org'
 # xpath queries
 xpath_results = '//ul[@class="mw-search-results"]/li'
 xpath_link = './/div[@class="mw-search-result-heading"]/a'
+xpath_content = './/div[@class="searchresult"]'
 
 
 # cut 'en' from 'en-US', 'de' from 'de-CH', and so on
@@ -77,8 +78,6 @@ main_langs = {
     'uk': 'Українська',
     'zh': '简体中文',
 }
-supported_languages = dict(lang_urls, **main_langs)
-
 
 # do search-request
 def request(query, params):
@@ -118,7 +117,8 @@ def response(resp):
         link = result.xpath(xpath_link)[0]
         href = urljoin(base_url, link.attrib.get('href'))
         title = extract_text(link)
+        content = extract_text(result.xpath(xpath_content))
 
-        results.append({'url': href, 'title': title})
+        results.append({'url': href, 'title': title, 'content': content})
 
     return results
diff --git a/searx/engines/google.py b/searx/engines/google.py
index bdb351432..708068f3a 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -1,34 +1,39 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""This is the implementation of the google WEB engine.  Some of this
-implementations are shared by other engines:
+"""This is the implementation of the Google WEB engine.  Some of this
+implementations (manly the :py:obj:`get_google_info`) are shared by other
+engines:
 
 - :ref:`google images engine`
 - :ref:`google news engine`
 - :ref:`google videos engine`
-
-The google WEB engine itself has a special setup option:
-
-.. code:: yaml
-
-  - name: google
-    ...
-    use_mobile_ui: false
-
-``use_mobile_ui``: (default: ``false``)
-  Enables to use *mobile endpoint* to bypass the google blocking (see
-  :issue:`159`).  On the mobile UI of Google Search, the button :guilabel:`More
-  results` is not affected by Google rate limiting and we can still do requests
-  while actively blocked by the original Google search.  By activate
-  ``use_mobile_ui`` this behavior is simulated by adding the parameter
-  ``async=use_ac:true,_fmt:pc`` to the :py:func:`request`.
+- :ref:`google scholar engine`
+- :ref:`google autocomplete`
 
 """
 
+from typing import TYPE_CHECKING
+
+import re
 from urllib.parse import urlencode
 from lxml import html
-from searx.utils import match_language, extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
+import babel
+import babel.core
+import babel.languages
+
+from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
+from searx.locales import language_tag, region_tag, get_offical_locales
+from searx import network
 from searx.exceptions import SearxEngineCaptchaException
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
+
 
 # about
 about = {
@@ -45,64 +50,6 @@ categories = ['general', 'web']
 paging = True
 time_range_support = True
 safesearch = True
-send_accept_language_header = True
-use_mobile_ui = False
-supported_languages_url = 'https://www.google.com/preferences?#languages'
-
-# based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests
-google_domains = {
-    'BG': 'google.bg',  # Bulgaria
-    'CZ': 'google.cz',  # Czech Republic
-    'DE': 'google.de',  # Germany
-    'DK': 'google.dk',  # Denmark
-    'AT': 'google.at',  # Austria
-    'CH': 'google.ch',  # Switzerland
-    'GR': 'google.gr',  # Greece
-    'AU': 'google.com.au',  # Australia
-    'CA': 'google.ca',  # Canada
-    'GB': 'google.co.uk',  # United Kingdom
-    'ID': 'google.co.id',  # Indonesia
-    'IE': 'google.ie',  # Ireland
-    'IN': 'google.co.in',  # India
-    'MY': 'google.com.my',  # Malaysia
-    'NZ': 'google.co.nz',  # New Zealand
-    'PH': 'google.com.ph',  # Philippines
-    'SG': 'google.com.sg',  # Singapore
-    'US': 'google.com',  # United States (google.us) redirects to .com
-    'ZA': 'google.co.za',  # South Africa
-    'AR': 'google.com.ar',  # Argentina
-    'CL': 'google.cl',  # Chile
-    'ES': 'google.es',  # Spain
-    'MX': 'google.com.mx',  # Mexico
-    'EE': 'google.ee',  # Estonia
-    'FI': 'google.fi',  # Finland
-    'BE': 'google.be',  # Belgium
-    'FR': 'google.fr',  # France
-    'IL': 'google.co.il',  # Israel
-    'HR': 'google.hr',  # Croatia
-    'HU': 'google.hu',  # Hungary
-    'IT': 'google.it',  # Italy
-    'JP': 'google.co.jp',  # Japan
-    'KR': 'google.co.kr',  # South Korea
-    'LT': 'google.lt',  # Lithuania
-    'LV': 'google.lv',  # Latvia
-    'NO': 'google.no',  # Norway
-    'NL': 'google.nl',  # Netherlands
-    'PL': 'google.pl',  # Poland
-    'BR': 'google.com.br',  # Brazil
-    'PT': 'google.pt',  # Portugal
-    'RO': 'google.ro',  # Romania
-    'RU': 'google.ru',  # Russia
-    'SK': 'google.sk',  # Slovakia
-    'SI': 'google.si',  # Slovenia
-    'SE': 'google.se',  # Sweden
-    'TH': 'google.co.th',  # Thailand
-    'TR': 'google.com.tr',  # Turkey
-    'UA': 'google.com.ua',  # Ukraine
-    'CN': 'google.com.hk',  # There is no google.cn, we use .com.hk for zh-CN
-    'HK': 'google.com.hk',  # Hong Kong
-    'TW': 'google.com.tw',  # Taiwan
-}
 
 time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
 
@@ -112,50 +59,50 @@ filter_mapping = {0: 'off', 1: 'medium', 2: 'high'}
 # specific xpath variables
 # ------------------------
 
-results_xpath = './/div[@data-sokoban-container]'
+results_xpath = './/div[contains(@jscontroller, "SC7lYd")]'
 title_xpath = './/a/h3[1]'
 href_xpath = './/a[h3]/@href'
-content_xpath = './/div[@data-content-feature=1]'
-
-# google *sections* are no usual *results*, we ignore them
-g_section_with_header = './g-section-with-header'
-
+content_xpath = './/div[@data-sncf]'
 
 # Suggestions are links placed in a *card-section*, we extract only the text
 # from the links not the links itself.
 suggestion_xpath = '//div[contains(@class, "EIaa9b")]//a'
 
+# UI_ASYNC = 'use_ac:true,_fmt:html' # returns a HTTP 500 when user search for
+#                                    # celebrities like '!google natasha allegri'
+#                                    # or '!google chris evans'
+UI_ASYNC = 'use_ac:true,_fmt:prog'
+"""Format of the response from UI's async request."""
+
 
-def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
-    """Composing various language properties for the google engines.
+def get_google_info(params, eng_traits):
+    """Composing various (language) properties for the google engines (:ref:`google
+    API`).
 
     This function is called by the various google engines (:ref:`google web
     engine`, :ref:`google images engine`, :ref:`google news engine` and
     :ref:`google videos engine`).
 
-    :param dict param: request parameters of the engine
-
-    :param list lang_list: list of supported languages of the engine
-        :py:obj:`ENGINES_LANGUAGES[engine-name] <searx.data.ENGINES_LANGUAGES>`
-
-    :param dict lang_list: custom aliases for non standard language codes
-        (used when calling :py:func:`searx.utils.match_language`)
+    :param dict param: Request parameters of the engine.  At least
+        a ``searxng_locale`` key should be in the dictionary.
 
-    :param bool supported_any_language: When a language is not specified, the
-        language interpretation is left up to Google to decide how the search
-        results should be delivered.  This argument is ``True`` for the google
-        engine and ``False`` for the other engines (google-images, -news,
-        -scholar, -videos).
+    :param eng_traits: Engine's traits fetched from google preferences
+        (:py:obj:`searx.enginelib.traits.EngineTraits`)
 
     :rtype: dict
     :returns:
         Py-Dictionary with the key/value pairs:
 
         language:
-            Return value from :py:func:`searx.utils.match_language`
+            The language code that is used by google (e.g. ``lang_en`` or
+            ``lang_zh-TW``)
 
         country:
-            The country code (e.g. US, AT, CA, FR, DE ..)
+            The country code that is used by google (e.g. ``US`` or ``TW``)
+
+        locale:
+            A instance of :py:obj:`babel.core.Locale` build from the
+            ``searxng_locale`` value.
 
         subdomain:
             Google subdomain :py:obj:`google_domains` that fits to the country
@@ -165,52 +112,67 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
             Py-Dictionary with additional request arguments (can be passed to
             :py:func:`urllib.parse.urlencode`).
 
+            - ``hl`` parameter: specifies the interface language of user interface.
+            - ``lr`` parameter: restricts search results to documents written in
+              a particular language.
+            - ``cr`` parameter: restricts search results to documents
+              originating in a particular country.
+            - ``ie`` parameter: sets the character encoding scheme that should
+              be used to interpret the query string ('utf8').
+            - ``oe`` parameter: sets the character encoding scheme that should
+              be used to decode the XML result ('utf8').
+
         headers:
             Py-Dictionary with additional HTTP headers (can be passed to
             request's headers)
+
+            - ``Accept: '*/*``
+
     """
+
     ret_val = {
         'language': None,
         'country': None,
         'subdomain': None,
         'params': {},
         'headers': {},
+        'cookies': {},
+        'locale': None,
     }
 
-    # language ...
+    sxng_locale = params.get('searxng_locale', 'all')
+    try:
+        locale = babel.Locale.parse(sxng_locale, sep='-')
+    except babel.core.UnknownLocaleError:
+        locale = None
 
-    _lang = params['language']
-    _any_language = _lang.lower() == 'all'
-    if _any_language:
-        _lang = 'en-US'
-    language = match_language(_lang, lang_list, custom_aliases)
-    ret_val['language'] = language
+    eng_lang = eng_traits.get_language(sxng_locale, 'lang_en')
+    lang_code = eng_lang.split('_')[-1]  # lang_zh-TW --> zh-TW / lang_en --> en
+    country = eng_traits.get_region(sxng_locale, eng_traits.all_locale)
 
-    # country ...
+    # Test zh_hans & zh_hant --> in the topmost links in the result list of list
+    # TW and HK you should a find wiktionary.org zh_hant link.  In the result
+    # list of zh-CN should not be no hant link instead you should find
+    # zh.m.wikipedia.org/zh somewhere in the top.
 
-    _l = _lang.split('-')
-    if len(_l) == 2:
-        country = _l[1]
-    else:
-        country = _l[0].upper()
-        if country == 'EN':
-            country = 'US'
-    ret_val['country'] = country
-
-    # subdomain ...
-
-    ret_val['subdomain'] = 'www.' + google_domains.get(country.upper(), 'google.com')
-
-    # params & headers
+    # '!go 日 :zh-TW' --> https://zh.m.wiktionary.org/zh-hant/%E6%97%A5
+    # '!go 日 :zh-CN' --> https://zh.m.wikipedia.org/zh/%E6%97%A5
 
-    lang_country = '%s-%s' % (language, country)  # (en-US, en-EN, de-DE, de-AU, fr-FR ..)
+    ret_val['language'] = eng_lang
+    ret_val['country'] = country
+    ret_val['locale'] = locale
+    ret_val['subdomain'] = eng_traits.custom['supported_domains'].get(country.upper(), 'www.google.com')
 
     # hl parameter:
-    #   https://developers.google.com/custom-search/docs/xml_results#hlsp The
-    # Interface Language:
+    #   The hl parameter specifies the interface language (host language) of
+    #   your user interface. To improve the performance and the quality of your
+    #   search results, you are strongly encouraged to set this parameter
+    #   explicitly.
+    #   https://developers.google.com/custom-search/docs/xml_results#hlsp
+    # The Interface Language:
     #   https://developers.google.com/custom-search/docs/xml_results_appendices#interfaceLanguages
 
-    ret_val['params']['hl'] = lang_list.get(lang_country, language)
+    ret_val['params']['hl'] = lang_code
 
     # lr parameter:
     #   The lr (language restrict) parameter restricts search results to
@@ -218,22 +180,72 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
     #   https://developers.google.com/custom-search/docs/xml_results#lrsp
     #   Language Collection Values:
     #   https://developers.google.com/custom-search/docs/xml_results_appendices#languageCollections
+    #
+    # To select 'all' languages an empty 'lr' value is used.
+    #
+    # Different to other google services, Google Schloar supports to select more
+    # than one language. The languages are seperated by a pipe '|' (logical OR).
+    # By example: &lr=lang_zh-TW%7Clang_de selects articles written in
+    # traditional chinese OR german language.
 
-    if _any_language and supported_any_language:
+    ret_val['params']['lr'] = eng_lang
+    if sxng_locale == 'all':
+        ret_val['params']['lr'] = ''
 
-        # interpretation is left up to Google (based on whoogle)
-        #
-        # - add parameter ``source=lnt``
-        # - don't use parameter ``lr``
-        # - don't add a ``Accept-Language`` HTTP header.
+    # cr parameter:
+    #   The cr parameter restricts search results to documents originating in a
+    #   particular country.
+    #   https://developers.google.com/custom-search/docs/xml_results#crsp
 
-        ret_val['params']['source'] = 'lnt'
+    ret_val['params']['cr'] = 'country' + country
+    if sxng_locale == 'all':
+        ret_val['params']['cr'] = ''
 
-    else:
+    # gl parameter: (mandatory by Geeogle News)
+    #   The gl parameter value is a two-letter country code. For WebSearch
+    #   results, the gl parameter boosts search results whose country of origin
+    #   matches the parameter value. See the Country Codes section for a list of
+    #   valid values.
+    #   Specifying a gl parameter value in WebSearch requests should improve the
+    #   relevance of results. This is particularly true for international
+    #   customers and, even more specifically, for customers in English-speaking
+    #   countries other than the United States.
+    #   https://developers.google.com/custom-search/docs/xml_results#glsp
+
+    ret_val['params']['gl'] = country
+
+    # ie parameter:
+    #   The ie parameter sets the character encoding scheme that should be used
+    #   to interpret the query string. The default ie value is latin1.
+    #   https://developers.google.com/custom-search/docs/xml_results#iesp
+
+    ret_val['params']['ie'] = 'utf8'
 
-        # restricts search results to documents written in a particular
-        # language.
-        ret_val['params']['lr'] = "lang_" + lang_list.get(lang_country, language)
+    # oe parameter:
+    #   The oe parameter sets the character encoding scheme that should be used
+    #   to decode the XML result. The default oe value is latin1.
+    #   https://developers.google.com/custom-search/docs/xml_results#oesp
+
+    ret_val['params']['oe'] = 'utf8'
+
+    # num parameter:
+    #   The num parameter identifies the number of search results to return.
+    #   The default num value is 10, and the maximum value is 20. If you request
+    #   more than 20 results, only 20 results will be returned.
+    #   https://developers.google.com/custom-search/docs/xml_results#numsp
+
+    # HINT: seems to have no effect (tested in google WEB & Images)
+    # ret_val['params']['num'] = 20
+
+    # HTTP headers
+
+    ret_val['headers']['Accept'] = '*/*'
+
+    # Cookies
+
+    # - https://github.com/searxng/searxng/pull/1679#issuecomment-1235432746
+    # - https://github.com/searxng/searxng/issues/1555
+    ret_val['cookies']['CONSENT'] = "YES+"
 
     return ret_val
 
@@ -245,33 +257,34 @@ def detect_google_sorry(resp):
 
 def request(query, params):
     """Google search request"""
-
+    # pylint: disable=line-too-long
     offset = (params['pageno'] - 1) * 10
-
-    lang_info = get_lang_info(params, supported_languages, language_aliases, True)
-
-    additional_parameters = {}
-    if use_mobile_ui:
-        additional_parameters = {
-            'asearch': 'arc',
-            'async': 'use_ac:true,_fmt:prog',
-        }
+    google_info = get_google_info(params, traits)
 
     # https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium
     query_url = (
         'https://'
-        + lang_info['subdomain']
+        + google_info['subdomain']
         + '/search'
         + "?"
         + urlencode(
             {
                 'q': query,
-                **lang_info['params'],
-                'ie': "utf8",
-                'oe': "utf8",
-                'start': offset,
+                **google_info['params'],
                 'filter': '0',
-                **additional_parameters,
+                'start': offset,
+                # 'vet': '12ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0QxK8CegQIARAC..i',
+                # 'ved': '2ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0Q_skCegQIARAG',
+                # 'cs' : 1,
+                # 'sa': 'N',
+                # 'yv': 3,
+                # 'prmd': 'vin',
+                # 'ei': 'GASaY6TxOcy_xc8PtYeY6AE',
+                # 'sa': 'N',
+                # 'sstk': 'AcOHfVkD7sWCSAheZi-0tx_09XDO55gTWY0JNq3_V26cNN-c8lfD45aZYPI8s_Bqp8s57AHz5pxchDtAGCA_cikAWSjy9kw3kgg'
+                # formally known as use_mobile_ui
+                'asearch': 'arc',
+                'async': UI_ASYNC,
             }
         )
     )
@@ -282,25 +295,38 @@ def request(query, params):
         query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
     params['url'] = query_url
 
-    params['cookies']['CONSENT'] = "YES+"
-    params['headers'].update(lang_info['headers'])
-    if use_mobile_ui:
-        params['headers']['Accept'] = '*/*'
-    else:
-        params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
-
+    params['cookies'] = google_info['cookies']
+    params['headers'].update(google_info['headers'])
     return params
 
 
+# =26;[3,"dimg_ZNMiZPCqE4apxc8P3a2tuAQ_137"]a87;data:image/jpeg;base64,/9j/4AAQSkZJRgABA
+# ...6T+9Nl4cnD+gr9OK8I56/tX3l86nWYw//2Q==26;
+RE_DATA_IMAGE = re.compile(r'"(dimg_[^"]*)"[^;]*;(data:image[^;]*;[^;]*);')
+
+
+def _parse_data_images(dom):
+    data_image_map = {}
+    for img_id, data_image in RE_DATA_IMAGE.findall(dom.text_content()):
+        end_pos = data_image.rfind('=')
+        if end_pos > 0:
+            data_image = data_image[: end_pos + 1]
+        data_image_map[img_id] = data_image
+    logger.debug('data:image objects --> %s', list(data_image_map.keys()))
+    return data_image_map
+
+
 def response(resp):
     """Get response from google's search request"""
-
+    # pylint: disable=too-many-branches, too-many-statements
     detect_google_sorry(resp)
 
     results = []
 
     # convert the text to dom
     dom = html.fromstring(resp.text)
+    data_image_map = _parse_data_images(dom)
+
     # results --> answer
     answer_list = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]')
     if answer_list:
@@ -309,25 +335,9 @@ def response(resp):
     else:
         logger.debug("did not find 'answer'")
 
-        # results --> number_of_results
-        if not use_mobile_ui:
-            try:
-                _txt = eval_xpath_getindex(dom, '//div[@id="result-stats"]//text()', 0)
-                _digit = ''.join([n for n in _txt if n.isdigit()])
-                number_of_results = int(_digit)
-                results.append({'number_of_results': number_of_results})
-            except Exception as e:  # pylint: disable=broad-except
-                logger.debug("did not 'number_of_results'")
-                logger.error(e, exc_info=True)
-
     # parse results
 
-    for result in eval_xpath_list(dom, results_xpath):
-
-        # google *sections*
-        if extract_text(eval_xpath(result, g_section_with_header)):
-            logger.debug("ignoring <g-section-with-header>")
-            continue
+    for result in eval_xpath_list(dom, results_xpath):  # pylint: disable=too-many-nested-blocks
 
         try:
             title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None)
@@ -336,16 +346,30 @@ def response(resp):
                 logger.debug('ignoring item from the result_xpath list: missing title')
                 continue
             title = extract_text(title_tag)
+
             url = eval_xpath_getindex(result, href_xpath, 0, None)
             if url is None:
+                logger.debug('ignoring item from the result_xpath list: missing url of title "%s"', title)
                 continue
-            content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True)
-            if content is None:
+
+            content_nodes = eval_xpath(result, content_xpath)
+            content = extract_text(content_nodes)
+
+            if not content:
                 logger.debug('ignoring item from the result_xpath list: missing content of title "%s"', title)
                 continue
 
-            logger.debug('add link to results: %s', title)
-            results.append({'url': url, 'title': title, 'content': content})
+            img_src = content_nodes[0].xpath('.//img/@src')
+            if img_src:
+                img_src = img_src[0]
+                if img_src.startswith('data:image'):
+                    img_id = content_nodes[0].xpath('.//img/@id')
+                    if img_id:
+                        img_src = data_image_map.get(img_id[0])
+            else:
+                img_src = None
+
+            results.append({'url': url, 'title': title, 'content': content, 'img_src': img_src})
 
         except Exception as e:  # pylint: disable=broad-except
             logger.error(e, exc_info=True)
@@ -361,15 +385,107 @@ def response(resp):
 
 
 # get supported languages from their site
-def _fetch_supported_languages(resp):
-    ret_val = {}
+
+
+skip_countries = [
+    # official language of google-country not in google-languages
+    'AL',  # Albanien (sq)
+    'AZ',  # Aserbaidschan  (az)
+    'BD',  # Bangladesch (bn)
+    'BN',  # Brunei Darussalam (ms)
+    'BT',  # Bhutan (dz)
+    'ET',  # Äthiopien (am)
+    'GE',  # Georgien (ka, os)
+    'GL',  # Grönland (kl)
+    'KH',  # Kambodscha (km)
+    'LA',  # Laos (lo)
+    'LK',  # Sri Lanka (si, ta)
+    'ME',  # Montenegro (sr)
+    'MK',  # Nordmazedonien (mk, sq)
+    'MM',  # Myanmar (my)
+    'MN',  # Mongolei (mn)
+    'MV',  # Malediven (dv) // dv_MV is unknown by babel
+    'MY',  # Malaysia (ms)
+    'NP',  # Nepal (ne)
+    'TJ',  # Tadschikistan (tg)
+    'TM',  # Turkmenistan (tk)
+    'UZ',  # Usbekistan (uz)
+]
+
+
+def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True):
+    """Fetch languages from Google."""
+    # pylint: disable=import-outside-toplevel, too-many-branches
+
+    engine_traits.custom['supported_domains'] = {}
+
+    resp = network.get('https://www.google.com/preferences')
+    if not resp.ok:
+        raise RuntimeError("Response from Google's preferences is not OK.")
+
     dom = html.fromstring(resp.text)
 
-    radio_buttons = eval_xpath_list(dom, '//*[@id="langSec"]//input[@name="lr"]')
+    # supported language codes
 
-    for x in radio_buttons:
-        name = x.get("data-name")
-        code = x.get("value").split('_')[-1]
-        ret_val[code] = {"name": name}
+    lang_map = {'no': 'nb'}
+    for x in eval_xpath_list(dom, '//*[@id="langSec"]//input[@name="lr"]'):
 
-    return ret_val
+        eng_lang = x.get("value").split('_')[-1]
+        try:
+            locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-')
+        except babel.UnknownLocaleError:
+            print("ERROR: %s -> %s is unknown by babel" % (x.get("data-name"), eng_lang))
+            continue
+        sxng_lang = language_tag(locale)
+
+        conflict = engine_traits.languages.get(sxng_lang)
+        if conflict:
+            if conflict != eng_lang:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_lang, conflict, eng_lang))
+            continue
+        engine_traits.languages[sxng_lang] = 'lang_' + eng_lang
+
+    # alias languages
+    engine_traits.languages['zh'] = 'lang_zh-CN'
+
+    # supported region codes
+
+    for x in eval_xpath_list(dom, '//*[@name="region"]/..//input[@name="region"]'):
+        eng_country = x.get("value")
+
+        if eng_country in skip_countries:
+            continue
+        if eng_country == 'ZZ':
+            engine_traits.all_locale = 'ZZ'
+            continue
+
+        sxng_locales = get_offical_locales(eng_country, engine_traits.languages.keys(), regional=True)
+
+        if not sxng_locales:
+            print("ERROR: can't map from google country %s (%s) to a babel region." % (x.get('data-name'), eng_country))
+            continue
+
+        for sxng_locale in sxng_locales:
+            engine_traits.regions[region_tag(sxng_locale)] = eng_country
+
+    # alias regions
+    engine_traits.regions['zh-CN'] = 'HK'
+
+    # supported domains
+
+    if add_domains:
+        resp = network.get('https://www.google.com/supported_domains')
+        if not resp.ok:
+            raise RuntimeError("Response from https://www.google.com/supported_domains is not OK.")
+
+        for domain in resp.text.split():
+            domain = domain.strip()
+            if not domain or domain in [
+                '.google.com',
+            ]:
+                continue
+            region = domain.split('.')[-1].upper()
+            engine_traits.custom['supported_domains'][region] = 'www' + domain
+            if region == 'HK':
+                # There is no google.cn, we use .com.hk for zh-CN
+                engine_traits.custom['supported_domains']['CN'] = 'www' + domain
diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py
index 528f8d21d..e6445b1c4 100644
--- a/searx/engines/google_images.py
+++ b/searx/engines/google_images.py
@@ -1,31 +1,38 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""This is the implementation of the google images engine using the google
-internal API used the Google Go Android app.
+"""This is the implementation of the Google Images engine using the internal
+Google API used by the Google Go Android app.
 
 This internal API offer results in
 
-- JSON (_fmt:json)
-- Protobuf (_fmt:pb)
-- Protobuf compressed? (_fmt:pc)
-- HTML (_fmt:html)
-- Protobuf encoded in JSON (_fmt:jspb).
+- JSON (``_fmt:json``)
+- Protobuf_ (``_fmt:pb``)
+- Protobuf_ compressed? (``_fmt:pc``)
+- HTML (``_fmt:html``)
+- Protobuf_ encoded in JSON (``_fmt:jspb``).
 
+.. _Protobuf: https://en.wikipedia.org/wiki/Protocol_Buffers
 """
 
+from typing import TYPE_CHECKING
+
 from urllib.parse import urlencode
 from json import loads
 
+from searx.engines.google import fetch_traits  # pylint: disable=unused-import
 from searx.engines.google import (
-    get_lang_info,
+    get_google_info,
     time_range_dict,
     detect_google_sorry,
 )
 
-# pylint: disable=unused-import
-from searx.engines.google import supported_languages_url, _fetch_supported_languages
+if TYPE_CHECKING:
+    import logging
+    from searx.enginelib.traits import EngineTraits
+
+    logger: logging.Logger
+    traits: EngineTraits
 
-# pylint: enable=unused-import
 
 # about
 about = {
@@ -40,7 +47,6 @@ about = {
 # engine dependent config
 categories = ['images', 'web']
 paging = True
-use_locale_domain = True
 time_range_support = True
 safesearch = True
 send_accept_language_header = True
@@ -51,20 +57,18 @@ filter_mapping = {0: 'images', 1: 'active', 2: 'active'}
 def request(query, params):
     """Google-Image search request"""
 
-    lang_info = get_lang_info(params, supported_languages, language_aliases, False)
+    google_info = get_google_info(params, traits)
 
     query_url = (
         'https://'
-        + lang_info['subdomain']
+        + google_info['subdomain']
         + '/search'
         + "?"
         + urlencode(
             {
                 'q': query,
                 'tbm': "isch",
-                **lang_info['params'],
-                'ie': "utf8",
-                'oe': "utf8",
+                **google_info['params'],
                 'asearch': 'isch',
                 'async': '_fmt:json,p:1,ijn:' + str(params['pageno']),
             }
@@ -77,9 +81,8 @@ def request(query, params):
         query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
     params['url'] = query_url
 
-    params['headers'].update(lang_info['headers'])
-    params['headers']['User-Agent'] = 'NSTN/3.60.474802233.release Dalvik/2.1.0 (Linux; U; Android 12; US) gzip'
-    params['headers']['Accept'] = '*/*'
+    params['cookies'] = google_info['cookies']
+    params['headers'].update(google_info['headers'])
     return params
 
 
@@ -111,7 +114,11 @@ def response(resp):
 
         copyright_notice = item["result"].get('iptc', {}).get('copyright_notice')
         if copyright_notice:
-            result_item['source'] += ' / ' + copyright_notice
+            result_item['source'] += ' | ' + copyright_notice
+
+        freshness_date = item["result"].get("freshness_date")
+        if freshness_date:
+            result_item['source'] += ' | ' + freshness_date
 
         file_size = item.get('gsa', {}).get('file_size')
         if file_size:
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
index 1ada2d64d..ae55ca9cb 100644
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@@ -1,24 +1,40 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""This is the implementation of the google news engine.  The google news API
-ignores some parameters from the common :ref:`google API`:
+"""This is the implementation of the Google News engine.
 
-- num_ : the number of search results is ignored
+Google News has a different region handling compared to Google WEB.
+
+- the ``ceid`` argument has to be set (:py:obj:`ceid_list`)
+- the hl_ argument has to be set correctly (and different to Google WEB)
+- the gl_ argument is mandatory
+
+If one of this argument is not set correctly, the request is redirected to
+CONSENT dialog::
+
+  https://consent.google.com/m?continue=
+
+The google news API ignores some parameters from the common :ref:`google API`:
+
+- num_ : the number of search results is ignored / there is no paging all
+  results for a query term are in the first response.
 - save_ : is ignored / Google-News results are always *SafeSearch*
 
+.. _hl: https://developers.google.com/custom-search/docs/xml_results#hlsp
+.. _gl: https://developers.google.com/custom-search/docs/xml_results#glsp
 .. _num: https://developers.google.com/custom-search/docs/xml_results#numsp
 .. _save: https://developers.google.com/custom-search/docs/xml_results#safesp
-
 """
 
-# pylint: disable=invalid-name
+from typing import TYPE_CHECKING
 
 import binascii
 import re
 from urllib.parse import urlencode
 from base64 import b64decode
 from lxml import html
+import babel
 
+from searx import locales
 from searx.utils import (
     eval_xpath,
     eval_xpath_list,
@@ -26,18 +42,19 @@ from searx.utils import (
     extract_text,
 )
 
-# pylint: disable=unused-import
+from searx.engines.google import fetch_traits as _fetch_traits  # pylint: disable=unused-import
 from searx.engines.google import (
-    supported_languages_url,
-    _fetch_supported_languages,
+    get_google_info,
+    detect_google_sorry,
 )
+from searx.enginelib.traits import EngineTraits
 
-# pylint: enable=unused-import
+if TYPE_CHECKING:
+    import logging
 
-from searx.engines.google import (
-    get_lang_info,
-    detect_google_sorry,
-)
+    logger: logging.Logger
+
+traits: EngineTraits
 
 # about
 about = {
@@ -49,70 +66,77 @@ about = {
     "results": 'HTML',
 }
 
-# compared to other google engines google-news has a different time range
-# support.  The time range is included in the search term.
-time_range_dict = {
-    'day': 'when:1d',
-    'week': 'when:7d',
-    'month': 'when:1m',
-    'year': 'when:1y',
-}
-
 # engine dependent config
-
 categories = ['news']
 paging = False
-use_locale_domain = True
-time_range_support = True
+time_range_support = False
 
 # Google-News results are always *SafeSearch*. Option 'safesearch' is set to
 # False here, otherwise checker will report safesearch-errors::
 #
 #  safesearch : results are identitical for safesearch=0 and safesearch=2
-safesearch = False
-send_accept_language_header = True
+safesearch = True
+# send_accept_language_header = True
 
 
 def request(query, params):
     """Google-News search request"""
 
-    lang_info = get_lang_info(params, supported_languages, language_aliases, False)
+    sxng_locale = params.get('searxng_locale', 'en-US')
+    ceid = locales.get_engine_locale(sxng_locale, traits.custom['ceid'], default='US:en')
+    google_info = get_google_info(params, traits)
+    google_info['subdomain'] = 'news.google.com'  # google news has only one domain
 
-    # google news has only one domain
-    lang_info['subdomain'] = 'news.google.com'
+    ceid_region, ceid_lang = ceid.split(':')
+    ceid_lang, ceid_suffix = (
+        ceid_lang.split('-')
+        + [
+            None,
+        ]
+    )[:2]
 
-    ceid = "%s:%s" % (lang_info['country'], lang_info['language'])
+    google_info['params']['hl'] = ceid_lang
 
-    # google news redirects en to en-US
-    if lang_info['params']['hl'] == 'en':
-        lang_info['params']['hl'] = 'en-US'
+    if ceid_suffix and ceid_suffix not in ['Hans', 'Hant']:
 
-    # Very special to google-news compared to other google engines, the time
-    # range is included in the search term.
-    if params['time_range']:
-        query += ' ' + time_range_dict[params['time_range']]
+        if ceid_region.lower() == ceid_lang:
+            google_info['params']['hl'] = ceid_lang + '-' + ceid_region
+        else:
+            google_info['params']['hl'] = ceid_lang + '-' + ceid_suffix
+
+    elif ceid_region.lower() != ceid_lang:
+
+        if ceid_region in ['AT', 'BE', 'CH', 'IL', 'SA', 'IN', 'BD', 'PT']:
+            google_info['params']['hl'] = ceid_lang
+        else:
+            google_info['params']['hl'] = ceid_lang + '-' + ceid_region
+
+    google_info['params']['lr'] = 'lang_' + ceid_lang.split('-')[0]
+    google_info['params']['gl'] = ceid_region
 
     query_url = (
         'https://'
-        + lang_info['subdomain']
-        + '/search'
-        + "?"
-        + urlencode({'q': query, **lang_info['params'], 'ie': "utf8", 'oe': "utf8", 'gl': lang_info['country']})
+        + google_info['subdomain']
+        + "/search?"
+        + urlencode(
+            {
+                'q': query,
+                **google_info['params'],
+            }
+        )
+        # ceid includes a ':' character which must not be urlencoded
         + ('&ceid=%s' % ceid)
-    )  # ceid includes a ':' character which must not be urlencoded
-    params['url'] = query_url
-
-    params['cookies']['CONSENT'] = "YES+"
-    params['headers'].update(lang_info['headers'])
-    params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
+    )
 
+    params['url'] = query_url
+    params['cookies'] = google_info['cookies']
+    params['headers'].update(google_info['headers'])
     return params
 
 
 def response(resp):
     """Get response from google's search request"""
     results = []
-
     detect_google_sorry(resp)
 
     # convert the text to dom
@@ -152,8 +176,8 @@ def response(resp):
 
         # The pub_date is mostly a string like 'yesertday', not a real
         # timezone date or time.  Therefore we can't use publishedDate.
-        pub_date = extract_text(eval_xpath(result, './article/div[1]/div[1]/time'))
-        pub_origin = extract_text(eval_xpath(result, './article/div[1]/div[1]/a'))
+        pub_date = extract_text(eval_xpath(result, './article//time'))
+        pub_origin = extract_text(eval_xpath(result, './article//a[@data-n-tid]'))
 
         content = ' / '.join([x for x in [pub_origin, pub_date] if x])
 
@@ -174,3 +198,127 @@ def response(resp):
 
     # return results
     return results
+
+
+ceid_list = [
+    'AE:ar',
+    'AR:es-419',
+    'AT:de',
+    'AU:en',
+    'BD:bn',
+    'BE:fr',
+    'BE:nl',
+    'BG:bg',
+    'BR:pt-419',
+    'BW:en',
+    'CA:en',
+    'CA:fr',
+    'CH:de',
+    'CH:fr',
+    'CL:es-419',
+    'CN:zh-Hans',
+    'CO:es-419',
+    'CU:es-419',
+    'CZ:cs',
+    'DE:de',
+    'EG:ar',
+    'ES:es',
+    'ET:en',
+    'FR:fr',
+    'GB:en',
+    'GH:en',
+    'GR:el',
+    'HK:zh-Hant',
+    'HU:hu',
+    'ID:en',
+    'ID:id',
+    'IE:en',
+    'IL:en',
+    'IL:he',
+    'IN:bn',
+    'IN:en',
+    'IN:hi',
+    'IN:ml',
+    'IN:mr',
+    'IN:ta',
+    'IN:te',
+    'IT:it',
+    'JP:ja',
+    'KE:en',
+    'KR:ko',
+    'LB:ar',
+    'LT:lt',
+    'LV:en',
+    'LV:lv',
+    'MA:fr',
+    'MX:es-419',
+    'MY:en',
+    'NA:en',
+    'NG:en',
+    'NL:nl',
+    'NO:no',
+    'NZ:en',
+    'PE:es-419',
+    'PH:en',
+    'PK:en',
+    'PL:pl',
+    'PT:pt-150',
+    'RO:ro',
+    'RS:sr',
+    'RU:ru',
+    'SA:ar',
+    'SE:sv',
+    'SG:en',
+    'SI:sl',
+    'SK:sk',
+    'SN:fr',
+    'TH:th',
+    'TR:tr',
+    'TW:zh-Hant',
+    'TZ:en',
+    'UA:ru',
+    'UA:uk',
+    'UG:en',
+    'US:en',
+    'US:es-419',
+    'VE:es-419',
+    'VN:vi',
+    'ZA:en',
+    'ZW:en',
+]
+"""List of region/language combinations supported by Google News.  Values of the
+``ceid`` argument of the Google News REST API."""
+
+
+_skip_values = [
+    'ET:en',  # english (ethiopia)
+    'ID:en',  # english (indonesia)
+    'LV:en',  # english (latvia)
+]
+
+_ceid_locale_map = {'NO:no': 'nb-NO'}
+
+
+def fetch_traits(engine_traits: EngineTraits):
+    _fetch_traits(engine_traits, add_domains=False)
+
+    engine_traits.custom['ceid'] = {}
+
+    for ceid in ceid_list:
+        if ceid in _skip_values:
+            continue
+
+        region, lang = ceid.split(':')
+        x = lang.split('-')
+        if len(x) > 1:
+            if x[1] not in ['Hant', 'Hans']:
+                lang = x[0]
+
+        sxng_locale = _ceid_locale_map.get(ceid, lang + '-' + region)
+        try:
+            locale = babel.Locale.parse(sxng_locale, sep='-')
+        except babel.UnknownLocaleError:
+            print("ERROR: %s -> %s is unknown by babel" % (ceid, sxng_locale))
+            continue
+
+        engine_traits.custom['ceid'][locales.region_tag(locale)] = ceid
diff --git a/searx/engines/google_scholar.py b/searx/engines/google_scholar.py
index c07cd4cea..6f33d1e1a 100644
--- a/searx/engines/google_scholar.py
+++ b/searx/engines/google_scholar.py
@@ -1,19 +1,18 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""Google (Scholar)
+"""This is the implementation of the Google Scholar engine.
 
-For detailed description of the *REST-full* API see: `Query Parameter
-Definitions`_.
-
-.. _Query Parameter Definitions:
-   https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
+Compared to other Google services the Scholar engine has a simple GET REST-API
+and there does not exists `async` API.  Even though the API slightly vintage we
+can make use of the :ref:`google API` to assemble the arguments of the GET
+request.
 """
 
-# pylint: disable=invalid-name
+from typing import TYPE_CHECKING
+from typing import Optional
 
 from urllib.parse import urlencode
 from datetime import datetime
-from typing import Optional
 from lxml import html
 
 from searx.utils import (
@@ -23,19 +22,21 @@ from searx.utils import (
     extract_text,
 )
 
+from searx.exceptions import SearxEngineCaptchaException
+
+from searx.engines.google import fetch_traits  # pylint: disable=unused-import
 from searx.engines.google import (
-    get_lang_info,
+    get_google_info,
     time_range_dict,
-    detect_google_sorry,
 )
+from searx.enginelib.traits import EngineTraits
 
-# pylint: disable=unused-import
-from searx.engines.google import (
-    supported_languages_url,
-    _fetch_supported_languages,
-)
+if TYPE_CHECKING:
+    import logging
 
-# pylint: enable=unused-import
+    logger: logging.Logger
+
+traits: EngineTraits
 
 # about
 about = {
@@ -51,53 +52,62 @@ about = {
 categories = ['science', 'scientific publications']
 paging = True
 language_support = True
-use_locale_domain = True
 time_range_support = True
 safesearch = False
 send_accept_language_header = True
 
 
-def time_range_url(params):
-    """Returns a URL query component for a google-Scholar time range based on
-    ``params['time_range']``.  Google-Scholar does only support ranges in years.
-    To have any effect, all the Searx ranges (*day*, *week*, *month*, *year*)
-    are mapped to *year*.  If no range is set, an empty string is returned.
-    Example::
+def time_range_args(params):
+    """Returns a dictionary with a time range arguments based on
+    ``params['time_range']``.
 
-        &as_ylo=2019
-    """
-    # as_ylo=2016&as_yhi=2019
-    ret_val = ''
-    if params['time_range'] in time_range_dict:
-        ret_val = urlencode({'as_ylo': datetime.now().year - 1})
-    return '&' + ret_val
+    Google Scholar supports a detailed search by year.  Searching by *last
+    month* or *last week* (as offered by SearXNG) is uncommon for scientific
+    publications and is not supported by Google Scholar.
 
+    To limit the result list when the users selects a range, all the SearXNG
+    ranges (*day*, *week*, *month*, *year*) are mapped to *year*.  If no range
+    is set an empty dictionary of arguments is returned.  Example;  when
+    user selects a time range (current year minus one in 2022):
 
-def request(query, params):
-    """Google-Scholar search request"""
+    .. code:: python
 
-    offset = (params['pageno'] - 1) * 10
-    lang_info = get_lang_info(params, supported_languages, language_aliases, False)
+        { 'as_ylo' : 2021 }
 
-    # subdomain is: scholar.google.xy
-    lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.")
+    """
+    ret_val = {}
+    if params['time_range'] in time_range_dict:
+        ret_val['as_ylo'] = datetime.now().year - 1
+    return ret_val
 
-    query_url = (
-        'https://'
-        + lang_info['subdomain']
-        + '/scholar'
-        + "?"
-        + urlencode({'q': query, **lang_info['params'], 'ie': "utf8", 'oe': "utf8", 'start': offset})
-    )
 
-    query_url += time_range_url(params)
-    params['url'] = query_url
+def detect_google_captcha(dom):
+    """In case of CAPTCHA Google Scholar open its own *not a Robot* dialog and is
+    not redirected to ``sorry.google.com``.
+    """
+    if eval_xpath(dom, "//form[@id='gs_captcha_f']"):
+        raise SearxEngineCaptchaException()
+
 
-    params['cookies']['CONSENT'] = "YES+"
-    params['headers'].update(lang_info['headers'])
-    params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
+def request(query, params):
+    """Google-Scholar search request"""
 
-    # params['google_subdomain'] = subdomain
+    google_info = get_google_info(params, traits)
+    # subdomain is: scholar.google.xy
+    google_info['subdomain'] = google_info['subdomain'].replace("www.", "scholar.")
+
+    args = {
+        'q': query,
+        **google_info['params'],
+        'start': (params['pageno'] - 1) * 10,
+        'as_sdt': '2007',  # include patents / to disable set '0,5'
+        'as_vis': '0',  # include citations / to disable set '1'
+    }
+    args.update(time_range_args(params))
+
+    params['url'] = 'https://' + google_info['subdomain'] + '/scholar?' + urlencode(args)
+    params['cookies'] = google_info['cookies']
+    params['headers'].update(google_info['headers'])
     return params
 
 
@@ -138,19 +148,15 @@ def parse_gs_a(text: Optional[str]):
 
 
 def response(resp):  # pylint: disable=too-many-locals
-    """Get response from google's search request"""
+    """Parse response from Google Scholar"""
     results = []
 
-    detect_google_sorry(resp)
-
-    # which subdomain ?
-    # subdomain = resp.search_params.get('google_subdomain')
-
     # convert the text to dom
     dom = html.fromstring(resp.text)
+    detect_google_captcha(dom)
 
     # parse results
-    for result in eval_xpath_list(dom, '//div[@data-cid]'):
+    for result in eval_xpath_list(dom, '//div[@data-rp]'):
 
         title = extract_text(eval_xpath(result, './/h3[1]//a'))
 
@@ -158,7 +164,7 @@ def response(resp):  # pylint: disable=too-many-locals
             # this is a [ZITATION] block
             continue
 
-        pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ct1"]'))
+        pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ctg2"]'))
         if pub_type:
             pub_type = pub_type[1:-1].lower()
 
diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py
index fc574bd48..985189df5 100644
--- a/searx/engines/google_videos.py
+++ b/searx/engines/google_videos.py
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""This is the implementation of the google videos engine.
+"""This is the implementation of the Google Videos engine.
 
 .. admonition:: Content-Security-Policy (CSP)
 
@@ -14,9 +14,8 @@
 
 """
 
-# pylint: disable=invalid-name
+from typing import TYPE_CHECKING
 
-import re
 from urllib.parse import urlencode
 from lxml import html
 
@@ -27,20 +26,22 @@ from searx.utils import (
     extract_text,
 )
 
+from searx.engines.google import fetch_traits  # pylint: disable=unused-import
 from searx.engines.google import (
-    get_lang_info,
+    get_google_info,
     time_range_dict,
     filter_mapping,
-    g_section_with_header,
-    title_xpath,
     suggestion_xpath,
     detect_google_sorry,
 )
+from searx.enginelib.traits import EngineTraits
 
-# pylint: disable=unused-import
-from searx.engines.google import supported_languages_url, _fetch_supported_languages
+if TYPE_CHECKING:
+    import logging
 
-# pylint: enable=unused-import
+    logger: logging.Logger
+
+traits: EngineTraits
 
 # about
 about = {
@@ -55,70 +56,32 @@ about = {
 # engine dependent config
 
 categories = ['videos', 'web']
-paging = False
+paging = True
 language_support = True
-use_locale_domain = True
 time_range_support = True
 safesearch = True
-send_accept_language_header = True
-
-RE_CACHE = {}
-
-
-def _re(regexpr):
-    """returns compiled regular expression"""
-    RE_CACHE[regexpr] = RE_CACHE.get(regexpr, re.compile(regexpr))
-    return RE_CACHE[regexpr]
-
-
-def scrap_out_thumbs_src(dom):
-    ret_val = {}
-    thumb_name = 'dimg_'
-    for script in eval_xpath_list(dom, '//script[contains(., "google.ldi={")]'):
-        _script = script.text
-        # "dimg_35":"https://i.ytimg.c....",
-        _dimurl = _re("s='([^']*)").findall(_script)
-        for k, v in _re('(' + thumb_name + '[0-9]*)":"(http[^"]*)').findall(_script):
-            v = v.replace(r'\u003d', '=')
-            v = v.replace(r'\u0026', '&')
-            ret_val[k] = v
-    logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys())
-    return ret_val
-
-
-def scrap_out_thumbs(dom):
-    """Scrap out thumbnail data from <script> tags."""
-    ret_val = {}
-    thumb_name = 'dimg_'
-
-    for script in eval_xpath_list(dom, '//script[contains(., "_setImagesSrc")]'):
-        _script = script.text
-
-        # var s='data:image/jpeg;base64, ...'
-        _imgdata = _re("s='([^']*)").findall(_script)
-        if not _imgdata:
-            continue
-
-        # var ii=['dimg_17']
-        for _vidthumb in _re(r"(%s\d+)" % thumb_name).findall(_script):
-            # At least the equal sign in the URL needs to be decoded
-            ret_val[_vidthumb] = _imgdata[0].replace(r"\x3d", "=")
-
-    logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys())
-    return ret_val
 
 
 def request(query, params):
     """Google-Video search request"""
 
-    lang_info = get_lang_info(params, supported_languages, language_aliases, False)
+    google_info = get_google_info(params, traits)
 
     query_url = (
         'https://'
-        + lang_info['subdomain']
+        + google_info['subdomain']
         + '/search'
         + "?"
-        + urlencode({'q': query, 'tbm': "vid", **lang_info['params'], 'ie': "utf8", 'oe': "utf8"})
+        + urlencode(
+            {
+                'q': query,
+                'tbm': "vid",
+                'start': 10 * params['pageno'],
+                **google_info['params'],
+                'asearch': 'arc',
+                'async': 'use_ac:true,_fmt:html',
+            }
+        )
     )
 
     if params['time_range'] in time_range_dict:
@@ -127,9 +90,8 @@ def request(query, params):
         query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
     params['url'] = query_url
 
-    params['cookies']['CONSENT'] = "YES+"
-    params['headers'].update(lang_info['headers'])
-    params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
+    params['cookies'] = google_info['cookies']
+    params['headers'].update(google_info['headers'])
     return params
 
 
@@ -141,43 +103,30 @@ def response(resp):
 
     # convert the text to dom
     dom = html.fromstring(resp.text)
-    vidthumb_imgdata = scrap_out_thumbs(dom)
-    thumbs_src = scrap_out_thumbs_src(dom)
-    logger.debug(str(thumbs_src))
 
     # parse results
     for result in eval_xpath_list(dom, '//div[contains(@class, "g ")]'):
 
-        # ignore google *sections*
-        if extract_text(eval_xpath(result, g_section_with_header)):
-            logger.debug("ignoring <g-section-with-header>")
-            continue
-
-        # ingnore articles without an image id / e.g. news articles
-        img_id = eval_xpath_getindex(result, './/g-img/img/@id', 0, default=None)
-        if img_id is None:
-            logger.error("no img_id found in item %s (news article?)", len(results) + 1)
+        img_src = eval_xpath_getindex(result, './/img/@src', 0, None)
+        if img_src is None:
             continue
 
-        img_src = vidthumb_imgdata.get(img_id, None)
-        if not img_src:
-            img_src = thumbs_src.get(img_id, "")
+        title = extract_text(eval_xpath_getindex(result, './/a/h3[1]', 0))
+        url = eval_xpath_getindex(result, './/a/h3[1]/../@href', 0)
 
-        title = extract_text(eval_xpath_getindex(result, title_xpath, 0))
-        url = eval_xpath_getindex(result, './/div[@class="dXiKIc"]//a/@href', 0)
-        length = extract_text(eval_xpath(result, './/div[contains(@class, "P7xzyf")]/span/span'))
         c_node = eval_xpath_getindex(result, './/div[@class="Uroaid"]', 0)
         content = extract_text(c_node)
-        pub_info = extract_text(eval_xpath(result, './/div[@class="Zg1NU"]'))
+        pub_info = extract_text(eval_xpath(result, './/div[@class="P7xzyf"]'))
+        length = extract_text(eval_xpath(result, './/div[@class="J1mWY"]'))
 
         results.append(
             {
                 'url': url,
                 'title': title,
                 'content': content,
-                'length': length,
                 'author': pub_info,
                 'thumbnail': img_src,
+                'length': length,
                 'template': 'videos.html',
             }
         )
diff --git a/searx/engines/peertube.py b/searx/engines/peertube.py
index 345c2f991..87b386d7a 100644
--- a/searx/engines/peertube.py
+++ b/searx/engines/peertube.py
@@ -1,18 +1,30 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""
- peertube (Videos)
+# lint: pylint
+"""Peertube and :py:obj:`SepiaSearch <searx.engines.sepiasearch>` do share
+(more or less) the same REST API and the schema of the JSON result is identical.
+
 """
 
-from json import loads
-from datetime import datetime
+import re
 from urllib.parse import urlencode
+from datetime import datetime
+from dateutil.parser import parse
+from dateutil.relativedelta import relativedelta
+
+import babel
+
+from searx import network
+from searx.locales import language_tag
 from searx.utils import html_to_text
+from searx.enginelib.traits import EngineTraits
+
+traits: EngineTraits
 
-# about
 about = {
+    # pylint: disable=line-too-long
     "website": 'https://joinpeertube.org',
     "wikidata_id": 'Q50938515',
-    "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html',
+    "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos',
     "use_official_api": True,
     "require_api_key": False,
     "results": 'JSON',
@@ -22,66 +34,155 @@ about = {
 categories = ["videos"]
 paging = True
 base_url = "https://peer.tube"
-supported_languages_url = 'https://peer.tube/api/v1/videos/languages'
+"""Base URL of the Peertube instance.  A list of instances is available at:
+
+- https://instances.joinpeertube.org/instances
+"""
+
+time_range_support = True
+time_range_table = {
+    'day': relativedelta(),
+    'week': relativedelta(weeks=-1),
+    'month': relativedelta(months=-1),
+    'year': relativedelta(years=-1),
+}
+
+safesearch = True
+safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
+
+
+def minute_to_hm(minute):
+    if isinstance(minute, int):
+        return "%d:%02d" % (divmod(minute, 60))
+    return None
 
 
-# do search-request
 def request(query, params):
-    sanitized_url = base_url.rstrip("/")
-    pageno = (params["pageno"] - 1) * 15
-    search_url = sanitized_url + "/api/v1/search/videos/?pageno={pageno}&{query}"
-    query_dict = {"search": query}
-    language = params["language"].split("-")[0]
-    if "all" != language and language in supported_languages:
-        query_dict["languageOneOf"] = language
-    params["url"] = search_url.format(query=urlencode(query_dict), pageno=pageno)
-    return params
+    """Assemble request for the Peertube API"""
+
+    if not query:
+        return False
+
+    # eng_region = traits.get_region(params['searxng_locale'], 'en_US')
+    eng_lang = traits.get_language(params['searxng_locale'], None)
+
+    params['url'] = (
+        base_url.rstrip("/")
+        + "/api/v1/search/videos?"
+        + urlencode(
+            {
+                'search': query,
+                'searchTarget': 'search-index',  # Vidiversum
+                'resultType': 'videos',
+                'start': (params['pageno'] - 1) * 10,
+                'count': 10,
+                # -createdAt: sort by date ascending / createdAt: date descending
+                'sort': '-match',  # sort by *match descending*
+                'nsfw': safesearch_table[params['safesearch']],
+            }
+        )
+    )
+
+    if eng_lang is not None:
+        params['url'] += '&languageOneOf[]=' + eng_lang
+        params['url'] += '&boostLanguages[]=' + eng_lang
 
+    if params['time_range'] in time_range_table:
+        time = datetime.now().date() + time_range_table[params['time_range']]
+        params['url'] += '&startDate=' + time.isoformat()
 
-def _get_offset_from_pageno(pageno):
-    return (pageno - 1) * 15 + 1
+    return params
 
 
-# get response from search-request
 def response(resp):
-    sanitized_url = base_url.rstrip("/")
+    return video_response(resp)
+
+
+def video_response(resp):
+    """Parse video response from SepiaSearch and Peertube instances."""
     results = []
 
-    search_res = loads(resp.text)
+    json_data = resp.json()
 
-    # return empty array if there are no results
-    if "data" not in search_res:
+    if 'data' not in json_data:
         return []
 
-    # parse results
-    for res in search_res["data"]:
-        title = res["name"]
-        url = sanitized_url + "/videos/watch/" + res["uuid"]
-        description = res["description"]
-        if description:
-            content = html_to_text(res["description"])
-        else:
-            content = ""
-        thumbnail = sanitized_url + res["thumbnailPath"]
-        publishedDate = datetime.strptime(res["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
+    for result in json_data['data']:
+        metadata = [
+            x
+            for x in [
+                result.get('channel', {}).get('displayName'),
+                result.get('channel', {}).get('name') + '@' + result.get('channel', {}).get('host'),
+                ', '.join(result.get('tags', [])),
+            ]
+            if x
+        ]
 
         results.append(
             {
-                "template": "videos.html",
-                "url": url,
-                "title": title,
-                "content": content,
-                "publishedDate": publishedDate,
-                "iframe_src": sanitized_url + res["embedPath"],
-                "thumbnail": thumbnail,
+                'url': result['url'],
+                'title': result['name'],
+                'content': html_to_text(result.get('description') or ''),
+                'author': result.get('account', {}).get('displayName'),
+                'length': minute_to_hm(result.get('duration')),
+                'template': 'videos.html',
+                'publishedDate': parse(result['publishedAt']),
+                'iframe_src': result.get('embedUrl'),
+                'thumbnail': result.get('thumbnailUrl') or result.get('previewUrl'),
+                'metadata': ' | '.join(metadata),
             }
         )
 
-    # return results
     return results
 
 
-def _fetch_supported_languages(resp):
-    videolanguages = resp.json()
-    peertube_languages = list(videolanguages.keys())
-    return peertube_languages
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch languages from peertube's search-index source code.
+
+    See videoLanguages_ in commit `8ed5c729 - Refactor and redesign client`_
+
+    .. _8ed5c729 - Refactor and redesign client:
+       https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729
+    .. _videoLanguages:
+       https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291
+    """
+
+    resp = network.get(
+        'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue',
+        # the response from search-index repository is very slow
+        timeout=60,
+    )
+
+    if not resp.ok:
+        print("ERROR: response from peertube is not OK.")
+        return
+
+    js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL)
+    if not js_lang:
+        print("ERROR: can't determine languages from peertube")
+        return
+
+    for lang in re.finditer(r"\{ id: '([a-z]+)', label:", js_lang.group(1)):
+        try:
+            eng_tag = lang.group(1)
+            if eng_tag == 'oc':
+                # Occitanis not known by babel, its closest relative is Catalan
+                # but 'ca' is already in the list of engine_traits.languages -->
+                # 'oc' will be ignored.
+                continue
+
+            sxng_tag = language_tag(babel.Locale.parse(eng_tag))
+
+        except babel.UnknownLocaleError:
+            print("ERROR: %s is unknown by babel" % eng_tag)
+            continue
+
+        conflict = engine_traits.languages.get(sxng_tag)
+        if conflict:
+            if conflict != eng_tag:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+            continue
+        engine_traits.languages[sxng_tag] = eng_tag
+
+    engine_traits.languages['zh_Hans'] = 'zh'
+    engine_traits.languages['zh_Hant'] = 'zh'
diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py
index 6de2176d0..4a41676c5 100644
--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
@@ -34,7 +34,9 @@ import babel
 
 from searx.exceptions import SearxEngineAPIException
 from searx.network import raise_for_httperror
-from searx.locales import get_engine_locale
+from searx.enginelib.traits import EngineTraits
+
+traits: EngineTraits
 
 # about
 about = {
@@ -49,7 +51,6 @@ about = {
 # engine dependent config
 categories = []
 paging = True
-supported_languages_url = about['website']
 qwant_categ = None  # web|news|inages|videos
 
 safesearch = True
@@ -95,7 +96,7 @@ def request(query, params):
     )
 
     # add quant's locale
-    q_locale = get_engine_locale(params['language'], supported_languages, default='en_US')
+    q_locale = traits.get_region(params["searxng_locale"], default='en_US')
     params['url'] += '&locale=' + q_locale
 
     # add safesearch option
@@ -243,15 +244,20 @@ def response(resp):
     return results
 
 
-def _fetch_supported_languages(resp):
+def fetch_traits(engine_traits: EngineTraits):
+
+    # pylint: disable=import-outside-toplevel
+    from searx import network
+    from searx.locales import region_tag
 
+    resp = network.get(about['website'])
     text = resp.text
     text = text[text.find('INITIAL_PROPS') :]
     text = text[text.find('{') : text.find('</script>')]
 
     q_initial_props = loads(text)
     q_locales = q_initial_props.get('locales')
-    q_valid_locales = []
+    eng_tag_list = set()
 
     for country, v in q_locales.items():
         for lang in v['langs']:
@@ -261,25 +267,18 @@ def _fetch_supported_languages(resp):
                 # qwant-news does not support all locales from qwant-web:
                 continue
 
-            q_valid_locales.append(_locale)
-
-    supported_languages = {}
+            eng_tag_list.add(_locale)
 
-    for q_locale in q_valid_locales:
+    for eng_tag in eng_tag_list:
         try:
-            locale = babel.Locale.parse(q_locale, sep='_')
-        except babel.core.UnknownLocaleError:
-            print("ERROR: can't determine babel locale of quant's locale %s" % q_locale)
+            sxng_tag = region_tag(babel.Locale.parse(eng_tag, sep='_'))
+        except babel.UnknownLocaleError:
+            print("ERROR: can't determine babel locale of quant's locale %s" % eng_tag)
             continue
 
-        # note: supported_languages (dict)
-        #
-        #   dict's key is a string build up from a babel.Locale object / the
-        #   notation 'xx-XX' (and 'xx') conforms to SearXNG's locale (and
-        #   language) notation and dict's values are the locale strings used by
-        #   the engine.
-
-        searxng_locale = locale.language + '-' + locale.territory  # --> params['language']
-        supported_languages[searxng_locale] = q_locale
-
-    return supported_languages
+        conflict = engine_traits.regions.get(sxng_tag)
+        if conflict:
+            if conflict != eng_tag:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+            continue
+        engine_traits.regions[sxng_tag] = eng_tag
diff --git a/searx/engines/sepiasearch.py b/searx/engines/sepiasearch.py
index 9c45d6c43..72157b253 100644
--- a/searx/engines/sepiasearch.py
+++ b/searx/engines/sepiasearch.py
@@ -1,70 +1,80 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""SepiaSearch uses the same languages as :py:obj:`Peertube
+<searx.engines.peertube>` and the response is identical to the response from the
+peertube engines.
+
 """
- SepiaSearch (Videos)
-"""
 
-from json import loads
-from dateutil import parser, relativedelta
+from typing import TYPE_CHECKING
+
 from urllib.parse import urlencode
 from datetime import datetime
 
-# about
+from searx.engines.peertube import fetch_traits  # pylint: disable=unused-import
+from searx.engines.peertube import (
+    # pylint: disable=unused-import
+    video_response,
+    safesearch_table,
+    time_range_table,
+)
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
+
 about = {
+    # pylint: disable=line-too-long
     "website": 'https://sepiasearch.org',
     "wikidata_id": None,
-    "official_api_documentation": "https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api",  # NOQA
+    "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos',
     "use_official_api": True,
     "require_api_key": False,
     "results": 'JSON',
 }
 
+# engine dependent config
 categories = ['videos']
 paging = True
+
+base_url = 'https://sepiasearch.org'
+
 time_range_support = True
 safesearch = True
-supported_languages = [
-    # fmt: off
-    'en', 'fr', 'ja', 'eu', 'ca', 'cs', 'eo', 'el',
-    'de', 'it', 'nl', 'es', 'oc', 'gd', 'zh', 'pt',
-    'sv', 'pl', 'fi', 'ru'
-    # fmt: on
-]
-base_url = 'https://sepiasearch.org/api/v1/search/videos'
-
-safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
-
-time_range_table = {
-    'day': relativedelta.relativedelta(),
-    'week': relativedelta.relativedelta(weeks=-1),
-    'month': relativedelta.relativedelta(months=-1),
-    'year': relativedelta.relativedelta(years=-1),
-}
 
 
-def minute_to_hm(minute):
-    if isinstance(minute, int):
-        return "%d:%02d" % (divmod(minute, 60))
-    return None
+def request(query, params):
+    """Assemble request for the SepiaSearch API"""
+
+    if not query:
+        return False
 
+    # eng_region = traits.get_region(params['searxng_locale'], 'en_US')
+    eng_lang = traits.get_language(params['searxng_locale'], None)
 
-def request(query, params):
     params['url'] = (
-        base_url
-        + '?'
+        base_url.rstrip("/")
+        + "/api/v1/search/videos?"
         + urlencode(
             {
                 'search': query,
                 'start': (params['pageno'] - 1) * 10,
                 'count': 10,
-                'sort': '-match',
+                # -createdAt: sort by date ascending / createdAt: date descending
+                'sort': '-match',  # sort by *match descending*
                 'nsfw': safesearch_table[params['safesearch']],
             }
         )
     )
 
-    language = params['language'].split('-')[0]
-    if language in supported_languages:
-        params['url'] += '&languageOneOf[]=' + language
+    if eng_lang is not None:
+        params['url'] += '&languageOneOf[]=' + eng_lang
+        params['url'] += '&boostLanguages[]=' + eng_lang
+
     if params['time_range'] in time_range_table:
         time = datetime.now().date() + time_range_table[params['time_range']]
         params['url'] += '&startDate=' + time.isoformat()
@@ -73,34 +83,4 @@ def request(query, params):
 
 
 def response(resp):
-    results = []
-
-    search_results = loads(resp.text)
-
-    if 'data' not in search_results:
-        return []
-
-    for result in search_results['data']:
-        title = result['name']
-        content = result['description']
-        thumbnail = result['thumbnailUrl']
-        publishedDate = parser.parse(result['publishedAt'])
-        author = result.get('account', {}).get('displayName')
-        length = minute_to_hm(result.get('duration'))
-        url = result['url']
-
-        results.append(
-            {
-                'url': url,
-                'title': title,
-                'content': content,
-                'author': author,
-                'length': length,
-                'template': 'videos.html',
-                'publishedDate': publishedDate,
-                'iframe_src': result.get('embedUrl'),
-                'thumbnail': thumbnail,
-            }
-        )
-
-    return results
+    return video_response(resp)
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
index f857f7b6d..2813d0bf3 100644
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@@ -1,28 +1,108 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""Startpage (Web)
+"""Startpage's language & region selectors are a mess ..
+
+.. _startpage regions:
+
+Startpage regions
+=================
+
+In the list of regions there are tags we need to map to common region tags::
+
+  pt-BR_BR --> pt_BR
+  zh-CN_CN --> zh_Hans_CN
+  zh-TW_TW --> zh_Hant_TW
+  zh-TW_HK --> zh_Hant_HK
+  en-GB_GB --> en_GB
+
+and there is at least one tag with a three letter language tag (ISO 639-2)::
+
+  fil_PH --> fil_PH
+
+The locale code ``no_NO`` from Startpage does not exists and is mapped to
+``nb-NO``::
+
+    babel.core.UnknownLocaleError: unknown locale 'no_NO'
+
+For reference see languages-subtag at iana; ``no`` is the macrolanguage [1]_ and
+W3C recommends subtag over macrolanguage [2]_.
+
+.. [1] `iana: language-subtag-registry
+   <https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry>`_ ::
+
+      type: language
+      Subtag: nb
+      Description: Norwegian Bokmål
+      Added: 2005-10-16
+      Suppress-Script: Latn
+      Macrolanguage: no
+
+.. [2]
+   Use macrolanguages with care.  Some language subtags have a Scope field set to
+   macrolanguage, i.e. this primary language subtag encompasses a number of more
+   specific primary language subtags in the registry.  ...  As we recommended for
+   the collection subtags mentioned above, in most cases you should try to use
+   the more specific subtags ... `W3: The primary language subtag
+   <https://www.w3.org/International/questions/qa-choosing-language-tags#langsubtag>`_
+
+.. _startpage languages:
+
+Startpage languages
+===================
+
+:py:obj:`send_accept_language_header`:
+  The displayed name in Startpage's settings page depend on the location of the
+  IP when ``Accept-Language`` HTTP header is unset.  In :py:obj:`fetch_traits`
+  we use::
+
+    'Accept-Language': "en-US,en;q=0.5",
+    ..
+
+  to get uniform names independent from the IP).
+
+.. _startpage categories:
+
+Startpage categories
+====================
+
+Startpage's category (for Web-search, News, Videos, ..) is set by
+:py:obj:`startpage_categ` in  settings.yml::
+
+  - name: startpage
+    engine: startpage
+    startpage_categ: web
+    ...
+
+.. hint::
+
+   The default category is ``web`` .. and other categories than ``web`` are not
+   yet implemented.
 
 """
 
+from typing import TYPE_CHECKING
+from collections import OrderedDict
 import re
-from time import time
-
-from urllib.parse import urlencode
 from unicodedata import normalize, combining
+from time import time
 from datetime import datetime, timedelta
 
-from dateutil import parser
-from lxml import html
-from babel import Locale
-from babel.localedata import locale_identifiers
+import dateutil.parser
+import lxml.html
+import babel
+
+from searx import network
+from searx.utils import extract_text, eval_xpath, gen_useragent
+from searx.exceptions import SearxEngineCaptchaException
+from searx.locales import region_tag
+from searx.enginelib.traits import EngineTraits
 
-from searx.network import get
-from searx.utils import extract_text, eval_xpath, match_language
-from searx.exceptions import (
-    SearxEngineResponseException,
-    SearxEngineCaptchaException,
-)
+if TYPE_CHECKING:
+    import logging
 
+    logger: logging.Logger
+
+traits: EngineTraits
 
 # about
 about = {
@@ -34,18 +114,28 @@ about = {
     "results": 'HTML',
 }
 
+startpage_categ = 'web'
+"""Startpage's category, visit :ref:`startpage categories`.
+"""
+
+send_accept_language_header = True
+"""Startpage tries to guess user's language and territory from the HTTP
+``Accept-Language``.  Optional the user can select a search-language (can be
+different to the UI language) and a region filter.
+"""
+
 # engine dependent config
 categories = ['general', 'web']
-# there is a mechanism to block "bot" search
-# (probably the parameter qid), require
-# storing of qid's between mulitble search-calls
-
 paging = True
-supported_languages_url = 'https://www.startpage.com/do/settings'
+time_range_support = True
+safesearch = True
+
+time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
+safesearch_dict = {0: '0', 1: '1', 2: '1'}
 
 # search-url
-base_url = 'https://startpage.com/'
-search_url = base_url + 'sp/search?'
+base_url = 'https://www.startpage.com'
+search_url = base_url + '/sp/search'
 
 # specific xpath variables
 # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
@@ -53,92 +143,193 @@ search_url = base_url + 'sp/search?'
 results_xpath = '//div[@class="w-gl__result__main"]'
 link_xpath = './/a[@class="w-gl__result-title result-link"]'
 content_xpath = './/p[@class="w-gl__description"]'
+search_form_xpath = '//form[@id="search"]'
+"""XPath of Startpage's origin search form
+
+.. code: html
+
+    <form action="/sp/search" method="post">
+      <input type="text" name="query"  value="" ..>
+      <input type="hidden" name="t" value="device">
+      <input type="hidden" name="lui" value="english">
+      <input type="hidden" name="sc" value="Q7Mt5TRqowKB00">
+      <input type="hidden" name="cat" value="web">
+      <input type="hidden" class="abp" id="abp-input" name="abp" value="1">
+    </form>
+"""
 
 # timestamp of the last fetch of 'sc' code
 sc_code_ts = 0
 sc_code = ''
+sc_code_cache_sec = 30
+"""Time in seconds the sc-code is cached in memory :py:obj:`get_sc_code`."""
 
 
-def raise_captcha(resp):
+def get_sc_code(searxng_locale, params):
+    """Get an actual ``sc`` argument from Startpage's search form (HTML page).
 
-    if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
-        raise SearxEngineCaptchaException()
+    Startpage puts a ``sc`` argument on every HTML :py:obj:`search form
+    <search_form_xpath>`.  Without this argument Startpage considers the request
+    is from a bot.  We do not know what is encoded in the value of the ``sc``
+    argument, but it seems to be a kind of a *time-stamp*.
 
+    Startpage's search form generates a new sc-code on each request.  This
+    function scrap a new sc-code from Startpage's home page every
+    :py:obj:`sc_code_cache_sec` seconds.
 
-def get_sc_code(headers):
-    """Get an actual `sc` argument from startpage's home page.
+    """
 
-    Startpage puts a `sc` argument on every link.  Without this argument
-    startpage considers the request is from a bot.  We do not know what is
-    encoded in the value of the `sc` argument, but it seems to be a kind of a
-    *time-stamp*.  This *time-stamp* is valid for a few hours.
+    global sc_code_ts, sc_code  # pylint: disable=global-statement
 
-    This function scrap a new *time-stamp* from startpage's home page every hour
-    (3000 sec).
+    if sc_code and (time() < (sc_code_ts + sc_code_cache_sec)):
+        logger.debug("get_sc_code: reuse '%s'", sc_code)
+        return sc_code
+
+    headers = {**params['headers']}
+    headers['Origin'] = base_url
+    headers['Referer'] = base_url + '/'
+    # headers['Connection'] = 'keep-alive'
+    # headers['Accept-Encoding'] = 'gzip, deflate, br'
+    # headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8'
+    # headers['User-Agent'] = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:105.0) Gecko/20100101 Firefox/105.0'
+
+    # add Accept-Language header
+    if searxng_locale == 'all':
+        searxng_locale = 'en-US'
+    locale = babel.Locale.parse(searxng_locale, sep='-')
+
+    if send_accept_language_header:
+        ac_lang = locale.language
+        if locale.territory:
+            ac_lang = "%s-%s,%s;q=0.9,*;q=0.5" % (
+                locale.language,
+                locale.territory,
+                locale.language,
+            )
+        headers['Accept-Language'] = ac_lang
+
+    get_sc_url = base_url + '/?sc=%s' % (sc_code)
+    logger.debug("query new sc time-stamp ... %s", get_sc_url)
+    logger.debug("headers: %s", headers)
+    resp = network.get(get_sc_url, headers=headers)
+
+    # ?? x = network.get('https://www.startpage.com/sp/cdn/images/filter-chevron.svg', headers=headers)
+    # ?? https://www.startpage.com/sp/cdn/images/filter-chevron.svg
+    # ?? ping-back URL: https://www.startpage.com/sp/pb?sc=TLsB0oITjZ8F21
 
-    """
+    if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
+        raise SearxEngineCaptchaException(
+            message="get_sc_code: got redirected to https://www.startpage.com/sp/captcha",
+        )
+
+    dom = lxml.html.fromstring(resp.text)
+
+    try:
+        sc_code = eval_xpath(dom, search_form_xpath + '//input[@name="sc"]/@value')[0]
+    except IndexError as exc:
+        logger.debug("suspend startpage API --> https://github.com/searxng/searxng/pull/695")
+        raise SearxEngineCaptchaException(
+            message="get_sc_code: [PR-695] query new sc time-stamp failed! (%s)" % resp.url,
+        ) from exc
+
+    sc_code_ts = time()
+    logger.debug("get_sc_code: new value is: %s", sc_code)
+    return sc_code
 
-    global sc_code_ts, sc_code  # pylint: disable=global-statement
 
-    if time() > (sc_code_ts + 3000):
-        logger.debug("query new sc time-stamp ...")
+def request(query, params):
+    """Assemble a Startpage request.
 
-        resp = get(base_url, headers=headers)
-        raise_captcha(resp)
-        dom = html.fromstring(resp.text)
+    To avoid CAPTCHA we need to send a well formed HTTP POST request with a
+    cookie.  We need to form a request that is identical to the request build by
+    Startpage's search form:
 
-        try:
-            # <input type="hidden" name="sc" value="...">
-            sc_code = eval_xpath(dom, '//input[@name="sc"]/@value')[0]
-        except IndexError as exc:
-            # suspend startpage API --> https://github.com/searxng/searxng/pull/695
-            raise SearxEngineResponseException(
-                suspended_time=7 * 24 * 3600, message="PR-695: query new sc time-stamp failed!"
-            ) from exc
+    - in the cookie the **region** is selected
+    - in the HTTP POST data the **language** is selected
 
-        sc_code_ts = time()
-        logger.debug("new value is: %s", sc_code)
+    Additionally the arguments form Startpage's search form needs to be set in
+    HTML POST data / compare ``<input>`` elements: :py:obj:`search_form_xpath`.
+    """
+    if startpage_categ == 'web':
+        return _request_cat_web(query, params)
 
-    return sc_code
+    logger.error("Startpages's category '%' is not yet implemented.", startpage_categ)
+    return params
 
 
-# do search-request
-def request(query, params):
+def _request_cat_web(query, params):
 
-    # pylint: disable=line-too-long
-    # The format string from Startpage's FFox add-on [1]::
-    #
-    #     https://www.startpage.com/do/dsearch?query={searchTerms}&cat=web&pl=ext-ff&language=__MSG_extensionUrlLanguage__&extVersion=1.3.0
-    #
-    # [1] https://addons.mozilla.org/en-US/firefox/addon/startpage-private-search/
+    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
+    engine_language = traits.get_language(params['searxng_locale'], 'en')
 
+    # build arguments
     args = {
         'query': query,
-        'page': params['pageno'],
         'cat': 'web',
-        # 'pl': 'ext-ff',
-        # 'extVersion': '1.3.0',
-        # 'abp': "-1",
-        'sc': get_sc_code(params['headers']),
+        't': 'device',
+        'sc': get_sc_code(params['searxng_locale'], params),  # hint: this func needs HTTP headers,
+        'with_date': time_range_dict.get(params['time_range'], ''),
     }
 
-    # set language if specified
-    if params['language'] != 'all':
-        lang_code = match_language(params['language'], supported_languages, fallback=None)
-        if lang_code:
-            language_name = supported_languages[lang_code]['alias']
-            args['language'] = language_name
-            args['lui'] = language_name
+    if engine_language:
+        args['language'] = engine_language
+        args['lui'] = engine_language
+
+    args['abp'] = '1'
+    if params['pageno'] > 1:
+        args['page'] = params['pageno']
+
+    # build cookie
+    lang_homepage = 'en'
+    cookie = OrderedDict()
+    cookie['date_time'] = 'world'
+    cookie['disable_family_filter'] = safesearch_dict[params['safesearch']]
+    cookie['disable_open_in_new_window'] = '0'
+    cookie['enable_post_method'] = '1'  # hint: POST
+    cookie['enable_proxy_safety_suggest'] = '1'
+    cookie['enable_stay_control'] = '1'
+    cookie['instant_answers'] = '1'
+    cookie['lang_homepage'] = 's/device/%s/' % lang_homepage
+    cookie['num_of_results'] = '10'
+    cookie['suggestions'] = '1'
+    cookie['wt_unit'] = 'celsius'
+
+    if engine_language:
+        cookie['language'] = engine_language
+        cookie['language_ui'] = engine_language
+
+    if engine_region:
+        cookie['search_results_region'] = engine_region
+
+    params['cookies']['preferences'] = 'N1N'.join(["%sEEE%s" % x for x in cookie.items()])
+    logger.debug('cookie preferences: %s', params['cookies']['preferences'])
+
+    # POST request
+    logger.debug("data: %s", args)
+    params['data'] = args
+    params['method'] = 'POST'
+    params['url'] = search_url
+    params['headers']['Origin'] = base_url
+    params['headers']['Referer'] = base_url + '/'
+    # is the Accept header needed?
+    # params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
 
-    params['url'] = search_url + urlencode(args)
     return params
 
 
 # get response from search-request
 def response(resp):
-    results = []
+    dom = lxml.html.fromstring(resp.text)
 
-    dom = html.fromstring(resp.text)
+    if startpage_categ == 'web':
+        return _response_cat_web(dom)
+
+    logger.error("Startpages's category '%' is not yet implemented.", startpage_categ)
+    return []
+
+
+def _response_cat_web(dom):
+    results = []
 
     # parse results
     for result in eval_xpath(dom, results_xpath):
@@ -173,7 +364,7 @@ def response(resp):
             content = content[date_pos:]
 
             try:
-                published_date = parser.parse(date_string, dayfirst=True)
+                published_date = dateutil.parser.parse(date_string, dayfirst=True)
             except ValueError:
                 pass
 
@@ -199,62 +390,103 @@ def response(resp):
     return results
 
 
-# get supported languages from their site
-def _fetch_supported_languages(resp):
-    # startpage's language selector is a mess each option has a displayed name
-    # and a value, either of which may represent the language name in the native
-    # script, the language name in English, an English transliteration of the
-    # native name, the English name of the writing script used by the language,
-    # or occasionally something else entirely.
-
-    # this cases are so special they need to be hardcoded, a couple of them are misspellings
-    language_names = {
-        'english_uk': 'en-GB',
-        'fantizhengwen': ['zh-TW', 'zh-HK'],
-        'hangul': 'ko',
-        'malayam': 'ml',
-        'norsk': 'nb',
-        'sinhalese': 'si',
-        'sudanese': 'su',
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch :ref:`languages <startpage languages>` and :ref:`regions <startpage
+    regions>` from Startpage."""
+    # pylint: disable=too-many-branches
+
+    headers = {
+        'User-Agent': gen_useragent(),
+        'Accept-Language': "en-US,en;q=0.5",  # bing needs to set the English language
     }
+    resp = network.get('https://www.startpage.com/do/settings', headers=headers)
 
-    # get the English name of every language known by babel
-    language_names.update(
-        {
-            # fmt: off
-            name.lower(): lang_code
-            # pylint: disable=protected-access
-            for lang_code, name in Locale('en')._data['languages'].items()
-            # fmt: on
-        }
-    )
+    if not resp.ok:
+        print("ERROR: response from Startpage is not OK.")
+
+    dom = lxml.html.fromstring(resp.text)
+
+    # regions
+
+    sp_region_names = []
+    for option in dom.xpath('//form[@name="settings"]//select[@name="search_results_region"]/option'):
+        sp_region_names.append(option.get('value'))
+
+    for eng_tag in sp_region_names:
+        if eng_tag == 'all':
+            continue
+        babel_region_tag = {'no_NO': 'nb_NO'}.get(eng_tag, eng_tag)  # norway
+
+        if '-' in babel_region_tag:
+            l, r = babel_region_tag.split('-')
+            r = r.split('_')[-1]
+            sxng_tag = region_tag(babel.Locale.parse(l + '_' + r, sep='_'))
+
+        else:
+            try:
+                sxng_tag = region_tag(babel.Locale.parse(babel_region_tag, sep='_'))
+
+            except babel.UnknownLocaleError:
+                print("ERROR: can't determine babel locale of startpage's locale %s" % eng_tag)
+                continue
+
+        conflict = engine_traits.regions.get(sxng_tag)
+        if conflict:
+            if conflict != eng_tag:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+            continue
+        engine_traits.regions[sxng_tag] = eng_tag
+
+    # languages
+
+    catalog_engine2code = {name.lower(): lang_code for lang_code, name in babel.Locale('en').languages.items()}
 
     # get the native name of every language known by babel
-    for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, locale_identifiers()):
-        native_name = Locale(lang_code).get_language_name().lower()
+
+    for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, babel.localedata.locale_identifiers()):
+        native_name = babel.Locale(lang_code).get_language_name().lower()
         # add native name exactly as it is
-        language_names[native_name] = lang_code
+        catalog_engine2code[native_name] = lang_code
 
         # add "normalized" language name (i.e. français becomes francais and español becomes espanol)
         unaccented_name = ''.join(filter(lambda c: not combining(c), normalize('NFKD', native_name)))
         if len(unaccented_name) == len(unaccented_name.encode()):
             # add only if result is ascii (otherwise "normalization" didn't work)
-            language_names[unaccented_name] = lang_code
+            catalog_engine2code[unaccented_name] = lang_code
+
+    # values that can't be determined by babel's languages names
+
+    catalog_engine2code.update(
+        {
+            # traditional chinese used in ..
+            'fantizhengwen': 'zh_Hant',
+            # Korean alphabet
+            'hangul': 'ko',
+            # Malayalam is one of 22 scheduled languages of India.
+            'malayam': 'ml',
+            'norsk': 'nb',
+            'sinhalese': 'si',
+        }
+    )
+
+    skip_eng_tags = {
+        'english_uk',  # SearXNG lang 'en' already maps to 'english'
+    }
 
-    dom = html.fromstring(resp.text)
-    sp_lang_names = []
     for option in dom.xpath('//form[@name="settings"]//select[@name="language"]/option'):
-        sp_lang_names.append((option.get('value'), extract_text(option).lower()))
-
-    supported_languages = {}
-    for sp_option_value, sp_option_text in sp_lang_names:
-        lang_code = language_names.get(sp_option_value) or language_names.get(sp_option_text)
-        if isinstance(lang_code, str):
-            supported_languages[lang_code] = {'alias': sp_option_value}
-        elif isinstance(lang_code, list):
-            for _lc in lang_code:
-                supported_languages[_lc] = {'alias': sp_option_value}
-        else:
-            print('Unknown language option in Startpage: {} ({})'.format(sp_option_value, sp_option_text))
 
-    return supported_languages
+        eng_tag = option.get('value')
+        if eng_tag in skip_eng_tags:
+            continue
+        name = extract_text(option).lower()
+
+        sxng_tag = catalog_engine2code.get(eng_tag)
+        if sxng_tag is None:
+            sxng_tag = catalog_engine2code[name]
+
+        conflict = engine_traits.languages.get(sxng_tag)
+        if conflict:
+            if conflict != eng_tag:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+            continue
+        engine_traits.languages[sxng_tag] = eng_tag
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index 8d3b0839a..6ea77f092 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -1,9 +1,12 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""Wikidata
+"""This module implements the Wikidata engine.  Some implementations are shared
+from :ref:`wikipedia engine`.
+
 """
 # pylint: disable=missing-class-docstring
 
+from typing import TYPE_CHECKING
 from hashlib import md5
 from urllib.parse import urlencode, unquote
 from json import loads
@@ -13,12 +16,17 @@ from babel.dates import format_datetime, format_date, format_time, get_datetime_
 
 from searx.data import WIKIDATA_UNITS
 from searx.network import post, get
-from searx.utils import match_language, searx_useragent, get_string_replaces_function
+from searx.utils import searx_useragent, get_string_replaces_function
 from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
-from searx.engines.wikipedia import (  # pylint: disable=unused-import
-    _fetch_supported_languages,
-    supported_languages_url,
-)
+from searx.engines.wikipedia import fetch_traits as _fetch_traits
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
 
 # about
 about = {
@@ -154,33 +162,35 @@ def send_wikidata_query(query, method='GET'):
 
 
 def request(query, params):
-    language = params['language'].split('-')[0]
-    if language == 'all':
-        language = 'en'
-    else:
-        language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
+
+    # wikidata does not support zh-classical (zh_Hans) / zh-TW, zh-HK and zh-CN
+    # mapped to zh
+    sxng_lang = params['searxng_locale'].split('-')[0]
+    language = traits.get_language(sxng_lang, 'en')
 
     query, attributes = get_query(query, language)
+    logger.debug("request --> language %s // len(attributes): %s", language, len(attributes))
 
     params['method'] = 'POST'
     params['url'] = SPARQL_ENDPOINT_URL
     params['data'] = {'query': query}
     params['headers'] = get_headers()
-
     params['language'] = language
     params['attributes'] = attributes
+
     return params
 
 
 def response(resp):
+
     results = []
     jsonresponse = loads(resp.content.decode())
 
-    language = resp.search_params['language'].lower()
+    language = resp.search_params['language']
     attributes = resp.search_params['attributes']
+    logger.debug("request --> language %s // len(attributes): %s", language, len(attributes))
 
     seen_entities = set()
-
     for result in jsonresponse.get('results', {}).get('bindings', []):
         attribute_result = {key: value['value'] for key, value in result.items()}
         entity_url = attribute_result['item']
@@ -756,3 +766,15 @@ def init(engine_settings=None):  # pylint: disable=unused-argument
         lang = result['name']['xml:lang']
         entity_id = result['item']['value'].replace('http://www.wikidata.org/entity/', '')
         WIKIDATA_PROPERTIES[(entity_id, lang)] = name.capitalize()
+
+
+def fetch_traits(engine_traits: EngineTraits):
+    """Use languages evaluated from :py:obj:`wikipedia.fetch_traits
+    <searx.engines.wikipedia.fetch_traits>` except zh-classical (zh_Hans) what
+    is not supported by wikidata."""
+
+    _fetch_traits(engine_traits)
+    # wikidata does not support zh-classical (zh_Hans)
+    engine_traits.languages.pop('zh_Hans')
+    # wikidata does not have net-locations for the languages
+    engine_traits.custom['wiki_netloc'] = {}
diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py
index ca841e8b3..9d2d30afa 100644
--- a/searx/engines/wikipedia.py
+++ b/searx/engines/wikipedia.py
@@ -1,13 +1,26 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""This module implements the Wikipedia engine.  Some of this implementations
+are shared by other engines:
+
+- :ref:`wikidata engine`
+
+The list of supported languages is fetched from the article linked by
+:py:obj:`wikipedia_article_depth`.  Unlike traditional search engines, wikipedia
+does not support one Wikipedia for all the languages, but there is one Wikipedia
+for every language (:py:obj:`fetch_traits`).
 """
- Wikipedia (Web)
-"""
 
-from urllib.parse import quote
-from json import loads
-from lxml.html import fromstring
-from searx.utils import match_language, searx_useragent
-from searx.network import raise_for_httperror
+import urllib.parse
+import babel
+
+from lxml import html
+
+from searx import network
+from searx.locales import language_tag
+from searx.enginelib.traits import EngineTraits
+
+traits: EngineTraits
 
 # about
 about = {
@@ -19,32 +32,40 @@ about = {
     "results": 'JSON',
 }
 
-
 send_accept_language_header = True
 
-# search-url
-search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
-supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
-language_variants = {"zh": ("zh-cn", "zh-hk", "zh-mo", "zh-my", "zh-sg", "zh-tw")}
+wikipedia_article_depth = 'https://meta.wikimedia.org/wiki/Wikipedia_article_depth'
+"""The *editing depth* of Wikipedia is one of several possible rough indicators
+of the encyclopedia's collaborative quality, showing how frequently its articles
+are updated.  The measurement of depth was introduced after some limitations of
+the classic measurement of article count were realized.
+"""
+
+# example: https://zh-classical.wikipedia.org/api/rest_v1/page/summary/日
+rest_v1_summary_url = 'https://{wiki_netloc}/api/rest_v1/page/summary/{title}'
+"""`wikipedia rest_v1 summary API`_: The summary response includes an extract of
+the first paragraph of the page in plain text and HTML as well as the type of
+page. This is useful for page previews (fka. Hovercards, aka. Popups) on the web
+and link previews in the apps.
 
+.. _wikipedia rest_v1 summary API: https://en.wikipedia.org/api/rest_v1/#/Page%20content/get_page_summary__title_
 
-# set language in base_url
-def url_lang(lang):
-    lang_pre = lang.split('-')[0]
-    if lang_pre == 'all' or lang_pre not in supported_languages and lang_pre not in language_aliases:
-        return 'en'
-    return match_language(lang, supported_languages, language_aliases).split('-')[0]
+"""
 
 
-# do search-request
 def request(query, params):
+    """Assemble a request (`wikipedia rest_v1 summary API`_)."""
     if query.islower():
         query = query.title()
 
-    language = url_lang(params['language'])
-    params['url'] = search_url.format(title=quote(query), language=language)
+    engine_language = traits.get_language(params['searxng_locale'], 'en')
+    wiki_netloc = traits.custom['wiki_netloc'].get(engine_language, 'https://en.wikipedia.org/wiki/')
+    title = urllib.parse.quote(query)
+
+    # '!wikipedia 日 :zh-TW' --> https://zh-classical.wikipedia.org/
+    # '!wikipedia 日 :zh' --> https://zh.wikipedia.org/
+    params['url'] = rest_v1_summary_url.format(wiki_netloc=wiki_netloc, title=title)
 
-    params['headers']['User-Agent'] = searx_useragent()
     params['raise_for_httperror'] = False
     params['soft_max_redirects'] = 2
 
@@ -53,13 +74,14 @@ def request(query, params):
 
 # get response from search-request
 def response(resp):
+
+    results = []
     if resp.status_code == 404:
         return []
-
     if resp.status_code == 400:
         try:
-            api_result = loads(resp.text)
-        except:
+            api_result = resp.json()
+        except Exception:  # pylint: disable=broad-except
             pass
         else:
             if (
@@ -68,49 +90,135 @@ def response(resp):
             ):
                 return []
 
-    raise_for_httperror(resp)
-
-    results = []
-    api_result = loads(resp.text)
-
-    # skip disambiguation pages
-    if api_result.get('type') != 'standard':
-        return []
+    network.raise_for_httperror(resp)
 
+    api_result = resp.json()
     title = api_result['title']
     wikipedia_link = api_result['content_urls']['desktop']['page']
-
-    results.append({'url': wikipedia_link, 'title': title})
-
-    results.append(
-        {
-            'infobox': title,
-            'id': wikipedia_link,
-            'content': api_result.get('extract', ''),
-            'img_src': api_result.get('thumbnail', {}).get('source'),
-            'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}],
-        }
-    )
+    results.append({'url': wikipedia_link, 'title': title, 'content': api_result.get('description', '')})
+
+    if api_result.get('type') == 'standard':
+        results.append(
+            {
+                'infobox': title,
+                'id': wikipedia_link,
+                'content': api_result.get('extract', ''),
+                'img_src': api_result.get('thumbnail', {}).get('source'),
+                'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}],
+            }
+        )
 
     return results
 
 
-# get supported languages from their site
-def _fetch_supported_languages(resp):
-    supported_languages = {}
-    dom = fromstring(resp.text)
-    tables = dom.xpath('//table[contains(@class,"sortable")]')
-    for table in tables:
-        # exclude header row
-        trs = table.xpath('.//tr')[1:]
-        for tr in trs:
-            td = tr.xpath('./td')
-            code = td[3].xpath('./a')[0].text
-            name = td[1].xpath('./a')[0].text
-            english_name = td[1].xpath('./a')[0].text
-            articles = int(td[4].xpath('./a')[0].text.replace(',', ''))
+# Nonstandard language codes
+#
+# These Wikipedias use language codes that do not conform to the ISO 639
+# standard (which is how wiki subdomains are chosen nowadays).
+
+lang_map = {
+    'be-tarask': 'bel',
+    'ak': 'aka',
+    'als': 'gsw',
+    'bat-smg': 'sgs',
+    'cbk-zam': 'cbk',
+    'fiu-vro': 'vro',
+    'map-bms': 'map',
+    'nrm': 'nrf',
+    'roa-rup': 'rup',
+    'nds-nl': 'nds',
+    #'simple: – invented code used for the Simple English Wikipedia (not the official IETF code en-simple)
+    'zh-min-nan': 'nan',
+    'zh-yue': 'yue',
+    'an': 'arg',
+    'zh-classical': 'zh-Hant',  # babel maps classical to zh-Hans (for whatever reason)
+}
+
+unknown_langs = [
+    'an',  # Aragonese
+    'ba',  # Bashkir
+    'bar',  # Bavarian
+    'bcl',  # Central Bicolano
+    'be-tarask',  # Belarusian variant / Belarusian is already covered by 'be'
+    'bpy',  # Bishnupriya Manipuri is unknown by babel
+    'hif',  # Fiji Hindi
+    'ilo',  # Ilokano
+    'li',  # Limburgish
+    'sco',  # Scots (sco) is not known by babel, Scottish Gaelic (gd) is known by babel
+    'sh',  # Serbo-Croatian
+    'simple',  # simple english is not know as a natural language different to english (babel)
+    'vo',  # Volapük
+    'wa',  # Walloon
+]
+
+
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch languages from Wikipedia.
+
+    The location of the Wikipedia address of a language is mapped in a
+    :py:obj:`custom field <searx.enginelib.traits.EngineTraits.custom>`
+    (``wiki_netloc``).  Here is a reduced example:
+
+    .. code:: python
+
+       traits.custom['wiki_netloc'] = {
+           "en": "en.wikipedia.org",
+           ..
+           "gsw": "als.wikipedia.org",
+           ..
+           "zh": "zh.wikipedia.org",
+           "zh-classical": "zh-classical.wikipedia.org"
+       }
+
+    """
+
+    engine_traits.custom['wiki_netloc'] = {}
+
+    # insert alias to map from a region like zh-CN to a language zh_Hans
+    engine_traits.languages['zh_Hans'] = 'zh'
+
+    resp = network.get(wikipedia_article_depth)
+    if not resp.ok:
+        print("ERROR: response from Wikipedia is not OK.")
+
+    dom = html.fromstring(resp.text)
+    for row in dom.xpath('//table[contains(@class,"sortable")]//tbody/tr'):
+
+        cols = row.xpath('./td')
+        if not cols:
+            continue
+        cols = [c.text_content().strip() for c in cols]
+
+        depth = float(cols[3].replace('-', '0').replace(',', ''))
+        articles = int(cols[4].replace(',', '').replace(',', ''))
+
+        if articles < 10000:
             # exclude languages with too few articles
-            if articles >= 100:
-                supported_languages[code] = {"name": name, "english_name": english_name}
+            continue
+
+        if int(depth) < 20:
+            # Rough indicator of a Wikipedia’s quality, showing how frequently
+            # its articles are updated.
+            continue
 
-    return supported_languages
+        eng_tag = cols[2]
+        wiki_url = row.xpath('./td[3]/a/@href')[0]
+        wiki_url = urllib.parse.urlparse(wiki_url)
+
+        if eng_tag in unknown_langs:
+            continue
+
+        try:
+            sxng_tag = language_tag(babel.Locale.parse(lang_map.get(eng_tag, eng_tag), sep='-'))
+        except babel.UnknownLocaleError:
+            print("ERROR: %s [%s] is unknown by babel" % (cols[0], eng_tag))
+            continue
+
+        conflict = engine_traits.languages.get(sxng_tag)
+        if conflict:
+            if conflict != eng_tag:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+            continue
+
+        engine_traits.languages[sxng_tag] = eng_tag
+        engine_traits.custom['wiki_netloc'][eng_tag] = wiki_url.netloc
diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py
index c13ce6d78..0fdeacec2 100644
--- a/searx/engines/yahoo.py
+++ b/searx/engines/yahoo.py
@@ -17,8 +17,10 @@ from searx.utils import (
     eval_xpath_getindex,
     eval_xpath_list,
     extract_text,
-    match_language,
 )
+from searx.enginelib.traits import EngineTraits
+
+traits: EngineTraits
 
 # about
 about = {
@@ -34,8 +36,7 @@ about = {
 categories = ['general', 'web']
 paging = True
 time_range_support = True
-supported_languages_url = 'https://search.yahoo.com/preferences/languages'
-"""Supported languages are read from Yahoo preference page."""
+# send_accept_language_header = True
 
 time_range_dict = {
     'day': ('1d', 'd'),
@@ -43,15 +44,10 @@ time_range_dict = {
     'month': ('1m', 'm'),
 }
 
-language_aliases = {
-    'zh-HK': 'zh_chs',
-    'zh-CN': 'zh_chs',  # dead since 2015 / routed to hk.search.yahoo.com
-    'zh-TW': 'zh_cht',
-}
-
 lang2domain = {
     'zh_chs': 'hk.search.yahoo.com',
     'zh_cht': 'tw.search.yahoo.com',
+    'any': 'search.yahoo.com',
     'en': 'search.yahoo.com',
     'bg': 'search.yahoo.com',
     'cs': 'search.yahoo.com',
@@ -67,21 +63,23 @@ lang2domain = {
 }
 """Map language to domain"""
 
-
-def _get_language(params):
-
-    lang = language_aliases.get(params['language'])
-    if lang is None:
-        lang = match_language(params['language'], supported_languages, language_aliases)
-    lang = lang.split('-')[0]
-    logger.debug("params['language']: %s --> %s", params['language'], lang)
-    return lang
+locale_aliases = {
+    'zh': 'zh_Hans',
+    'zh-HK': 'zh_Hans',
+    'zh-CN': 'zh_Hans',  # dead since 2015 / routed to hk.search.yahoo.com
+    'zh-TW': 'zh_Hant',
+}
 
 
 def request(query, params):
     """build request"""
+
+    lang = locale_aliases.get(params['language'], None)
+    if not lang:
+        lang = params['language'].split('-')[0]
+    lang = traits.get_language(lang, traits.all_locale)
+
     offset = (params['pageno'] - 1) * 7 + 1
-    lang = _get_language(params)
     age, btf = time_range_dict.get(params['time_range'], ('', ''))
 
     args = urlencode(
@@ -154,13 +152,37 @@ def response(resp):
     return results
 
 
-# get supported languages from their site
-def _fetch_supported_languages(resp):
-    supported_languages = []
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch languages from yahoo"""
+
+    # pylint: disable=import-outside-toplevel
+    import babel
+    from searx import network
+    from searx.locales import language_tag
+
+    engine_traits.all_locale = 'any'
+
+    resp = network.get('https://search.yahoo.com/preferences/languages')
+    if not resp.ok:
+        print("ERROR: response from peertube is not OK.")
+
     dom = html.fromstring(resp.text)
     offset = len('lang_')
 
+    eng2sxng = {'zh_chs': 'zh_Hans', 'zh_cht': 'zh_Hant'}
+
     for val in eval_xpath_list(dom, '//div[contains(@class, "lang-item")]/input/@value'):
-        supported_languages.append(val[offset:])
+        eng_tag = val[offset:]
+
+        try:
+            sxng_tag = language_tag(babel.Locale.parse(eng2sxng.get(eng_tag, eng_tag)))
+        except babel.UnknownLocaleError:
+            print('ERROR: unknown language --> %s' % eng_tag)
+            continue
 
-    return supported_languages
+        conflict = engine_traits.languages.get(sxng_tag)
+        if conflict:
+            if conflict != eng_tag:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+            continue
+        engine_traits.languages[sxng_tag] = eng_tag
author	Markus Heiser <markus.heiser@darmarIT.de>	2023-03-29 09:47:21 +0200
committer	GitHub <noreply@github.com>	2023-03-29 09:47:21 +0200
commit	f950119ca87363aec81591dc4985f11371aa2b3e (patch)
tree	ab893ff1f60d8c969ff0f5c2fad0cff49148aa3c /searx/engines
parent	64fea2f9cb079bd0055c6a23360097d285204515 (diff)
parent	6f9e678346e5978a09ee453a62fa133cdc0ee0bd (diff)