summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/data-update.yml2
-rw-r--r--docs/admin/engines/configured_engines.rst2
-rw-r--r--docs/admin/engines/settings.rst7
-rw-r--r--docs/conf.py5
-rw-r--r--docs/dev/engine_overview.rst42
-rw-r--r--docs/dev/searxng_extra/update.rst8
-rw-r--r--docs/src/searx.engine.archlinux.rst9
-rw-r--r--docs/src/searx.engine.dailymotion.rst8
-rw-r--r--docs/src/searx.engine.duckduckgo.rst22
-rw-r--r--docs/src/searx.enginelib.rst17
-rw-r--r--docs/src/searx.engines.bing.rst43
-rw-r--r--docs/src/searx.engines.google.rst27
-rw-r--r--docs/src/searx.engines.peertube.rst27
-rw-r--r--docs/src/searx.engines.rst8
-rw-r--r--docs/src/searx.engines.startpage.rst13
-rw-r--r--docs/src/searx.engines.wikipedia.rst27
-rw-r--r--docs/src/searx.locales.rst12
-rw-r--r--docs/src/searx.search.processors.rst47
-rwxr-xr-xmanage2
-rw-r--r--requirements.txt2
-rw-r--r--searx/autocomplete.py130
-rw-r--r--searx/data/__init__.py4
-rw-r--r--searx/data/engine_traits.json3810
-rw-r--r--searx/data/engines_languages.json4381
-rw-r--r--searx/enginelib/__init__.py136
-rw-r--r--searx/enginelib/traits.py250
-rw-r--r--searx/engines/__init__.py100
-rw-r--r--searx/engines/archlinux.py225
-rw-r--r--searx/engines/bing.py258
-rw-r--r--searx/engines/bing_images.py129
-rw-r--r--searx/engines/bing_news.py225
-rw-r--r--searx/engines/bing_videos.py112
-rw-r--r--searx/engines/dailymotion.py175
-rw-r--r--searx/engines/demo_offline.py2
-rw-r--r--searx/engines/duckduckgo.py360
-rw-r--r--searx/engines/duckduckgo_definitions.py35
-rw-r--r--searx/engines/duckduckgo_images.py112
-rw-r--r--searx/engines/duckduckgo_weather.py35
-rw-r--r--searx/engines/gentoo.py6
-rw-r--r--searx/engines/google.py494
-rw-r--r--searx/engines/google_images.py49
-rw-r--r--searx/engines/google_news.py250
-rw-r--r--searx/engines/google_scholar.py120
-rw-r--r--searx/engines/google_videos.py115
-rw-r--r--searx/engines/peertube.py195
-rw-r--r--searx/engines/qwant.py45
-rw-r--r--searx/engines/sepiasearch.py110
-rw-r--r--searx/engines/startpage.py472
-rw-r--r--searx/engines/wikidata.py50
-rw-r--r--searx/engines/wikipedia.py232
-rw-r--r--searx/engines/yahoo.py70
-rw-r--r--searx/locales.py190
-rw-r--r--searx/preferences.py2
-rw-r--r--searx/query.py6
-rw-r--r--searx/search/processors/__init__.py5
-rw-r--r--searx/search/processors/abstract.py12
-rw-r--r--searx/search/processors/online.py10
-rw-r--r--searx/search/processors/online_currency.py4
-rw-r--r--searx/search/processors/online_dictionary.py5
-rw-r--r--searx/search/processors/online_url_search.py5
-rw-r--r--searx/settings.yml18
-rw-r--r--searx/settings_defaults.py8
-rw-r--r--searx/sxng_locales.py (renamed from searx/languages.py)91
-rw-r--r--searx/templates/simple/filters/languages.html8
-rw-r--r--searx/templates/simple/preferences.html8
-rw-r--r--searx/utils.py98
-rwxr-xr-xsearx/webapp.py73
-rw-r--r--searx/webutils.py9
-rwxr-xr-xsearxng_extra/update/update_engine_descriptions.py8
-rwxr-xr-xsearxng_extra/update/update_engine_traits.py198
-rwxr-xr-xsearxng_extra/update/update_languages.py313
-rwxr-xr-xsearxng_extra/update/update_osm_keys_tags.py4
-rw-r--r--tests/unit/test_locales.py111
-rw-r--r--tests/unit/test_utils.py33
-rw-r--r--utils/templates/etc/searxng/settings.yml3
75 files changed, 7824 insertions, 6415 deletions
diff --git a/.github/workflows/data-update.yml b/.github/workflows/data-update.yml
index d20cd6c63..0ffb1498e 100644
--- a/.github/workflows/data-update.yml
+++ b/.github/workflows/data-update.yml
@@ -17,7 +17,7 @@ jobs:
- update_currencies.py
- update_external_bangs.py
- update_firefox_version.py
- - update_languages.py
+ - update_engine_traits.py
- update_wikidata_units.py
- update_engine_descriptions.py
steps:
diff --git a/docs/admin/engines/configured_engines.rst b/docs/admin/engines/configured_engines.rst
index c7b6a1f52..fa1e5a4b0 100644
--- a/docs/admin/engines/configured_engines.rst
+++ b/docs/admin/engines/configured_engines.rst
@@ -42,7 +42,7 @@ Explanation of the :ref:`general engine configuration` shown in the table
- Timeout
- Weight
- Paging
- - Language
+ - Language, Region
- Safe search
- Time range
diff --git a/docs/admin/engines/settings.rst b/docs/admin/engines/settings.rst
index 099e449e0..0d9e14e57 100644
--- a/docs/admin/engines/settings.rst
+++ b/docs/admin/engines/settings.rst
@@ -569,10 +569,13 @@ engine is shown. Most of the options have a default value or even are optional.
To disable by default the engine, but not deleting it. It will allow the user
to manually activate it in the settings.
+``inactive``: optional
+ Remove the engine from the settings (*disabled & removed*).
+
``language`` : optional
If you want to use another language for a specific engine, you can define it
- by using the full ISO code of language and country, like ``fr_FR``, ``en_US``,
- ``de_DE``.
+ by using the ISO code of language (and region), like ``fr``, ``en-US``,
+ ``de-DE``.
``tokens`` : optional
A list of secret tokens to make this engine *private*, more details see
diff --git a/docs/conf.py b/docs/conf.py
index 8e0c3ab1b..1d71b7f8a 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -127,6 +127,10 @@ extensions = [
'notfound.extension', # https://github.com/readthedocs/sphinx-notfound-page
]
+autodoc_default_options = {
+ 'member-order': 'groupwise',
+}
+
myst_enable_extensions = [
"replacements", "smartquotes"
]
@@ -135,6 +139,7 @@ suppress_warnings = ['myst.domains']
intersphinx_mapping = {
"python": ("https://docs.python.org/3/", None),
+ "babel" : ("https://babel.readthedocs.io/en/latest/", None),
"flask": ("https://flask.palletsprojects.com/", None),
"flask_babel": ("https://python-babel.github.io/flask-babel/", None),
# "werkzeug": ("https://werkzeug.palletsprojects.com/", None),
diff --git a/docs/dev/engine_overview.rst b/docs/dev/engine_overview.rst
index 95ed267e2..930fd0813 100644
--- a/docs/dev/engine_overview.rst
+++ b/docs/dev/engine_overview.rst
@@ -54,6 +54,7 @@ Engine File
- ``offline`` :ref:`[ref] <offline engines>`
- ``online_dictionary``
- ``online_currency``
+ - ``online_url_search``
======================= =========== ========================================================
.. _engine settings:
@@ -131,8 +132,10 @@ Passed Arguments (request)
These arguments can be used to construct the search query. Furthermore,
parameters with default value can be redefined for special purposes.
+.. _engine request online:
-.. table:: If the ``engine_type`` is ``online``
+.. table:: If the ``engine_type`` is :py:obj:`online
+ <searx.search.processors.online.OnlineProcessor.get_params>`
:width: 100%
====================== ============== ========================================================================
@@ -149,12 +152,16 @@ parameters with default value can be redefined for special purposes.
safesearch int ``0``, between ``0`` and ``2`` (normal, moderate, strict)
time_range Optional[str] ``None``, can be ``day``, ``week``, ``month``, ``year``
pageno int current pagenumber
- language str specific language code like ``'en_US'``, or ``'all'`` if unspecified
+ searxng_locale str SearXNG's locale selected by user. Specific language code like
+ ``'en'``, ``'en-US'``, or ``'all'`` if unspecified.
====================== ============== ========================================================================
-.. table:: If the ``engine_type`` is ``online_dictionary``, in addition to the
- ``online`` arguments:
+.. _engine request online_dictionary:
+
+.. table:: If the ``engine_type`` is :py:obj:`online_dictionary
+ <searx.search.processors.online_dictionary.OnlineDictionaryProcessor.get_params>`,
+ in addition to the :ref:`online <engine request online>` arguments:
:width: 100%
====================== ============== ========================================================================
@@ -165,8 +172,11 @@ parameters with default value can be redefined for special purposes.
query str the text query without the languages
====================== ============== ========================================================================
-.. table:: If the ``engine_type`` is ``online_currency```, in addition to the
- ``online`` arguments:
+.. _engine request online_currency:
+
+.. table:: If the ``engine_type`` is :py:obj:`online_currency
+ <searx.search.processors.online_currency.OnlineCurrencyProcessor.get_params>`,
+ in addition to the :ref:`online <engine request online>` arguments:
:width: 100%
====================== ============== ========================================================================
@@ -179,6 +189,26 @@ parameters with default value can be redefined for special purposes.
to_name str currency name
====================== ============== ========================================================================
+.. _engine request online_url_search:
+
+.. table:: If the ``engine_type`` is :py:obj:`online_url_search
+ <searx.search.processors.online_url_search.OnlineUrlSearchProcessor.get_params>`,
+ in addition to the :ref:`online <engine request online>` arguments:
+ :width: 100%
+
+ ====================== ============== ========================================================================
+ argument type default-value, information
+ ====================== ============== ========================================================================
+ search_url dict URLs from the search query:
+
+ .. code:: python
+
+ {
+ 'http': str,
+ 'ftp': str,
+ 'data:image': str
+ }
+ ====================== ============== ========================================================================
Specify Request
---------------
diff --git a/docs/dev/searxng_extra/update.rst b/docs/dev/searxng_extra/update.rst
index d05c81409..a125303e0 100644
--- a/docs/dev/searxng_extra/update.rst
+++ b/docs/dev/searxng_extra/update.rst
@@ -52,12 +52,12 @@ Scripts to update static data in :origin:`searx/data/`
:members:
-``update_languages.py``
-=======================
+``update_engine_traits.py``
+===========================
-:origin:`[source] <searxng_extra/update/update_languages.py>`
+:origin:`[source] <searxng_extra/update/update_engine_traits.py>`
-.. automodule:: searxng_extra.update.update_languages
+.. automodule:: searxng_extra.update.update_engine_traits
:members:
diff --git a/docs/src/searx.engine.archlinux.rst b/docs/src/searx.engine.archlinux.rst
new file mode 100644
index 000000000..be48b1859
--- /dev/null
+++ b/docs/src/searx.engine.archlinux.rst
@@ -0,0 +1,9 @@
+.. _archlinux engine:
+
+==========
+Arch Linux
+==========
+
+.. automodule:: searx.engines.archlinux
+ :members:
+
diff --git a/docs/src/searx.engine.dailymotion.rst b/docs/src/searx.engine.dailymotion.rst
new file mode 100644
index 000000000..84348e2d0
--- /dev/null
+++ b/docs/src/searx.engine.dailymotion.rst
@@ -0,0 +1,8 @@
+.. _dailymotion engine:
+
+===========
+Dailymotion
+===========
+
+.. automodule:: searx.engines.dailymotion
+ :members:
diff --git a/docs/src/searx.engine.duckduckgo.rst b/docs/src/searx.engine.duckduckgo.rst
new file mode 100644
index 000000000..1646d4984
--- /dev/null
+++ b/docs/src/searx.engine.duckduckgo.rst
@@ -0,0 +1,22 @@
+.. _duckduckgo engines:
+
+=================
+DukcDukGo engines
+=================
+
+.. contents:: Contents
+ :depth: 2
+ :local:
+ :backlinks: entry
+
+.. automodule:: searx.engines.duckduckgo
+ :members:
+
+.. automodule:: searx.engines.duckduckgo_images
+ :members:
+
+.. automodule:: searx.engines.duckduckgo_definitions
+ :members:
+
+.. automodule:: searx.engines.duckduckgo_weather
+ :members:
diff --git a/docs/src/searx.enginelib.rst b/docs/src/searx.enginelib.rst
new file mode 100644
index 000000000..651a04e68
--- /dev/null
+++ b/docs/src/searx.enginelib.rst
@@ -0,0 +1,17 @@
+.. _searx.enginelib:
+
+============
+Engine model
+============
+
+.. automodule:: searx.enginelib
+ :members:
+
+.. _searx.enginelib.traits:
+
+=============
+Engine traits
+=============
+
+.. automodule:: searx.enginelib.traits
+ :members:
diff --git a/docs/src/searx.engines.bing.rst b/docs/src/searx.engines.bing.rst
new file mode 100644
index 000000000..6b7bba8f2
--- /dev/null
+++ b/docs/src/searx.engines.bing.rst
@@ -0,0 +1,43 @@
+.. _bing engines:
+
+============
+Bing Engines
+============
+
+.. contents:: Contents
+ :depth: 2
+ :local:
+ :backlinks: entry
+
+
+.. _bing web engine:
+
+Bing WEB
+========
+
+.. automodule:: searx.engines.bing
+ :members:
+
+.. _bing images engine:
+
+Bing Images
+===========
+
+.. automodule:: searx.engines.bing_images
+ :members:
+
+.. _bing videos engine:
+
+Bing Videos
+===========
+
+.. automodule:: searx.engines.bing_videos
+ :members:
+
+.. _bing news engine:
+
+Bing News
+=========
+
+.. automodule:: searx.engines.bing_news
+ :members:
diff --git a/docs/src/searx.engines.google.rst b/docs/src/searx.engines.google.rst
index 2d10b5eea..9c15325f8 100644
--- a/docs/src/searx.engines.google.rst
+++ b/docs/src/searx.engines.google.rst
@@ -12,15 +12,21 @@ Google Engines
.. _google API:
-google API
+Google API
==========
.. _Query Parameter Definitions:
https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
+SearXNG's implementation of the Google API is mainly done in
+:py:obj:`get_google_info <searx.engines.google.get_google_info>`.
+
For detailed description of the *REST-full* API see: `Query Parameter
-Definitions`_. Not all parameters can be appied and some engines are *special*
-(e.g. :ref:`google news engine`).
+Definitions`_. The linked API documentation can sometimes be helpful during
+reverse engineering. However, we cannot use it in the freely accessible WEB
+services; not all parameters can be applied and some engines are more *special*
+than other (e.g. :ref:`google news engine`).
+
.. _google web engine:
@@ -30,6 +36,13 @@ Google WEB
.. automodule:: searx.engines.google
:members:
+.. _google autocomplete:
+
+Google Autocomplete
+====================
+
+.. autofunction:: searx.autocomplete.google_complete
+
.. _google images engine:
Google Images
@@ -53,3 +66,11 @@ Google News
.. automodule:: searx.engines.google_news
:members:
+
+.. _google scholar engine:
+
+Google Scholar
+==============
+
+.. automodule:: searx.engines.google_scholar
+ :members:
diff --git a/docs/src/searx.engines.peertube.rst b/docs/src/searx.engines.peertube.rst
new file mode 100644
index 000000000..8e1576ea0
--- /dev/null
+++ b/docs/src/searx.engines.peertube.rst
@@ -0,0 +1,27 @@
+.. _peertube engines:
+
+================
+Peertube Engines
+================
+
+.. contents:: Contents
+ :depth: 2
+ :local:
+ :backlinks: entry
+
+
+.. _peertube video engine:
+
+Peertube Video
+==============
+
+.. automodule:: searx.engines.peertube
+ :members:
+
+.. _sepiasearch engine:
+
+SepiaSearch
+===========
+
+.. automodule:: searx.engines.sepiasearch
+ :members:
diff --git a/docs/src/searx.engines.rst b/docs/src/searx.engines.rst
index 687fdb0b2..4ce96b27e 100644
--- a/docs/src/searx.engines.rst
+++ b/docs/src/searx.engines.rst
@@ -1,8 +1,8 @@
-.. _load_engines:
+.. _searx.engines:
-============
-Load Engines
-============
+=================
+SearXNG's engines
+=================
.. automodule:: searx.engines
:members:
diff --git a/docs/src/searx.engines.startpage.rst b/docs/src/searx.engines.startpage.rst
new file mode 100644
index 000000000..c885d8f1b
--- /dev/null
+++ b/docs/src/searx.engines.startpage.rst
@@ -0,0 +1,13 @@
+.. _startpage engines:
+
+=================
+Startpage engines
+=================
+
+.. contents:: Contents
+ :depth: 2
+ :local:
+ :backlinks: entry
+
+.. automodule:: searx.engines.startpage
+ :members:
diff --git a/docs/src/searx.engines.wikipedia.rst b/docs/src/searx.engines.wikipedia.rst
new file mode 100644
index 000000000..e644cd645
--- /dev/null
+++ b/docs/src/searx.engines.wikipedia.rst
@@ -0,0 +1,27 @@
+.. _wikimedia engines:
+
+=========
+Wikimedia
+=========
+
+.. contents:: Contents
+ :depth: 2
+ :local:
+ :backlinks: entry
+
+
+.. _wikipedia engine:
+
+Wikipedia
+=========
+
+.. automodule:: searx.engines.wikipedia
+ :members:
+
+.. _wikidata engine:
+
+Wikidata
+=========
+
+.. automodule:: searx.engines.wikidata
+ :members:
diff --git a/docs/src/searx.locales.rst b/docs/src/searx.locales.rst
index 579247aff..2f13bfca1 100644
--- a/docs/src/searx.locales.rst
+++ b/docs/src/searx.locales.rst
@@ -4,5 +4,17 @@
Locales
=======
+.. contents:: Contents
+ :depth: 2
+ :local:
+ :backlinks: entry
+
.. automodule:: searx.locales
:members:
+
+
+SearXNG's locale codes
+======================
+
+.. automodule:: searx.sxng_locales
+ :members:
diff --git a/docs/src/searx.search.processors.rst b/docs/src/searx.search.processors.rst
new file mode 100644
index 000000000..390680657
--- /dev/null
+++ b/docs/src/searx.search.processors.rst
@@ -0,0 +1,47 @@
+.. _searx.search.processors:
+
+=================
+Search processors
+=================
+
+.. contents:: Contents
+ :depth: 2
+ :local:
+ :backlinks: entry
+
+
+Abstract processor class
+========================
+
+.. automodule:: searx.search.processors.abstract
+ :members:
+
+Offline processor
+=================
+
+.. automodule:: searx.search.processors.offline
+ :members:
+
+Online processor
+================
+
+.. automodule:: searx.search.processors.online
+ :members:
+
+Online currency processor
+=========================
+
+.. automodule:: searx.search.processors.online_currency
+ :members:
+
+Online Dictionary processor
+===========================
+
+.. automodule:: searx.search.processors.online_dictionary
+ :members:
+
+Online URL search processor
+===========================
+
+.. automodule:: searx.search.processors.online_url_search
+ :members:
diff --git a/manage b/manage
index c51d76ddb..8d61e1328 100755
--- a/manage
+++ b/manage
@@ -63,7 +63,7 @@ PYLINT_SEARXNG_DISABLE_OPTION="\
I,C,R,\
W0105,W0212,W0511,W0603,W0613,W0621,W0702,W0703,W1401,\
E1136"
-PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="supported_languages,language_aliases,logger,categories"
+PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="traits,supported_languages,language_aliases,logger,categories"
PYLINT_OPTIONS="-m pylint -j 0 --rcfile .pylintrc"
help() {
diff --git a/requirements.txt b/requirements.txt
index adccfa3f5..3ee417d7c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
certifi==2022.12.7
-babel==2.11.0
+babel==2.12.1
flask-babel==3.0.1
flask==2.2.3
jinja2==3.1.2
diff --git a/searx/autocomplete.py b/searx/autocomplete.py
index aeb697a14..ad9903f36 100644
--- a/searx/autocomplete.py
+++ b/searx/autocomplete.py
@@ -5,20 +5,20 @@
"""
# pylint: disable=use-dict-literal
-from json import loads
+import json
from urllib.parse import urlencode
-from lxml import etree
+import lxml
from httpx import HTTPError
from searx import settings
-from searx.data import ENGINES_LANGUAGES
+from searx.engines import (
+ engines,
+ google,
+)
from searx.network import get as http_get
from searx.exceptions import SearxEngineResponseException
-# a fetch_supported_languages() for XPath engines isn't available right now
-# _brave = ENGINES_LANGUAGES['brave'].keys()
-
def get(*args, **kwargs):
if 'timeout' not in kwargs:
@@ -55,34 +55,58 @@ def dbpedia(query, _lang):
results = []
if response.ok:
- dom = etree.fromstring(response.content)
+ dom = lxml.etree.fromstring(response.content)
results = dom.xpath('//Result/Label//text()')
return results
-def duckduckgo(query, _lang):
- # duckduckgo autocompleter
- url = 'https://ac.duckduckgo.com/ac/?{0}&type=list'
+def duckduckgo(query, sxng_locale):
+ """Autocomplete from DuckDuckGo. Supports DuckDuckGo's languages"""
- resp = loads(get(url.format(urlencode(dict(q=query)))).text)
- if len(resp) > 1:
- return resp[1]
- return []
+ traits = engines['duckduckgo'].traits
+ args = {
+ 'q': query,
+ 'kl': traits.get_region(sxng_locale, traits.all_locale),
+ }
+ url = 'https://duckduckgo.com/ac/?type=list&' + urlencode(args)
+ resp = get(url)
-def google(query, lang):
- # google autocompleter
- autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&'
+ ret_val = []
+ if resp.ok:
+ j = resp.json()
+ if len(j) > 1:
+ ret_val = j[1]
+ return ret_val
- response = get(autocomplete_url + urlencode(dict(hl=lang, q=query)))
- results = []
+def google_complete(query, sxng_locale):
+ """Autocomplete from Google. Supports Google's languages and subdomains
+ (:py:obj:`searx.engines.google.get_google_info`) by using the async REST
+ API::
- if response.ok:
- dom = etree.fromstring(response.text)
- results = dom.xpath('//suggestion/@data')
+ https://{subdomain}/complete/search?{args}
+ """
+
+ google_info = google.get_google_info({'searxng_locale': sxng_locale}, engines['google'].traits)
+
+ url = 'https://{subdomain}/complete/search?{args}'
+ args = urlencode(
+ {
+ 'q': query,
+ 'client': 'gws-wiz',
+ 'hl': google_info['params']['hl'],
+ }
+ )
+ results = []
+ resp = get(url.format(subdomain=google_info['subdomain'], args=args))
+ if resp.ok:
+ json_txt = resp.text[resp.text.find('[') : resp.text.find(']', -3) + 1]
+ data = json.loads(json_txt)
+ for item in data[0]:
+ results.append(lxml.html.fromstring(item[0]).text_content())
return results
@@ -109,9 +133,9 @@ def seznam(query, _lang):
]
-def startpage(query, lang):
- # startpage autocompleter
- lui = ENGINES_LANGUAGES['startpage'].get(lang, 'english')
+def startpage(query, sxng_locale):
+ """Autocomplete from Startpage. Supports Startpage's languages"""
+ lui = engines['startpage'].traits.get_language(sxng_locale, 'english')
url = 'https://startpage.com/suggestions?{query}'
resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui})))
data = resp.json()
@@ -122,20 +146,20 @@ def swisscows(query, _lang):
# swisscows autocompleter
url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5'
- resp = loads(get(url.format(query=urlencode({'query': query}))).text)
+ resp = json.loads(get(url.format(query=urlencode({'query': query}))).text)
return resp
-def qwant(query, lang):
- # qwant autocompleter (additional parameter : lang=en_en&count=xxx )
- url = 'https://api.qwant.com/api/suggest?{query}'
-
- resp = get(url.format(query=urlencode({'q': query, 'lang': lang})))
-
+def qwant(query, sxng_locale):
+ """Autocomplete from Qwant. Supports Qwant's regions."""
results = []
+ locale = engines['qwant'].traits.get_region(sxng_locale, 'en_US')
+ url = 'https://api.qwant.com/v3/suggest?{query}'
+ resp = get(url.format(query=urlencode({'q': query, 'locale': locale, 'version': '2'})))
+
if resp.ok:
- data = loads(resp.text)
+ data = resp.json()
if data['status'] == 'success':
for item in data['data']['items']:
results.append(item['value'])
@@ -143,21 +167,38 @@ def qwant(query, lang):
return results
-def wikipedia(query, lang):
- # wikipedia autocompleter
- url = 'https://' + lang + '.wikipedia.org/w/api.php?action=opensearch&{0}&limit=10&namespace=0&format=json'
+def wikipedia(query, sxng_locale):
+ """Autocomplete from Wikipedia. Supports Wikipedia's languages (aka netloc)."""
+ results = []
+ eng_traits = engines['wikipedia'].traits
+ wiki_lang = eng_traits.get_language(sxng_locale, 'en')
+ wiki_netloc = eng_traits.custom['wiki_netloc'].get(wiki_lang, 'en.wikipedia.org')
+
+ url = 'https://{wiki_netloc}/w/api.php?{args}'
+ args = urlencode(
+ {
+ 'action': 'opensearch',
+ 'format': 'json',
+ 'formatversion': '2',
+ 'search': query,
+ 'namespace': '0',
+ 'limit': '10',
+ }
+ )
+ resp = get(url.format(args=args, wiki_netloc=wiki_netloc))
+ if resp.ok:
+ data = resp.json()
+ if len(data) > 1:
+ results = data[1]
- resp = loads(get(url.format(urlencode(dict(search=query)))).text)
- if len(resp) > 1:
- return resp[1]
- return []
+ return results
def yandex(query, _lang):
# yandex autocompleter
url = "https://suggest.yandex.com/suggest-ff.cgi?{0}"
- resp = loads(get(url.format(urlencode(dict(part=query)))).text)
+ resp = json.loads(get(url.format(urlencode(dict(part=query)))).text)
if len(resp) > 1:
return resp[1]
return []
@@ -166,7 +207,7 @@ def yandex(query, _lang):
backends = {
'dbpedia': dbpedia,
'duckduckgo': duckduckgo,
- 'google': google,
+ 'google': google_complete,
'seznam': seznam,
'startpage': startpage,
'swisscows': swisscows,
@@ -177,12 +218,11 @@ backends = {
}
-def search_autocomplete(backend_name, query, lang):
+def search_autocomplete(backend_name, query, sxng_locale):
backend = backends.get(backend_name)
if backend is None:
return []
-
try:
- return backend(query, lang)
+ return backend(query, sxng_locale)
except (HTTPError, SearxEngineResponseException):
return []
diff --git a/searx/data/__init__.py b/searx/data/__init__.py
index 424440a71..0822f4ac8 100644
--- a/searx/data/__init__.py
+++ b/searx/data/__init__.py
@@ -7,7 +7,7 @@
"""
__all__ = [
- 'ENGINES_LANGUAGES',
+ 'ENGINE_TRAITS',
'CURRENCIES',
'USER_AGENTS',
'EXTERNAL_URLS',
@@ -42,7 +42,6 @@ def ahmia_blacklist_loader():
return f.read().split()
-ENGINES_LANGUAGES = _load('engines_languages.json')
CURRENCIES = _load('currencies.json')
USER_AGENTS = _load('useragents.json')
EXTERNAL_URLS = _load('external_urls.json')
@@ -50,3 +49,4 @@ WIKIDATA_UNITS = _load('wikidata_units.json')
EXTERNAL_BANGS = _load('external_bangs.json')
OSM_KEYS_TAGS = _load('osm_keys_tags.json')
ENGINE_DESCRIPTIONS = _load('engine_descriptions.json')
+ENGINE_TRAITS = _load('engine_traits.json')
diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json
new file mode 100644
index 000000000..ea3724c10
--- /dev/null
+++ b/searx/data/engine_traits.json
@@ -0,0 +1,3810 @@
+{
+ "arch linux wiki": {
+ "all_locale": null,
+ "custom": {
+ "title": {
+ "de": "Spezial:Suche",
+ "fa": "\u0648\u06cc\u0698\u0647:\u062c\u0633\u062a\u062c\u0648",
+ "ja": "\u7279\u5225:\u691c\u7d22",
+ "zh": "Special:\u641c\u7d22"
+ },
+ "wiki_netloc": {
+ "de": "wiki.archlinux.de",
+ "fa": "wiki.archusers.ir",
+ "ja": "wiki.archlinux.jp",
+ "zh": "wiki.archlinuxcn.org"
+ }
+ },
+ "data_type": "traits_v1",
+ "languages": {
+ "ar": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629",
+ "bg": "\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438",
+ "bs": "Bosanski",
+ "cs": "\u010ce\u0161tina",
+ "da": "Dansk",
+ "de": "Deutsch",
+ "el": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac",
+ "en": "English",
+ "es": "Espa\u00f1ol",
+ "fa": "\u0641\u0627\u0631\u0633\u06cc",
+ "fi": "Suomi",
+ "fr": "Fran\u00e7ais",
+ "he": "\u05e2\u05d1\u05e8\u05d9\u05ea",
+ "hr": "Hrvatski",
+ "hu": "Magyar",
+ "id": "Bahasa Indonesia",
+ "it": "Italiano",
+ "ja": "\u65e5\u672c\u8a9e",
+ "ko": "\ud55c\uad6d\uc5b4",
+ "lt": "Lietuvi\u0173",
+ "nl": "Nederlands",
+ "pl": "Polski",
+ "pt": "Portugu\u00eas",
+ "ru": "\u0420\u0443\u0441\u0441\u043a\u0438\u0439",
+ "sk": "Sloven\u010dina",
+ "sr": "\u0421\u0440\u043f\u0441\u043a\u0438 / srpski",
+ "sv": "Svenska",
+ "th": "\u0e44\u0e17\u0e22",
+ "tr": "T\u00fcrk\u00e7e",
+ "uk": "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430",
+ "zh": "\u4e2d\u6587\uff08\u7e41\u9ad4\uff09"
+ },
+ "regions": {}
+ },
+ "bing": {
+ "all_locale": null,
+ "custom": {},
+ "data_type": "traits_v1",
+ "languages": {
+ "ar": "ar",
+ "bg": "bg",
+ "bn": "bn",
+ "ca": "ca",
+ "cs": "cs",
+ "da": "da",
+ "de": "de",
+ "en": "en",
+ "es": "es",
+ "et": "et",
+ "eu": "eu",
+ "fi": "fi",
+ "fr": "fr",
+ "gl": "gl",
+ "gu": "gu",
+ "he": "he",
+ "hi": "hi",
+ "hr": "hr",
+ "hu": "hu",
+ "is": "is",
+ "it": "it",
+ "ja": "jp",
+ "kn": "kn",
+ "ko": "ko",
+ "lt": "lt",
+ "lv": "lv",
+ "ml": "ml",
+ "mr": "mr",
+ "ms": "ms",
+ "nb": "nb",
+ "nl": "nl",
+ "pa": "pa",
+ "pl": "pl",
+ "pt": "pt-pt",
+ "ro": "ro",
+ "ru": "ru",
+ "sk": "sk",
+ "sl": "sl",
+ "sr": "sr",
+ "sv": "sv",
+ "ta": "ta",
+ "te": "te",
+ "th": "th",
+ "tr": "tr",
+ "uk": "uk",
+ "vi": "vi",
+ "zh": "zh-hans",
+ "zh_Hans": "zh-hans",
+ "zh_Hant": "zh-hant"
+ },
+ "regions": {
+ "da-DK": "da-DK",
+ "de-AT": "de-AT",
+ "de-CH": "de-CH",
+ "de-DE": "de-DE",
+ "en-AU": "en-AU",
+ "en-CA": "en-CA",
+ "en-GB": "en-GB",
+ "en-IN": "en-IN",
+ "en-MY": "en-MY",
+ "en-NZ": "en-NZ",
+ "en-PH": "en-PH",
+ "en-US": "en-US",
+ "en-ZA": "en-ZA",
+ "es-AR": "es-AR",
+ "es-CL": "es-CL",
+ "es-ES": "es-ES",
+ "es-MX": "es-MX",
+ "es-US": "es-US",
+ "fi-FI": "fi-FI",
+ "fr-BE": "fr-BE",
+ "fr-CA": "fr-CA",
+ "fr-CH": "fr-CH",
+ "fr-FR": "fr-FR",
+ "id-ID": "en-ID",
+ "it-IT": "it-IT",
+ "ja-JP": "ja-JP",
+ "ko-KR": "ko-KR",
+ "nb-NO": "no-NO",
+ "nl-BE": "nl-BE",
+ "nl-NL": "nl-NL",
+ "pl-PL": "pl-PL",
+ "pt-BR": "pt-BR",
+ "ru-RU": "ru-RU",
+ "sv-SE": "sv-SE",
+ "tr-TR": "tr-TR",
+ "zh-CN": "zh-CN",
+ "zh-HK": "zh-HK",
+ "zh-TW": "zh-TW"
+ }
+ },
+ "bing images": {
+ "all_locale": null,
+ "custom": {},
+ "data_type": "traits_v1",
+ "languages": {
+ "ar": "ar",
+ "bg": "bg",
+ "bn": "bn",
+ "ca": "ca",
+ "cs": "cs",
+ "da": "da",
+ "de": "de",
+ "en": "en",
+ "es": "es",
+ "et": "et",
+ "eu": "eu",
+ "fi": "fi",
+ "fr": "fr",
+ "gl": "gl",
+ "gu": "gu",
+ "he": "he",
+ "hi": "hi",
+ "hr": "hr",
+ "hu": "hu",
+ "is": "is",
+ "it": "it",
+ "ja": "jp",
+ "kn": "kn",
+ "ko": "ko",
+ "lt": "lt",
+ "lv": "lv",
+ "ml": "ml",
+ "mr": "mr",
+ "ms": "ms",
+ "nb": "nb",
+ "nl": "nl",
+ "pa": "pa",
+ "pl": "pl",
+ "pt": "pt-pt",
+ "ro": "ro",
+ "ru": "ru",
+ "sk": "sk",
+ "sl": "sl",
+ "sr": "sr",
+ "sv": "sv",
+ "ta": "ta",
+ "te": "te",
+ "th": "th",
+ "tr": "tr",
+ "uk": "uk",
+ "vi": "vi",
+ "zh": "zh-hans",
+ "zh_Hans": "zh-hans",
+ "zh_Hant": "zh-hant"
+ },
+ "regions": {
+ "da-DK": "da-DK",
+ "de-AT": "de-AT",
+ "de-CH": "de-CH",
+ "de-DE": "de-DE",
+ "en-AU": "en-AU",
+ "en-CA": "en-CA",
+ "en-GB": "en-GB",
+ "en-IN": "en-IN",
+ "en-MY": "en-MY",
+ "en-NZ": "en-NZ",
+ "en-PH": "en-PH",
+ "en-US": "en-US",
+ "en-ZA": "en-ZA",
+ "es-AR": "es-AR",
+ "es-CL": "es-CL",
+ "es-ES": "es-ES",
+ "es-MX": "es-MX",
+ "es-US": "es-US",
+ "fi-FI": "fi-FI",
+ "fr-BE": "fr-BE",
+ "fr-CA": "fr-CA",
+ "fr-CH": "fr-CH",
+ "fr-FR": "fr-FR",
+ "id-ID": "en-ID",
+ "it-IT": "it-IT",
+ "ja-JP": "ja-JP",
+ "ko-KR": "ko-KR",
+ "nb-NO": "no-NO",
+ "nl-BE": "nl-BE",
+ "nl-NL": "nl-NL",
+ "pl-PL": "pl-PL",
+ "pt-BR": "pt-BR",
+ "ru-RU": "ru-RU",
+ "sv-SE": "sv-SE",
+ "tr-TR": "tr-TR",
+ "zh-CN": "zh-CN",
+ "zh-HK": "zh-HK",
+ "zh-TW": "zh-TW"
+ }
+ },
+ "bing news": {
+ "all_locale": "en-WW",
+ "custom": {},
+ "data_type": "traits_v1",
+ "languages": {
+ "ar": "ar",
+ "bg": "bg",
+ "bn": "bn",
+ "ca": "ca",
+ "cs": "cs",
+ "da": "da",
+ "de": "de",
+ "en": "en",
+ "es": "es",
+ "et": "et",
+ "eu": "eu",
+ "fi": "fi",
+ "fr": "fr",
+ "gl": "gl",
+ "gu": "gu",
+ "he": "he",
+ "hi": "hi",
+ "hr": "hr",
+ "hu": "hu",
+ "is": "is",
+ "it": "it",
+ "ja": "jp",
+ "kn": "kn",
+ "ko": "ko",
+ "lt": "lt",
+ "lv": "lv",
+ "ml": "ml",
+ "mr": "mr",
+ "ms": "ms",
+ "nb": "nb",
+ "nl": "nl",
+ "pa": "pa",
+ "pl": "pl",
+ "pt": "pt-pt",
+ "ro": "ro",
+ "ru": "ru",
+ "sk": "sk",
+ "sl": "sl",
+ "sr": "sr",
+ "sv": "sv",
+ "ta": "ta",
+ "te": "te",
+ "th": "th",
+ "tr": "tr",
+ "uk": "uk",
+ "vi": "vi",
+ "zh": "zh-hans",
+ "zh_Hans": "zh-hans",
+ "zh_Hant": "zh-hant"
+ },
+ "regions": {
+ "da-DK": "da-DK",
+ "de-DE": "de-DE",
+ "en-AU": "en-AU",
+ "en-GB": "en-GB",
+ "en-US": "en-US",
+ "es-CL": "es-CL",
+ "es-MX": "es-MX",
+ "es-US": "es-US",
+ "fi-FI": "fi-FI",
+ "fr-CA": "fr-CA",
+ "fr-FR": "fr-FR",
+ "it-IT": "it-IT",
+ "pt-BR": "pt-BR",
+ "zh-CN": "zh-CN"
+ }
+ },
+ "bing videos": {
+ "all_locale": null,
+ "custom": {},
+ "data_type": "traits_v1",
+ "languages": {
+ "ar": "ar",
+ "bg": "bg",
+ "bn": "bn",
+ "ca": "ca",
+ "cs": "cs",
+ "da": "da",
+ "de": "de",
+ "en": "en",
+ "es": "es",
+ "et": "et",
+ "eu": "eu",
+ "fi": "fi",
+ "fr": "fr",
+ "gl": "gl",
+ "gu": "gu",
+ "he": "he",
+ "hi": "hi",
+ "hr": "hr",
+ "hu": "hu",
+ "is": "is",
+ "it": "it",
+ "ja": "jp",
+ "kn": "kn",
+ "ko": "ko",
+ "lt": "lt",
+ "lv": "lv",
+ "ml": "ml",
+ "mr": "mr",
+ "ms": "ms",
+ "nb": "nb",
+ "nl": "nl",
+ "pa": "pa",
+ "pl": "pl",
+ "pt": "pt-pt",
+ "ro": "ro",
+ "ru": "ru",
+ "sk": "sk",
+ "sl": "sl",
+ "sr": "sr",
+ "sv": "sv",
+ "ta": "ta",
+ "te": "te",
+ "th": "th",
+ "tr": "tr",
+ "uk": "uk",
+ "vi": "vi",
+ "zh": "zh-hans",
+ "zh_Hans": "zh-hans",
+ "zh_Hant": "zh-hant"
+ },
+ "regions": {
+ "da-DK": "da-DK",
+ "de-AT": "de-AT",
+ "de-CH": "de-CH",
+ "de-DE": "de-DE",
+ "en-AU": "en-AU",
+ "en-CA": "en-CA",
+ "en-GB": "en-GB",
+ "en-IN": "en-IN",
+ "en-MY": "en-MY",
+ "en-NZ": "en-NZ",
+ "en-PH": "en-PH",
+ "en-US": "en-US",
+ "en-ZA": "en-ZA",
+ "es-AR": "es-AR",
+ "es-CL": "es-CL",
+ "es-ES": "es-ES",
+ "es-MX": "es-MX",
+ "es-US": "es-US",
+ "fi-FI": "fi-FI",
+ "fr-BE": "fr-BE",
+ "fr-CA": "fr-CA",
+ "fr-CH": "fr-CH",
+ "fr-FR": "fr-FR",
+ "id-ID": "en-ID",
+ "it-IT": "it-IT",
+ "ja-JP": "ja-JP",
+ "ko-KR": "ko-KR",
+ "nb-NO": "no-NO",
+ "nl-BE": "nl-BE",
+ "nl-NL": "nl-NL",
+ "pl-PL": "pl-PL",
+ "pt-BR": "pt-BR",
+ "ru-RU": "ru-RU",
+ "sv-SE": "sv-SE",
+ "tr-TR": "tr-TR",
+ "zh-CN": "zh-CN",
+ "zh-HK": "zh-HK",
+ "zh-TW": "zh-TW"
+ }
+ },
+ "dailymotion": {
+ "all_locale": null,
+ "custom": {},
+ "data_type": "traits_v1",
+ "languages": {
+ "ar": "ar",
+ "de": "de",
+ "el": "el",
+ "en": "en",
+ "es": "es",
+ "fr": "fr",
+ "id": "id",
+ "it": "it",
+ "ja": "ja",
+ "ko": "ko",
+ "ms": "ms",
+ "nl": "nl",
+ "pl": "pl",
+ "pt": "pt",
+ "ro": "ro",
+ "ru": "ru",
+ "th": "th",
+ "tr": "tr",
+ "vi": "vi",
+ "zh": "zh"
+ },
+ "regions": {
+ "ar-AE": "ar_AE",
+ "ar-EG": "ar_EG",
+ "ar-SA": "ar_SA",
+ "de-AT": "de_AT",
+ "de-CH": "de_CH",
+ "de-DE": "de_DE",
+ "el-GR": "el_GR",
+ "en-AU": "en_AU",
+ "en-CA": "en_CA",
+ "en-GB": "en_GB",
+ "en-HK": "en_HK",
+ "en-IE": "en_IE",
+ "en-IN": "en_IN",
+ "en-NG": "en_NG",
+ "en-PH": "en_PH",
+ "en-PK": "en_PK",
+ "en-SG": "en_SG",
+ "en-US": "en_US",
+ "en-ZA": "en_ZA",
+ "es-AR": "es_AR",
+ "es-ES": "es_ES",
+ "es-MX": "es_MX",
+ "fr-BE": "fr_BE",
+ "fr-CA": "fr_CA",
+ "fr-CH": "fr_CH",
+ "fr-CI": "fr_CI",
+ "fr-FR": "fr_FR",
+ "fr-MA": "fr_MA",
+ "fr-SN": "fr_SN",
+ "fr-TN": "fr_TN",
+ "id-ID": "id_ID",
+ "it-CH": "it_CH",
+ "it-IT": "it_IT",
+ "ja-JP": "ja_JP",
+ "ko-KR": "ko_KR",
+ "ms-MY": "ms_MY",
+ "nl-BE": "nl_BE",
+ "nl-NL": "nl_NL",
+ "pl-PL": "pl_PL",
+ "pt-BR": "pt_BR",
+ "pt-PT": "pt_PT",
+ "ro-RO": "ro_RO",
+ "ru-RU": "ru_RU",
+ "th-TH": "th_TH",
+ "tr-TR": "tr_TR",
+ "vi-VN": "vi_VN",
+ "zh-CN": "zh_CN",
+ "zh-TW": "zh_TW"
+ }
+ },
+ "duckduckgo": {
+ "all_locale": "wt-wt",
+ "custom": {
+ "lang_region": {
+ "ar-DZ": "ar_DZ",
+ "ar-JO": "ar_JO",
+ "ar-SA": "ar_SA",
+ "bn-IN": "bn_IN",
+ "de-CH": "de_CH",
+ "en-AU": "en_AU",
+ "en-CA": "en_CA",
+ "en-GB": "en_GB",
+ "es-AR": "es_AR",
+ "es-CL": "es_CL",
+ "es-CO": "es_CO",
+ "es-CR": "es_CR",
+ "es-EC": "es_EC",
+ "es-MX": "es_MX",
+ "es-PE": "es_PE",
+ "es-UY": "es_UY",
+ "es-VE": "es_VE",
+ "fr-BE": "fr_BE",
+ "fr-CA": "fr_CA",
+ "fr-CH": "fr_CH",
+ "nl-BE": "nl_BE",
+ "pt-BR": "pt_BR"
+ }
+ },
+ "data_type": "traits_v1",
+ "languages": {
+ "af": "af_ZA",
+ "ar": "ar_EG",
+ "ast": "ast_ES",
+ "az_Latn": "az_AZ",
+ "be": "be_BY",
+ "bg": "bg_BG",
+ "bn": "bn_BD",
+ "br": "br_FR",
+ "bs_Latn": "bs_BA",
+ "ca": "ca_ES",
+ "cs": "cs_CZ",
+ "cy": "cy_GB",
+ "da": "da_DK",
+ "de": "de_DE",
+ "el": "el_GR",
+ "en": "en_US",
+ "eo": "eo_XX",
+ "es": "es_ES",
+ "et": "et_EE",
+ "eu": "eu_ES",
+ "fa": "fa_IR",
+ "fi": "fi_FI",
+ "fil": "tl_PH",
+ "fr": "fr_FR",
+ "ga": "ga_IE",
+ "gd": "gd_GB",
+ "gl": "gl_ES",
+ "he": "he_IL",
+ "hi": "hi_IN",
+ "hr": "hr_HR",
+ "hu": "hu_HU",
+ "hy": "hy_AM",
+ "id": "id_ID",
+ "is": "is_IS",
+ "it": "it_IT",
+ "ja": "ja_JP",
+ "kab": "kab_DZ",
+ "kn": "kn_IN",
+ "ko": "ko_KR",
+ "ku": "ku",
+ "kw": "kw_GB",
+ "lt": "lt_LT",
+ "lv": "lv_LV",
+ "ml": "ml_IN",
+ "mr": "mr_IN",
+ "ms": "ms_MY",
+ "nb": "nb_NO",
+ "nl": "nl_NL",
+ "nn": "nn_NO",
+ "pl": "pl_PL",
+ "pt": "pt_PT",
+ "ro": "ro_RO",
+ "ru": "ru_RU",
+ "sc": "sc_IT",
+ "si": "si_LK",
+ "sk": "sk_SK",
+ "sl": "sl_SI",
+ "sq": "sq_AL",
+ "sr_Cyrl": "sr_RS",
+ "sv": "sv_SE",
+ "ta": "ta_IN",
+ "te": "te_IN",
+ "th": "th_TH",
+ "tr": "tr_TR",
+ "uk": "uk_UA",
+ "ur": "ur_PK",
+ "vi": "vi_VN",
+ "zh_Hans": "zh_CN",
+ "zh_Hant": "zh_TW"
+ },
+ "regions": {
+ "ar-SA": "xa-ar",
+ "bg-BG": "bg-bg",
+ "ca-ES": "es-ca",
+ "cs-CZ": "cz-cs",
+ "da-DK": "dk-da",
+ "de-AT": "at-de",
+ "de-CH": "ch-de",
+ "de-DE": "de-de",
+ "el-GR": "gr-el",
+ "en-AU": "au-en",
+ "en-CA": "ca-en",
+ "en-GB": "uk-en",
+ "en-IE": "ie-en",
+ "en-IL": "il-en",
+ "en-IN": "in-en",
+ "en-MY": "my-en",
+ "en-NZ": "nz-en",
+ "en-PH": "ph-en",
+ "en-PK": "pk-en",
+ "en-SG": "sg-en",
+ "en-US": "us-en",
+ "en-ZA": "za-en",
+ "es-AR": "ar-es",
+ "es-CL": "cl-es",
+ "es-CO": "co-es",
+ "es-ES": "es-es",
+ "es-MX": "mx-es",
+ "es-PE": "pe-es",
+ "es-US": "us-es",
+ "et-EE": "ee-et",
+ "fi-FI": "fi-fi",
+ "fr-BE": "be-fr",
+ "fr-CA": "ca-fr",
+ "fr-CH": "ch-fr",
+ "fr-FR": "fr-fr",
+ "hr-HR": "hr-hr",
+ "hu-HU": "hu-hu",
+ "id-ID": "id-en",
+ "it-IT": "it-it",
+ "ja-JP": "jp-jp",
+ "ko-KR": "kr-kr",
+ "lt-LT": "lt-lt",
+ "lv-LV": "lv-lv",
+ "nb-NO": "no-no",
+ "nl-BE": "be-nl",
+ "nl-NL": "nl-nl",
+ "pl-PL": "pl-pl",
+ "pt-BR": "br-pt",
+ "pt-PT": "pt-pt",
+ "ro-RO": "ro-ro",
+ "ru-RU": "ru-ru",
+ "sk-SK": "sk-sk",
+ "sl-SI": "sl-sl",
+ "sv-SE": "se-sv",
+ "th-TH": "th-en",
+ "tr-TR": "tr-tr",
+ "uk-UA": "ua-uk",
+ "vi-VN": "vn-en",
+ "zh-CN": "cn-zh",
+ "zh-HK": "hk-tzh",
+ "zh-TW": "tw-tzh"
+ }
+ },
+ "duckduckgo images": {
+ "all_locale": "wt-wt",
+ "custom": {
+ "lang_region": {
+ "ar-DZ": "ar_DZ",
+ "ar-JO": "ar_JO",
+ "ar-SA": "ar_SA",
+ "bn-IN": "bn_IN",
+ "de-CH": "de_CH",
+ "en-AU": "en_AU",
+ "en-CA": "en_CA",
+ "en-GB": "en_GB",
+ "es-AR": "es_AR",
+ "es-CL": "es_CL",
+ "es-CO": "es_CO",
+ "es-CR": "es_CR",
+ "es-EC": "es_EC",
+ "es-MX": "es_MX",
+ "es-PE": "es_PE",
+ "es-UY": "es_UY",
+ "es-VE": "es_VE",
+ "fr-BE": "fr_BE",
+ "fr-CA": "fr_CA",
+ "fr-CH": "fr_CH",
+ "nl-BE": "nl_BE",
+ "pt-BR": "pt_BR"
+ }
+ },
+ "data_type": "traits_v1",
+ "languages": {
+ "af": "af_ZA",
+ "ar": "ar_EG",
+ "ast": "ast_ES",
+ "az_Latn": "az_AZ",
+ "be": "be_BY",
+ "bg": "bg_BG",
+ "bn": "bn_BD",
+ "br": "br_FR",
+ "bs_Latn": "bs_BA",
+ "ca": "ca_ES",
+ "cs": "cs_CZ",
+ "cy": "cy_GB",
+ "da": "da_DK",
+ "de": "de_DE",
+ "el": "el_GR",
+ "en": "en_US",
+ "eo": "eo_XX",
+ "es": "es_ES",
+ "et": "et_EE",
+ "eu": "eu_ES",
+ "fa": "fa_IR",
+ "fi": "fi_FI",
+ "fil": "tl_PH",
+ "fr": "fr_FR",
+ "ga": "ga_IE",
+ "gd": "gd_GB",
+ "gl": "gl_ES",
+ "he": "he_IL",
+ "hi": "hi_IN",
+ "hr": "hr_HR",
+ "hu": "hu_HU",
+ "hy": "hy_AM",
+ "id": "id_ID",
+ "is": "is_IS",
+ "it": "it_IT",
+ "ja": "ja_JP",
+ "kab": "kab_DZ",
+ "kn": "kn_IN",
+ "ko": "ko_KR",
+ "ku": "ku",
+ "kw": "kw_GB",
+ "lt": "lt_LT",
+ "lv": "lv_LV",
+ "ml": "ml_IN",
+ "mr": "mr_IN",
+ "ms": "ms_MY",
+ "nb": "nb_NO",
+ "nl": "nl_NL",
+ "nn": "nn_NO",
+ "pl": "pl_PL",
+ "pt": "pt_PT",
+ "ro": "ro_RO",
+ "ru": "ru_RU",
+ "sc": "sc_IT",
+ "si": "si_LK",
+ "sk": "sk_SK",
+ "sl": "sl_SI",
+ "sq": "sq_AL",
+ "sr_Cyrl": "sr_RS",
+ "sv": "sv_SE",
+ "ta": "ta_IN",
+ "te": "te_IN",
+ "th": "th_TH",
+ "tr": "tr_TR",
+ "uk": "uk_UA",
+ "ur": "ur_PK",
+ "vi": "vi_VN",
+ "zh_Hans": "zh_CN",
+ "zh_Hant": "zh_TW"
+ },
+ "regions": {
+ "ar-SA": "xa-ar",
+ "bg-BG": "bg-bg",
+ "ca-ES": "es-ca",
+ "cs-CZ": "cz-cs",
+ "da-DK": "dk-da",
+ "de-AT": "at-de",
+ "de-CH": "ch-de",
+ "de-DE": "de-de",
+ "el-GR": "gr-el",
+ "en-AU": "au-en",
+ "en-CA": "ca-en",
+ "en-GB": "uk-en",
+ "en-IE": "ie-en",
+ "en-IL": "il-en",
+ "en-IN": "in-en",
+ "en-MY": "my-en",
+ "en-NZ": "nz-en",
+ "en-PH": "ph-en",
+ "en-PK": "pk-en",
+ "en-SG": "sg-en",
+ "en-US": "us-en",
+ "en-ZA": "za-en",
+ "es-AR": "ar-es",
+ "es-CL": "cl-es",
+ "es-CO": "co-es",
+ "es-ES": "es-es",
+ "es-MX": "mx-es",
+ "es-PE": "pe-es",
+ "es-US": "us-es",
+ "et-EE": "ee-et",
+ "fi-FI": "fi-fi",
+ "fr-BE": "be-fr",
+ "fr-CA": "ca-fr",
+ "fr-CH": "ch-fr",
+ "fr-FR": "fr-fr",
+ "hr-HR": "hr-hr",
+ "hu-HU": "hu-hu",
+ "id-ID": "id-en",
+ "it-IT": "it-it",
+ "ja-JP": "jp-jp",
+ "ko-KR": "kr-kr",
+ "lt-LT": "lt-lt",
+ "lv-LV": "lv-lv",
+ "nb-NO": "no-no",
+ "nl-BE": "be-nl",
+ "nl-NL": "nl-nl",
+ "pl-PL": "pl-pl",
+ "pt-BR": "br-pt",
+ "pt-PT": "pt-pt",
+ "ro-RO": "ro-ro",
+ "ru-RU": "ru-ru",
+ "sk-SK": "sk-sk",
+ "sl-SI": "sl-sl",
+ "sv-SE": "se-sv",
+ "th-TH": "th-en",
+ "tr-TR": "tr-tr",
+ "uk-UA": "ua-uk",
+ "vi-VN": "vn-en",
+ "zh-CN": "cn-zh",
+ "zh-HK": "hk-tzh",
+ "zh-TW": "tw-tzh"
+ }
+ },
+ "duckduckgo weather": {
+ "all_locale": "wt-wt",
+ "custom": {
+ "lang_region": {
+ "ar-DZ": "ar_DZ",
+ "ar-JO": "ar_JO",
+ "ar-SA": "ar_SA",
+ "bn-IN": "bn_IN",
+ "de-CH": "de_CH",
+ "en-AU": "en_AU",
+ "en-CA": "en_CA",
+ "en-GB": "en_GB",
+ "es-AR": "es_AR",
+ "es-CL": "es_CL",
+ "es-CO": "es_CO",
+ "es-CR": "es_CR",
+ "es-EC": "es_EC",
+ "es-MX": "es_MX",
+ "es-PE": "es_PE",
+ "es-UY": "es_UY",
+ "es-VE": "es_VE",
+ "fr-BE": "fr_BE",
+ "fr-CA": "fr_CA",
+ "fr-CH": "fr_CH",
+ "nl-BE": "nl_BE",
+ "pt-BR": "pt_BR"
+ }
+ },
+ "data_type": "traits_v1",
+ "languages": {
+ "af": "af_ZA",
+ "ar": "ar_EG",
+ "ast": "ast_ES",
+ "az_Latn": "az_AZ",
+ "be": "be_BY",
+ "bg": "bg_BG",
+ "bn": "bn_BD",
+ "br": "br_FR",
+ "bs_Latn": "bs_BA",
+ "ca": "ca_ES",
+ "cs": "cs_CZ",
+ "cy": "cy_GB",
+ "da": "da_DK",
+ "de": "de_DE",
+ "el": "el_GR",
+ "en": "en_US",
+ "eo": "eo_XX",
+ "es": "es_ES",
+ "et": "et_EE",
+ "eu": "eu_ES",
+ "fa": "fa_IR",
+ "fi": "fi_FI",
+ "fil": "tl_PH",
+ "fr": "fr_FR",
+ "ga": "ga_IE",
+ "gd": "gd_GB",
+ "gl": "gl_ES",
+ "he": "he_IL",
+ "hi": "hi_IN",
+ "hr": "hr_HR",
+ "hu": "hu_HU",
+ "hy": "hy_AM",
+ "id": "id_ID",
+ "is": "is_IS",
+ "it": "it_IT",
+ "ja": "ja_JP",
+ "kab": "kab_DZ",
+ "kn": "kn_IN",
+ "ko": "ko_KR",
+ "ku": "ku",
+ "kw": "kw_GB",
+ "lt": "lt_LT",
+ "lv": "lv_LV",
+ "ml": "ml_IN",
+ "mr": "mr_IN",
+ "ms": "ms_MY",
+ "nb": "nb_NO",
+ "nl": "nl_NL",
+ "nn": "nn_NO",
+ "pl": "pl_PL",
+ "pt": "pt_PT",
+ "ro": "ro_RO",
+ "ru": "ru_RU",
+ "sc": "sc_IT",
+ "si": "si_LK",
+ "sk": "sk_SK",
+ "sl": "sl_SI",
+ "sq": "sq_AL",
+ "sr_Cyrl": "sr_RS",
+ "sv": "sv_SE",
+ "ta": "ta_IN",
+ "te": "te_IN",
+ "th": "th_TH",
+ "tr": "tr_TR",
+ "uk": "uk_UA",
+ "ur": "ur_PK",
+ "vi": "vi_VN",
+ "zh_Hans": "zh_CN",
+ "zh_Hant": "zh_TW"
+ },
+ "regions": {
+ "ar-SA": "xa-ar",
+ "bg-BG": "bg-bg",
+ "ca-ES": "es-ca",
+ "cs-CZ": "cz-cs",
+ "da-DK": "dk-da",
+ "de-AT": "at-de",
+ "de-CH": "ch-de",
+ "de-DE": "de-de",
+ "el-GR": "gr-el",
+ "en-AU": "au-en",
+ "en-CA": "ca-en",
+ "en-GB": "uk-en",
+ "en-IE": "ie-en",
+ "en-IL": "il-en",
+ "en-IN": "in-en",
+ "en-MY": "my-en",
+ "en-NZ": "nz-en",
+ "en-PH": "ph-en",
+ "en-PK": "pk-en",
+ "en-SG": "sg-en",
+ "en-US": "us-en",
+ "en-ZA": "za-en",
+ "es-AR": "ar-es",
+ "es-CL": "cl-es",
+ "es-CO": "co-es",
+ "es-ES": "es-es",
+ "es-MX": "mx-es",
+ "es-PE": "pe-es",
+ "es-US": "us-es",
+ "et-EE": "ee-et",
+ "fi-FI": "fi-fi",
+ "fr-BE": "be-fr",
+ "fr-CA": "ca-fr",
+ "fr-CH": "ch-fr",
+ "fr-FR": "fr-fr",
+ "hr-HR": "hr-hr",
+ "hu-HU": "hu-hu",
+ "id-ID": "id-en",
+ "it-IT": "it-it",
+ "ja-JP": "jp-jp",
+ "ko-KR": "kr-kr",
+ "lt-LT": "lt-lt",
+ "lv-LV": "lv-lv",
+ "nb-NO": "no-no",
+ "nl-BE": "be-nl",
+ "nl-NL": "nl-nl",
+ "pl-PL": "pl-pl",
+ "pt-BR": "br-pt",
+ "pt-PT": "pt-pt",
+ "ro-RO": "ro-ro",
+ "ru-RU": "ru-ru",
+ "sk-SK": "sk-sk",
+ "sl-SI": "sl-sl",
+ "sv-SE": "se-sv",
+ "th-TH": "th-en",
+ "tr-TR": "tr-tr",
+ "uk-UA": "ua-uk",
+ "vi-VN": "vn-en",
+ "zh-CN": "cn-zh",
+ "zh-HK": "hk-tzh",
+ "zh-TW": "tw-tzh"
+ }
+ },
+ "google": {
+ "all_locale": "ZZ",
+ "custom": {
+ "supported_domains": {
+ "AD": "www.google.ad",
+ "AE": "www.google.ae",
+ "AF": "www.google.com.af",
+ "AG": "www.google.com.ag",
+ "AI": "www.google.com.ai",
+ "AL": "www.google.al",
+ "AM": "www.google.am",
+ "AO": "www.google.co.ao",
+ "AR": "www.google.com.ar",
+ "AS": "www.google.as",
+ "AT": "www.google.at",
+ "AU": "www.google.com.au",
+ "AZ": "www.google.az",
+ "BA": "www.google.ba",
+ "BD": "www.google.com.bd",
+ "BE": "www.google.be",
+ "BF": "www.google.bf",
+ "BG": "www.google.bg",
+ "BH": "www.google.com.bh",
+ "BI": "www.google.bi",
+ "BJ": "www.google.bj",
+ "BN": "www.google.com.bn",
+ "BO": "www.google.com.bo",
+ "BR": "www.google.com.br",
+ "BS": "www.google.bs",
+ "BT": "www.google.bt",
+ "BW": "www.google.co.bw",
+ "BY": "www.google.by",
+ "BZ": "www.google.com.bz",
+ "CA": "www.google.ca",
+ "CAT": "www.google.cat",
+ "CD": "www.google.cd",
+ "CF": "www.google.cf",
+ "CG": "www.google.cg",
+ "CH": "www.google.ch",
+ "CI": "www.google.ci",
+ "CK": "www.google.co.ck",
+ "CL": "www.google.cl",
+ "CM": "www.google.cm",
+ "CN": "www.google.com.hk",
+ "CO": "www.google.com.co",
+ "CR": "www.google.co.cr",
+ "CU": "www.google.com.cu",
+ "CV": "www.google.cv",
+ "CY": "www.google.com.cy",
+ "CZ": "www.google.cz",
+ "DE": "www.google.de",
+ "DJ": "www.google.dj",
+ "DK": "www.google.dk",
+ "DM": "www.google.dm",
+ "DO": "www.google.com.do",
+ "DZ": "www.google.dz",
+ "EC": "www.google.com.ec",
+ "EE": "www.google.ee",
+ "EG": "www.google.com.eg",
+ "ES": "www.google.es",
+ "ET": "www.google.com.et",
+ "FI": "www.google.fi",
+ "FJ": "www.google.com.fj",
+ "FM": "www.google.fm",
+ "FR": "www.google.fr",
+ "GA": "www.google.ga",
+ "GE": "www.google.ge",
+ "GG": "www.google.gg",
+ "GH": "www.google.com.gh",
+ "GI": "www.google.com.gi",
+ "GL": "www.google.gl",
+ "GM": "www.google.gm",
+ "GR": "www.google.gr",
+ "GT": "www.google.com.gt",
+ "GY": "www.google.gy",
+ "HK": "www.google.com.hk",
+ "HN": "www.google.hn",
+ "HR": "www.google.hr",
+ "HT": "www.google.ht",
+ "HU": "www.google.hu",
+ "ID": "www.google.co.id",
+ "IE": "www.google.ie",
+ "IL": "www.google.co.il",
+ "IM": "www.google.im",
+ "IN": "www.google.co.in",
+ "IQ": "www.google.iq",
+ "IS": "www.google.is",
+ "IT": "www.google.it",
+ "JE": "www.google.je",
+ "JM": "www.google.com.jm",
+ "JO": "www.google.jo",
+ "JP": "www.google.co.jp",
+ "KE": "www.google.co.ke",
+ "KG": "www.google.kg",
+ "KH": "www.google.com.kh",
+ "KI": "www.google.ki",
+ "KR": "www.google.co.kr",
+ "KW": "www.google.com.kw",
+ "KZ": "www.google.kz",
+ "LA": "www.google.la",
+ "LB": "www.google.com.lb",
+ "LI": "www.google.li",
+ "LK": "www.google.lk",
+ "LS": "www.google.co.ls",
+ "LT": "www.google.lt",
+ "LU": "www.google.lu",
+ "LV": "www.google.lv",
+ "LY": "www.google.com.ly",
+ "MA": "www.google.co.ma",
+ "MD": "www.google.md",
+ "ME": "www.google.me",
+ "MG": "www.google.mg",
+ "MK": "www.google.mk",
+ "ML": "www.google.ml",
+ "MM": "www.google.com.mm",
+ "MN": "www.google.mn",
+ "MS": "www.google.ms",
+ "MT": "www.google.com.mt",
+ "MU": "www.google.mu",
+ "MV": "www.google.mv",
+ "MW": "www.google.mw",
+ "MX": "www.google.com.mx",
+ "MY": "www.google.com.my",
+ "MZ": "www.google.co.mz",
+ "NA": "www.google.com.na",
+ "NE": "www.google.ne",
+ "NG": "www.google.com.ng",
+ "NI": "www.google.com.ni",
+ "NL": "www.google.nl",
+ "NO": "www.google.no",
+ "NP": "www.google.com.np",
+ "NR": "www.google.nr",
+ "NU": "www.google.nu",
+ "NZ": "www.google.co.nz",
+ "OM": "www.google.com.om",
+ "PA": "www.google.com.pa",
+ "PE": "www.google.com.pe",
+ "PG": "www.google.com.pg",
+ "PH": "www.google.com.ph",
+ "PK": "www.google.com.pk",
+ "PL": "www.google.pl",
+ "PN": "www.google.pn",
+ "PR": "www.google.com.pr",
+ "PS": "www.google.ps",
+ "PT": "www.google.pt",
+ "PY": "www.google.com.py",
+ "QA": "www.google.com.qa",
+ "RO": "www.google.ro",
+ "RS": "www.google.rs",
+ "RU": "www.google.ru",
+ "RW": "www.google.rw",
+ "SA": "www.google.com.sa",
+ "SB": "www.google.com.sb",
+ "SC": "www.google.sc",
+ "SE": "www.google.se",
+ "SG": "www.google.com.sg",
+ "SH": "www.google.sh",
+ "SI": "www.google.si",
+ "SK": "www.google.sk",
+ "SL": "www.google.com.sl",
+ "SM": "www.google.sm",
+ "SN": "www.google.sn",
+ "SO": "www.google.so",
+ "SR": "www.google.sr",
+ "ST": "www.google.st",
+ "SV": "www.google.com.sv",
+ "TD": "www.google.td",
+ "TG": "www.google.tg",
+ "TH": "www.google.co.th",
+ "TJ": "www.google.com.tj",
+ "TL": "www.google.tl",
+ "TM": "www.google.tm",
+ "TN": "www.google.tn",
+ "TO": "www.google.to",
+ "TR": "www.google.com.tr",
+ "TT": "www.google.tt",
+ "TW": "www.google.com.tw",
+ "TZ": "www.google.co.tz",
+ "UA": "www.google.com.ua",
+ "UG": "www.google.co.ug",
+ "UK": "www.google.co.uk",
+ "UY": "www.google.com.uy",
+ "UZ": "www.google.co.uz",
+ "VC": "www.google.com.vc",
+ "VE": "www.google.co.ve",
+ "VG": "www.google.vg",
+ "VI": "www.google.co.vi",
+ "VN": "www.google.com.vn",
+ "VU": "www.google.vu",
+ "WS": "www.google.ws",
+ "ZA": "www.google.co.za",
+ "ZM": "www.google.co.zm",
+ "ZW": "www.google.co.zw"
+ }
+ },
+ "data_type": "traits_v1",
+ "languages": {
+ "af": "lang_af",
+ "ar": "lang_ar",
+ "be": "lang_be",
+ "bg": "lang_bg",
+ "ca": "lang_ca",
+ "cs": "lang_cs",
+ "da": "lang_da",
+ "de": "lang_de",
+ "el": "lang_el",
+ "en": "lang_en",
+ "eo": "lang_eo",
+ "es": "lang_es",
+ "et": "lang_et",
+ "fa": "lang_fa",
+ "fi": "lang_fi",
+ "fil": "lang_tl",
+ "fr": "lang_fr",
+ "he": "lang_iw",
+ "hi": "lang_hi",
+ "hr": "lang_hr",
+ "hu": "lang_hu",
+ "hy": "lang_hy",
+ "id": "lang_id",
+ "is": "lang_is",
+ "it": "lang_it",
+ "ja": "lang_ja",
+ "ko": "lang_ko",
+ "lt": "lang_lt",
+ "lv": "lang_lv",
+ "nb": "lang_no",
+ "nl": "lang_nl",
+ "pl": "lang_pl",
+ "pt": "lang_pt",
+ "ro": "lang_ro",
+ "ru": "lang_ru",
+ "sk": "lang_sk",
+ "sl": "lang_sl",
+ "sr": "lang_sr",
+ "sv": "lang_sv",
+ "sw": "lang_sw",
+ "th": "lang_th",
+ "tr": "lang_tr",
+ "uk": "lang_uk",
+ "vi": "lang_vi",
+ "zh": "lang_zh-CN",
+ "zh_Hans": "lang_zh-CN",
+ "zh_Hant": "lang_zh-TW"
+ },
+ "regions": {
+ "af-ZA": "ZA",
+ "ar-AE": "AE",
+ "ar-BH": "BH",
+ "ar-DJ": "DJ",
+ "ar-DZ": "DZ",
+ "ar-EG": "EG",
+ "ar-IL": "IL",
+ "ar-IQ": "IQ",
+ "ar-JO": "JO",
+ "ar-KW": "KW",
+ "ar-LB": "LB",
+ "ar-LY": "LY",
+ "ar-MA": "MA",
+ "ar-OM": "OM",
+ "ar-PS": "PS",
+ "ar-QA": "QA",
+ "ar-SA": "SA",
+ "ar-SO": "SO",
+ "ar-TD": "TD",
+ "ar-TN": "TN",
+ "be-BY": "BY",
+ "bg-BG": "BG",
+ "ca-AD": "AD",
+ "ca-ES": "ES",
+ "cs-CZ": "CZ",
+ "da-DK": "DK",
+ "de-AT": "AT",
+ "de-BE": "BE",
+ "de-CH": "CH",
+ "de-DE": "DE",
+ "de-LI": "LI",
+ "de-LU": "LU",
+ "el-CY": "CY",
+ "el-GR": "GR",
+ "en-AG": "AG",
+ "en-AI": "AI",
+ "en-AS": "AS",
+ "en-AU": "AU",
+ "en-BI": "BI",
+ "en-BS": "BS",
+ "en-BW": "BW",
+ "en-BZ": "BZ",
+ "en-CA": "CA",
+ "en-CK": "CK",
+ "en-CM": "CM",
+ "en-DM": "DM",
+ "en-FJ": "FJ",
+ "en-FM": "FM",
+ "en-GB": "GB",
+ "en-GG": "GG",
+ "en-GH": "GH",
+ "en-GI": "GI",
+ "en-GM": "GM",
+ "en-GY": "GY",
+ "en-HK": "HK",
+ "en-IE": "IE",
+ "en-IM": "IM",
+ "en-IN": "IN",
+ "en-JE": "JE",
+ "en-JM": "JM",
+ "en-KE": "KE",
+ "en-KI": "KI",
+ "en-LS": "LS",
+ "en-MG": "MG",
+ "en-MS": "MS",
+ "en-MT": "MT",
+ "en-MU": "MU",
+ "en-MW": "MW",
+ "en-NA": "NA",
+ "en-NG": "NG",
+ "en-NR": "NR",
+ "en-NU": "NU",
+ "en-NZ": "NZ",
+ "en-PG": "PG",
+ "en-PH": "PH",
+ "en-PK": "PK",
+ "en-PN": "PN",
+ "en-PR": "PR",
+ "en-RW": "RW",
+ "en-SB": "SB",
+ "en-SC": "SC",
+ "en-SG": "SG",
+ "en-SH": "SH",
+ "en-SL": "SL",
+ "en-TO": "TO",
+ "en-TT": "TT",
+ "en-TZ": "TZ",
+ "en-UG": "UG",
+ "en-US": "US",
+ "en-VC": "VC",
+ "en-VG": "VG",
+ "en-VI": "VI",
+ "en-VU": "VU",
+ "en-WS": "WS",
+ "en-ZA": "ZA",
+ "en-ZM": "ZM",
+ "en-ZW": "ZW",
+ "es-AR": "AR",
+ "es-BO": "BO",
+ "es-CL": "CL",
+ "es-CO": "CO",
+ "es-CR": "CR",
+ "es-CU": "CU",
+ "es-DO": "DO",
+ "es-EC": "EC",
+ "es-ES": "ES",
+ "es-GT": "GT",
+ "es-HN": "HN",
+ "es-MX": "MX",
+ "es-NI": "NI",
+ "es-PA": "PA",
+ "es-PE": "PE",
+ "es-PR": "PR",
+ "es-PY": "PY",
+ "es-SV": "SV",
+ "es-US": "US",
+ "es-UY": "UY",
+ "es-VE": "VE",
+ "et-EE": "EE",
+ "fa-AF": "AF",
+ "fi-FI": "FI",
+ "fil-PH": "PH",
+ "fr-BE": "BE",
+ "fr-BF": "BF",
+ "fr-BI": "BI",
+ "fr-BJ": "BJ",
+ "fr-CA": "CA",
+ "fr-CD": "CD",
+ "fr-CF": "CF",
+ "fr-CG": "CG",
+ "fr-CH": "CH",
+ "fr-CI": "CI",
+ "fr-CM": "CM",
+ "fr-DJ": "DJ",
+ "fr-DZ": "DZ",
+ "fr-FR": "FR",
+ "fr-GA": "GA",
+ "fr-HT": "HT",
+ "fr-LU": "LU",
+ "fr-MA": "MA",
+ "fr-MG": "MG",
+ "fr-ML": "ML",
+ "fr-MU": "MU",
+ "fr-NE": "NE",
+ "fr-RW": "RW",
+ "fr-SC": "SC",
+ "fr-SN": "SN",
+ "fr-TD": "TD",
+ "fr-TG": "TG",
+ "fr-TN": "TN",
+ "fr-VU": "VU",
+ "he-IL": "IL",
+ "hi-IN": "IN",
+ "hr-BA": "BA",
+ "hr-HR": "HR",
+ "hu-HU": "HU",
+ "hy-AM": "AM",
+ "id-ID": "ID",
+ "is-IS": "IS",
+ "it-CH": "CH",
+ "it-IT": "IT",
+ "it-SM": "SM",
+ "ja-JP": "JP",
+ "ko-KR": "KR",
+ "lt-LT": "LT",
+ "lv-LV": "LV",
+ "nb-NO": "NO",
+ "nl-BE": "BE",
+ "nl-NL": "NL",
+ "nl-SR": "SR",
+ "pl-PL": "PL",
+ "pt-AO": "AO",
+ "pt-BR": "BR",
+ "pt-CV": "CV",
+ "pt-MZ": "MZ",
+ "pt-PT": "PT",
+ "pt-ST": "ST",
+ "pt-TL": "TL",
+ "ro-MD": "MD",
+ "ro-RO": "RO",
+ "ru-BY": "BY",
+ "ru-KG": "KG",
+ "ru-KZ": "KZ",
+ "ru-RU": "RU",
+ "ru-UA": "UA",
+ "sk-SK": "SK",
+ "sl-SI": "SI",
+ "sr-BA": "BA",
+ "sr-RS": "RS",
+ "sv-FI": "FI",
+ "sv-SE": "SE",
+ "sw-CD": "CD",
+ "sw-KE": "KE",
+ "sw-TZ": "TZ",
+ "sw-UG": "UG",
+ "th-TH": "TH",
+ "tr-CY": "CY",
+ "tr-TR": "TR",
+ "uk-UA": "UA",
+ "vi-VN": "VN",
+ "zh-CN": "HK",
+ "zh-HK": "HK",
+ "zh-SG": "SG",
+ "zh-TW": "TW"
+ }
+ },
+ "google images": {
+ "all_locale": "ZZ",
+ "custom": {
+ "supported_domains": {
+ "AD": "www.google.ad",
+ "AE": "www.google.ae",
+ "AF": "www.google.com.af",
+ "AG": "www.google.com.ag",
+ "AI": "www.google.com.ai",
+ "AL": "www.google.al",
+ "AM": "www.google.am",
+ "AO": "www.google.co.ao",
+ "AR": "www.google.com.ar",
+ "AS": "www.google.as",
+ "AT": "www.google.at",
+ "AU": "www.google.com.au",
+ "AZ": "www.google.az",
+ "BA": "www.google.ba",
+ "BD": "www.google.com.bd",
+ "BE": "www.google.be",
+ "BF": "www.google.bf",
+ "BG": "www.google.bg",
+ "BH": "www.google.com.bh",
+ "BI": "www.google.bi",
+ "BJ": "www.google.bj",
+ "BN": "www.google.com.bn",
+ "BO": "www.google.com.bo",
+ "BR": "www.google.com.br",
+ "BS": "www.google.bs",
+ "BT": "www.google.bt",
+ "BW": "www.google.co.bw",
+ "BY": "www.google.by",
+ "BZ": "www.google.com.bz",
+ "CA": "www.google.ca",
+ "CAT": "www.google.cat",
+ "CD": "www.google.cd",
+ "CF": "www.google.cf",
+ "CG": "www.google.cg",
+ "CH": "www.google.ch",
+ "CI": "www.google.ci",
+ "CK": "www.google.co.ck",
+ "CL": "www.google.cl",
+ "CM": "www.google.cm",
+ "CN": "www.google.com.hk",
+ "CO": "www.google.com.co",
+ "CR": "www.google.co.cr",
+ "CU": "www.google.com.cu",
+ "CV": "www.google.cv",
+ "CY": "www.google.com.cy",
+ "CZ": "www.google.cz",
+ "DE": "www.google.de",
+ "DJ": "www.google.dj",
+ "DK": "www.google.dk",
+ "DM": "www.google.dm",
+ "DO": "www.google.com.do",
+ "DZ": "www.google.dz",
+ "EC": "www.google.com.ec",
+ "EE": "www.google.ee",
+ "EG": "www.google.com.eg",
+ "ES": "www.google.es",
+ "ET": "www.google.com.et",
+ "FI": "www.google.fi",
+ "FJ": "www.google.com.fj",
+ "FM": "www.google.fm",
+ "FR": "www.google.fr",
+ "GA": "www.google.ga",
+ "GE": "www.google.ge",
+ "GG": "www.google.gg",
+ "GH": "www.google.com.gh",
+ "GI": "www.google.com.gi",
+ "GL": "www.google.gl",
+ "GM": "www.google.gm",
+ "GR": "www.google.gr",
+ "GT": "www.google.com.gt",
+ "GY": "www.google.gy",
+ "HK": "www.google.com.hk",
+ "HN": "www.google.hn",
+ "HR": "www.google.hr",
+ "HT": "www.google.ht",
+ "HU": "www.google.hu",
+ "ID": "www.google.co.id",
+ "IE": "www.google.ie",
+ "IL": "www.google.co.il",
+ "IM": "www.google.im",
+ "IN": "www.google.co.in",
+ "IQ": "www.google.iq",
+ "IS": "www.google.is",
+ "IT": "www.google.it",
+ "JE": "www.google.je",
+ "JM": "www.google.com.jm",
+ "JO": "www.google.jo",
+ "JP": "www.google.co.jp",
+ "KE": "www.google.co.ke",
+ "KG": "www.google.kg",
+ "KH": "www.google.com.kh",
+ "KI": "www.google.ki",
+ "KR": "www.google.co.kr",
+ "KW": "www.google.com.kw",
+ "KZ": "www.google.kz",
+ "LA": "www.google.la",
+ "LB": "www.google.com.lb",
+ "LI": "www.google.li",
+ "LK": "www.google.lk",
+ "LS": "www.google.co.ls",
+ "LT": "www.google.lt",
+ "LU": "www.google.lu",
+ "LV": "www.google.lv",
+ "LY": "www.google.com.ly",
+ "MA": "www.google.co.ma",
+ "MD": "www.google.md",
+ "ME": "www.google.me",
+ "MG": "www.google.mg",
+ "MK": "www.google.mk",
+ "ML": "www.google.ml",
+ "MM": "www.google.com.mm",
+ "MN": "www.google.mn",
+ "MS": "www.google.ms",
+ "MT": "www.google.com.mt",
+ "MU": "www.google.mu",
+ "MV": "www.google.mv",
+ "MW": "www.google.mw",
+ "MX": "www.google.com.mx",
+ "MY": "www.google.com.my",
+ "MZ": "www.google.co.mz",
+ "NA": "www.google.com.na",
+ "NE": "www.google.ne",
+ "NG": "www.google.com.ng",
+ "NI": "www.google.com.ni",
+ "NL": "www.google.nl",
+ "NO": "www.google.no",
+ "NP": "www.google.com.np",
+ "NR": "www.google.nr",
+ "NU": "www.google.nu",
+ "NZ": "www.google.co.nz",
+ "OM": "www.google.com.om",
+ "PA": "www.google.com.pa",
+ "PE": "www.google.com.pe",
+ "PG": "www.google.com.pg",
+ "PH": "www.google.com.ph",
+ "PK": "www.google.com.pk",
+ "PL": "www.google.pl",
+ "PN": "www.google.pn",
+ "PR": "www.google.com.pr",
+ "PS": "www.google.ps",
+ "PT": "www.google.pt",
+ "PY": "www.google.com.py",
+ "QA": "www.google.com.qa",
+ "RO": "www.google.ro",
+ "RS": "www.google.rs",
+ "RU": "www.google.ru",
+ "RW": "www.google.rw",
+ "SA": "www.google.com.sa",
+ "SB": "www.google.com.sb",
+ "SC": "www.google.sc",
+ "SE": "www.google.se",
+ "SG": "www.google.com.sg",
+ "SH": "www.google.sh",
+ "SI": "www.google.si",
+ "SK": "www.google.sk",
+ "SL": "www.google.com.sl",
+ "SM": "www.google.sm",
+ "SN": "www.google.sn",
+ "SO": "www.google.so",
+ "SR": "www.google.sr",
+ "ST": "www.google.st",
+ "SV": "www.google.com.sv",
+ "TD": "www.google.td",
+ "TG": "www.google.tg",
+ "TH": "www.google.co.th",
+ "TJ": "www.google.com.tj",
+ "TL": "www.google.tl",
+ "TM": "www.google.tm",
+ "TN": "www.google.tn",
+ "TO": "www.google.to",
+ "TR": "www.google.com.tr",
+ "TT": "www.google.tt",
+ "TW": "www.google.com.tw",
+ "TZ": "www.google.co.tz",
+ "UA": "www.google.com.ua",
+ "UG": "www.google.co.ug",
+ "UK": "www.google.co.uk",
+ "UY": "www.google.com.uy",
+ "UZ": "www.google.co.uz",
+ "VC": "www.google.com.vc",
+ "VE": "www.google.co.ve",
+ "VG": "www.google.vg",
+ "VI": "www.google.co.vi",
+ "VN": "www.google.com.vn",
+ "VU": "www.google.vu",
+ "WS": "www.google.ws",
+ "ZA": "www.google.co.za",
+ "ZM": "www.google.co.zm",
+ "ZW": "www.google.co.zw"
+ }
+ },
+ "data_type": "traits_v1",
+ "languages": {
+ "af": "lang_af",
+ "ar": "lang_ar",
+ "be": "lang_be",
+ "bg": "lang_bg",
+ "ca": "lang_ca",
+ "cs": "lang_cs",
+ "da": "lang_da",
+ "de": "lang_de",
+ "el": "lang_el",
+ "en": "lang_en",
+ "eo": "lang_eo",
+ "es": "lang_es",
+ "et": "lang_et",
+ "fa": "lang_fa",
+ "fi": "lang_fi",
+ "fil": "lang_tl",
+ "fr": "lang_fr",
+ "he": "lang_iw",
+ "hi": "lang_hi",
+ "hr": "lang_hr",
+ "hu": "lang_hu",
+ "hy": "lang_hy",
+ "id": "lang_id",
+ "is": "lang_is",
+ "it": "lang_it",
+ "ja": "lang_ja",
+ "ko": "lang_ko",
+ "lt": "lang_lt",
+ "lv": "lang_lv",
+ "nb": "lang_no",
+ "nl": "lang_nl",
+ "pl": "lang_pl",
+ "pt": "lang_pt",
+ "ro": "lang_ro",
+ "ru": "lang_ru",
+ "sk": "lang_sk",
+ "sl": "lang_sl",
+ "sr": "lang_sr",
+ "sv": "lang_sv",
+ "sw": "lang_sw",
+ "th": "lang_th",
+ "tr": "lang_tr",
+ "uk": "lang_uk",
+ "vi": "lang_vi",
+ "zh": "lang_zh-CN",
+ "zh_Hans": "lang_zh-CN",
+ "zh_Hant": "lang_zh-TW"
+ },
+ "regions": {
+ "af-ZA": "ZA",
+ "ar-AE": "AE",
+ "ar-BH": "BH",
+ "ar-DJ": "DJ",
+ "ar-DZ": "DZ",
+ "ar-EG": "EG",
+ "ar-IL": "IL",
+ "ar-IQ": "IQ",
+ "ar-JO": "JO",
+ "ar-KW": "KW",
+ "ar-LB": "LB",
+ "ar-LY": "LY",
+ "ar-MA": "MA",
+ "ar-OM": "OM",
+ "ar-PS": "PS",
+ "ar-QA": "QA",
+ "ar-SA": "SA",
+ "ar-SO": "SO",
+ "ar-TD": "TD",
+ "ar-TN": "TN",
+ "be-BY": "BY",
+ "bg-BG": "BG",
+ "ca-AD": "AD",
+ "ca-ES": "ES",
+ "cs-CZ": "CZ",
+ "da-DK": "DK",
+ "de-AT": "AT",
+ "de-BE": "BE",
+ "de-CH": "CH",
+ "de-DE": "DE",
+ "de-LI": "LI",
+ "de-LU": "LU",
+ "el-CY": "CY",
+ "el-GR": "GR",
+ "en-AG": "AG",
+ "en-AI": "AI",
+ "en-AS": "AS",
+ "en-AU": "AU",
+ "en-BI": "BI",
+ "en-BS": "BS",
+ "en-BW": "BW",
+ "en-BZ": "BZ",
+ "en-CA": "CA",
+ "en-CK": "CK",
+ "en-CM": "CM",
+ "en-DM": "DM",
+ "en-FJ": "FJ",
+ "en-FM": "FM",
+ "en-GB": "GB",
+ "en-GG": "GG",
+ "en-GH": "GH",
+ "en-GI": "GI",
+ "en-GM": "GM",
+ "en-GY": "GY",
+ "en-HK": "HK",
+ "en-IE": "IE",
+ "en-IM": "IM",
+ "en-IN": "IN",
+ "en-JE": "JE",
+ "en-JM": "JM",
+ "en-KE": "KE",
+ "en-KI": "KI",
+ "en-LS": "LS",
+ "en-MG": "MG",
+ "en-MS": "MS",
+ "en-MT": "MT",
+ "en-MU": "MU",
+ "en-MW": "MW",
+ "en-NA": "NA",
+ "en-NG": "NG",
+ "en-NR": "NR",
+ "en-NU": "NU",
+ "en-NZ": "NZ",
+ "en-PG": "PG",
+ "en-PH": "PH",
+ "en-PK": "PK",
+ "en-PN": "PN",
+ "en-PR": "PR",
+ "en-RW": "RW",
+ "en-SB": "SB",
+ "en-SC": "SC",
+ "en-SG": "SG",
+ "en-SH": "SH",
+ "en-SL": "SL",
+ "en-TO": "TO",
+ "en-TT": "TT",
+ "en-TZ": "TZ",
+ "en-UG": "UG",
+ "en-US": "US",
+ "en-VC": "VC",
+ "en-VG": "VG",
+ "en-VI": "VI",
+ "en-VU": "VU",
+ "en-WS": "WS",
+ "en-ZA": "ZA",
+ "en-ZM": "ZM",
+ "en-ZW": "ZW",
+ "es-AR": "AR",
+ "es-BO": "BO",
+ "es-CL": "CL",
+ "es-CO": "CO",
+ "es-CR": "CR",
+ "es-CU": "CU",
+ "es-DO": "DO",
+ "es-EC": "EC",
+ "es-ES": "ES",
+ "es-GT": "GT",
+ "es-HN": "HN",
+ "es-MX": "MX",
+ "es-NI": "NI",
+ "es-PA": "PA",
+ "es-PE": "PE",
+ "es-PR": "PR",
+ "es-PY": "PY",
+ "es-SV": "SV",
+ "es-US": "US",
+ "es-UY": "UY",
+ "es-VE": "VE",
+ "et-EE": "EE",
+ "fa-AF": "AF",
+ "fi-FI": "FI",
+ "fil-PH": "PH",
+ "fr-BE": "BE",
+ "fr-BF": "BF",
+ "fr-BI": "BI",
+ "fr-BJ": "BJ",
+ "fr-CA": "CA",
+ "fr-CD": "CD",
+ "fr-CF": "CF",
+ "fr-CG": "CG",
+ "fr-CH": "CH",
+ "fr-CI": "CI",
+ "fr-CM": "CM",
+ "fr-DJ": "DJ",
+ "fr-DZ": "DZ",
+ "fr-FR": "FR",
+ "fr-GA": "GA",
+ "fr-HT": "HT",
+ "fr-LU": "LU",
+ "fr-MA": "MA",
+ "fr-MG": "MG",
+ "fr-ML": "ML",
+ "fr-MU": "MU",
+ "fr-NE": "NE",
+ "fr-RW": "RW",
+ "fr-SC": "SC",
+ "fr-SN": "SN",
+ "fr-TD": "TD",
+ "fr-TG": "TG",
+ "fr-TN": "TN",
+ "fr-VU": "VU",
+ "he-IL": "IL",
+ "hi-IN": "IN",
+ "hr-BA": "BA",
+ "hr-HR": "HR",
+ "hu-HU": "HU",
+ "hy-AM": "AM",
+ "id-ID": "ID",
+ "is-IS": "IS",
+ "it-CH": "CH",
+ "it-IT": "IT",
+ "it-SM": "SM",
+ "ja-JP": "JP",
+ "ko-KR": "KR",
+ "lt-LT": "LT",
+ "lv-LV": "LV",
+ "nb-NO": "NO",
+ "nl-BE": "BE",
+ "nl-NL": "NL",
+ "nl-SR": "SR",
+ "pl-PL": "PL",
+ "pt-AO": "AO",
+ "pt-BR": "BR",
+ "pt-CV": "CV",
+ "pt-MZ": "MZ",
+ "pt-PT": "PT",
+ "pt-ST": "ST",
+ "pt-TL": "TL",
+ "ro-MD": "MD",
+ "ro-RO": "RO",
+ "ru-BY": "BY",
+ "ru-KG": "KG",
+ "ru-KZ": "KZ",
+ "ru-RU": "RU",
+ "ru-UA": "UA",
+ "sk-SK": "SK",
+ "sl-SI": "SI",
+ "sr-BA": "BA",
+ "sr-RS": "RS",
+ "sv-FI": "FI",
+ "sv-SE": "SE",
+ "sw-CD": "CD",
+ "sw-KE": "KE",
+ "sw-TZ": "TZ",
+ "sw-UG": "UG",
+ "th-TH": "TH",
+ "tr-CY": "CY",
+ "tr-TR": "TR",
+ "uk-UA": "UA",
+ "vi-VN": "VN",
+ "zh-CN": "HK",
+ "zh-HK": "HK",
+ "zh-SG": "SG",
+ "zh-TW": "TW"
+ }
+ },
+ "google news": {
+ "all_locale": "ZZ",
+ "custom": {
+ "ceid": {
+ "ar-AE": "AE:ar",
+ "ar-EG": "EG:ar",
+ "ar-LB": "LB:ar",
+ "ar-SA": "SA:ar",
+ "bg-BG": "BG:bg",
+ "bn-BD": "BD:bn",
+ "bn-IN": "IN:bn",
+ "cs-CZ": "CZ:cs",
+ "de-AT": "AT:de",
+ "de-CH": "CH:de",
+ "de-DE": "DE:de",
+ "el-GR": "GR:el",
+ "en-AU": "AU:en",
+ "en-BW": "BW:en",
+ "en-CA": "CA:en",
+ "en-GB": "GB:en",
+ "en-GH": "GH:en",
+ "en-IE": "IE:en",
+ "en-IL": "IL:en",
+ "en-IN": "IN:en",
+ "en-KE": "KE:en",
+ "en-MY": "MY:en",
+ "en-NA": "NA:en",
+ "en-NG": "NG:en",
+ "en-NZ": "NZ:en",
+ "en-PH": "PH:en",
+ "en-PK": "PK:en",
+ "en-SG": "SG:en",
+ "en-TZ": "TZ:en",
+ "en-UG": "UG:en",
+ "en-US": "US:en",
+ "en-ZA": "ZA:en",
+ "en-ZW": "ZW:en",
+ "es-AR": "AR:es-419",
+ "es-CL": "CL:es-419",
+ "es-CO": "CO:es-419",
+ "es-CU": "CU:es-419",
+ "es-ES": "ES:es",
+ "es-MX": "MX:es-419",
+ "es-PE": "PE:es-419",
+ "es-US": "US:es-419",
+ "es-VE": "VE:es-419",
+ "fr-BE": "BE:fr",
+ "fr-CA": "CA:fr",
+ "fr-CH": "CH:fr",
+ "fr-FR": "FR:fr",
+ "fr-MA": "MA:fr",
+ "fr-SN": "SN:fr",
+ "he-IL": "IL:he",
+ "hi-IN": "IN:hi",
+ "hu-HU": "HU:hu",
+ "id-ID": "ID:id",
+ "it-IT": "IT:it",
+ "ja-JP": "JP:ja",
+ "ko-KR": "KR:ko",
+ "lt-LT": "LT:lt",
+ "lv-LV": "LV:lv",
+ "ml-IN": "IN:ml",
+ "mr-IN": "IN:mr",
+ "nb-NO": "NO:no",
+ "nl-BE": "BE:nl",
+ "nl-NL": "NL:nl",
+ "pl-PL": "PL:pl",
+ "pt-BR": "BR:pt-419",
+ "pt-PT": "PT:pt-150",
+ "ro-RO": "RO:ro",
+ "ru-RU": "RU:ru",
+ "ru-UA": "UA:ru",
+ "sk-SK": "SK:sk",
+ "sl-SI": "SI:sl",
+ "sr-RS": "RS:sr",
+ "sv-SE": "SE:sv",
+ "ta-IN": "IN:ta",
+ "te-IN": "IN:te",
+ "th-TH": "TH:th",
+ "tr-TR": "TR:tr",
+ "uk-UA": "UA:uk",
+ "vi-VN": "VN:vi",
+ "zh-CN": "CN:zh-Hans",
+ "zh-HK": "HK:zh-Hant",
+ "zh-TW": "TW:zh-Hant"
+ },
+ "supported_domains": {}
+ },
+ "data_type": "traits_v1",
+ "languages": {
+ "af": "lang_af",
+ "ar": "lang_ar",
+ "be": "lang_be",
+ "bg": "lang_bg",
+ "ca": "lang_ca",
+ "cs": "lang_cs",
+ "da": "lang_da",
+ "de": "lang_de",
+ "el": "lang_el",
+ "en": "lang_en",
+ "eo": "lang_eo",
+ "es": "lang_es",
+ "et": "lang_et",
+ "fa": "lang_fa",
+ "fi": "lang_fi",
+ "fil": "lang_tl",
+ "fr": "lang_fr",
+ "he": "lang_iw",
+ "hi": "lang_hi",
+ "hr": "lang_hr",
+ "hu": "lang_hu",
+ "hy": "lang_hy",
+ "id": "lang_id",
+ "is": "lang_is",
+ "it": "lang_it",
+ "ja": "lang_ja",
+ "ko": "lang_ko",
+ "lt": "lang_lt",
+ "lv": "lang_lv",
+ "nb": "lang_no",
+ "nl": "lang_nl",
+ "pl": "lang_pl",
+ "pt": "lang_pt",
+ "ro": "lang_ro",
+ "ru": "lang_ru",
+ "sk": "lang_sk",
+ "sl": "lang_sl",
+ "sr": "lang_sr",
+ "sv": "lang_sv",
+ "sw": "lang_sw",
+ "th": "lang_th",
+ "tr": "lang_tr",
+ "uk": "lang_uk",
+ "vi": "lang_vi",
+ "zh": "lang_zh-CN",
+ "zh_Hans": "lang_zh-CN",
+ "zh_Hant": "lang_zh-TW"
+ },
+ "regions": {
+ "af-ZA": "ZA",
+ "ar-AE": "AE",
+ "ar-BH": "BH",
+ "ar-DJ": "DJ",
+ "ar-DZ": "DZ",
+ "ar-EG": "EG",
+ "ar-IL": "IL",
+ "ar-IQ": "IQ",
+ "ar-JO": "JO",
+ "ar-KW": "KW",
+ "ar-LB": "LB",
+ "ar-LY": "LY",
+ "ar-MA": "MA",
+ "ar-OM": "OM",
+ "ar-PS": "PS",
+ "ar-QA": "QA",
+ "ar-SA": "SA",
+ "ar-SO": "SO",
+ "ar-TD": "TD",
+ "ar-TN": "TN",
+ "be-BY": "BY",
+ "bg-BG": "BG",
+ "ca-AD": "AD",
+ "ca-ES": "ES",
+ "cs-CZ": "CZ",
+ "da-DK": "DK",
+ "de-AT": "AT",
+ "de-BE": "BE",
+ "de-CH": "CH",
+ "de-DE": "DE",
+ "de-LI": "LI",
+ "de-LU": "LU",
+ "el-CY": "CY",
+ "el-GR": "GR",
+ "en-AG": "AG",
+ "en-AI": "AI",
+ "en-AS": "AS",
+ "en-AU": "AU",
+ "en-BI": "BI",
+ "en-BS": "BS",
+ "en-BW": "BW",
+ "en-BZ": "BZ",
+ "en-CA": "CA",
+ "en-CK": "CK",
+ "en-CM": "CM",
+ "en-DM": "DM",
+ "en-FJ": "FJ",
+ "en-FM": "FM",
+ "en-GB": "GB",
+ "en-GG": "GG",
+ "en-GH": "GH",
+ "en-GI": "GI",
+ "en-GM": "GM",
+ "en-GY": "GY",
+ "en-HK": "HK",
+ "en-IE": "IE",
+ "en-IM": "IM",
+ "en-IN": "IN",
+ "en-JE": "JE",
+ "en-JM": "JM",
+ "en-KE": "KE",
+ "en-KI": "KI",
+ "en-LS": "LS",
+ "en-MG": "MG",
+ "en-MS": "MS",
+ "en-MT": "MT",
+ "en-MU": "MU",
+ "en-MW": "MW",
+ "en-NA": "NA",
+ "en-NG": "NG",
+ "en-NR": "NR",
+ "en-NU": "NU",
+ "en-NZ": "NZ",
+ "en-PG": "PG",
+ "en-PH": "PH",
+ "en-PK": "PK",
+ "en-PN": "PN",
+ "en-PR": "PR",
+ "en-RW": "RW",
+ "en-SB": "SB",
+ "en-SC": "SC",
+ "en-SG": "SG",
+ "en-SH": "SH",
+ "en-SL": "SL",
+ "en-TO": "TO",
+ "en-TT": "TT",
+ "en-TZ": "TZ",
+ "en-UG": "UG",
+ "en-US": "US",
+ "en-VC": "VC",
+ "en-VG": "VG",
+ "en-VI": "VI",
+ "en-VU": "VU",
+ "en-WS": "WS",
+ "en-ZA": "ZA",
+ "en-ZM": "ZM",
+ "en-ZW": "ZW",
+ "es-AR": "AR",
+ "es-BO": "BO",
+ "es-CL": "CL",
+ "es-CO": "CO",
+ "es-CR": "CR",
+ "es-CU": "CU",
+ "es-DO": "DO",
+ "es-EC": "EC",
+ "es-ES": "ES",
+ "es-GT": "GT",
+ "es-HN": "HN",
+ "es-MX": "MX",
+ "es-NI": "NI",
+ "es-PA": "PA",
+ "es-PE": "PE",
+ "es-PR": "PR",
+ "es-PY": "PY",
+ "es-SV": "SV",
+ "es-US": "US",
+ "es-UY": "UY",
+ "es-VE": "VE",
+ "et-EE": "EE",
+ "fa-AF": "AF",
+ "fi-FI": "FI",
+ "fil-PH": "PH",
+ "fr-BE": "BE",
+ "fr-BF": "BF",
+ "fr-BI": "BI",
+ "fr-BJ": "BJ",
+ "fr-CA": "CA",
+ "fr-CD": "CD",
+ "fr-CF": "CF",
+ "fr-CG": "CG",
+ "fr-CH": "CH",
+ "fr-CI": "CI",
+ "fr-CM": "CM",
+ "fr-DJ": "DJ",
+ "fr-DZ": "DZ",
+ "fr-FR": "FR",
+ "fr-GA": "GA",
+ "fr-HT": "HT",
+ "fr-LU": "LU",
+ "fr-MA": "MA",
+ "fr-MG": "MG",
+ "fr-ML": "ML",
+ "fr-MU": "MU",
+ "fr-NE": "NE",
+ "fr-RW": "RW",
+ "fr-SC": "SC",
+ "fr-SN": "SN",
+ "fr-TD": "TD",
+ "fr-TG": "TG",
+ "fr-TN": "TN",
+ "fr-VU": "VU",
+ "he-IL": "IL",
+ "hi-IN": "IN",
+ "hr-BA": "BA",
+ "hr-HR": "HR",
+ "hu-HU": "HU",
+ "hy-AM": "AM",
+ "id-ID": "ID",
+ "is-IS": "IS",
+ "it-CH": "CH",
+ "it-IT": "IT",
+ "it-SM": "SM",
+ "ja-JP": "JP",
+ "ko-KR": "KR",
+ "lt-LT": "LT",
+ "lv-LV": "LV",
+ "nb-NO": "NO",
+ "nl-BE": "BE",
+ "nl-NL": "NL",
+ "nl-SR": "SR",
+ "pl-PL": "PL",
+ "pt-AO": "AO",
+ "pt-BR": "BR",
+ "pt-CV": "CV",
+ "pt-MZ": "MZ",
+ "pt-PT": "PT",
+ "pt-ST": "ST",
+ "pt-TL": "TL",
+ "ro-MD": "MD",
+ "ro-RO": "RO",
+ "ru-BY": "BY",
+ "ru-KG": "KG",
+ "ru-KZ": "KZ",
+ "ru-RU": "RU",
+ "ru-UA": "UA",
+ "sk-SK": "SK",
+ "sl-SI": "SI",
+ "sr-BA": "BA",
+ "sr-RS": "RS",
+ "sv-FI": "FI",
+ "sv-SE": "SE",
+ "sw-CD": "CD",
+ "sw-KE": "KE",
+ "sw-TZ": "TZ",
+ "sw-UG": "UG",
+ "th-TH": "TH",
+ "tr-CY": "CY",
+ "tr-TR": "TR",
+ "uk-UA": "UA",
+ "vi-VN": "VN",
+ "zh-CN": "HK",
+ "zh-HK": "HK",
+ "zh-SG": "SG",
+ "zh-TW": "TW"
+ }
+ },
+ "google scholar": {
+ "all_locale": "ZZ",
+ "custom": {
+ "supported_domains": {
+ "AD": "www.google.ad",
+ "AE": "www.google.ae",
+ "AF": "www.google.com.af",
+ "AG": "www.google.com.ag",
+ "AI": "www.google.com.ai",
+ "AL": "www.google.al",
+ "AM": "www.google.am",
+ "AO": "www.google.co.ao",
+ "AR": "www.google.com.ar",
+ "AS": "www.google.as",
+ "AT": "www.google.at",
+ "AU": "www.google.com.au",
+ "AZ": "www.google.az",
+ "BA": "www.google.ba",
+ "BD": "www.google.com.bd",
+ "BE": "www.google.be",
+ "BF": "www.google.bf",
+ "BG": "www.google.bg",
+ "BH": "www.google.com.bh",
+ "BI": "www.google.bi",
+ "BJ": "www.google.bj",
+ "BN": "www.google.com.bn",
+ "BO": "www.google.com.bo",
+ "BR": "www.google.com.br",
+ "BS": "www.google.bs",
+ "BT": "www.google.bt",
+ "BW": "www.google.co.bw",
+ "BY": "www.google.by",
+ "BZ": "www.google.com.bz",
+ "CA": "www.google.ca",
+ "CAT": "www.google.cat",
+ "CD": "www.google.cd",
+ "CF": "www.google.cf",
+ "CG": "www.google.cg",
+ "CH": "www.google.ch",
+ "CI": "www.google.ci",
+ "CK": "www.google.co.ck",
+ "CL": "www.google.cl",
+ "CM": "www.google.cm",
+ "CN": "www.google.com.hk",
+ "CO": "www.google.com.co",
+ "CR": "www.google.co.cr",
+ "CU": "www.google.com.cu",
+ "CV": "www.google.cv",
+ "CY": "www.google.com.cy",
+ "CZ": "www.google.cz",
+ "DE": "www.google.de",
+ "DJ": "www.google.dj",
+ "DK": "www.google.dk",
+ "DM": "www.google.dm",
+ "DO": "www.google.com.do",
+ "DZ": "www.google.dz",
+ "EC": "www.google.com.ec",
+ "EE": "www.google.ee",
+ "EG": "www.google.com.eg",
+ "ES": "www.google.es",
+ "ET": "www.google.com.et",
+ "FI": "www.google.fi",
+ "FJ": "www.google.com.fj",
+ "FM": "www.google.fm",
+ "FR": "www.google.fr",
+ "GA": "www.google.ga",
+ "GE": "www.google.ge",
+ "GG": "www.google.gg",
+ "GH": "www.google.com.gh",
+ "GI": "www.google.com.gi",
+ "GL": "www.google.gl",
+ "GM": "www.google.gm",
+ "GR": "www.google.gr",
+ "GT": "www.google.com.gt",
+ "GY": "www.google.gy",
+ "HK": "www.google.com.hk",
+ "HN": "www.google.hn",
+ "HR": "www.google.hr",
+ "HT": "www.google.ht",
+ "HU": "www.google.hu",
+ "ID": "www.google.co.id",
+ "IE": "www.google.ie",
+ "IL": "www.google.co.il",
+ "IM": "www.google.im",
+ "IN": "www.google.co.in",
+ "IQ": "www.google.iq",
+ "IS": "www.google.is",
+ "IT": "www.google.it",
+ "JE": "www.google.je",
+ "JM": "www.google.com.jm",
+ "JO": "www.google.jo",
+ "JP": "www.google.co.jp",
+ "KE": "www.google.co.ke",
+ "KG": "www.google.kg",
+ "KH": "www.google.com.kh",
+ "KI": "www.google.ki",
+ "KR": "www.google.co.kr",
+ "KW": "www.google.com.kw",
+ "KZ": "www.google.kz",
+ "LA": "www.google.la",
+ "LB": "www.google.com.lb",
+ "LI": "www.google.li",
+ "LK": "www.google.lk",
+ "LS": "www.google.co.ls",
+ "LT": "www.google.lt",
+ "LU": "www.google.lu",
+ "LV": "www.google.lv",
+ "LY": "www.google.com.ly",
+ "MA": "www.google.co.ma",
+ "MD": "www.google.md",
+ "ME": "www.google.me",
+ "MG": "www.google.mg",
+ "MK": "www.google.mk",
+ "ML": "www.google.ml",
+ "MM": "www.google.com.mm",
+ "MN": "www.google.mn",
+ "MS": "www.google.ms",
+ "MT": "www.google.com.mt",
+ "MU": "www.google.mu",
+ "MV": "www.google.mv",
+ "MW": "www.google.mw",
+ "MX": "www.google.com.mx",
+ "MY": "www.google.com.my",
+ "MZ": "www.google.co.mz",
+ "NA": "www.google.com.na",
+ "NE": "www.google.ne",
+ "NG": "www.google.com.ng",
+ "NI": "www.google.com.ni",
+ "NL": "www.google.nl",
+ "NO": "www.google.no",
+ "NP": "www.google.com.np",
+ "NR": "www.google.nr",
+ "NU": "www.google.nu",
+ "NZ": "www.google.co.nz",
+ "OM": "www.google.com.om",
+ "PA": "www.google.com.pa",
+ "PE": "www.google.com.pe",
+ "PG": "www.google.com.pg",
+ "PH": "www.google.com.ph",
+ "PK": "www.google.com.pk",
+ "PL": "www.google.pl",
+ "PN": "www.google.pn",
+ "PR": "www.google.com.pr",
+ "PS": "www.google.ps",
+ "PT": "www.google.pt",
+ "PY": "www.google.com.py",
+ "QA": "www.google.com.qa",
+ "RO": "www.google.ro",
+ "RS": "www.google.rs",
+ "RU": "www.google.ru",
+ "RW": "www.google.rw",
+ "SA": "www.google.com.sa",
+ "SB": "www.google.com.sb",
+ "SC": "www.google.sc",
+ "SE": "www.google.se",
+ "SG": "www.google.com.sg",
+ "SH": "www.google.sh",
+ "SI": "www.google.si",
+ "SK": "www.google.sk",
+ "SL": "www.google.com.sl",
+ "SM": "www.google.sm",
+ "SN": "www.google.sn",
+ "SO": "www.google.so",
+ "SR": "www.google.sr",
+ "ST": "www.google.st",
+ "SV": "www.google.com.sv",
+ "TD": "www.google.td",
+ "TG": "www.google.tg",
+ "TH": "www.google.co.th",
+ "TJ": "www.google.com.tj",
+ "TL": "www.google.tl",
+ "TM": "www.google.tm",
+ "TN": "www.google.tn",
+ "TO": "www.google.to",
+ "TR": "www.google.com.tr",
+ "TT": "www.google.tt",
+ "TW": "www.google.com.tw",
+ "TZ": "www.google.co.tz",
+ "UA": "www.google.com.ua",
+ "UG": "www.google.co.ug",
+ "UK": "www.google.co.uk",
+ "UY": "www.google.com.uy",
+ "UZ": "www.google.co.uz",
+ "VC": "www.google.com.vc",
+ "VE": "www.google.co.ve",
+ "VG": "www.google.vg",
+ "VI": "www.google.co.vi",
+ "VN": "www.google.com.vn",
+ "VU": "www.google.vu",
+ "WS": "www.google.ws",
+ "ZA": "www.google.co.za",
+ "ZM": "www.google.co.zm",
+ "ZW": "www.google.co.zw"
+ }
+ },
+ "data_type": "traits_v1",
+ "languages": {
+ "af": "lang_af",
+ "ar": "lang_ar",
+ "be": "lang_be",
+ "bg": "lang_bg",
+ "ca": "lang_ca",
+ "cs": "lang_cs",
+ "da": "lang_da",
+ "de": "lang_de",
+ "el": "lang_el",
+ "en": "lang_en",
+ "eo": "lang_eo",
+ "es": "lang_es",
+ "et": "lang_et",
+ "fa": "lang_fa",
+ "fi": "lang_fi",
+ "fil": "lang_tl",
+ "fr": "lang_fr",
+ "he": "lang_iw",
+ "hi": "lang_hi",
+ "hr": "lang_hr",
+ "hu": "lang_hu",
+ "hy": "lang_hy",
+ "id": "lang_id",
+ "is": "lang_is",
+ "it": "lang_it",
+ "ja": "lang_ja",
+ "ko": "lang_ko",
+ "lt": "lang_lt",
+ "lv": "lang_lv",
+ "nb": "lang_no",
+ "nl": "lang_nl",
+ "pl": "lang_pl",
+ "pt": "lang_pt",
+ "ro": "lang_ro",
+ "ru": "lang_ru",
+ "sk": "lang_sk",
+ "sl": "lang_sl",
+ "sr": "lang_sr",
+ "sv": "lang_sv",
+ "sw": "lang_sw",
+ "th": "lang_th",
+ "tr": "lang_tr",
+ "uk": "lang_uk",
+ "vi": "lang_vi",
+ "zh": "lang_zh-CN",
+ "zh_Hans": "lang_zh-CN",
+ "zh_Hant": "lang_zh-TW"
+ },
+ "regions": {
+ "af-ZA": "ZA",
+ "ar-AE": "AE",
+ "ar-BH": "BH",
+ "ar-DJ": "DJ",
+ "ar-DZ": "DZ",
+ "ar-EG": "EG",
+ "ar-IL": "IL",
+ "ar-IQ": "IQ",
+ "ar-JO": "JO",
+ "ar-KW": "KW",
+ "ar-LB": "LB",
+ "ar-LY": "LY",
+ "ar-MA": "MA",
+ "ar-OM": "OM",
+ "ar-PS": "PS",
+ "ar-QA": "QA",
+ "ar-SA": "SA",
+ "ar-SO": "SO",
+ "ar-TD": "TD",
+ "ar-TN": "TN",
+ "be-BY": "BY",
+ "bg-BG": "BG",
+ "ca-AD": "AD",
+ "ca-ES": "ES",
+ "cs-CZ": "CZ",
+ "da-DK": "DK",
+ "de-AT": "AT",
+ "de-BE": "BE",
+ "de-CH": "CH",
+ "de-DE": "DE",
+ "de-LI": "LI",
+ "de-LU": "LU",
+ "el-CY": "CY",
+ "el-GR": "GR",
+ "en-AG": "AG",
+ "en-AI": "AI",
+ "en-AS": "AS",
+ "en-AU": "AU",
+ "en-BI": "BI",
+ "en-BS": "BS",
+ "en-BW": "BW",
+ "en-BZ": "BZ",
+ "en-CA": "CA",
+ "en-CK": "CK",
+ "en-CM": "CM",
+ "en-DM": "DM",
+ "en-FJ": "FJ",
+ "en-FM": "FM",
+ "en-GB": "GB",
+ "en-GG": "GG",
+ "en-GH": "GH",
+ "en-GI": "GI",
+ "en-GM": "GM",
+ "en-GY": "GY",
+ "en-HK": "HK",
+ "en-IE": "IE",
+ "en-IM": "IM",
+ "en-IN": "IN",
+ "en-JE": "JE",
+ "en-JM": "JM",
+ "en-KE": "KE",
+ "en-KI": "KI",
+ "en-LS": "LS",
+ "en-MG": "MG",
+ "en-MS": "MS",
+ "en-MT": "MT",
+ "en-MU": "MU",
+ "en-MW": "MW",
+ "en-NA": "NA",
+ "en-NG": "NG",
+ "en-NR": "NR",
+ "en-NU": "NU",
+ "en-NZ": "NZ",
+ "en-PG": "PG",
+ "en-PH": "PH",
+ "en-PK": "PK",
+ "en-PN": "PN",
+ "en-PR": "PR",
+ "en-RW": "RW",
+ "en-SB": "SB",
+ "en-SC": "SC",
+ "en-SG": "SG",
+ "en-SH": "SH",
+ "en-SL": "SL",
+ "en-TO": "TO",
+ "en-TT": "TT",
+ "en-TZ": "TZ",
+ "en-UG": "UG",
+ "en-US": "US",
+ "en-VC": "VC",
+ "en-VG": "VG",
+ "en-VI": "VI",
+ "en-VU": "VU",
+ "en-WS": "WS",
+ "en-ZA": "ZA",
+ "en-ZM": "ZM",
+ "en-ZW": "ZW",
+ "es-AR": "AR",
+ "es-BO": "BO",
+ "es-CL": "CL",
+ "es-CO": "CO",
+ "es-CR": "CR",
+ "es-CU": "CU",
+ "es-DO": "DO",
+ "es-EC": "EC",
+ "es-ES": "ES",
+ "es-GT": "GT",
+ "es-HN": "HN",
+ "es-MX": "MX",
+ "es-NI": "NI",
+ "es-PA": "PA",
+ "es-PE": "PE",
+ "es-PR": "PR",
+ "es-PY": "PY",
+ "es-SV": "SV",
+ "es-US": "US",
+ "es-UY": "UY",
+ "es-VE": "VE",
+ "et-EE": "EE",
+ "fa-AF": "AF",
+ "fi-FI": "FI",
+ "fil-PH": "PH",
+ "fr-BE": "BE",
+ "fr-BF": "BF",
+ "fr-BI": "BI",
+ "fr-BJ": "BJ",
+ "fr-CA": "CA",
+ "fr-CD": "CD",
+ "fr-CF": "CF",
+ "fr-CG": "CG",
+ "fr-CH": "CH",
+ "fr-CI": "CI",
+ "fr-CM": "CM",
+ "fr-DJ": "DJ",
+ "fr-DZ": "DZ",
+ "fr-FR": "FR",
+ "fr-GA": "GA",
+ "fr-HT": "HT",
+ "fr-LU": "LU",
+ "fr-MA": "MA",
+ "fr-MG": "MG",
+ "fr-ML": "ML",
+ "fr-MU": "MU",
+ "fr-NE": "NE",
+ "fr-RW": "RW",
+ "fr-SC": "SC",
+ "fr-SN": "SN",
+ "fr-TD": "TD",
+ "fr-TG": "TG",
+ "fr-TN": "TN",
+ "fr-VU": "VU",
+ "he-IL": "IL",
+ "hi-IN": "IN",
+ "hr-BA": "BA",
+ "hr-HR": "HR",
+ "hu-HU": "HU",
+ "hy-AM": "AM",
+ "id-ID": "ID",
+ "is-IS": "IS",
+ "it-CH": "CH",
+ "it-IT": "IT",
+ "it-SM": "SM",
+ "ja-JP": "JP",
+ "ko-KR": "KR",
+ "lt-LT": "LT",
+ "lv-LV": "LV",
+ "nb-NO": "NO",
+ "nl-BE": "BE",
+ "nl-NL": "NL",
+ "nl-SR": "SR",
+ "pl-PL": "PL",
+ "pt-AO": "AO",
+ "pt-BR": "BR",
+ "pt-CV": "CV",
+ "pt-MZ": "MZ",
+ "pt-PT": "PT",
+ "pt-ST": "ST",
+ "pt-TL": "TL",
+ "ro-MD": "MD",
+ "ro-RO": "RO",
+ "ru-BY": "BY",
+ "ru-KG": "KG",
+ "ru-KZ": "KZ",
+ "ru-RU": "RU",
+ "ru-UA": "UA",
+ "sk-SK": "SK",
+ "sl-SI": "SI",
+ "sr-BA": "BA",
+ "sr-RS": "RS",
+ "sv-FI": "FI",
+ "sv-SE": "SE",
+ "sw-CD": "CD",
+ "sw-KE": "KE",
+ "sw-TZ": "TZ",
+ "sw-UG": "UG",
+ "th-TH": "TH",
+ "tr-CY": "CY",
+ "tr-TR": "TR",
+ "uk-UA": "UA",
+ "vi-VN": "VN",
+ "zh-CN": "HK",
+ "zh-HK": "HK",
+ "zh-SG": "SG",
+ "zh-TW": "TW"
+ }
+ },
+ "google videos": {
+ "all_locale": "ZZ",
+ "custom": {
+ "supported_domains": {
+ "AD": "www.google.ad",
+ "AE": "www.google.ae",
+ "AF": "www.google.com.af",
+ "AG": "www.google.com.ag",
+ "AI": "www.google.com.ai",
+ "AL": "www.google.al",
+ "AM": "www.google.am",
+ "AO": "www.google.co.ao",
+ "AR": "www.google.com.ar",
+ "AS": "www.google.as",
+ "AT": "www.google.at",
+ "AU": "www.google.com.au",
+ "AZ": "www.google.az",
+ "BA": "www.google.ba",
+ "BD": "www.google.com.bd",
+ "BE": "www.google.be",
+ "BF": "www.google.bf",
+ "BG": "www.google.bg",
+ "BH": "www.google.com.bh",
+ "BI": "www.google.bi",
+ "BJ": "www.google.bj",
+ "BN": "www.google.com.bn",
+ "BO": "www.google.com.bo",
+ "BR": "www.google.com.br",
+ "BS": "www.google.bs",
+ "BT": "www.google.bt",
+ "BW": "www.google.co.bw",
+ "BY": "www.google.by",
+ "BZ": "www.google.com.bz",
+ "CA": "www.google.ca",
+ "CAT": "www.google.cat",
+ "CD": "www.google.cd",
+ "CF": "www.google.cf",
+ "CG": "www.google.cg",
+ "CH": "www.google.ch",
+ "CI": "www.google.ci",
+ "CK": "www.google.co.ck",
+ "CL": "www.google.cl",
+ "CM": "www.google.cm",
+ "CN": "www.google.com.hk",
+ "CO": "www.google.com.co",
+ "CR": "www.google.co.cr",
+ "CU": "www.google.com.cu",
+ "CV": "www.google.cv",
+ "CY": "www.google.com.cy",
+ "CZ": "www.google.cz",
+ "DE": "www.google.de",
+ "DJ": "www.google.dj",
+ "DK": "www.google.dk",
+ "DM": "www.google.dm",
+ "DO": "www.google.com.do",
+ "DZ": "www.google.dz",
+ "EC": "www.google.com.ec",
+ "EE": "www.google.ee",
+ "EG": "www.google.com.eg",
+ "ES": "www.google.es",
+ "ET": "www.google.com.et",
+ "FI": "www.google.fi",
+ "FJ": "www.google.com.fj",
+ "FM": "www.google.fm",
+ "FR": "www.google.fr",
+ "GA": "www.google.ga",
+ "GE": "www.google.ge",
+ "GG": "www.google.gg",
+ "GH": "www.google.com.gh",
+ "GI": "www.google.com.gi",
+ "GL": "www.google.gl",
+ "GM": "www.google.gm",
+ "GR": "www.google.gr",
+ "GT": "www.google.com.gt",
+ "GY": "www.google.gy",
+ "HK": "www.google.com.hk",
+ "HN": "www.google.hn",
+ "HR": "www.google.hr",
+ "HT": "www.google.ht",
+ "HU": "www.google.hu",
+ "ID": "www.google.co.id",
+ "IE": "www.google.ie",
+ "IL": "www.google.co.il",
+ "IM": "www.google.im",
+ "IN": "www.google.co.in",
+ "IQ": "www.google.iq",
+ "IS": "www.google.is",
+ "IT": "www.google.it",
+ "JE": "www.google.je",
+ "JM": "www.google.com.jm",
+ "JO": "www.google.jo",
+ "JP": "www.google.co.jp",
+ "KE": "www.google.co.ke",
+ "KG": "www.google.kg",
+ "KH": "www.google.com.kh",
+ "KI": "www.google.ki",
+ "KR": "www.google.co.kr",
+ "KW": "www.google.com.kw",
+ "KZ": "www.google.kz",
+ "LA": "www.google.la",
+ "LB": "www.google.com.lb",
+ "LI": "www.google.li",
+ "LK": "www.google.lk",
+ "LS": "www.google.co.ls",
+ "LT": "www.google.lt",
+ "LU": "www.google.lu",
+ "LV": "www.google.lv",
+ "LY": "www.google.com.ly",
+ "MA": "www.google.co.ma",
+ "MD": "www.google.md",
+ "ME": "www.google.me",
+ "MG": "www.google.mg",
+ "MK": "www.google.mk",
+ "ML": "www.google.ml",
+ "MM": "www.google.com.mm",
+ "MN": "www.google.mn",
+ "MS": "www.google.ms",
+ "MT": "www.google.com.mt",
+ "MU": "www.google.mu",
+ "MV": "www.google.mv",
+ "MW": "www.google.mw",
+ "MX": "www.google.com.mx",
+ "MY": "www.google.com.my",
+ "MZ": "www.google.co.mz",
+ "NA": "www.google.com.na",
+ "NE": "www.google.ne",
+ "NG": "www.google.com.ng",
+ "NI": "www.google.com.ni",
+ "NL": "www.google.nl",
+ "NO": "www.google.no",
+ "NP": "www.google.com.np",
+ "NR": "www.google.nr",
+ "NU": "www.google.nu",
+ "NZ": "www.google.co.nz",
+ "OM": "www.google.com.om",
+ "PA": "www.google.com.pa",
+ "PE": "www.google.com.pe",
+ "PG": "www.google.com.pg",
+ "PH": "www.google.com.ph",
+ "PK": "www.google.com.pk",
+ "PL": "www.google.pl",
+ "PN": "www.google.pn",
+ "PR": "www.google.com.pr",
+ "PS": "www.google.ps",
+ "PT": "www.google.pt",
+ "PY": "www.google.com.py",
+ "QA": "www.google.com.qa",
+ "RO": "www.google.ro",
+ "RS": "www.google.rs",
+ "RU": "www.google.ru",
+ "RW": "www.google.rw",
+ "SA": "www.google.com.sa",
+ "SB": "www.google.com.sb",
+ "SC": "www.google.sc",
+ "SE": "www.google.se",
+ "SG": "www.google.com.sg",
+ "SH": "www.google.sh",
+ "SI": "www.google.si",
+ "SK": "www.google.sk",
+ "SL": "www.google.com.sl",
+ "SM": "www.google.sm",
+ "SN": "www.google.sn",
+ "SO": "www.google.so",
+ "SR": "www.google.sr",
+ "ST": "www.google.st",
+ "SV": "www.google.com.sv",
+ "TD": "www.google.td",
+ "TG": "www.google.tg",
+ "TH": "www.google.co.th",
+ "TJ": "www.google.com.tj",
+ "TL": "www.google.tl",
+ "TM": "www.google.tm",
+ "TN": "www.google.tn",
+ "TO": "www.google.to",
+ "TR": "www.google.com.tr",
+ "TT": "www.google.tt",
+ "TW": "www.google.com.tw",
+ "TZ": "www.google.co.tz",
+ "UA": "www.google.com.ua",
+ "UG": "www.google.co.ug",
+ "UK": "www.google.co.uk",
+ "UY": "www.google.com.uy",
+ "UZ": "www.google.co.uz",
+ "VC": "www.google.com.vc",
+ "VE": "www.google.co.ve",
+ "VG": "www.google.vg",
+ "VI": "www.google.co.vi",
+ "VN": "www.google.com.vn",
+ "VU": "www.google.vu",
+ "WS": "www.google.ws",
+ "ZA": "www.google.co.za",
+ "ZM": "www.google.co.zm",
+ "ZW": "www.google.co.zw"
+ }
+ },
+ "data_type": "traits_v1",
+ "languages": {
+ "af": "lang_af",
+ "ar": "lang_ar",
+ "be": "lang_be",
+ "bg": "lang_bg",
+ "ca": "lang_ca",
+ "cs": "lang_cs",
+ "da": "lang_da",
+ "de": "lang_de",
+ "el": "lang_el",
+ "en": "lang_en",
+ "eo": "lang_eo",
+ "es": "lang_es",
+ "et": "lang_et",
+ "fa": "lang_fa",
+ "fi": "lang_fi",
+ "fil": "lang_tl",
+ "fr": "lang_fr",
+ "he": "lang_iw",
+ "hi": "lang_hi",
+ "hr": "lang_hr",
+ "hu": "lang_hu",
+ "hy": "lang_hy",
+ "id": "lang_id",
+ "is": "lang_is",
+ "it": "lang_it",
+ "ja": "lang_ja",
+ "ko": "lang_ko",
+ "lt": "lang_lt",
+ "lv": "lang_lv",
+ "nb": "lang_no",
+ "nl": "lang_nl",
+ "pl": "lang_pl",
+ "pt": "lang_pt",
+ "ro": "lang_ro",
+ "ru": "lang_ru",
+ "sk": "lang_sk",
+ "sl": "lang_sl",
+ "sr": "lang_sr",
+ "sv": "lang_sv",
+ "sw": "lang_sw",
+ "th": "lang_th",
+ "tr": "lang_tr",
+ "uk": "lang_uk",
+ "vi": "lang_vi",
+ "zh": "lang_zh-CN",
+ "zh_Hans": "lang_zh-CN",
+ "zh_Hant": "lang_zh-TW"
+ },
+ "regions": {
+ "af-ZA": "ZA",
+ "ar-AE": "AE",
+ "ar-BH": "BH",
+ "ar-DJ": "DJ",
+ "ar-DZ": "DZ",
+ "ar-EG": "EG",
+ "ar-IL": "IL",
+ "ar-IQ": "IQ",
+ "ar-JO": "JO",
+ "ar-KW": "KW",
+ "ar-LB": "LB",
+ "ar-LY": "LY",
+ "ar-MA": "MA",
+ "ar-OM": "OM",
+ "ar-PS": "PS",
+ "ar-QA": "QA",
+ "ar-SA": "SA",
+ "ar-SO": "SO",
+ "ar-TD": "TD",
+ "ar-TN": "TN",
+ "be-BY": "BY",
+ "bg-BG": "BG",
+ "ca-AD": "AD",
+ "ca-ES": "ES",
+ "cs-CZ": "CZ",
+ "da-DK": "DK",
+ "de-AT": "AT",
+ "de-BE": "BE",
+ "de-CH": "CH",
+ "de-DE": "DE",
+ "de-LI": "LI",
+ "de-LU": "LU",
+ "el-CY": "CY",
+ "el-GR": "GR",
+ "en-AG": "AG",
+ "en-AI": "AI",
+ "en-AS": "AS",
+ "en-AU": "AU",
+ "en-BI": "BI",
+ "en-BS": "BS",
+ "en-BW": "BW",
+ "en-BZ": "BZ",
+ "en-CA": "CA",
+ "en-CK": "CK",
+ "en-CM": "CM",
+ "en-DM": "DM",
+ "en-FJ": "FJ",
+ "en-FM": "FM",
+ "en-GB": "GB",
+ "en-GG": "GG",
+ "en-GH": "GH",
+ "en-GI": "GI",
+ "en-GM": "GM",
+ "en-GY": "GY",
+ "en-HK": "HK",
+ "en-IE": "IE",
+ "en-IM": "IM",
+ "en-IN": "IN",
+ "en-JE": "JE",
+ "en-JM": "JM",
+ "en-KE": "KE",
+ "en-KI": "KI",
+ "en-LS": "LS",
+ "en-MG": "MG",
+ "en-MS": "MS",
+ "en-MT": "MT",
+ "en-MU": "MU",
+ "en-MW": "MW",
+ "en-NA": "NA",
+ "en-NG": "NG",
+ "en-NR": "NR",
+ "en-NU": "NU",
+ "en-NZ": "NZ",
+ "en-PG": "PG",
+ "en-PH": "PH",
+ "en-PK": "PK",
+ "en-PN": "PN",
+ "en-PR": "PR",
+ "en-RW": "RW",
+ "en-SB": "SB",
+ "en-SC": "SC",
+ "en-SG": "SG",
+ "en-SH": "SH",
+ "en-SL": "SL",
+ "en-TO": "TO",
+ "en-TT": "TT",
+ "en-TZ": "TZ",
+ "en-UG": "UG",
+ "en-US": "US",
+ "en-VC": "VC",
+ "en-VG": "VG",
+ "en-VI": "VI",
+ "en-VU": "VU",
+ "en-WS": "WS",
+ "en-ZA": "ZA",
+ "en-ZM": "ZM",
+ "en-ZW": "ZW",
+ "es-AR": "AR",
+ "es-BO": "BO",
+ "es-CL": "CL",
+ "es-CO": "CO",
+ "es-CR": "CR",
+ "es-CU": "CU",
+ "es-DO": "DO",
+ "es-EC": "EC",
+ "es-ES": "ES",
+ "es-GT": "GT",
+ "es-HN": "HN",
+ "es-MX": "MX",
+ "es-NI": "NI",
+ "es-PA": "PA",
+ "es-PE": "PE",
+ "es-PR": "PR",
+ "es-PY": "PY",
+ "es-SV": "SV",
+ "es-US": "US",
+ "es-UY": "UY",
+ "es-VE": "VE",
+ "et-EE": "EE",
+ "fa-AF": "AF",
+ "fi-FI": "FI",
+ "fil-PH": "PH",
+ "fr-BE": "BE",
+ "fr-BF": "BF",
+ "fr-BI": "BI",
+ "fr-BJ": "BJ",
+ "fr-CA": "CA",
+ "fr-CD": "CD",
+ "fr-CF": "CF",
+ "fr-CG": "CG",
+ "fr-CH": "CH",
+ "fr-CI": "CI",
+ "fr-CM": "CM",
+ "fr-DJ": "DJ",
+ "fr-DZ": "DZ",
+ "fr-FR": "FR",
+ "fr-GA": "GA",
+ "fr-HT": "HT",
+ "fr-LU": "LU",
+ "fr-MA": "MA",
+ "fr-MG": "MG",
+ "fr-ML": "ML",
+ "fr-MU": "MU",
+ "fr-NE": "NE",
+ "fr-RW": "RW",
+ "fr-SC": "SC",
+ "fr-SN": "SN",
+ "fr-TD": "TD",
+ "fr-TG": "TG",
+ "fr-TN": "TN",
+ "fr-VU": "VU",
+ "he-IL": "IL",
+ "hi-IN": "IN",
+ "hr-BA": "BA",
+ "hr-HR": "HR",
+ "hu-HU": "HU",
+ "hy-AM": "AM",
+ "id-ID": "ID",
+ "is-IS": "IS",
+ "it-CH": "CH",
+ "it-IT": "IT",
+ "it-SM": "SM",
+ "ja-JP": "JP",
+ "ko-KR": "KR",
+ "lt-LT": "LT",
+ "lv-LV": "LV",
+ "nb-NO": "NO",
+ "nl-BE": "BE",
+ "nl-NL": "NL",
+ "nl-SR": "SR",
+ "pl-PL": "PL",
+ "pt-AO": "AO",
+ "pt-BR": "BR",
+ "pt-CV": "CV",
+ "pt-MZ": "MZ",
+ "pt-PT": "PT",
+ "pt-ST": "ST",
+ "pt-TL": "TL",
+ "ro-MD": "MD",
+ "ro-RO": "RO",
+ "ru-BY": "BY",
+ "ru-KG": "KG",
+ "ru-KZ": "KZ",
+ "ru-RU": "RU",
+ "ru-UA": "UA",
+ "sk-SK": "SK",
+ "sl-SI": "SI",
+ "sr-BA": "BA",
+ "sr-RS": "RS",
+ "sv-FI": "FI",
+ "sv-SE": "SE",
+ "sw-CD": "CD",
+ "sw-KE": "KE",
+ "sw-TZ": "TZ",
+ "sw-UG": "UG",
+ "th-TH": "TH",
+ "tr-CY": "CY",
+ "tr-TR": "TR",
+ "uk-UA": "UA",
+ "vi-VN": "VN",
+ "zh-CN": "HK",
+ "zh-HK": "HK",
+ "zh-SG": "SG",
+ "zh-TW": "TW"
+ }
+ },
+ "peertube": {
+ "all_locale": null,
+ "custom": {},
+ "data_type": "traits_v1",
+ "languages": {
+ "ca": "ca",
+ "cs": "cs",
+ "de": "de",
+ "el": "el",
+ "en": "en",
+ "eo": "eo",
+ "es": "es",
+ "eu": "eu",
+ "fi": "fi",
+ "fr": "fr",
+ "gd": "gd",
+ "it": "it",
+ "ja": "ja",
+ "nl": "nl",
+ "pl": "pl",
+ "pt": "pt",
+ "ru": "ru",
+ "sv": "sv",
+ "zh": "zh",
+ "zh_Hans": "zh",
+ "zh_Hant": "zh"
+ },
+ "regions": {}
+ },
+ "qwant": {
+ "all_locale": null,
+ "custom": {},
+ "data_type": "traits_v1",
+ "languages": {},
+ "regions": {
+ "bg-BG": "bg_BG",
+ "ca-ES": "ca_ES",
+ "cs-CZ": "cs_CZ",
+ "da-DK": "da_DK",
+ "de-AT": "de_AT",
+ "de-CH": "de_CH",
+ "de-DE": "de_DE",
+ "el-GR": "el_GR",
+ "en-AU": "en_AU",
+ "en-CA": "en_CA",
+ "en-GB": "en_GB",
+ "en-IE": "en_IE",
+ "en-MY": "en_MY",
+ "en-NZ": "en_NZ",
+ "en-US": "en_US",
+ "es-AR": "es_AR",
+ "es-CL": "es_CL",
+ "es-ES": "es_ES",
+ "es-MX": "es_MX",
+ "et-EE": "et_EE",
+ "fi-FI": "fi_FI",
+ "fr-BE": "fr_BE",
+ "fr-CA": "fr_CA",
+ "fr-CH": "fr_CH",
+ "fr-FR": "fr_FR",
+ "hu-HU": "hu_HU",
+ "it-CH": "it_CH",
+ "it-IT": "it_IT",
+ "ko-KR": "ko_KR",
+ "nb-NO": "nb_NO",
+ "nl-BE": "nl_BE",
+ "nl-NL": "nl_NL",
+ "pl-PL": "pl_PL",
+ "pt-PT": "pt_PT",
+ "ro-RO": "ro_RO",
+ "sv-SE": "sv_SE",
+ "th-TH": "th_TH",
+ "zh-CN": "zh_CN",
+ "zh-HK": "zh_HK"
+ }
+ },
+ "qwant images": {
+ "all_locale": null,
+ "custom": {},
+ "data_type": "traits_v1",
+ "languages": {},
+ "regions": {
+ "bg-BG": "bg_BG",
+ "ca-ES": "ca_ES",
+ "cs-CZ": "cs_CZ",
+ "da-DK": "da_DK",
+ "de-AT": "de_AT",
+ "de-CH": "de_CH",
+ "de-DE": "de_DE",
+ "el-GR": "el_GR",
+ "en-AU": "en_AU",
+ "en-CA": "en_CA",
+ "en-GB": "en_GB",
+ "en-IE": "en_IE",
+ "en-MY": "en_MY",
+ "en-NZ": "en_NZ",
+ "en-US": "en_US",
+ "es-AR": "es_AR",
+ "es-CL": "es_CL",
+ "es-ES": "es_ES",
+ "es-MX": "es_MX",
+ "et-EE": "et_EE",
+ "fi-FI": "fi_FI",
+ "fr-BE": "fr_BE",
+ "fr-CA": "fr_CA",
+ "fr-CH": "fr_CH",
+ "fr-FR": "fr_FR",
+ "hu-HU": "hu_HU",
+ "it-CH": "it_CH",
+ "it-IT": "it_IT",
+ "ko-KR": "ko_KR",
+ "nb-NO": "nb_NO",
+ "nl-BE": "nl_BE",
+ "nl-NL": "nl_NL",
+ "pl-PL": "pl_PL",
+ "pt-PT": "pt_PT",
+ "ro-RO": "ro_RO",
+ "sv-SE": "sv_SE",
+ "th-TH": "th_TH",
+ "zh-CN": "zh_CN",
+ "zh-HK": "zh_HK"
+ }
+ },
+ "qwant news": {
+ "all_locale": null,
+ "custom": {},
+ "data_type": "traits_v1",
+ "languages": {},
+ "regions": {
+ "ca-ES": "ca_ES",
+ "de-AT": "de_AT",
+ "de-CH": "de_CH",
+ "de-DE": "de_DE",
+ "en-AU": "en_AU",
+ "en-CA": "en_CA",
+ "en-GB": "en_GB",
+ "en-IE": "en_IE",
+ "en-MY": "en_MY",
+ "en-NZ": "en_NZ",
+ "en-US": "en_US",
+ "es-AR": "es_AR",
+ "es-CL": "es_CL",
+ "es-ES": "es_ES",
+ "es-MX": "es_MX",
+ "fr-BE": "fr_BE",
+ "fr-CA": "fr_CA",
+ "fr-CH": "fr_CH",
+ "fr-FR": "fr_FR",
+ "it-CH": "it_CH",
+ "it-IT": "it_IT",
+ "nl-BE": "nl_BE",
+ "nl-NL": "nl_NL",
+ "pt-PT": "pt_PT"
+ }
+ },
+ "qwant videos": {
+ "all_locale": null,
+ "custom": {},
+ "data_type": "traits_v1",
+ "languages": {},
+ "regions": {
+ "bg-BG": "bg_BG",
+ "ca-ES": "ca_ES",
+ "cs-CZ": "cs_CZ",
+ "da-DK": "da_DK",
+ "de-AT": "de_AT",
+ "de-CH": "de_CH",
+ "de-DE": "de_DE",
+ "el-GR": "el_GR",
+ "en-AU": "en_AU",
+ "en-CA": "en_CA",
+ "en-GB": "en_GB",
+ "en-IE": "en_IE",
+ "en-MY": "en_MY",
+ "en-NZ": "en_NZ",
+ "en-US": "en_US",
+ "es-AR": "es_AR",
+ "es-CL": "es_CL",
+ "es-ES": "es_ES",
+ "es-MX": "es_MX",
+ "et-EE": "et_EE",
+ "fi-FI": "fi_FI",
+ "fr-BE": "fr_BE",
+ "fr-CA": "fr_CA",
+ "fr-CH": "fr_CH",
+ "fr-FR": "fr_FR",
+ "hu-HU": "hu_HU",
+ "it-CH": "it_CH",
+ "it-IT": "it_IT",
+ "ko-KR": "ko_KR",
+ "nb-NO": "nb_NO",
+ "nl-BE": "nl_BE",
+ "nl-NL": "nl_NL",
+ "pl-PL": "pl_PL",
+ "pt-PT": "pt_PT",
+ "ro-RO": "ro_RO",
+ "sv-SE": "sv_SE",
+ "th-TH": "th_TH",
+ "zh-CN": "zh_CN",
+ "zh-HK": "zh_HK"
+ }
+ },
+ "sepiasearch": {
+ "all_locale": null,
+ "custom": {},
+ "data_type": "traits_v1",
+ "languages": {
+ "ca": "ca",
+ "cs": "cs",
+ "de": "de",
+ "el": "el",
+ "en": "en",
+ "eo": "eo",
+ "es": "es",
+ "eu": "eu",
+ "fi": "fi",
+ "fr": "fr",
+ "gd": "gd",
+ "it": "it",
+ "ja": "ja",
+ "nl": "nl",
+ "pl": "pl",
+ "pt": "pt",
+ "ru": "ru",
+ "sv": "sv",
+ "zh": "zh",
+ "zh_Hans": "zh",
+ "zh_Hant": "zh"
+ },
+ "regions": {}
+ },
+ "startpage": {
+ "all_locale": null,
+ "custom": {},
+ "data_type": "traits_v1",
+ "languages": {
+ "af": "afrikaans",
+ "am": "amharic",
+ "ar": "arabic",
+ "az": "azerbaijani",
+ "be": "belarusian",
+ "bg": "bulgarian",
+ "bn": "bengali",
+ "bs": "bosnian",
+ "ca": "catalan",
+ "cs": "czech",
+ "cy": "welsh",
+ "da": "dansk",
+ "de": "deutsch",
+ "el": "greek",
+ "en": "english",
+ "eo": "esperanto",
+ "es": "espanol",
+ "et": "estonian",
+ "eu": "basque",
+ "fa": "persian",
+ "fi": "suomi",
+ "fo": "faroese",
+ "fr": "francais",
+ "fy": "frisian",
+ "ga": "irish",
+ "gd": "gaelic",
+ "gl": "galician",
+ "gu": "gujarati",
+ "he": "hebrew",
+ "hi": "hindi",
+ "hr": "croatian",
+ "hu": "hungarian",
+ "ia": "interlingua",
+ "id": "indonesian",
+ "is": "icelandic",
+ "it": "italiano",
+ "ja": "nihongo",
+ "jv": "javanese",
+ "ka": "georgian",
+ "kn": "kannada",
+ "ko": "hangul",
+ "la": "latin",
+ "lt": "lithuanian",
+ "lv": "latvian",
+ "mai": "bihari",
+ "mk": "macedonian",
+ "ml": "malayalam",
+ "mr": "marathi",
+ "ms": "malay",
+ "mt": "maltese",
+ "nb": "norsk",
+ "ne": "nepali",
+ "nl": "nederlands",
+ "oc": "occitan",
+ "pa": "punjabi",
+ "pl": "polski",
+ "pt": "portugues",
+ "ro": "romanian",
+ "ru": "russian",
+ "si": "sinhalese",
+ "sk": "slovak",
+ "sl": "slovenian",
+ "sq": "albanian",
+ "sr": "serbian",
+ "su": "sudanese",
+ "sv": "svenska",
+ "sw": "swahili",
+ "ta": "tamil",
+ "te": "telugu",
+ "th": "thai",
+ "ti": "tigrinya",
+ "tl": "tagalog",
+ "tr": "turkce",
+ "uk": "ukrainian",
+ "ur": "urdu",
+ "uz": "uzbek",
+ "vi": "vietnamese",
+ "xh": "xhosa",
+ "zh": "jiantizhongwen",
+ "zh_Hant": "fantizhengwen",
+ "zu": "zulu"
+ },
+ "regions": {
+ "ar-EG": "ar_EG",
+ "bg-BG": "bg_BG",
+ "ca-ES": "ca_ES",
+ "cs-CZ": "cs_CZ",
+ "da-DK": "da_DK",
+ "de-AT": "de_AT",
+ "de-CH": "de_CH",
+ "de-DE": "de_DE",
+ "el-GR": "el_GR",
+ "en-AU": "en_AU",
+ "en-CA": "en_CA",
+ "en-GB": "en-GB_GB",
+ "en-IE": "en_IE",
+ "en-IN": "en_IN",
+ "en-MY": "en_MY",
+ "en-NZ": "en_NZ",
+ "en-PH": "en_PH",
+ "en-US": "en_US",
+ "en-ZA": "en_ZA",
+ "es-AR": "es_AR",
+ "es-CL": "es_CL",
+ "es-CO": "es_CO",
+ "es-ES": "es_ES",
+ "es-MX": "es_MX",
+ "es-PE": "es_PE",
+ "es-US": "es_US",
+ "es-UY": "es_UY",
+ "es-VE": "es_VE",
+ "fi-FI": "fi_FI",
+ "fil-PH": "fil_PH",
+ "fr-BE": "fr_BE",
+ "fr-CA": "fr_CA",
+ "fr-CH": "fr_CH",
+ "fr-FR": "fr_FR",
+ "hi-IN": "hi_IN",
+ "id-ID": "id_ID",
+ "it-CH": "it_CH",
+ "it-IT": "it_IT",
+ "ja-JP": "ja_JP",
+ "ko-KR": "ko_KR",
+ "ms-MY": "ms_MY",
+ "nb-NO": "no_NO",
+ "nl-BE": "nl_BE",
+ "nl-NL": "nl_NL",
+ "pl-PL": "pl_PL",
+ "pt-BR": "pt-BR_BR",
+ "pt-PT": "pt_PT",
+ "ro-RO": "ro_RO",
+ "ru-BY": "ru_BY",
+ "ru-RU": "ru_RU",
+ "sv-SE": "sv_SE",
+ "tr-TR": "tr_TR",
+ "uk-UA": "uk_UA",
+ "vi-VN": "vi_VN",
+ "zh-CN": "zh-CN_CN",
+ "zh-HK": "zh-TW_HK",
+ "zh-TW": "zh-TW_TW"
+ }
+ },
+ "wikidata": {
+ "all_locale": null,
+ "custom": {
+ "wiki_netloc": {}
+ },
+ "data_type": "traits_v1",
+ "languages": {
+ "af": "af",
+ "am": "am",
+ "ar": "ar",
+ "as": "as",
+ "az": "az",
+ "be": "be",
+ "bg": "bg",
+ "bn": "bn",
+ "bs": "bs",
+ "ca": "ca",
+ "ckb": "ckb",
+ "cs": "cs",
+ "da": "da",
+ "de": "de",
+ "el": "el",
+ "en": "en",
+ "es": "es",
+ "et": "et",
+ "fa": "fa",
+ "fi": "fi",
+ "fil": "tl",
+ "fo": "fo",
+ "fr": "fr",
+ "fy": "fy",
+ "gl": "gl",
+ "gsw": "als",
+ "gu": "gu",
+ "he": "he",
+ "hi": "hi",
+ "hsb": "hsb",
+ "hu": "hu",
+ "hy": "hy",
+ "id": "id",
+ "is": "is",
+ "it": "it",
+ "ja": "ja",
+ "jv": "jv",
+ "ka": "ka",
+ "kn": "kn",
+ "ko": "ko",
+ "lb": "lb",
+ "lt": "lt",
+ "lv": "lv",
+ "mai": "mai",
+ "mk": "mk",
+ "ml": "ml",
+ "mn": "mn",
+ "mr": "mr",
+ "ne": "ne",
+ "no": "no",
+ "or": "or",
+ "os": "os",
+ "pa": "pa",
+ "pl": "pl",
+ "ps": "ps",
+ "pt": "pt",
+ "qu": "qu",
+ "ro": "ro",
+ "ru": "ru",
+ "sa": "sa",
+ "sah": "sah",
+ "sd": "sd",
+ "si": "si",
+ "sk": "sk",
+ "sl": "sl",
+ "sq": "sq",
+ "sr": "sr",
+ "ta": "ta",
+ "te": "te",
+ "th": "th",
+ "tr": "tr",
+ "uk": "uk",
+ "ur": "ur",
+ "uz": "uz",
+ "vi": "vi",
+ "yi": "yi",
+ "zh": "zh",
+ "zh_Hant": "zh-classical"
+ },
+ "regions": {}
+ },
+ "wikipedia": {
+ "all_locale": null,
+ "custom": {
+ "wiki_netloc": {
+ "af": "af.wikipedia.org",
+ "als": "als.wikipedia.org",
+ "am": "am.wikipedia.org",
+ "ar": "ar.wikipedia.org",
+ "as": "as.wikipedia.org",
+ "az": "az.wikipedia.org",
+ "be": "be.wikipedia.org",
+ "bg": "bg.wikipedia.org",
+ "bn": "bn.wikipedia.org",
+ "bs": "bs.wikipedia.org",
+ "ca": "ca.wikipedia.org",
+ "ckb": "ckb.wikipedia.org",
+ "cs": "cs.wikipedia.org",
+ "da": "da.wikipedia.org",
+ "de": "de.wikipedia.org",
+ "el": "el.wikipedia.org",
+ "en": "en.wikipedia.org",
+ "es": "es.wikipedia.org",
+ "et": "et.wikipedia.org",
+ "fa": "fa.wikipedia.org",
+ "fi": "fi.wikipedia.org",
+ "fo": "fo.wikipedia.org",
+ "fr": "fr.wikipedia.org",
+ "fy": "fy.wikipedia.org",
+ "gl": "gl.wikipedia.org",
+ "gu": "gu.wikipedia.org",
+ "he": "he.wikipedia.org",
+ "hi": "hi.wikipedia.org",
+ "hsb": "hsb.wikipedia.org",
+ "hu": "hu.wikipedia.org",
+ "hy": "hy.wikipedia.org",
+ "id": "id.wikipedia.org",
+ "is": "is.wikipedia.org",
+ "it": "it.wikipedia.org",
+ "ja": "ja.wikipedia.org",
+ "jv": "jv.wikipedia.org",
+ "ka": "ka.wikipedia.org",
+ "kn": "kn.wikipedia.org",
+ "ko": "ko.wikipedia.org",
+ "lb": "lb.wikipedia.org",
+ "lt": "lt.wikipedia.org",
+ "lv": "lv.wikipedia.org",
+ "mai": "mai.wikipedia.org",
+ "mk": "mk.wikipedia.org",
+ "ml": "ml.wikipedia.org",
+ "mn": "mn.wikipedia.org",
+ "mr": "mr.wikipedia.org",
+ "ne": "ne.wikipedia.org",
+ "no": "no.wikipedia.org",
+ "or": "or.wikipedia.org",
+ "os": "os.wikipedia.org",
+ "pa": "pa.wikipedia.org",
+ "pl": "pl.wikipedia.org",
+ "ps": "ps.wikipedia.org",
+ "pt": "pt.wikipedia.org",
+ "qu": "qu.wikipedia.org",
+ "ro": "ro.wikipedia.org",
+ "ru": "ru.wikipedia.org",
+ "sa": "sa.wikipedia.org",
+ "sah": "sah.wikipedia.org",
+ "sd": "sd.wikipedia.org",
+ "si": "si.wikipedia.org",
+ "sk": "sk.wikipedia.org",
+ "sl": "sl.wikipedia.org",
+ "sq": "sq.wikipedia.org",
+ "sr": "sr.wikipedia.org",
+ "ta": "ta.wikipedia.org",
+ "te": "te.wikipedia.org",
+ "th": "th.wikipedia.org",
+ "tl": "tl.wikipedia.org",
+ "tr": "tr.wikipedia.org",
+ "uk": "uk.wikipedia.org",
+ "ur": "ur.wikipedia.org",
+ "uz": "uz.wikipedia.org",
+ "vi": "vi.wikipedia.org",
+ "yi": "yi.wikipedia.org",
+ "zh": "zh.wikipedia.org",
+ "zh-classical": "zh-classical.wikipedia.org"
+ }
+ },
+ "data_type": "traits_v1",
+ "languages": {
+ "af": "af",
+ "am": "am",
+ "ar": "ar",
+ "as": "as",
+ "az": "az",
+ "be": "be",
+ "bg": "bg",
+ "bn": "bn",
+ "bs": "bs",
+ "ca": "ca",
+ "ckb": "ckb",
+ "cs": "cs",
+ "da": "da",
+ "de": "de",
+ "el": "el",
+ "en": "en",
+ "es": "es",
+ "et": "et",
+ "fa": "fa",
+ "fi": "fi",
+ "fil": "tl",
+ "fo": "fo",
+ "fr": "fr",
+ "fy": "fy",
+ "gl": "gl",
+ "gsw": "als",
+ "gu": "gu",
+ "he": "he",
+ "hi": "hi",
+ "hsb": "hsb",
+ "hu": "hu",
+ "hy": "hy",
+ "id": "id",
+ "is": "is",
+ "it": "it",
+ "ja": "ja",
+ "jv": "jv",
+ "ka": "ka",
+ "kn": "kn",
+ "ko": "ko",
+ "lb": "lb",
+ "lt": "lt",
+ "lv": "lv",
+ "mai": "mai",
+ "mk": "mk",
+ "ml": "ml",
+ "mn": "mn",
+ "mr": "mr",
+ "ne": "ne",
+ "no": "no",
+ "or": "or",
+ "os": "os",
+ "pa": "pa",
+ "pl": "pl",
+ "ps": "ps",
+ "pt": "pt",
+ "qu": "qu",
+ "ro": "ro",
+ "ru": "ru",
+ "sa": "sa",
+ "sah": "sah",
+ "sd": "sd",
+ "si": "si",
+ "sk": "sk",
+ "sl": "sl",
+ "sq": "sq",
+ "sr": "sr",
+ "ta": "ta",
+ "te": "te",
+ "th": "th",
+ "tr": "tr",
+ "uk": "uk",
+ "ur": "ur",
+ "uz": "uz",
+ "vi": "vi",
+ "yi": "yi",
+ "zh": "zh",
+ "zh_Hans": "zh",
+ "zh_Hant": "zh-classical"
+ },
+ "regions": {}
+ },
+ "yahoo": {
+ "all_locale": "any",
+ "custom": {},
+ "data_type": "traits_v1",
+ "languages": {
+ "ar": "ar",
+ "bg": "bg",
+ "cs": "cs",
+ "da": "da",
+ "de": "de",
+ "el": "el",
+ "en": "en",
+ "es": "es",
+ "et": "et",
+ "fi": "fi",
+ "fr": "fr",
+ "he": "he",
+ "hr": "hr",
+ "hu": "hu",
+ "it": "it",
+ "ja": "ja",
+ "ko": "ko",
+ "lt": "lt",
+ "lv": "lv",
+ "nl": "nl",
+ "no": "no",
+ "pl": "pl",
+ "pt": "pt",
+ "ro": "ro",
+ "ru": "ru",
+ "sk": "sk",
+ "sl": "sl",
+ "sv": "sv",
+ "th": "th",
+ "tr": "tr",
+ "zh_Hans": "zh_chs",
+ "zh_Hant": "zh_cht"
+ },
+ "regions": {}
+ }
+} \ No newline at end of file
diff --git a/searx/data/engines_languages.json b/searx/data/engines_languages.json
deleted file mode 100644
index acd36439c..000000000
--- a/searx/data/engines_languages.json
+++ /dev/null
@@ -1,4381 +0,0 @@
-{
- "bing": [
- "af",
- "am",
- "ar",
- "as",
- "az-latn",
- "be",
- "bg",
- "bn",
- "bs-latn",
- "ca",
- "ca-es-valencia",
- "chr-cher",
- "cs",
- "cy",
- "da",
- "de",
- "el",
- "en",
- "es",
- "et",
- "eu",
- "fa",
- "fi",
- "fil",
- "fr",
- "ga",
- "gd",
- "gl",
- "gu",
- "ha-latn",
- "he",
- "hi",
- "hr",
- "hu",
- "hy",
- "id",
- "ig",
- "is",
- "it",
- "ja",
- "ka",
- "kk",
- "km",
- "kn",
- "ko",
- "kok",
- "ku-arab",
- "ky",
- "lb",
- "lo",
- "lt",
- "lv",
- "mi",
- "mk",
- "ml",
- "mn-Cyrl-MN",
- "mr",
- "ms",
- "mt",
- "nb",
- "ne",
- "nl",
- "nn",
- "nso",
- "or",
- "pa-arab",
- "pa-guru",
- "pl",
- "prs",
- "pt-BR",
- "pt-PT",
- "quc",
- "quz",
- "ro",
- "ru",
- "rw",
- "sd-arab",
- "si",
- "sk",
- "sl",
- "sq",
- "sr-cyrl",
- "sr-latn",
- "sv",
- "sw",
- "ta",
- "te",
- "tg-cyrl",
- "th",
- "ti",
- "tk",
- "tn",
- "tr",
- "tt",
- "ug",
- "uk",
- "ur",
- "uz-latn",
- "vi",
- "wo",
- "xh",
- "yo",
- "zh-Hans",
- "zh-Hant",
- "zu"
- ],
- "bing images": [
- "af",
- "am",
- "ar",
- "as",
- "az-latn",
- "be",
- "bg",
- "bn",
- "bs-latn",
- "ca",
- "ca-es-valencia",
- "chr-cher",
- "cs",
- "cy",
- "da",
- "de",
- "el",
- "en",
- "es",
- "et",
- "eu",
- "fa",
- "fi",
- "fil",
- "fr",
- "ga",
- "gd",
- "gl",
- "gu",
- "ha-latn",
- "he",
- "hi",
- "hr",
- "hu",
- "hy",
- "id",
- "ig",
- "is",
- "it",
- "ja",
- "ka",
- "kk",
- "km",
- "kn",
- "ko",
- "kok",
- "ku-arab",
- "ky",
- "lb",
- "lo",
- "lt",
- "lv",
- "mi",
- "mk",
- "ml",
- "mn-Cyrl-MN",
- "mr",
- "ms",
- "mt",
- "nb",
- "ne",
- "nl",
- "nn",
- "nso",
- "or",
- "pa-arab",
- "pa-guru",
- "pl",
- "prs",
- "pt-BR",
- "pt-PT",
- "quc",
- "quz",
- "ro",
- "ru",
- "rw",
- "sd-arab",
- "si",
- "sk",
- "sl",
- "sq",
- "sr-cyrl",
- "sr-latn",
- "sv",
- "sw",
- "ta",
- "te",
- "tg-cyrl",
- "th",
- "ti",
- "tk",
- "tn",
- "tr",
- "tt",
- "ug",
- "uk",
- "ur",
- "uz-latn",
- "vi",
- "wo",
- "xh",
- "yo",
- "zh-Hans",
- "zh-Hant",
- "zu"
- ],
- "bing news": [
- "af",
- "am",
- "ar",
- "as",
- "az-latn",
- "be",
- "bg",
- "bn",
- "bs-latn",
- "ca",
- "ca-es-valencia",
- "chr-cher",
- "cs",
- "cy",
- "da",
- "de",
- "el",
- "en",
- "es",
- "et",
- "eu",
- "fa",
- "fi",
- "fil",
- "fr",
- "ga",
- "gd",
- "gl",
- "gu",
- "ha-latn",
- "he",
- "hi",
- "hr",
- "hu",
- "hy",
- "id",
- "ig",
- "is",
- "it",
- "ja",
- "ka",
- "kk",
- "km",
- "kn",
- "ko",
- "kok",
- "ku-arab",
- "ky",
- "lb",
- "lo",
- "lt",
- "lv",
- "mi",
- "mk",
- "ml",
- "mn-Cyrl-MN",
- "mr",
- "ms",
- "mt",
- "nb",
- "ne",
- "nl",
- "nn",
- "nso",
- "or",
- "pa-arab",
- "pa-guru",
- "pl",
- "prs",
- "pt-BR",
- "pt-PT",
- "quc",
- "quz",
- "ro",
- "ru",
- "rw",
- "sd-arab",
- "si",
- "sk",
- "sl",
- "sq",
- "sr-cyrl",
- "sr-latn",
- "sv",
- "sw",
- "ta",
- "te",
- "tg-cyrl",
- "th",
- "ti",
- "tk",
- "tn",
- "tr",
- "tt",
- "ug",
- "uk",
- "ur",
- "uz-latn",
- "vi",
- "wo",
- "xh",
- "yo",
- "zh-Hans",
- "zh-Hant",
- "zu"
- ],
- "bing videos": [
- "af",
- "am",
- "ar",
- "as",
- "az-latn",
- "be",
- "bg",
- "bn",
- "bs-latn",
- "ca",
- "ca-es-valencia",
- "chr-cher",
- "cs",
- "cy",
- "da",
- "de",
- "el",
- "en",
- "es",
- "et",
- "eu",
- "fa",
- "fi",
- "fil",
- "fr",
- "ga",
- "gd",
- "gl",
- "gu",
- "ha-latn",
- "he",
- "hi",
- "hr",
- "hu",
- "hy",
- "id",
- "ig",
- "is",
- "it",
- "ja",
- "ka",
- "kk",
- "km",
- "kn",
- "ko",
- "kok",
- "ku-arab",
- "ky",
- "lb",
- "lo",
- "lt",
- "lv",
- "mi",
- "mk",
- "ml",
- "mn-Cyrl-MN",
- "mr",
- "ms",
- "mt",
- "nb",
- "ne",
- "nl",
- "nn",
- "nso",
- "or",
- "pa-arab",
- "pa-guru",
- "pl",
- "prs",
- "pt-BR",
- "pt-PT",
- "quc",
- "quz",
- "ro",
- "ru",
- "rw",
- "sd-arab",
- "si",
- "sk",
- "sl",
- "sq",
- "sr-cyrl",
- "sr-latn",
- "sv",
- "sw",
- "ta",
- "te",
- "tg-cyrl",
- "th",
- "ti",
- "tk",
- "tn",
- "tr",
- "tt",
- "ug",
- "uk",
- "ur",
- "uz-latn",
- "vi",
- "wo",
- "xh",
- "yo",
- "zh-Hans",
- "zh-Hant",
- "zu"
- ],
- "dailymotion": [
- "ar_AA",
- "ar_AE",
- "ar_EG",
- "ar_SA",
- "de_AT",
- "de_CH",
- "de_DE",
- "el_GR",
- "en_AU",
- "en_CA",
- "en_EN",
- "en_GB",
- "en_HK",
- "en_IE",
- "en_IN",
- "en_NG",
- "en_PH",
- "en_PK",
- "en_SG",
- "en_US",
- "en_ZA",
- "es_AR",
- "es_ES",
- "es_MX",
- "fr_BE",
- "fr_CA",
- "fr_CH",
- "fr_CI",
- "fr_FR",
- "fr_MA",
- "fr_SN",
- "fr_TN",
- "id_ID",
- "it_CH",
- "it_IT",
- "ja_JP",
- "ko_KR",
- "ms_MY",
- "nl_BE",
- "nl_NL",
- "pl_PL",
- "pt_BR",
- "pt_PT",
- "ro_RO",
- "ru_RU",
- "th_TH",
- "tr_TR",
- "vi_VN",
- "zh_CN",
- "zh_TW"
- ],
- "ddg definitions": [
- "ar-XA",
- "bg-BG",
- "ca-CT",
- "ca-ES",
- "cs-CZ",
- "da-DK",
- "de-AT",
- "de-CH",
- "de-DE",
- "el-GR",
- "en-AU",
- "en-CA",
- "en-ID",
- "en-IE",
- "en-IL",
- "en-IN",
- "en-MY",
- "en-NZ",
- "en-PH",
- "en-PK",
- "en-SG",
- "en-TH",
- "en-UK",
- "en-US",
- "en-VN",
- "en-ZA",
- "es-AR",
- "es-CL",
- "es-CO",
- "es-ES",
- "es-MX",
- "es-PE",
- "es-US",
- "et-EE",
- "fi-FI",
- "fr-BE",
- "fr-CA",
- "fr-CH",
- "fr-FR",
- "hr-HR",
- "hu-HU",
- "it-IT",
- "jp-JP",
- "kr-KR",
- "lt-LT",
- "lv-LV",
- "nl-BE",
- "nl-NL",
- "no-NO",
- "pl-PL",
- "pt-BR",
- "pt-PT",
- "ro-RO",
- "ru-RU",
- "sk-SK",
- "sl-SL",
- "sv-SE",
- "tr-TR",
- "tzh-HK",
- "tzh-TW",
- "uk-UA",
- "wt-WT",
- "zh-CN"
- ],
- "duckduckgo": [
- "ar-XA",
- "bg-BG",
- "ca-CT",
- "ca-ES",
- "cs-CZ",
- "da-DK",
- "de-AT",
- "de-CH",
- "de-DE",
- "el-GR",
- "en-AU",
- "en-CA",
- "en-ID",
- "en-IE",
- "en-IL",
- "en-IN",
- "en-MY",
- "en-NZ",
- "en-PH",
- "en-PK",
- "en-SG",
- "en-TH",
- "en-UK",
- "en-US",
- "en-VN",
- "en-ZA",
- "es-AR",
- "es-CL",
- "es-CO",
- "es-ES",
- "es-MX",
- "es-PE",
- "es-US",
- "et-EE",
- "fi-FI",
- "fr-BE",
- "fr-CA",
- "fr-CH",
- "fr-FR",
- "hr-HR",
- "hu-HU",
- "it-IT",
- "jp-JP",
- "kr-KR",
- "lt-LT",
- "lv-LV",
- "nl-BE",
- "nl-NL",
- "no-NO",
- "pl-PL",
- "pt-BR",
- "pt-PT",
- "ro-RO",
- "ru-RU",
- "sk-SK",
- "sl-SL",
- "sv-SE",
- "tr-TR",
- "tzh-HK",
- "tzh-TW",
- "uk-UA",
- "wt-WT",
- "zh-CN"
- ],
- "duckduckgo images": [
- "ar-XA",
- "bg-BG",
- "ca-CT",
- "ca-ES",
- "cs-CZ",
- "da-DK",
- "de-AT",
- "de-CH",
- "de-DE",
- "el-GR",
- "en-AU",
- "en-CA",
- "en-ID",
- "en-IE",
- "en-IL",
- "en-IN",
- "en-MY",
- "en-NZ",
- "en-PH",
- "en-PK",
- "en-SG",
- "en-TH",
- "en-UK",
- "en-US",
- "en-VN",
- "en-ZA",
- "es-AR",
- "es-CL",
- "es-CO",
- "es-ES",
- "es-MX",
- "es-PE",
- "es-US",
- "et-EE",
- "fi-FI",
- "fr-BE",
- "fr-CA",
- "fr-CH",
- "fr-FR",
- "hr-HR",
- "hu-HU",
- "it-IT",
- "jp-JP",
- "kr-KR",
- "lt-LT",
- "lv-LV",
- "nl-BE",
- "nl-NL",
- "no-NO",
- "pl-PL",
- "pt-BR",
- "pt-PT",
- "ro-RO",
- "ru-RU",
- "sk-SK",
- "sl-SL",
- "sv-SE",
- "tr-TR",
- "tzh-HK",
- "tzh-TW",
- "uk-UA",
- "wt-WT",
- "zh-CN"
- ],
- "google": {
- "af": {
- "name": "Afrikaans"
- },
- "ar": {
- "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629"
- },
- "be": {
- "name": "\u0431\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f"
- },
- "bg": {
- "name": "\u0431\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438"
- },
- "ca": {
- "name": "catal\u00e0"
- },
- "cs": {
- "name": "\u010de\u0161tina"
- },
- "da": {
- "name": "dansk"
- },
- "de": {
- "name": "Deutsch"
- },
- "el": {
- "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac"
- },
- "en": {
- "name": "English"
- },
- "eo": {
- "name": "esperanto"
- },
- "es": {
- "name": "espa\u00f1ol"
- },
- "et": {
- "name": "eesti"
- },
- "fa": {
- "name": "\u0641\u0627\u0631\u0633\u06cc"
- },
- "fi": {
- "name": "suomi"
- },
- "fr": {
- "name": "fran\u00e7ais"
- },
- "hi": {
- "name": "\u0939\u093f\u0928\u094d\u0926\u0940"
- },
- "hr": {
- "name": "hrvatski"
- },
- "hu": {
- "name": "magyar"
- },
- "hy": {
- "name": "\u0570\u0561\u0575\u0565\u0580\u0565\u0576"
- },
- "id": {
- "name": "Indonesia"
- },
- "is": {
- "name": "\u00edslenska"
- },
- "it": {
- "name": "italiano"
- },
- "iw": {
- "name": "\u05e2\u05d1\u05e8\u05d9\u05ea"
- },
- "ja": {
- "name": "\u65e5\u672c\u8a9e"
- },
- "ko": {
- "name": "\ud55c\uad6d\uc5b4"
- },
- "lt": {
- "name": "lietuvi\u0173"
- },
- "lv": {
- "name": "latvie\u0161u"
- },
- "nl": {
- "name": "Nederlands"
- },
- "no": {
- "name": "norsk"
- },
- "pl": {
- "name": "polski"
- },
- "pt": {
- "name": "portugu\u00eas"
- },
- "ro": {
- "name": "rom\u00e2n\u0103"
- },
- "ru": {
- "name": "\u0440\u0443\u0441\u0441\u043a\u0438\u0439"
- },
- "sk": {
- "name": "sloven\u010dina"
- },
- "sl": {
- "name": "sloven\u0161\u010dina"
- },
- "sr": {
- "name": "\u0441\u0440\u043f\u0441\u043a\u0438"
- },
- "sv": {
- "name": "svenska"
- },
- "sw": {
- "name": "Kiswahili"
- },
- "th": {
- "name": "\u0e44\u0e17\u0e22"
- },
- "tl": {
- "name": "Filipino"
- },
- "tr": {
- "name": "T\u00fcrk\u00e7e"
- },
- "uk": {
- "name": "\u0443\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430"
- },
- "vi": {
- "name": "Ti\u1ebfng Vi\u1ec7t"
- },
- "zh-CN": {
- "name": "\u4e2d\u6587 (\u7b80\u4f53)"
- },
- "zh-TW": {
- "name": "\u4e2d\u6587 (\u7e41\u9ad4)"
- }
- },
- "google images": {
- "af": {
- "name": "Afrikaans"
- },
- "ar": {
- "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629"
- },
- "be": {
- "name": "\u0431\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f"
- },
- "bg": {
- "name": "\u0431\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438"
- },
- "ca": {
- "name": "catal\u00e0"
- },
- "cs": {
- "name": "\u010de\u0161tina"
- },
- "da": {
- "name": "dansk"
- },
- "de": {
- "name": "Deutsch"
- },
- "el": {
- "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac"
- },
- "en": {
- "name": "English"
- },
- "eo": {
- "name": "esperanto"
- },
- "es": {
- "name": "espa\u00f1ol"
- },
- "et": {
- "name": "eesti"
- },
- "fa": {
- "name": "\u0641\u0627\u0631\u0633\u06cc"
- },
- "fi": {
- "name": "suomi"
- },
- "fr": {
- "name": "fran\u00e7ais"
- },
- "hi": {
- "name": "\u0939\u093f\u0928\u094d\u0926\u0940"
- },
- "hr": {
- "name": "hrvatski"
- },
- "hu": {
- "name": "magyar"
- },
- "hy": {
- "name": "\u0570\u0561\u0575\u0565\u0580\u0565\u0576"
- },
- "id": {
- "name": "Indonesia"
- },
- "is": {
- "name": "\u00edslenska"
- },
- "it": {
- "name": "italiano"
- },
- "iw": {
- "name": "\u05e2\u05d1\u05e8\u05d9\u05ea"
- },
- "ja": {
- "name": "\u65e5\u672c\u8a9e"
- },
- "ko": {
- "name": "\ud55c\uad6d\uc5b4"
- },
- "lt": {
- "name": "lietuvi\u0173"
- },
- "lv": {
- "name": "latvie\u0161u"
- },
- "nl": {
- "name": "Nederlands"
- },
- "no": {
- "name": "norsk"
- },
- "pl": {
- "name": "polski"
- },
- "pt": {
- "name": "portugu\u00eas"
- },
- "ro": {
- "name": "rom\u00e2n\u0103"
- },
- "ru": {
- "name": "\u0440\u0443\u0441\u0441\u043a\u0438\u0439"
- },
- "sk": {
- "name": "sloven\u010dina"
- },
- "sl": {
- "name": "sloven\u0161\u010dina"
- },
- "sr": {
- "name": "\u0441\u0440\u043f\u0441\u043a\u0438"
- },
- "sv": {
- "name": "svenska"
- },
- "sw": {
- "name": "Kiswahili"
- },
- "th": {
- "name": "\u0e44\u0e17\u0e22"
- },
- "tl": {
- "name": "Filipino"
- },
- "tr": {
- "name": "T\u00fcrk\u00e7e"
- },
- "uk": {
- "name": "\u0443\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430"
- },
- "vi": {
- "name": "Ti\u1ebfng Vi\u1ec7t"
- },
- "zh-CN": {
- "name": "\u4e2d\u6587 (\u7b80\u4f53)"
- },
- "zh-TW": {
- "name": "\u4e2d\u6587 (\u7e41\u9ad4)"
- }
- },
- "google news": {
- "af": {
- "name": "Afrikaans"
- },
- "ar": {
- "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629"
- },
- "be": {
- "name": "\u0431\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f"
- },
- "bg": {
- "name": "\u0431\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438"
- },
- "ca": {
- "name": "catal\u00e0"
- },
- "cs": {
- "name": "\u010de\u0161tina"
- },
- "da": {
- "name": "dansk"
- },
- "de": {
- "name": "Deutsch"
- },
- "el": {
- "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac"
- },
- "en": {
- "name": "English"
- },
- "eo": {
- "name": "esperanto"
- },
- "es": {
- "name": "espa\u00f1ol"
- },
- "et": {
- "name": "eesti"
- },
- "fa": {
- "name": "\u0641\u0627\u0631\u0633\u06cc"
- },
- "fi": {
- "name": "suomi"
- },
- "fr": {
- "name": "fran\u00e7ais"
- },
- "hi": {
- "name": "\u0939\u093f\u0928\u094d\u0926\u0940"
- },
- "hr": {
- "name": "hrvatski"
- },
- "hu": {
- "name": "magyar"
- },
- "hy": {
- "name": "\u0570\u0561\u0575\u0565\u0580\u0565\u0576"
- },
- "id": {
- "name": "Indonesia"
- },
- "is": {
- "name": "\u00edslenska"
- },
- "it": {
- "name": "italiano"
- },
- "iw": {
- "name": "\u05e2\u05d1\u05e8\u05d9\u05ea"
- },
- "ja": {
- "name": "\u65e5\u672c\u8a9e"
- },
- "ko": {
- "name": "\ud55c\uad6d\uc5b4"
- },
- "lt": {
- "name": "lietuvi\u0173"
- },
- "lv": {
- "name": "latvie\u0161u"
- },
- "nl": {
- "name": "Nederlands"
- },
- "no": {
- "name": "norsk"
- },
- "pl": {
- "name": "polski"
- },
- "pt": {
- "name": "portugu\u00eas"
- },
- "ro": {
- "name": "rom\u00e2n\u0103"
- },
- "ru": {
- "name": "\u0440\u0443\u0441\u0441\u043a\u0438\u0439"
- },
- "sk": {
- "name": "sloven\u010dina"
- },
- "sl": {
- "name": "sloven\u0161\u010dina"
- },
- "sr": {
- "name": "\u0441\u0440\u043f\u0441\u043a\u0438"
- },
- "sv": {
- "name": "svenska"
- },
- "sw": {
- "name": "Kiswahili"
- },
- "th": {
- "name": "\u0e44\u0e17\u0e22"
- },
- "tl": {
- "name": "Filipino"
- },
- "tr": {
- "name": "T\u00fcrk\u00e7e"
- },
- "uk": {
- "name": "\u0443\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430"
- },
- "vi": {
- "name": "Ti\u1ebfng Vi\u1ec7t"
- },
- "zh-CN": {
- "name": "\u4e2d\u6587 (\u7b80\u4f53)"
- },
- "zh-TW": {
- "name": "\u4e2d\u6587 (\u7e41\u9ad4)"
- }
- },
- "google scholar": {
- "af": {
- "name": "Afrikaans"
- },
- "ar": {
- "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629"
- },
- "be": {
- "name": "\u0431\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f"
- },
- "bg": {
- "name": "\u0431\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438"
- },
- "ca": {
- "name": "catal\u00e0"
- },
- "cs": {
- "name": "\u010de\u0161tina"
- },
- "da": {
- "name": "dansk"
- },
- "de": {
- "name": "Deutsch"
- },
- "el": {
- "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac"
- },
- "en": {
- "name": "English"
- },
- "eo": {
- "name": "esperanto"
- },
- "es": {
- "name": "espa\u00f1ol"
- },
- "et": {
- "name": "eesti"
- },
- "fa": {
- "name": "\u0641\u0627\u0631\u0633\u06cc"
- },
- "fi": {
- "name": "suomi"
- },
- "fr": {
- "name": "fran\u00e7ais"
- },
- "hi": {
- "name": "\u0939\u093f\u0928\u094d\u0926\u0940"
- },
- "hr": {
- "name": "hrvatski"
- },
- "hu": {
- "name": "magyar"
- },
- "hy": {
- "name": "\u0570\u0561\u0575\u0565\u0580\u0565\u0576"
- },
- "id": {
- "name": "Indonesia"
- },
- "is": {
- "name": "\u00edslenska"
- },
- "it": {
- "name": "italiano"
- },
- "iw": {
- "name": "\u05e2\u05d1\u05e8\u05d9\u05ea"
- },
- "ja": {
- "name": "\u65e5\u672c\u8a9e"
- },
- "ko": {
- "name": "\ud55c\uad6d\uc5b4"
- },
- "lt": {
- "name": "lietuvi\u0173"
- },
- "lv": {
- "name": "latvie\u0161u"
- },
- "nl": {
- "name": "Nederlands"
- },
- "no": {
- "name": "norsk"
- },
- "pl": {
- "name": "polski"
- },
- "pt": {
- "name": "portugu\u00eas"
- },
- "ro": {
- "name": "rom\u00e2n\u0103"
- },
- "ru": {
- "name": "\u0440\u0443\u0441\u0441\u043a\u0438\u0439"
- },
- "sk": {
- "name": "sloven\u010dina"
- },
- "sl": {
- "name": "sloven\u0161\u010dina"
- },
- "sr": {
- "name": "\u0441\u0440\u043f\u0441\u043a\u0438"
- },
- "sv": {
- "name": "svenska"
- },
- "sw": {
- "name": "Kiswahili"
- },
- "th": {
- "name": "\u0e44\u0e17\u0e22"
- },
- "tl": {
- "name": "Filipino"
- },
- "tr": {
- "name": "T\u00fcrk\u00e7e"
- },
- "uk": {
- "name": "\u0443\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430"
- },
- "vi": {
- "name": "Ti\u1ebfng Vi\u1ec7t"
- },
- "zh-CN": {
- "name": "\u4e2d\u6587 (\u7b80\u4f53)"
- },
- "zh-TW": {
- "name": "\u4e2d\u6587 (\u7e41\u9ad4)"
- }
- },
- "google videos": {
- "af": {
- "name": "Afrikaans"
- },
- "ar": {
- "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629"
- },
- "be": {
- "name": "\u0431\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f"
- },
- "bg": {
- "name": "\u0431\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438"
- },
- "ca": {
- "name": "catal\u00e0"
- },
- "cs": {
- "name": "\u010de\u0161tina"
- },
- "da": {
- "name": "dansk"
- },
- "de": {
- "name": "Deutsch"
- },
- "el": {
- "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac"
- },
- "en": {
- "name": "English"
- },
- "eo": {
- "name": "esperanto"
- },
- "es": {
- "name": "espa\u00f1ol"
- },
- "et": {
- "name": "eesti"
- },
- "fa": {
- "name": "\u0641\u0627\u0631\u0633\u06cc"
- },
- "fi": {
- "name": "suomi"
- },
- "fr": {
- "name": "fran\u00e7ais"
- },
- "hi": {
- "name": "\u0939\u093f\u0928\u094d\u0926\u0940"
- },
- "hr": {
- "name": "hrvatski"
- },
- "hu": {
- "name": "magyar"
- },
- "hy": {
- "name": "\u0570\u0561\u0575\u0565\u0580\u0565\u0576"
- },
- "id": {
- "name": "Indonesia"
- },
- "is": {
- "name": "\u00edslenska"
- },
- "it": {
- "name": "italiano"
- },
- "iw": {
- "name": "\u05e2\u05d1\u05e8\u05d9\u05ea"
- },
- "ja": {
- "name": "\u65e5\u672c\u8a9e"
- },
- "ko": {
- "name": "\ud55c\uad6d\uc5b4"
- },
- "lt": {
- "name": "lietuvi\u0173"
- },
- "lv": {
- "name": "latvie\u0161u"
- },
- "nl": {
- "name": "Nederlands"
- },
- "no": {
- "name": "norsk"
- },
- "pl": {
- "name": "polski"
- },
- "pt": {
- "name": "portugu\u00eas"
- },
- "ro": {
- "name": "rom\u00e2n\u0103"
- },
- "ru": {
- "name": "\u0440\u0443\u0441\u0441\u043a\u0438\u0439"
- },
- "sk": {
- "name": "sloven\u010dina"
- },
- "sl": {
- "name": "sloven\u0161\u010dina"
- },
- "sr": {
- "name": "\u0441\u0440\u043f\u0441\u043a\u0438"
- },
- "sv": {
- "name": "svenska"
- },
- "sw": {
- "name": "Kiswahili"
- },
- "th": {
- "name": "\u0e44\u0e17\u0e22"
- },
- "tl": {
- "name": "Filipino"
- },
- "tr": {
- "name": "T\u00fcrk\u00e7e"
- },
- "uk": {
- "name": "\u0443\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430"
- },
- "vi": {
- "name": "Ti\u1ebfng Vi\u1ec7t"
- },
- "zh-CN": {
- "name": "\u4e2d\u6587 (\u7b80\u4f53)"
- },
- "zh-TW": {
- "name": "\u4e2d\u6587 (\u7e41\u9ad4)"
- }
- },
- "peertube": [
- "ca",
- "cs",
- "de",
- "el",
- "en",
- "eo",
- "es",
- "eu",
- "fi",
- "fr",
- "gd",
- "it",
- "ja",
- "nl",
- "oc",
- "pl",
- "pt",
- "ru",
- "sv",
- "zh"
- ],
- "qwant": {
- "bg-BG": "bg_BG",
- "ca-ES": "ca_ES",
- "cs-CZ": "cs_CZ",
- "da-DK": "da_DK",
- "de-AT": "de_AT",
- "de-CH": "de_CH",
- "de-DE": "de_DE",
- "el-GR": "el_GR",
- "en-AU": "en_AU",
- "en-CA": "en_CA",
- "en-GB": "en_GB",
- "en-IE": "en_IE",
- "en-MY": "en_MY",
- "en-NZ": "en_NZ",
- "en-US": "en_US",
- "es-AR": "es_AR",
- "es-CL": "es_CL",
- "es-ES": "es_ES",
- "es-MX": "es_MX",
- "et-EE": "et_EE",
- "fi-FI": "fi_FI",
- "fr-BE": "fr_BE",
- "fr-CA": "fr_CA",
- "fr-CH": "fr_CH",
- "fr-FR": "fr_FR",
- "hu-HU": "hu_HU",
- "it-CH": "it_CH",
- "it-IT": "it_IT",
- "ko-KR": "ko_KR",
- "nb-NO": "nb_NO",
- "nl-BE": "nl_BE",
- "nl-NL": "nl_NL",
- "pl-PL": "pl_PL",
- "pt-PT": "pt_PT",
- "ro-RO": "ro_RO",
- "sv-SE": "sv_SE",
- "th-TH": "th_TH",
- "zh-CN": "zh_CN",
- "zh-HK": "zh_HK"
- },
- "qwant images": {
- "bg-BG": "bg_BG",
- "ca-ES": "ca_ES",
- "cs-CZ": "cs_CZ",
- "da-DK": "da_DK",
- "de-AT": "de_AT",
- "de-CH": "de_CH",
- "de-DE": "de_DE",
- "el-GR": "el_GR",
- "en-AU": "en_AU",
- "en-CA": "en_CA",
- "en-GB": "en_GB",
- "en-IE": "en_IE",
- "en-MY": "en_MY",
- "en-NZ": "en_NZ",
- "en-US": "en_US",
- "es-AR": "es_AR",
- "es-CL": "es_CL",
- "es-ES": "es_ES",
- "es-MX": "es_MX",
- "et-EE": "et_EE",
- "fi-FI": "fi_FI",
- "fr-BE": "fr_BE",
- "fr-CA": "fr_CA",
- "fr-CH": "fr_CH",
- "fr-FR": "fr_FR",
- "hu-HU": "hu_HU",
- "it-CH": "it_CH",
- "it-IT": "it_IT",
- "ko-KR": "ko_KR",
- "nb-NO": "nb_NO",
- "nl-BE": "nl_BE",
- "nl-NL": "nl_NL",
- "pl-PL": "pl_PL",
- "pt-PT": "pt_PT",
- "ro-RO": "ro_RO",
- "sv-SE": "sv_SE",
- "th-TH": "th_TH",
- "zh-CN": "zh_CN",
- "zh-HK": "zh_HK"
- },
- "qwant news": {
- "ca-ES": "ca_ES",
- "de-AT": "de_AT",
- "de-CH": "de_CH",
- "de-DE": "de_DE",
- "en-AU": "en_AU",
- "en-CA": "en_CA",
- "en-GB": "en_GB",
- "en-IE": "en_IE",
- "en-MY": "en_MY",
- "en-NZ": "en_NZ",
- "en-US": "en_US",
- "es-AR": "es_AR",
- "es-CL": "es_CL",
- "es-ES": "es_ES",
- "es-MX": "es_MX",
- "fr-BE": "fr_BE",
- "fr-CA": "fr_CA",
- "fr-CH": "fr_CH",
- "fr-FR": "fr_FR",
- "it-CH": "it_CH",
- "it-IT": "it_IT",
- "nl-BE": "nl_BE",
- "nl-NL": "nl_NL",
- "pt-PT": "pt_PT"
- },
- "qwant videos": {
- "bg-BG": "bg_BG",
- "ca-ES": "ca_ES",
- "cs-CZ": "cs_CZ",
- "da-DK": "da_DK",
- "de-AT": "de_AT",
- "de-CH": "de_CH",
- "de-DE": "de_DE",
- "el-GR": "el_GR",
- "en-AU": "en_AU",
- "en-CA": "en_CA",
- "en-GB": "en_GB",
- "en-IE": "en_IE",
- "en-MY": "en_MY",
- "en-NZ": "en_NZ",
- "en-US": "en_US",
- "es-AR": "es_AR",
- "es-CL": "es_CL",
- "es-ES": "es_ES",
- "es-MX": "es_MX",
- "et-EE": "et_EE",
- "fi-FI": "fi_FI",
- "fr-BE": "fr_BE",
- "fr-CA": "fr_CA",
- "fr-CH": "fr_CH",
- "fr-FR": "fr_FR",
- "hu-HU": "hu_HU",
- "it-CH": "it_CH",
- "it-IT": "it_IT",
- "ko-KR": "ko_KR",
- "nb-NO": "nb_NO",
- "nl-BE": "nl_BE",
- "nl-NL": "nl_NL",
- "pl-PL": "pl_PL",
- "pt-PT": "pt_PT",
- "ro-RO": "ro_RO",
- "sv-SE": "sv_SE",
- "th-TH": "th_TH",
- "zh-CN": "zh_CN",
- "zh-HK": "zh_HK"
- },
- "startpage": {
- "af": {
- "alias": "afrikaans"
- },
- "am": {
- "alias": "amharic"
- },
- "ar": {
- "alias": "arabic"
- },
- "az": {
- "alias": "azerbaijani"
- },
- "be": {
- "alias": "belarusian"
- },
- "bg": {
- "alias": "bulgarian"
- },
- "bn": {
- "alias": "bengali"
- },
- "bs": {
- "alias": "bosnian"
- },
- "ca": {
- "alias": "catalan"
- },
- "cs": {
- "alias": "czech"
- },
- "cy": {
- "alias": "welsh"
- },
- "da": {
- "alias": "dansk"
- },
- "de": {
- "alias": "deutsch"
- },
- "el": {
- "alias": "greek"
- },
- "en": {
- "alias": "english"
- },
- "en-GB": {
- "alias": "english_uk"
- },
- "eo": {
- "alias": "esperanto"
- },
- "es": {
- "alias": "espanol"
- },
- "et": {
- "alias": "estonian"
- },
- "eu": {
- "alias": "basque"
- },
- "fa": {
- "alias": "persian"
- },
- "fi": {
- "alias": "suomi"
- },
- "fo": {
- "alias": "faroese"
- },
- "fr": {
- "alias": "francais"
- },
- "fy": {
- "alias": "frisian"
- },
- "ga": {
- "alias": "irish"
- },
- "gd": {
- "alias": "gaelic"
- },
- "gl": {
- "alias": "galician"
- },
- "gu": {
- "alias": "gujarati"
- },
- "he": {
- "alias": "hebrew"
- },
- "hi": {
- "alias": "hindi"
- },
- "hr": {
- "alias": "croatian"
- },
- "hu": {
- "alias": "hungarian"
- },
- "ia": {
- "alias": "interlingua"
- },
- "id": {
- "alias": "indonesian"
- },
- "is": {
- "alias": "icelandic"
- },
- "it": {
- "alias": "italiano"
- },
- "ja": {
- "alias": "nihongo"
- },
- "jv": {
- "alias": "javanese"
- },
- "ka": {
- "alias": "georgian"
- },
- "kn": {
- "alias": "kannada"
- },
- "ko": {
- "alias": "hangul"
- },
- "la": {
- "alias": "latin"
- },
- "lt": {
- "alias": "lithuanian"
- },
- "lv": {
- "alias": "latvian"
- },
- "mai": {
- "alias": "bihari"
- },
- "mk": {
- "alias": "macedonian"
- },
- "ml": {
- "alias": "malayalam"
- },
- "mr": {
- "alias": "marathi"
- },
- "ms": {
- "alias": "malay"
- },
- "mt": {
- "alias": "maltese"
- },
- "ne": {
- "alias": "nepali"
- },
- "nl": {
- "alias": "nederlands"
- },
- "no": {
- "alias": "norsk"
- },
- "oc": {
- "alias": "occitan"
- },
- "pa": {
- "alias": "punjabi"
- },
- "pl": {
- "alias": "polski"
- },
- "pt": {
- "alias": "portugues"
- },
- "ro": {
- "alias": "romanian"
- },
- "ru": {
- "alias": "russian"
- },
- "si": {
- "alias": "sinhalese"
- },
- "sk": {
- "alias": "slovak"
- },
- "sl": {
- "alias": "slovenian"
- },
- "sq": {
- "alias": "albanian"
- },
- "sr": {
- "alias": "serbian"
- },
- "su": {
- "alias": "sudanese"
- },
- "sv": {
- "alias": "svenska"
- },
- "sw": {
- "alias": "swahili"
- },
- "ta": {
- "alias": "tamil"
- },
- "te": {
- "alias": "telugu"
- },
- "th": {
- "alias": "thai"
- },
- "ti": {
- "alias": "tigrinya"
- },
- "tl": {
- "alias": "tagalog"
- },
- "tr": {
- "alias": "turkce"
- },
- "uk": {
- "alias": "ukrainian"
- },
- "ur": {
- "alias": "urdu"
- },
- "uz": {
- "alias": "uzbek"
- },
- "vi": {
- "alias": "vietnamese"
- },
- "xh": {
- "alias": "xhosa"
- },
- "zh": {
- "alias": "jiantizhongwen"
- },
- "zh-HK": {
- "alias": "fantizhengwen"
- },
- "zh-TW": {
- "alias": "fantizhengwen"
- },
- "zu": {
- "alias": "zulu"
- }
- },
- "wikidata": {
- "ab": {
- "english_name": "Abkhazian",
- "name": "\u0410\u0525\u0441\u0443\u0430"
- },
- "ace": {
- "english_name": "Acehnese",
- "name": "Basa Ac\u00e8h"
- },
- "ady": {
- "english_name": "Adyghe",
- "name": "\u0410\u0434\u044b\u0433\u044d\u0431\u0437\u044d"
- },
- "af": {
- "english_name": "Afrikaans",
- "name": "Afrikaans"
- },
- "ak": {
- "english_name": "Akan",
- "name": "Akana"
- },
- "als": {
- "english_name": "Alemannic",
- "name": "Alemannisch"
- },
- "alt": {
- "english_name": "Southern Altai",
- "name": "\u0410\u043b\u0442\u0430\u0439"
- },
- "am": {
- "english_name": "Amharic",
- "name": "\u12a0\u121b\u122d\u129b"
- },
- "ami": {
- "english_name": "Amis",
- "name": "Pangcah"
- },
- "an": {
- "english_name": "Aragonese",
- "name": "Aragon\u00e9s"
- },
- "ang": {
- "english_name": "Anglo-Saxon",
- "name": "\u00c6nglisc"
- },
- "ar": {
- "english_name": "Arabic",
- "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629"
- },
- "arc": {
- "english_name": "Aramaic",
- "name": "\u0710\u072a\u0721\u071d\u0710"
- },
- "ary": {
- "english_name": "Moroccan Arabic",
- "name": "\u062f\u0627\u0631\u064a\u062c\u0629"
- },
- "arz": {
- "english_name": "Egyptian Arabic",
- "name": "\u0645\u0635\u0631\u0649 (Ma\u1e63ri)"
- },
- "as": {
- "english_name": "Assamese",
- "name": "\u0985\u09b8\u09ae\u09c0\u09af\u09bc\u09be"
- },
- "ast": {
- "english_name": "Asturian",
- "name": "Asturianu"
- },
- "atj": {
- "english_name": "Atikamekw",
- "name": "Atikamekw"
- },
- "av": {
- "english_name": "Avar",
- "name": "\u0410\u0432\u0430\u0440"
- },
- "avk": {
- "english_name": "Kotava",
- "name": "Kotava"
- },
- "awa": {
- "english_name": "Awadhi",
- "name": "\u0905\u0935\u0927\u0940"
- },
- "ay": {
- "english_name": "Aymara",
- "name": "Aymar"
- },
- "az": {
- "english_name": "Azerbaijani",
- "name": "Az\u0259rbaycanca"
- },
- "azb": {
- "english_name": "South Azerbaijani",
- "name": "\u062a\u06c6\u0631\u06a9\u062c\u0647"
- },
- "ba": {
- "english_name": "Bashkir",
- "name": "\u0411\u0430\u0448\u04a1\u043e\u0440\u0442"
- },
- "ban": {
- "english_name": "Balinese",
- "name": "Bali"
- },
- "bar": {
- "english_name": "Bavarian",
- "name": "Boarisch"
- },
- "bat-smg": {
- "english_name": "Samogitian",
- "name": "\u017demait\u0117\u0161ka"
- },
- "bcl": {
- "english_name": "Central Bicolano",
- "name": "Bikol"
- },
- "be": {
- "english_name": "Belarusian",
- "name": "\u0411\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f"
- },
- "be-tarask": {
- "english_name": "Belarusian (Tara\u0161kievica)",
- "name": "\u0411\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f (\u0442\u0430\u0440\u0430\u0448\u043a\u0435\u0432\u0456\u0446\u0430)"
- },
- "bg": {
- "english_name": "Bulgarian",
- "name": "\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438"
- },
- "bh": {
- "english_name": "Bhojpuri",
- "name": "\u092d\u094b\u091c\u092a\u0941\u0930\u0940"
- },
- "bi": {
- "english_name": "Bislama",
- "name": "Bislama"
- },
- "bjn": {
- "english_name": "Banjar",
- "name": "Bahasa Banjar"
- },
- "blk": {
- "english_name": "Pa'O",
- "name": "\u1015\u1021\u102d\u102f\u101d\u103a\u108f\u1018\u102c\u108f\u101e\u102c\u108f"
- },
- "bm": {
- "english_name": "Bambara",
- "name": "Bamanankan"
- },
- "bn": {
- "english_name": "Bengali",
- "name": "\u09ac\u09be\u0982\u09b2\u09be"
- },
- "bo": {
- "english_name": "Tibetan",
- "name": "\u0f56\u0f7c\u0f51\u0f0b\u0f66\u0f90\u0f51"
- },
- "bpy": {
- "english_name": "Bishnupriya Manipuri",
- "name": "\u0987\u09ae\u09be\u09b0 \u09a0\u09be\u09b0/\u09ac\u09bf\u09b7\u09cd\u09a3\u09c1\u09aa\u09cd\u09b0\u09bf\u09af\u09bc\u09be \u09ae\u09a3\u09bf\u09aa\u09c1\u09b0\u09c0"
- },
- "br": {
- "english_name": "Breton",
- "name": "Brezhoneg"
- },
- "bs": {
- "english_name": "Bosnian",
- "name": "Bosanski"
- },
- "bug": {
- "english_name": "Buginese",
- "name": "Basa Ugi"
- },
- "bxr": {
- "english_name": "Buryat",
- "name": "\u0411\u0443\u0440\u044f\u0430\u0434"
- },
- "ca": {
- "english_name": "Catalan",
- "name": "Catal\u00e0"
- },
- "cbk-zam": {
- "english_name": "Zamboanga Chavacano",
- "name": "Chavacano de Zamboanga"
- },
- "cdo": {
- "english_name": "Min Dong",
- "name": "M\u00ecng-d\u0115\u0324ng-ng\u1e73\u0304"
- },
- "ce": {
- "english_name": "Chechen",
- "name": "\u041d\u043e\u0445\u0447\u0438\u0439\u043d"
- },
- "ceb": {
- "english_name": "Cebuano",
- "name": "Sinugboanong Binisaya"
- },
- "ch": {
- "english_name": "Chamorro",
- "name": "Chamoru"
- },
- "chr": {
- "english_name": "Cherokee",
- "name": "\u13e3\u13b3\u13a9"
- },
- "chy": {
- "english_name": "Cheyenne",
- "name": "Tsets\u00eahest\u00e2hese"
- },
- "ckb": {
- "english_name": "Sorani",
- "name": "Soran\u00ee / \u06a9\u0648\u0631\u062f\u06cc"
- },
- "co": {
- "english_name": "Corsican",
- "name": "Corsu"
- },
- "cr": {
- "english_name": "Cree",
- "name": "Nehiyaw"
- },
- "crh": {
- "english_name": "Crimean Tatar",
- "name": "Q\u0131r\u0131mtatarca"
- },
- "cs": {
- "english_name": "Czech",
- "name": "\u010ce\u0161tina"
- },
- "csb": {
- "english_name": "Kashubian",
- "name": "Kasz\u00ebbsczi"
- },
- "cu": {
- "english_name": "Old Church Slavonic",
- "name": "\u0421\u043b\u043e\u0432\u0463\u043d\u044c\u0441\u043a\u044a"
- },
- "cv": {
- "english_name": "Chuvash",
- "name": "\u0427\u0103\u0432\u0430\u0448"
- },
- "cy": {
- "english_name": "Welsh",
- "name": "Cymraeg"
- },
- "da": {
- "english_name": "Danish",
- "name": "Dansk"
- },
- "dag": {
- "english_name": "Dagbani",
- "name": "Dagbanli"
- },
- "de": {
- "english_name": "German",
- "name": "Deutsch"
- },
- "din": {
- "english_name": "Dinka",
- "name": "Thu\u0254\u014bj\u00e4\u014b"
- },
- "diq": {
- "english_name": "Zazaki",
- "name": "Zazaki"
- },
- "dsb": {
- "english_name": "Lower Sorbian",
- "name": "Dolnoserbski"
- },
- "dty": {
- "english_name": "Doteli",
- "name": "\u0921\u094b\u091f\u0947\u0932\u0940"
- },
- "dv": {
- "english_name": "Divehi",
- "name": "\u078b\u07a8\u0788\u07ac\u0780\u07a8\u0784\u07a6\u0790\u07b0"
- },
- "dz": {
- "english_name": "Dzongkha",
- "name": "\u0f47\u0f7c\u0f44\u0f0b\u0f41"
- },
- "ee": {
- "english_name": "Ewe",
- "name": "E\u028begbe"
- },
- "el": {
- "english_name": "Greek",
- "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac"
- },
- "eml": {
- "english_name": "Emilian-Romagnol",
- "name": "Emili\u00e0n e rumagn\u00f2l"
- },
- "en": {
- "english_name": "English",
- "name": "English"
- },
- "eo": {
- "english_name": "Esperanto",
- "name": "Esperanto"
- },
- "es": {
- "english_name": "Spanish",
- "name": "Espa\u00f1ol"
- },
- "et": {
- "english_name": "Estonian",
- "name": "Eesti"
- },
- "eu": {
- "english_name": "Basque",
- "name": "Euskara"
- },
- "ext": {
- "english_name": "Extremaduran",
- "name": "Estreme\u00f1u"
- },
- "fa": {
- "english_name": "Persian",
- "name": "\u0641\u0627\u0631\u0633\u06cc"
- },
- "ff": {
- "english_name": "Fula",
- "name": "Fulfulde"
- },
- "fi": {
- "english_name": "Finnish",
- "name": "Suomi"
- },
- "fiu-vro": {
- "english_name": "V\u00f5ro",
- "name": "V\u00f5ro"
- },
- "fj": {
- "english_name": "Fijian",
- "name": "Na Vosa Vakaviti"
- },
- "fo": {
- "english_name": "Faroese",
- "name": "F\u00f8royskt"
- },
- "fr": {
- "english_name": "French",
- "name": "Fran\u00e7ais"
- },
- "frp": {
- "english_name": "Franco-Proven\u00e7al",
- "name": "Arpetan"
- },
- "frr": {
- "english_name": "North Frisian",
- "name": "Nordfrasch"
- },
- "fur": {
- "english_name": "Friulian",
- "name": "Furlan"
- },
- "fy": {
- "english_name": "West Frisian",
- "name": "Frysk"
- },
- "ga": {
- "english_name": "Irish",
- "name": "Gaeilge"
- },
- "gag": {
- "english_name": "Gagauz",
- "name": "Gagauz"
- },
- "gan": {
- "english_name": "Gan",
- "name": "\u8d1b\u8a9e"
- },
- "gcr": {
- "english_name": "Guianan Creole",
- "name": "Kriy\u00f2l Gwiyannen"
- },
- "gd": {
- "english_name": "Scottish Gaelic",
- "name": "G\u00e0idhlig"
- },
- "gl": {
- "english_name": "Galician",
- "name": "Galego"
- },
- "glk": {
- "english_name": "Gilaki",
- "name": "\u06af\u06cc\u0644\u06a9\u06cc"
- },
- "gn": {
- "english_name": "Guarani",
- "name": "Ava\u00f1e'\u1ebd"
- },
- "gom": {
- "english_name": "Goan Konkani",
- "name": "\u0917\u094b\u0902\u092f\u091a\u0940 \u0915\u094b\u0902\u0915\u0923\u0940 / G\u00f5ychi Konknni"
- },
- "gor": {
- "english_name": "Gorontalo",
- "name": "Hulontalo"
- },
- "got": {
- "english_name": "Gothic",
- "name": "\ud800\udf32\ud800\udf3f\ud800\udf44\ud800\udf39\ud800\udf43\ud800\udf3a"
- },
- "gu": {
- "english_name": "Gujarati",
- "name": "\u0a97\u0ac1\u0a9c\u0ab0\u0abe\u0aa4\u0ac0"
- },
- "guw": {
- "english_name": "Gun",
- "name": "Gungbe"
- },
- "gv": {
- "english_name": "Manx",
- "name": "Gaelg"
- },
- "ha": {
- "english_name": "Hausa",
- "name": "Hausa / \u0647\u064e\u0648\u064f\u0633\u064e"
- },
- "hak": {
- "english_name": "Hakka",
- "name": "Hak-k\u00e2-fa / \u5ba2\u5bb6\u8a71"
- },
- "haw": {
- "english_name": "Hawaiian",
- "name": "Hawai\u02bbi"
- },
- "he": {
- "english_name": "Hebrew",
- "name": "\u05e2\u05d1\u05e8\u05d9\u05ea"
- },
- "hi": {
- "english_name": "Hindi",
- "name": "\u0939\u093f\u0928\u094d\u0926\u0940"
- },
- "hif": {
- "english_name": "Fiji Hindi",
- "name": "Fiji Hindi"
- },
- "hr": {
- "english_name": "Croatian",
- "name": "Hrvatski"
- },
- "hsb": {
- "english_name": "Upper Sorbian",
- "name": "Hornjoserbsce"
- },
- "ht": {
- "english_name": "Haitian",
- "name": "Kr\u00e8yol ayisyen"
- },
- "hu": {
- "english_name": "Hungarian",
- "name": "Magyar"
- },
- "hy": {
- "english_name": "Armenian",
- "name": "\u0540\u0561\u0575\u0565\u0580\u0565\u0576"
- },
- "hyw": {
- "english_name": "Western Armenian",
- "name": "\u0531\u0580\u0565\u0582\u0574\u057f\u0561\u0570\u0561\u0575\u0565\u0580\u0567\u0576"
- },
- "ia": {
- "english_name": "Interlingua",
- "name": "Interlingua"
- },
- "id": {
- "english_name": "Indonesian",
- "name": "Bahasa Indonesia"
- },
- "ie": {
- "english_name": "Interlingue",
- "name": "Interlingue"
- },
- "ig": {
- "english_name": "Igbo",
- "name": "\u00ccgb\u00f2"
- },
- "ik": {
- "english_name": "Inupiak",
- "name": "I\u00f1upiatun"
- },
- "ilo": {
- "english_name": "Ilokano",
- "name": "Ilokano"
- },
- "inh": {
- "english_name": "Ingush",
- "name": "\u0413\u04c0\u0430\u043b\u0433\u04c0\u0430\u0439"
- },
- "io": {
- "english_name": "Ido",
- "name": "Ido"
- },
- "is": {
- "english_name": "Icelandic",
- "name": "\u00cdslenska"
- },
- "it": {
- "english_name": "Italian",
- "name": "Italiano"
- },
- "iu": {
- "english_name": "Inuktitut",
- "name": "\u1403\u14c4\u1483\u144e\u1450\u1466"
- },
- "ja": {
- "english_name": "Japanese",
- "name": "\u65e5\u672c\u8a9e"
- },
- "jam": {
- "english_name": "Jamaican Patois",
- "name": "Jumiekan Kryuol"
- },
- "jbo": {
- "english_name": "Lojban",
- "name": "Lojban"
- },
- "jv": {
- "english_name": "Javanese",
- "name": "Basa Jawa"
- },
- "ka": {
- "english_name": "Georgian",
- "name": "\u10e5\u10d0\u10e0\u10d7\u10e3\u10da\u10d8"
- },
- "kaa": {
- "english_name": "Karakalpak",
- "name": "Qaraqalpaqsha"
- },
- "kab": {
- "english_name": "Kabyle",
- "name": "Taqbaylit"
- },
- "kbd": {
- "english_name": "Kabardian Circassian",
- "name": "\u0410\u0434\u044b\u0433\u044d\u0431\u0437\u044d (Adighabze)"
- },
- "kbp": {
- "english_name": "Kabiye",
- "name": "Kab\u0269y\u025b"
- },
- "kcg": {
- "english_name": "Tyap",
- "name": "Tyap"
- },
- "kg": {
- "english_name": "Kongo",
- "name": "Kik\u00f4ngo"
- },
- "ki": {
- "english_name": "Kikuyu",
- "name": "G\u0129k\u0169y\u0169"
- },
- "kk": {
- "english_name": "Kazakh",
- "name": "\u049a\u0430\u0437\u0430\u049b\u0448\u0430"
- },
- "kl": {
- "english_name": "Greenlandic",
- "name": "Kalaallisut"
- },
- "km": {
- "english_name": "Khmer",
- "name": "\u1797\u17b6\u179f\u17b6\u1781\u17d2\u1798\u17c2\u179a"
- },
- "kn": {
- "english_name": "Kannada",
- "name": "\u0c95\u0ca8\u0ccd\u0ca8\u0ca1"
- },
- "ko": {
- "english_name": "Korean",
- "name": "\ud55c\uad6d\uc5b4"
- },
- "koi": {
- "english_name": "Komi-Permyak",
- "name": "\u041f\u0435\u0440\u0435\u043c \u041a\u043e\u043c\u0438 (Perem Komi)"
- },
- "krc": {
- "english_name": "Karachay-Balkar",
- "name": "\u041a\u044a\u0430\u0440\u0430\u0447\u0430\u0439-\u041c\u0430\u043b\u043a\u044a\u0430\u0440 (Qarachay-Malqar)"
- },
- "ks": {
- "english_name": "Kashmiri",
- "name": "\u0915\u0936\u094d\u092e\u0940\u0930\u0940 / \u0643\u0634\u0645\u064a\u0631\u064a"
- },
- "ksh": {
- "english_name": "Ripuarian",
- "name": "Ripoarisch"
- },
- "ku": {
- "english_name": "Kurdish",
- "name": "Kurd\u00ee / \u0643\u0648\u0631\u062f\u06cc"
- },
- "kv": {
- "english_name": "Komi",
- "name": "\u041a\u043e\u043c\u0438"
- },
- "kw": {
- "english_name": "Cornish",
- "name": "Kernowek/Karnuack"
- },
- "ky": {
- "english_name": "Kyrgyz",
- "name": "\u041a\u044b\u0440\u0433\u044b\u0437\u0447\u0430"
- },
- "la": {
- "english_name": "Latin",
- "name": "Latina"
- },
- "lad": {
- "english_name": "Ladino",
- "name": "Dzhudezmo"
- },
- "lb": {
- "english_name": "Luxembourgish",
- "name": "L\u00ebtzebuergesch"
- },
- "lbe": {
- "english_name": "Lak",
- "name": "\u041b\u0430\u043a\u043a\u0443"
- },
- "lez": {
- "english_name": "Lezgian",
- "name": "\u041b\u0435\u0437\u0433\u0438 \u0447\u0406\u0430\u043b (Lezgi \u010d\u2019al)"
- },
- "lfn": {
- "english_name": "Lingua Franca Nova",
- "name": "Lingua franca nova"
- },
- "lg": {
- "english_name": "Luganda",
- "name": "Luganda"
- },
- "li": {
- "english_name": "Limburgish",
- "name": "Limburgs"
- },
- "lij": {
- "english_name": "Ligurian",
- "name": "L\u00ecgure"
- },
- "lld": {
- "english_name": "Ladin",
- "name": "Lingaz"
- },
- "lmo": {
- "english_name": "Lombard",
- "name": "Lumbaart"
- },
- "ln": {
- "english_name": "Lingala",
- "name": "Lingala"
- },
- "lo": {
- "english_name": "Lao",
- "name": "\u0ea5\u0eb2\u0ea7"
- },
- "lt": {
- "english_name": "Lithuanian",
- "name": "Lietuvi\u0173"
- },
- "ltg": {
- "english_name": "Latgalian",
- "name": "Latga\u013cu"
- },
- "lv": {
- "english_name": "Latvian",
- "name": "Latvie\u0161u"
- },
- "mad": {
- "english_name": "Madurese",
- "name": "Madhur\u00e2"
- },
- "mai": {
- "english_name": "Maithili",
- "name": "\u092e\u0948\u0925\u093f\u0932\u0940"
- },
- "map-bms": {
- "english_name": "Banyumasan",
- "name": "Basa Banyumasan"
- },
- "mdf": {
- "english_name": "Moksha",
- "name": "\u041c\u043e\u043a\u0448\u0435\u043d\u044c (Mokshanj K\u00e4lj)"
- },
- "mg": {
- "english_name": "Malagasy",
- "name": "Malagasy"
- },
- "mhr": {
- "english_name": "Meadow Mari",
- "name": "\u041e\u043b\u044b\u043a \u041c\u0430\u0440\u0438\u0439 (Olyk Marij)"
- },
- "mi": {
- "english_name": "Maori",
- "name": "M\u0101ori"
- },
- "min": {
- "english_name": "Minangkabau",
- "name": "Minangkabau"
- },
- "mk": {
- "english_name": "Macedonian",
- "name": "\u041c\u0430\u043a\u0435\u0434\u043e\u043d\u0441\u043a\u0438"
- },
- "ml": {
- "english_name": "Malayalam",
- "name": "\u0d2e\u0d32\u0d2f\u0d3e\u0d33\u0d02"
- },
- "mn": {
- "english_name": "Mongolian",
- "name": "\u041c\u043e\u043d\u0433\u043e\u043b"
- },
- "mni": {
- "english_name": "Meitei",
- "name": "\uabc3\uabe4\uabc7\uabe9\uabc2\uabe3\uabdf"
- },
- "mnw": {
- "english_name": "Mon",
- "name": "\u1019\u1014\u103a"
- },
- "mr": {
- "english_name": "Marathi",
- "name": "\u092e\u0930\u093e\u0920\u0940"
- },
- "mrj": {
- "english_name": "Hill Mari",
- "name": "\u041a\u044b\u0440\u044b\u043a \u041c\u0430\u0440\u044b (Kyryk Mary)"
- },
- "ms": {
- "english_name": "Malay",
- "name": "Bahasa Melayu"
- },
- "mt": {
- "english_name": "Maltese",
- "name": "Malti"
- },
- "mwl": {
- "english_name": "Mirandese",
- "name": "Mirand\u00e9s"
- },
- "my": {
- "english_name": "Burmese",
- "name": "\u1019\u103c\u1014\u103a\u1019\u102c\u1018\u102c\u101e\u102c"
- },
- "myv": {
- "english_name": "Erzya",
- "name": "\u042d\u0440\u0437\u044f\u043d\u044c (Erzjanj Kelj)"
- },
- "mzn": {
- "english_name": "Mazandarani",
- "name": "\u0645\u064e\u0632\u0650\u0631\u0648\u0646\u064a"
- },
- "na": {
- "english_name": "Nauruan",
- "name": "dorerin Naoero"
- },
- "nah": {
- "english_name": "Nahuatl",
- "name": "N\u0101huatl"
- },
- "nap": {
- "english_name": "Neapolitan",
- "name": "Nnapulitano"
- },
- "nds": {
- "english_name": "Low Saxon",
- "name": "Plattd\u00fc\u00fctsch"
- },
- "nds-nl": {
- "english_name": "Dutch Low Saxon",
- "name": "Nedersaksisch"
- },
- "ne": {
- "english_name": "Nepali",
- "name": "\u0928\u0947\u092a\u093e\u0932\u0940"
- },
- "new": {
- "english_name": "Newar",
- "name": "\u0928\u0947\u092a\u093e\u0932 \u092d\u093e\u0937\u093e"
- },
- "nia": {
- "english_name": "Nias",
- "name": "Li Niha"
- },
- "nl": {
- "english_name": "Dutch",
- "name": "Nederlands"
- },
- "nn": {
- "english_name": "Norwegian (Nynorsk)",
- "name": "Nynorsk"
- },
- "no": {
- "english_name": "Norwegian (Bokm\u00e5l)",
- "name": "Norsk (Bokm\u00e5l)"
- },
- "nov": {
- "english_name": "Novial",
- "name": "Novial"
- },
- "nqo": {
- "english_name": "N\u2019Ko",
- "name": "\u07d2\u07de\u07cf"
- },
- "nrm": {
- "english_name": "Norman",
- "name": "Nouormand/Normaund"
- },
- "nso": {
- "english_name": "Northern Sotho",
- "name": "Sepedi"
- },
- "nv": {
- "english_name": "Navajo",
- "name": "Din\u00e9 bizaad"
- },
- "ny": {
- "english_name": "Chichewa",
- "name": "Chichewa"
- },
- "oc": {
- "english_name": "Occitan",
- "name": "Occitan"
- },
- "olo": {
- "english_name": "Livvi-Karelian",
- "name": "Karjalan"
- },
- "om": {
- "english_name": "Oromo",
- "name": "Oromoo"
- },
- "or": {
- "english_name": "Oriya",
- "name": "\u0b13\u0b21\u0b3c\u0b3f\u0b06"
- },
- "os": {
- "english_name": "Ossetian",
- "name": "\u0418\u0440\u043e\u043d\u0430\u0443"
- },
- "pa": {
- "english_name": "Punjabi",
- "name": "\u0a2a\u0a70\u0a1c\u0a3e\u0a2c\u0a40"
- },
- "pag": {
- "english_name": "Pangasinan",
- "name": "Pangasinan"
- },
- "pam": {
- "english_name": "Kapampangan",
- "name": "Kapampangan"
- },
- "pap": {
- "english_name": "Papiamentu",
- "name": "Papiamentu"
- },
- "pcd": {
- "english_name": "Picard",
- "name": "Picard"
- },
- "pcm": {
- "english_name": "Nigerian Pidgin",
- "name": "Naij\u00e1"
- },
- "pdc": {
- "english_name": "Pennsylvania German",
- "name": "Deitsch"
- },
- "pfl": {
- "english_name": "Palatinate German",
- "name": "P\u00e4lzisch"
- },
- "pi": {
- "english_name": "Pali",
- "name": "\u092a\u093e\u0934\u093f"
- },
- "pih": {
- "english_name": "Norfolk",
- "name": "Norfuk"
- },
- "pl": {
- "english_name": "Polish",
- "name": "Polski"
- },
- "pms": {
- "english_name": "Piedmontese",
- "name": "Piemont\u00e8is"
- },
- "pnb": {
- "english_name": "Western Punjabi",
- "name": "\u0634\u0627\u06c1 \u0645\u06a9\u06be\u06cc \u067e\u0646\u062c\u0627\u0628\u06cc (Sh\u0101hmukh\u012b Pa\u00f1j\u0101b\u012b)"
- },
- "pnt": {
- "english_name": "Pontic",
- "name": "\u03a0\u03bf\u03bd\u03c4\u03b9\u03b1\u03ba\u03ac"
- },
- "ps": {
- "english_name": "Pashto",
- "name": "\u067e\u069a\u062a\u0648"
- },
- "pt": {
- "english_name": "Portuguese",
- "name": "Portugu\u00eas"
- },
- "pwn": {
- "english_name": "Paiwan",
- "name": "Paiwan"
- },
- "qu": {
- "english_name": "Quechua",
- "name": "Qichwa simi"
- },
- "rm": {
- "english_name": "Romansh",
- "name": "Rumantsch"
- },
- "rmy": {
- "english_name": "Romani",
- "name": "romani - \u0930\u094b\u092e\u093e\u0928\u0940"
- },
- "rn": {
- "english_name": "Kirundi",
- "name": "Ikirundi"
- },
- "ro": {
- "english_name": "Romanian",
- "name": "Rom\u00e2n\u0103"
- },
- "roa-rup": {
- "english_name": "Aromanian",
- "name": "Arm\u00e3neashce"
- },
- "roa-tara": {
- "english_name": "Tarantino",
- "name": "Tarand\u00edne"
- },
- "ru": {
- "english_name": "Russian",
- "name": "\u0420\u0443\u0441\u0441\u043a\u0438\u0439"
- },
- "rue": {
- "english_name": "Rusyn",
- "name": "\u0420\u0443\u0441\u0438\u043d\u044c\u0441\u043a\u044b\u0439"
- },
- "rw": {
- "english_name": "Kinyarwanda",
- "name": "Ikinyarwanda"
- },
- "sa": {
- "english_name": "Sanskrit",
- "name": "\u0938\u0902\u0938\u094d\u0915\u0943\u0924\u092e\u094d"
- },
- "sah": {
- "english_name": "Sakha",
- "name": "\u0421\u0430\u0445\u0430 \u0442\u044b\u043b\u0430 (Saxa Tyla)"
- },
- "sat": {
- "english_name": "Santali",
- "name": "\u1c65\u1c5f\u1c71\u1c5b\u1c5f\u1c72\u1c64"
- },
- "sc": {
- "english_name": "Sardinian",
- "name": "Sardu"
- },
- "scn": {
- "english_name": "Sicilian",
- "name": "Sicilianu"
- },
- "sco": {
- "english_name": "Scots",
- "name": "Scots"
- },
- "sd": {
- "english_name": "Sindhi",
- "name": "\u0633\u0646\u068c\u064a\u060c \u0633\u0646\u062f\u06be\u06cc \u060c \u0938\u093f\u0928\u094d\u0927"
- },
- "se": {
- "english_name": "Northern Sami",
- "name": "S\u00e1megiella"
- },
- "sg": {
- "english_name": "Sango",
- "name": "S\u00e4ng\u00f6"
- },
- "sh": {
- "english_name": "Serbo-Croatian",
- "name": "Srpskohrvatski / \u0421\u0440\u043f\u0441\u043a\u043e\u0445\u0440\u0432\u0430\u0442\u0441\u043a\u0438"
- },
- "shi": {
- "english_name": "Tachelhit",
- "name": "Tacl\u1e25it"
- },
- "shn": {
- "english_name": "Shan",
- "name": "\u101c\u102d\u1075\u103a\u1088\u1010\u1086\u1038"
- },
- "si": {
- "english_name": "Sinhalese",
- "name": "\u0dc3\u0dd2\u0d82\u0dc4\u0dbd"
- },
- "simple": {
- "english_name": "Simple English",
- "name": "Simple English"
- },
- "sk": {
- "english_name": "Slovak",
- "name": "Sloven\u010dina"
- },
- "skr": {
- "english_name": "Saraiki",
- "name": "\u0633\u0631\u0627\u0626\u06cc\u06a9\u06cc"
- },
- "sl": {
- "english_name": "Slovenian",
- "name": "Sloven\u0161\u010dina"
- },
- "sm": {
- "english_name": "Samoan",
- "name": "Gagana Samoa"
- },
- "smn": {
- "english_name": "Inari Sami",
- "name": "Anar\u00e2\u0161kiel\u00e2"
- },
- "sn": {
- "english_name": "Shona",
- "name": "chiShona"
- },
- "so": {
- "english_name": "Somali",
- "name": "Soomaali"
- },
- "sq": {
- "english_name": "Albanian",
- "name": "Shqip"
- },
- "sr": {
- "english_name": "Serbian",
- "name": "\u0421\u0440\u043f\u0441\u043a\u0438 / Srpski"
- },
- "srn": {
- "english_name": "Sranan",
- "name": "Sranantongo"
- },
- "ss": {
- "english_name": "Swati",
- "name": "SiSwati"
- },
- "st": {
- "english_name": "Sesotho",
- "name": "Sesotho"
- },
- "stq": {
- "english_name": "Saterland Frisian",
- "name": "Seeltersk"
- },
- "su": {
- "english_name": "Sundanese",
- "name": "Basa Sunda"
- },
- "sv": {
- "english_name": "Swedish",
- "name": "Svenska"
- },
- "sw": {
- "english_name": "Swahili",
- "name": "Kiswahili"
- },
- "szl": {
- "english_name": "Silesian",
- "name": "\u015al\u016fnski"
- },
- "szy": {
- "english_name": "Sakizaya",
- "name": "Sakizaya"
- },
- "ta": {
- "english_name": "Tamil",
- "name": "\u0ba4\u0bae\u0bbf\u0bb4\u0bcd"
- },
- "tay": {
- "english_name": "Atayal",
- "name": "Tayal"
- },
- "tcy": {
- "english_name": "Tulu",
- "name": "\u0ca4\u0cc1\u0cb3\u0cc1"
- },
- "te": {
- "english_name": "Telugu",
- "name": "\u0c24\u0c46\u0c32\u0c41\u0c17\u0c41"
- },
- "tet": {
- "english_name": "Tetum",
- "name": "Tetun"
- },
- "tg": {
- "english_name": "Tajik",
- "name": "\u0422\u043e\u04b7\u0438\u043a\u04e3"
- },
- "th": {
- "english_name": "Thai",
- "name": "\u0e44\u0e17\u0e22"
- },
- "ti": {
- "english_name": "Tigrinya",
- "name": "\u1275\u130d\u122d\u129b"
- },
- "tk": {
- "english_name": "Turkmen",
- "name": "T\u00fcrkmen"
- },
- "tl": {
- "english_name": "Tagalog",
- "name": "Tagalog"
- },
- "tn": {
- "english_name": "Tswana",
- "name": "Setswana"
- },
- "to": {
- "english_name": "Tongan",
- "name": "faka Tonga"
- },
- "tpi": {
- "english_name": "Tok Pisin",
- "name": "Tok Pisin"
- },
- "tr": {
- "english_name": "Turkish",
- "name": "T\u00fcrk\u00e7e"
- },
- "trv": {
- "english_name": "Seediq",
- "name": "Taroko"
- },
- "ts": {
- "english_name": "Tsonga",
- "name": "Xitsonga"
- },
- "tt": {
- "english_name": "Tatar",
- "name": "Tatar\u00e7a / \u0422\u0430\u0442\u0430\u0440\u0447\u0430"
- },
- "tum": {
- "english_name": "Tumbuka",
- "name": "chiTumbuka"
- },
- "tw": {
- "english_name": "Twi",
- "name": "Twi"
- },
- "ty": {
- "english_name": "Tahitian",
- "name": "Reo M\u0101`ohi"
- },
- "tyv": {
- "english_name": "Tuvan",
- "name": "\u0422\u044b\u0432\u0430"
- },
- "udm": {
- "english_name": "Udmurt",
- "name": "\u0423\u0434\u043c\u0443\u0440\u0442 \u043a\u044b\u043b"
- },
- "ug": {
- "english_name": "Uyghur",
- "name": "\u0626\u06c7\u064a\u063a\u06c7\u0631 \u062a\u0649\u0644\u0649"
- },
- "uk": {
- "english_name": "Ukrainian",
- "name": "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430"
- },
- "ur": {
- "english_name": "Urdu",
- "name": "\u0627\u0631\u062f\u0648"
- },
- "uz": {
- "english_name": "Uzbek",
- "name": "O\u2018zbek"
- },
- "ve": {
- "english_name": "Venda",
- "name": "Tshivenda"
- },
- "vec": {
- "english_name": "Venetian",
- "name": "V\u00e8neto"
- },
- "vep": {
- "english_name": "Vepsian",
- "name": "Veps\u00e4n"
- },
- "vi": {
- "english_name": "Vietnamese",
- "name": "Ti\u1ebfng Vi\u1ec7t"
- },
- "vls": {
- "english_name": "West Flemish",
- "name": "West-Vlams"
- },
- "vo": {
- "english_name": "Volap\u00fck",
- "name": "Volap\u00fck"
- },
- "wa": {
- "english_name": "Walloon",
- "name": "Walon"
- },
- "war": {
- "english_name": "Waray-Waray",
- "name": "Winaray"
- },
- "wo": {
- "english_name": "Wolof",
- "name": "Wolof"
- },
- "wuu": {
- "english_name": "Wu",
- "name": "\u5434\u8bed"
- },
- "xal": {
- "english_name": "Kalmyk",
- "name": "\u0425\u0430\u043b\u044c\u043c\u0433"
- },
- "xh": {
- "english_name": "Xhosa",
- "name": "isiXhosa"
- },
- "xmf": {
- "english_name": "Mingrelian",
- "name": "\u10db\u10d0\u10e0\u10d2\u10d0\u10da\u10e3\u10e0\u10d8 (Margaluri)"
- },
- "yi": {
- "english_name": "Yiddish",
- "name": "\u05d9\u05d9\u05b4\u05d3\u05d9\u05e9"
- },
- "yo": {
- "english_name": "Yoruba",
- "name": "Yor\u00f9b\u00e1"
- },
- "za": {
- "english_name": "Zhuang",
- "name": "Cuengh"
- },
- "zea": {
- "english_name": "Zeelandic",
- "name": "Ze\u00eauws"
- },
- "zh": {
- "english_name": "Chinese",
- "name": "\u4e2d\u6587"
- },
- "zh-classical": {
- "english_name": "Classical Chinese",
- "name": "\u53e4\u6587 / \u6587\u8a00\u6587"
- },
- "zh-min-nan": {
- "english_name": "Min Nan",
- "name": "B\u00e2n-l\u00e2m-g\u00fa"
- },
- "zh-yue": {
- "english_name": "Cantonese",
- "name": "\u7cb5\u8a9e"
- },
- "zu": {
- "english_name": "Zulu",
- "name": "isiZulu"
- }
- },
- "wikipedia": {
- "ab": {
- "english_name": "Abkhazian",
- "name": "\u0410\u0525\u0441\u0443\u0430"
- },
- "ace": {
- "english_name": "Acehnese",
- "name": "Basa Ac\u00e8h"
- },
- "ady": {
- "english_name": "Adyghe",
- "name": "\u0410\u0434\u044b\u0433\u044d\u0431\u0437\u044d"
- },
- "af": {
- "english_name": "Afrikaans",
- "name": "Afrikaans"
- },
- "ak": {
- "english_name": "Akan",
- "name": "Akana"
- },
- "als": {
- "english_name": "Alemannic",
- "name": "Alemannisch"
- },
- "alt": {
- "english_name": "Southern Altai",
- "name": "\u0410\u043b\u0442\u0430\u0439"
- },
- "am": {
- "english_name": "Amharic",
- "name": "\u12a0\u121b\u122d\u129b"
- },
- "ami": {
- "english_name": "Amis",
- "name": "Pangcah"
- },
- "an": {
- "english_name": "Aragonese",
- "name": "Aragon\u00e9s"
- },
- "ang": {
- "english_name": "Anglo-Saxon",
- "name": "\u00c6nglisc"
- },
- "ar": {
- "english_name": "Arabic",
- "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629"
- },
- "arc": {
- "english_name": "Aramaic",
- "name": "\u0710\u072a\u0721\u071d\u0710"
- },
- "ary": {
- "english_name": "Moroccan Arabic",
- "name": "\u062f\u0627\u0631\u064a\u062c\u0629"
- },
- "arz": {
- "english_name": "Egyptian Arabic",
- "name": "\u0645\u0635\u0631\u0649 (Ma\u1e63ri)"
- },
- "as": {
- "english_name": "Assamese",
- "name": "\u0985\u09b8\u09ae\u09c0\u09af\u09bc\u09be"
- },
- "ast": {
- "english_name": "Asturian",
- "name": "Asturianu"
- },
- "atj": {
- "english_name": "Atikamekw",
- "name": "Atikamekw"
- },
- "av": {
- "english_name": "Avar",
- "name": "\u0410\u0432\u0430\u0440"
- },
- "avk": {
- "english_name": "Kotava",
- "name": "Kotava"
- },
- "awa": {
- "english_name": "Awadhi",
- "name": "\u0905\u0935\u0927\u0940"
- },
- "ay": {
- "english_name": "Aymara",
- "name": "Aymar"
- },
- "az": {
- "english_name": "Azerbaijani",
- "name": "Az\u0259rbaycanca"
- },
- "azb": {
- "english_name": "South Azerbaijani",
- "name": "\u062a\u06c6\u0631\u06a9\u062c\u0647"
- },
- "ba": {
- "english_name": "Bashkir",
- "name": "\u0411\u0430\u0448\u04a1\u043e\u0440\u0442"
- },
- "ban": {
- "english_name": "Balinese",
- "name": "Bali"
- },
- "bar": {
- "english_name": "Bavarian",
- "name": "Boarisch"
- },
- "bat-smg": {
- "english_name": "Samogitian",
- "name": "\u017demait\u0117\u0161ka"
- },
- "bcl": {
- "english_name": "Central Bicolano",
- "name": "Bikol"
- },
- "be": {
- "english_name": "Belarusian",
- "name": "\u0411\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f"
- },
- "be-tarask": {
- "english_name": "Belarusian (Tara\u0161kievica)",
- "name": "\u0411\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f (\u0442\u0430\u0440\u0430\u0448\u043a\u0435\u0432\u0456\u0446\u0430)"
- },
- "bg": {
- "english_name": "Bulgarian",
- "name": "\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438"
- },
- "bh": {
- "english_name": "Bhojpuri",
- "name": "\u092d\u094b\u091c\u092a\u0941\u0930\u0940"
- },
- "bi": {
- "english_name": "Bislama",
- "name": "Bislama"
- },
- "bjn": {
- "english_name": "Banjar",
- "name": "Bahasa Banjar"
- },
- "blk": {
- "english_name": "Pa'O",
- "name": "\u1015\u1021\u102d\u102f\u101d\u103a\u108f\u1018\u102c\u108f\u101e\u102c\u108f"
- },
- "bm": {
- "english_name": "Bambara",
- "name": "Bamanankan"
- },
- "bn": {
- "english_name": "Bengali",
- "name": "\u09ac\u09be\u0982\u09b2\u09be"
- },
- "bo": {
- "english_name": "Tibetan",
- "name": "\u0f56\u0f7c\u0f51\u0f0b\u0f66\u0f90\u0f51"
- },
- "bpy": {
- "english_name": "Bishnupriya Manipuri",
- "name": "\u0987\u09ae\u09be\u09b0 \u09a0\u09be\u09b0/\u09ac\u09bf\u09b7\u09cd\u09a3\u09c1\u09aa\u09cd\u09b0\u09bf\u09af\u09bc\u09be \u09ae\u09a3\u09bf\u09aa\u09c1\u09b0\u09c0"
- },
- "br": {
- "english_name": "Breton",
- "name": "Brezhoneg"
- },
- "bs": {
- "english_name": "Bosnian",
- "name": "Bosanski"
- },
- "bug": {
- "english_name": "Buginese",
- "name": "Basa Ugi"
- },
- "bxr": {
- "english_name": "Buryat",
- "name": "\u0411\u0443\u0440\u044f\u0430\u0434"
- },
- "ca": {
- "english_name": "Catalan",
- "name": "Catal\u00e0"
- },
- "cbk-zam": {
- "english_name": "Zamboanga Chavacano",
- "name": "Chavacano de Zamboanga"
- },
- "cdo": {
- "english_name": "Min Dong",
- "name": "M\u00ecng-d\u0115\u0324ng-ng\u1e73\u0304"
- },
- "ce": {
- "english_name": "Chechen",
- "name": "\u041d\u043e\u0445\u0447\u0438\u0439\u043d"
- },
- "ceb": {
- "english_name": "Cebuano",
- "name": "Sinugboanong Binisaya"
- },
- "ch": {
- "english_name": "Chamorro",
- "name": "Chamoru"
- },
- "chr": {
- "english_name": "Cherokee",
- "name": "\u13e3\u13b3\u13a9"
- },
- "chy": {
- "english_name": "Cheyenne",
- "name": "Tsets\u00eahest\u00e2hese"
- },
- "ckb": {
- "english_name": "Sorani",
- "name": "Soran\u00ee / \u06a9\u0648\u0631\u062f\u06cc"
- },
- "co": {
- "english_name": "Corsican",
- "name": "Corsu"
- },
- "cr": {
- "english_name": "Cree",
- "name": "Nehiyaw"
- },
- "crh": {
- "english_name": "Crimean Tatar",
- "name": "Q\u0131r\u0131mtatarca"
- },
- "cs": {
- "english_name": "Czech",
- "name": "\u010ce\u0161tina"
- },
- "csb": {
- "english_name": "Kashubian",
- "name": "Kasz\u00ebbsczi"
- },
- "cu": {
- "english_name": "Old Church Slavonic",
- "name": "\u0421\u043b\u043e\u0432\u0463\u043d\u044c\u0441\u043a\u044a"
- },
- "cv": {
- "english_name": "Chuvash",
- "name": "\u0427\u0103\u0432\u0430\u0448"
- },
- "cy": {
- "english_name": "Welsh",
- "name": "Cymraeg"
- },
- "da": {
- "english_name": "Danish",
- "name": "Dansk"
- },
- "dag": {
- "english_name": "Dagbani",
- "name": "Dagbanli"
- },
- "de": {
- "english_name": "German",
- "name": "Deutsch"
- },
- "din": {
- "english_name": "Dinka",
- "name": "Thu\u0254\u014bj\u00e4\u014b"
- },
- "diq": {
- "english_name": "Zazaki",
- "name": "Zazaki"
- },
- "dsb": {
- "english_name": "Lower Sorbian",
- "name": "Dolnoserbski"
- },
- "dty": {
- "english_name": "Doteli",
- "name": "\u0921\u094b\u091f\u0947\u0932\u0940"
- },
- "dv": {
- "english_name": "Divehi",
- "name": "\u078b\u07a8\u0788\u07ac\u0780\u07a8\u0784\u07a6\u0790\u07b0"
- },
- "dz": {
- "english_name": "Dzongkha",
- "name": "\u0f47\u0f7c\u0f44\u0f0b\u0f41"
- },
- "ee": {
- "english_name": "Ewe",
- "name": "E\u028begbe"
- },
- "el": {
- "english_name": "Greek",
- "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac"
- },
- "eml": {
- "english_name": "Emilian-Romagnol",
- "name": "Emili\u00e0n e rumagn\u00f2l"
- },
- "en": {
- "english_name": "English",
- "name": "English"
- },
- "eo": {
- "english_name": "Esperanto",
- "name": "Esperanto"
- },
- "es": {
- "english_name": "Spanish",
- "name": "Espa\u00f1ol"
- },
- "et": {
- "english_name": "Estonian",
- "name": "Eesti"
- },
- "eu": {
- "english_name": "Basque",
- "name": "Euskara"
- },
- "ext": {
- "english_name": "Extremaduran",
- "name": "Estreme\u00f1u"
- },
- "fa": {
- "english_name": "Persian",
- "name": "\u0641\u0627\u0631\u0633\u06cc"
- },
- "ff": {
- "english_name": "Fula",
- "name": "Fulfulde"
- },
- "fi": {
- "english_name": "Finnish",
- "name": "Suomi"
- },
- "fiu-vro": {
- "english_name": "V\u00f5ro",
- "name": "V\u00f5ro"
- },
- "fj": {
- "english_name": "Fijian",
- "name": "Na Vosa Vakaviti"
- },
- "fo": {
- "english_name": "Faroese",
- "name": "F\u00f8royskt"
- },
- "fr": {
- "english_name": "French",
- "name": "Fran\u00e7ais"
- },
- "frp": {
- "english_name": "Franco-Proven\u00e7al",
- "name": "Arpetan"
- },
- "frr": {
- "english_name": "North Frisian",
- "name": "Nordfrasch"
- },
- "fur": {
- "english_name": "Friulian",
- "name": "Furlan"
- },
- "fy": {
- "english_name": "West Frisian",
- "name": "Frysk"
- },
- "ga": {
- "english_name": "Irish",
- "name": "Gaeilge"
- },
- "gag": {
- "english_name": "Gagauz",
- "name": "Gagauz"
- },
- "gan": {
- "english_name": "Gan",
- "name": "\u8d1b\u8a9e"
- },
- "gcr": {
- "english_name": "Guianan Creole",
- "name": "Kriy\u00f2l Gwiyannen"
- },
- "gd": {
- "english_name": "Scottish Gaelic",
- "name": "G\u00e0idhlig"
- },
- "gl": {
- "english_name": "Galician",
- "name": "Galego"
- },
- "glk": {
- "english_name": "Gilaki",
- "name": "\u06af\u06cc\u0644\u06a9\u06cc"
- },
- "gn": {
- "english_name": "Guarani",
- "name": "Ava\u00f1e'\u1ebd"
- },
- "gom": {
- "english_name": "Goan Konkani",
- "name": "\u0917\u094b\u0902\u092f\u091a\u0940 \u0915\u094b\u0902\u0915\u0923\u0940 / G\u00f5ychi Konknni"
- },
- "gor": {
- "english_name": "Gorontalo",
- "name": "Hulontalo"
- },
- "got": {
- "english_name": "Gothic",
- "name": "\ud800\udf32\ud800\udf3f\ud800\udf44\ud800\udf39\ud800\udf43\ud800\udf3a"
- },
- "gu": {
- "english_name": "Gujarati",
- "name": "\u0a97\u0ac1\u0a9c\u0ab0\u0abe\u0aa4\u0ac0"
- },
- "guw": {
- "english_name": "Gun",
- "name": "Gungbe"
- },
- "gv": {
- "english_name": "Manx",
- "name": "Gaelg"
- },
- "ha": {
- "english_name": "Hausa",
- "name": "Hausa / \u0647\u064e\u0648\u064f\u0633\u064e"
- },
- "hak": {
- "english_name": "Hakka",
- "name": "Hak-k\u00e2-fa / \u5ba2\u5bb6\u8a71"
- },
- "haw": {
- "english_name": "Hawaiian",
- "name": "Hawai\u02bbi"
- },
- "he": {
- "english_name": "Hebrew",
- "name": "\u05e2\u05d1\u05e8\u05d9\u05ea"
- },
- "hi": {
- "english_name": "Hindi",
- "name": "\u0939\u093f\u0928\u094d\u0926\u0940"
- },
- "hif": {
- "english_name": "Fiji Hindi",
- "name": "Fiji Hindi"
- },
- "hr": {
- "english_name": "Croatian",
- "name": "Hrvatski"
- },
- "hsb": {
- "english_name": "Upper Sorbian",
- "name": "Hornjoserbsce"
- },
- "ht": {
- "english_name": "Haitian",
- "name": "Kr\u00e8yol ayisyen"
- },
- "hu": {
- "english_name": "Hungarian",
- "name": "Magyar"
- },
- "hy": {
- "english_name": "Armenian",
- "name": "\u0540\u0561\u0575\u0565\u0580\u0565\u0576"
- },
- "hyw": {
- "english_name": "Western Armenian",
- "name": "\u0531\u0580\u0565\u0582\u0574\u057f\u0561\u0570\u0561\u0575\u0565\u0580\u0567\u0576"
- },
- "ia": {
- "english_name": "Interlingua",
- "name": "Interlingua"
- },
- "id": {
- "english_name": "Indonesian",
- "name": "Bahasa Indonesia"
- },
- "ie": {
- "english_name": "Interlingue",
- "name": "Interlingue"
- },
- "ig": {
- "english_name": "Igbo",
- "name": "\u00ccgb\u00f2"
- },
- "ik": {
- "english_name": "Inupiak",
- "name": "I\u00f1upiatun"
- },
- "ilo": {
- "english_name": "Ilokano",
- "name": "Ilokano"
- },
- "inh": {
- "english_name": "Ingush",
- "name": "\u0413\u04c0\u0430\u043b\u0433\u04c0\u0430\u0439"
- },
- "io": {
- "english_name": "Ido",
- "name": "Ido"
- },
- "is": {
- "english_name": "Icelandic",
- "name": "\u00cdslenska"
- },
- "it": {
- "english_name": "Italian",
- "name": "Italiano"
- },
- "iu": {
- "english_name": "Inuktitut",
- "name": "\u1403\u14c4\u1483\u144e\u1450\u1466"
- },
- "ja": {
- "english_name": "Japanese",
- "name": "\u65e5\u672c\u8a9e"
- },
- "jam": {
- "english_name": "Jamaican Patois",
- "name": "Jumiekan Kryuol"
- },
- "jbo": {
- "english_name": "Lojban",
- "name": "Lojban"
- },
- "jv": {
- "english_name": "Javanese",
- "name": "Basa Jawa"
- },
- "ka": {
- "english_name": "Georgian",
- "name": "\u10e5\u10d0\u10e0\u10d7\u10e3\u10da\u10d8"
- },
- "kaa": {
- "english_name": "Karakalpak",
- "name": "Qaraqalpaqsha"
- },
- "kab": {
- "english_name": "Kabyle",
- "name": "Taqbaylit"
- },
- "kbd": {
- "english_name": "Kabardian Circassian",
- "name": "\u0410\u0434\u044b\u0433\u044d\u0431\u0437\u044d (Adighabze)"
- },
- "kbp": {
- "english_name": "Kabiye",
- "name": "Kab\u0269y\u025b"
- },
- "kcg": {
- "english_name": "Tyap",
- "name": "Tyap"
- },
- "kg": {
- "english_name": "Kongo",
- "name": "Kik\u00f4ngo"
- },
- "ki": {
- "english_name": "Kikuyu",
- "name": "G\u0129k\u0169y\u0169"
- },
- "kk": {
- "english_name": "Kazakh",
- "name": "\u049a\u0430\u0437\u0430\u049b\u0448\u0430"
- },
- "kl": {
- "english_name": "Greenlandic",
- "name": "Kalaallisut"
- },
- "km": {
- "english_name": "Khmer",
- "name": "\u1797\u17b6\u179f\u17b6\u1781\u17d2\u1798\u17c2\u179a"
- },
- "kn": {
- "english_name": "Kannada",
- "name": "\u0c95\u0ca8\u0ccd\u0ca8\u0ca1"
- },
- "ko": {
- "english_name": "Korean",
- "name": "\ud55c\uad6d\uc5b4"
- },
- "koi": {
- "english_name": "Komi-Permyak",
- "name": "\u041f\u0435\u0440\u0435\u043c \u041a\u043e\u043c\u0438 (Perem Komi)"
- },
- "krc": {
- "english_name": "Karachay-Balkar",
- "name": "\u041a\u044a\u0430\u0440\u0430\u0447\u0430\u0439-\u041c\u0430\u043b\u043a\u044a\u0430\u0440 (Qarachay-Malqar)"
- },
- "ks": {
- "english_name": "Kashmiri",
- "name": "\u0915\u0936\u094d\u092e\u0940\u0930\u0940 / \u0643\u0634\u0645\u064a\u0631\u064a"
- },
- "ksh": {
- "english_name": "Ripuarian",
- "name": "Ripoarisch"
- },
- "ku": {
- "english_name": "Kurdish",
- "name": "Kurd\u00ee / \u0643\u0648\u0631\u062f\u06cc"
- },
- "kv": {
- "english_name": "Komi",
- "name": "\u041a\u043e\u043c\u0438"
- },
- "kw": {
- "english_name": "Cornish",
- "name": "Kernowek/Karnuack"
- },
- "ky": {
- "english_name": "Kyrgyz",
- "name": "\u041a\u044b\u0440\u0433\u044b\u0437\u0447\u0430"
- },
- "la": {
- "english_name": "Latin",
- "name": "Latina"
- },
- "lad": {
- "english_name": "Ladino",
- "name": "Dzhudezmo"
- },
- "lb": {
- "english_name": "Luxembourgish",
- "name": "L\u00ebtzebuergesch"
- },
- "lbe": {
- "english_name": "Lak",
- "name": "\u041b\u0430\u043a\u043a\u0443"
- },
- "lez": {
- "english_name": "Lezgian",
- "name": "\u041b\u0435\u0437\u0433\u0438 \u0447\u0406\u0430\u043b (Lezgi \u010d\u2019al)"
- },
- "lfn": {
- "english_name": "Lingua Franca Nova",
- "name": "Lingua franca nova"
- },
- "lg": {
- "english_name": "Luganda",
- "name": "Luganda"
- },
- "li": {
- "english_name": "Limburgish",
- "name": "Limburgs"
- },
- "lij": {
- "english_name": "Ligurian",
- "name": "L\u00ecgure"
- },
- "lld": {
- "english_name": "Ladin",
- "name": "Lingaz"
- },
- "lmo": {
- "english_name": "Lombard",
- "name": "Lumbaart"
- },
- "ln": {
- "english_name": "Lingala",
- "name": "Lingala"
- },
- "lo": {
- "english_name": "Lao",
- "name": "\u0ea5\u0eb2\u0ea7"
- },
- "lt": {
- "english_name": "Lithuanian",
- "name": "Lietuvi\u0173"
- },
- "ltg": {
- "english_name": "Latgalian",
- "name": "Latga\u013cu"
- },
- "lv": {
- "english_name": "Latvian",
- "name": "Latvie\u0161u"
- },
- "mad": {
- "english_name": "Madurese",
- "name": "Madhur\u00e2"
- },
- "mai": {
- "english_name": "Maithili",
- "name": "\u092e\u0948\u0925\u093f\u0932\u0940"
- },
- "map-bms": {
- "english_name": "Banyumasan",
- "name": "Basa Banyumasan"
- },
- "mdf": {
- "english_name": "Moksha",
- "name": "\u041c\u043e\u043a\u0448\u0435\u043d\u044c (Mokshanj K\u00e4lj)"
- },
- "mg": {
- "english_name": "Malagasy",
- "name": "Malagasy"
- },
- "mhr": {
- "english_name": "Meadow Mari",
- "name": "\u041e\u043b\u044b\u043a \u041c\u0430\u0440\u0438\u0439 (Olyk Marij)"
- },
- "mi": {
- "english_name": "Maori",
- "name": "M\u0101ori"
- },
- "min": {
- "english_name": "Minangkabau",
- "name": "Minangkabau"
- },
- "mk": {
- "english_name": "Macedonian",
- "name": "\u041c\u0430\u043a\u0435\u0434\u043e\u043d\u0441\u043a\u0438"
- },
- "ml": {
- "english_name": "Malayalam",
- "name": "\u0d2e\u0d32\u0d2f\u0d3e\u0d33\u0d02"
- },
- "mn": {
- "english_name": "Mongolian",
- "name": "\u041c\u043e\u043d\u0433\u043e\u043b"
- },
- "mni": {
- "english_name": "Meitei",
- "name": "\uabc3\uabe4\uabc7\uabe9\uabc2\uabe3\uabdf"
- },
- "mnw": {
- "english_name": "Mon",
- "name": "\u1019\u1014\u103a"
- },
- "mr": {
- "english_name": "Marathi",
- "name": "\u092e\u0930\u093e\u0920\u0940"
- },
- "mrj": {
- "english_name": "Hill Mari",
- "name": "\u041a\u044b\u0440\u044b\u043a \u041c\u0430\u0440\u044b (Kyryk Mary)"
- },
- "ms": {
- "english_name": "Malay",
- "name": "Bahasa Melayu"
- },
- "mt": {
- "english_name": "Maltese",
- "name": "Malti"
- },
- "mwl": {
- "english_name": "Mirandese",
- "name": "Mirand\u00e9s"
- },
- "my": {
- "english_name": "Burmese",
- "name": "\u1019\u103c\u1014\u103a\u1019\u102c\u1018\u102c\u101e\u102c"
- },
- "myv": {
- "english_name": "Erzya",
- "name": "\u042d\u0440\u0437\u044f\u043d\u044c (Erzjanj Kelj)"
- },
- "mzn": {
- "english_name": "Mazandarani",
- "name": "\u0645\u064e\u0632\u0650\u0631\u0648\u0646\u064a"
- },
- "na": {
- "english_name": "Nauruan",
- "name": "dorerin Naoero"
- },
- "nah": {
- "english_name": "Nahuatl",
- "name": "N\u0101huatl"
- },
- "nap": {
- "english_name": "Neapolitan",
- "name": "Nnapulitano"
- },
- "nds": {
- "english_name": "Low Saxon",
- "name": "Plattd\u00fc\u00fctsch"
- },
- "nds-nl": {
- "english_name": "Dutch Low Saxon",
- "name": "Nedersaksisch"
- },
- "ne": {
- "english_name": "Nepali",
- "name": "\u0928\u0947\u092a\u093e\u0932\u0940"
- },
- "new": {
- "english_name": "Newar",
- "name": "\u0928\u0947\u092a\u093e\u0932 \u092d\u093e\u0937\u093e"
- },
- "nia": {
- "english_name": "Nias",
- "name": "Li Niha"
- },
- "nl": {
- "english_name": "Dutch",
- "name": "Nederlands"
- },
- "nn": {
- "english_name": "Norwegian (Nynorsk)",
- "name": "Nynorsk"
- },
- "no": {
- "english_name": "Norwegian (Bokm\u00e5l)",
- "name": "Norsk (Bokm\u00e5l)"
- },
- "nov": {
- "english_name": "Novial",
- "name": "Novial"
- },
- "nqo": {
- "english_name": "N\u2019Ko",
- "name": "\u07d2\u07de\u07cf"
- },
- "nrm": {
- "english_name": "Norman",
- "name": "Nouormand/Normaund"
- },
- "nso": {
- "english_name": "Northern Sotho",
- "name": "Sepedi"
- },
- "nv": {
- "english_name": "Navajo",
- "name": "Din\u00e9 bizaad"
- },
- "ny": {
- "english_name": "Chichewa",
- "name": "Chichewa"
- },
- "oc": {
- "english_name": "Occitan",
- "name": "Occitan"
- },
- "olo": {
- "english_name": "Livvi-Karelian",
- "name": "Karjalan"
- },
- "om": {
- "english_name": "Oromo",
- "name": "Oromoo"
- },
- "or": {
- "english_name": "Oriya",
- "name": "\u0b13\u0b21\u0b3c\u0b3f\u0b06"
- },
- "os": {
- "english_name": "Ossetian",
- "name": "\u0418\u0440\u043e\u043d\u0430\u0443"
- },
- "pa": {
- "english_name": "Punjabi",
- "name": "\u0a2a\u0a70\u0a1c\u0a3e\u0a2c\u0a40"
- },
- "pag": {
- "english_name": "Pangasinan",
- "name": "Pangasinan"
- },
- "pam": {
- "english_name": "Kapampangan",
- "name": "Kapampangan"
- },
- "pap": {
- "english_name": "Papiamentu",
- "name": "Papiamentu"
- },
- "pcd": {
- "english_name": "Picard",
- "name": "Picard"
- },
- "pcm": {
- "english_name": "Nigerian Pidgin",
- "name": "Naij\u00e1"
- },
- "pdc": {
- "english_name": "Pennsylvania German",
- "name": "Deitsch"
- },
- "pfl": {
- "english_name": "Palatinate German",
- "name": "P\u00e4lzisch"
- },
- "pi": {
- "english_name": "Pali",
- "name": "\u092a\u093e\u0934\u093f"
- },
- "pih": {
- "english_name": "Norfolk",
- "name": "Norfuk"
- },
- "pl": {
- "english_name": "Polish",
- "name": "Polski"
- },
- "pms": {
- "english_name": "Piedmontese",
- "name": "Piemont\u00e8is"
- },
- "pnb": {
- "english_name": "Western Punjabi",
- "name": "\u0634\u0627\u06c1 \u0645\u06a9\u06be\u06cc \u067e\u0646\u062c\u0627\u0628\u06cc (Sh\u0101hmukh\u012b Pa\u00f1j\u0101b\u012b)"
- },
- "pnt": {
- "english_name": "Pontic",
- "name": "\u03a0\u03bf\u03bd\u03c4\u03b9\u03b1\u03ba\u03ac"
- },
- "ps": {
- "english_name": "Pashto",
- "name": "\u067e\u069a\u062a\u0648"
- },
- "pt": {
- "english_name": "Portuguese",
- "name": "Portugu\u00eas"
- },
- "pwn": {
- "english_name": "Paiwan",
- "name": "Paiwan"
- },
- "qu": {
- "english_name": "Quechua",
- "name": "Qichwa simi"
- },
- "rm": {
- "english_name": "Romansh",
- "name": "Rumantsch"
- },
- "rmy": {
- "english_name": "Romani",
- "name": "romani - \u0930\u094b\u092e\u093e\u0928\u0940"
- },
- "rn": {
- "english_name": "Kirundi",
- "name": "Ikirundi"
- },
- "ro": {
- "english_name": "Romanian",
- "name": "Rom\u00e2n\u0103"
- },
- "roa-rup": {
- "english_name": "Aromanian",
- "name": "Arm\u00e3neashce"
- },
- "roa-tara": {
- "english_name": "Tarantino",
- "name": "Tarand\u00edne"
- },
- "ru": {
- "english_name": "Russian",
- "name": "\u0420\u0443\u0441\u0441\u043a\u0438\u0439"
- },
- "rue": {
- "english_name": "Rusyn",
- "name": "\u0420\u0443\u0441\u0438\u043d\u044c\u0441\u043a\u044b\u0439"
- },
- "rw": {
- "english_name": "Kinyarwanda",
- "name": "Ikinyarwanda"
- },
- "sa": {
- "english_name": "Sanskrit",
- "name": "\u0938\u0902\u0938\u094d\u0915\u0943\u0924\u092e\u094d"
- },
- "sah": {
- "english_name": "Sakha",
- "name": "\u0421\u0430\u0445\u0430 \u0442\u044b\u043b\u0430 (Saxa Tyla)"
- },
- "sat": {
- "english_name": "Santali",
- "name": "\u1c65\u1c5f\u1c71\u1c5b\u1c5f\u1c72\u1c64"
- },
- "sc": {
- "english_name": "Sardinian",
- "name": "Sardu"
- },
- "scn": {
- "english_name": "Sicilian",
- "name": "Sicilianu"
- },
- "sco": {
- "english_name": "Scots",
- "name": "Scots"
- },
- "sd": {
- "english_name": "Sindhi",
- "name": "\u0633\u0646\u068c\u064a\u060c \u0633\u0646\u062f\u06be\u06cc \u060c \u0938\u093f\u0928\u094d\u0927"
- },
- "se": {
- "english_name": "Northern Sami",
- "name": "S\u00e1megiella"
- },
- "sg": {
- "english_name": "Sango",
- "name": "S\u00e4ng\u00f6"
- },
- "sh": {
- "english_name": "Serbo-Croatian",
- "name": "Srpskohrvatski / \u0421\u0440\u043f\u0441\u043a\u043e\u0445\u0440\u0432\u0430\u0442\u0441\u043a\u0438"
- },
- "shi": {
- "english_name": "Tachelhit",
- "name": "Tacl\u1e25it"
- },
- "shn": {
- "english_name": "Shan",
- "name": "\u101c\u102d\u1075\u103a\u1088\u1010\u1086\u1038"
- },
- "si": {
- "english_name": "Sinhalese",
- "name": "\u0dc3\u0dd2\u0d82\u0dc4\u0dbd"
- },
- "simple": {
- "english_name": "Simple English",
- "name": "Simple English"
- },
- "sk": {
- "english_name": "Slovak",
- "name": "Sloven\u010dina"
- },
- "skr": {
- "english_name": "Saraiki",
- "name": "\u0633\u0631\u0627\u0626\u06cc\u06a9\u06cc"
- },
- "sl": {
- "english_name": "Slovenian",
- "name": "Sloven\u0161\u010dina"
- },
- "sm": {
- "english_name": "Samoan",
- "name": "Gagana Samoa"
- },
- "smn": {
- "english_name": "Inari Sami",
- "name": "Anar\u00e2\u0161kiel\u00e2"
- },
- "sn": {
- "english_name": "Shona",
- "name": "chiShona"
- },
- "so": {
- "english_name": "Somali",
- "name": "Soomaali"
- },
- "sq": {
- "english_name": "Albanian",
- "name": "Shqip"
- },
- "sr": {
- "english_name": "Serbian",
- "name": "\u0421\u0440\u043f\u0441\u043a\u0438 / Srpski"
- },
- "srn": {
- "english_name": "Sranan",
- "name": "Sranantongo"
- },
- "ss": {
- "english_name": "Swati",
- "name": "SiSwati"
- },
- "st": {
- "english_name": "Sesotho",
- "name": "Sesotho"
- },
- "stq": {
- "english_name": "Saterland Frisian",
- "name": "Seeltersk"
- },
- "su": {
- "english_name": "Sundanese",
- "name": "Basa Sunda"
- },
- "sv": {
- "english_name": "Swedish",
- "name": "Svenska"
- },
- "sw": {
- "english_name": "Swahili",
- "name": "Kiswahili"
- },
- "szl": {
- "english_name": "Silesian",
- "name": "\u015al\u016fnski"
- },
- "szy": {
- "english_name": "Sakizaya",
- "name": "Sakizaya"
- },
- "ta": {
- "english_name": "Tamil",
- "name": "\u0ba4\u0bae\u0bbf\u0bb4\u0bcd"
- },
- "tay": {
- "english_name": "Atayal",
- "name": "Tayal"
- },
- "tcy": {
- "english_name": "Tulu",
- "name": "\u0ca4\u0cc1\u0cb3\u0cc1"
- },
- "te": {
- "english_name": "Telugu",
- "name": "\u0c24\u0c46\u0c32\u0c41\u0c17\u0c41"
- },
- "tet": {
- "english_name": "Tetum",
- "name": "Tetun"
- },
- "tg": {
- "english_name": "Tajik",
- "name": "\u0422\u043e\u04b7\u0438\u043a\u04e3"
- },
- "th": {
- "english_name": "Thai",
- "name": "\u0e44\u0e17\u0e22"
- },
- "ti": {
- "english_name": "Tigrinya",
- "name": "\u1275\u130d\u122d\u129b"
- },
- "tk": {
- "english_name": "Turkmen",
- "name": "T\u00fcrkmen"
- },
- "tl": {
- "english_name": "Tagalog",
- "name": "Tagalog"
- },
- "tn": {
- "english_name": "Tswana",
- "name": "Setswana"
- },
- "to": {
- "english_name": "Tongan",
- "name": "faka Tonga"
- },
- "tpi": {
- "english_name": "Tok Pisin",
- "name": "Tok Pisin"
- },
- "tr": {
- "english_name": "Turkish",
- "name": "T\u00fcrk\u00e7e"
- },
- "trv": {
- "english_name": "Seediq",
- "name": "Taroko"
- },
- "ts": {
- "english_name": "Tsonga",
- "name": "Xitsonga"
- },
- "tt": {
- "english_name": "Tatar",
- "name": "Tatar\u00e7a / \u0422\u0430\u0442\u0430\u0440\u0447\u0430"
- },
- "tum": {
- "english_name": "Tumbuka",
- "name": "chiTumbuka"
- },
- "tw": {
- "english_name": "Twi",
- "name": "Twi"
- },
- "ty": {
- "english_name": "Tahitian",
- "name": "Reo M\u0101`ohi"
- },
- "tyv": {
- "english_name": "Tuvan",
- "name": "\u0422\u044b\u0432\u0430"
- },
- "udm": {
- "english_name": "Udmurt",
- "name": "\u0423\u0434\u043c\u0443\u0440\u0442 \u043a\u044b\u043b"
- },
- "ug": {
- "english_name": "Uyghur",
- "name": "\u0626\u06c7\u064a\u063a\u06c7\u0631 \u062a\u0649\u0644\u0649"
- },
- "uk": {
- "english_name": "Ukrainian",
- "name": "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430"
- },
- "ur": {
- "english_name": "Urdu",
- "name": "\u0627\u0631\u062f\u0648"
- },
- "uz": {
- "english_name": "Uzbek",
- "name": "O\u2018zbek"
- },
- "ve": {
- "english_name": "Venda",
- "name": "Tshivenda"
- },
- "vec": {
- "english_name": "Venetian",
- "name": "V\u00e8neto"
- },
- "vep": {
- "english_name": "Vepsian",
- "name": "Veps\u00e4n"
- },
- "vi": {
- "english_name": "Vietnamese",
- "name": "Ti\u1ebfng Vi\u1ec7t"
- },
- "vls": {
- "english_name": "West Flemish",
- "name": "West-Vlams"
- },
- "vo": {
- "english_name": "Volap\u00fck",
- "name": "Volap\u00fck"
- },
- "wa": {
- "english_name": "Walloon",
- "name": "Walon"
- },
- "war": {
- "english_name": "Waray-Waray",
- "name": "Winaray"
- },
- "wo": {
- "english_name": "Wolof",
- "name": "Wolof"
- },
- "wuu": {
- "english_name": "Wu",
- "name": "\u5434\u8bed"
- },
- "xal": {
- "english_name": "Kalmyk",
- "name": "\u0425\u0430\u043b\u044c\u043c\u0433"
- },
- "xh": {
- "english_name": "Xhosa",
- "name": "isiXhosa"
- },
- "xmf": {
- "english_name": "Mingrelian",
- "name": "\u10db\u10d0\u10e0\u10d2\u10d0\u10da\u10e3\u10e0\u10d8 (Margaluri)"
- },
- "yi": {
- "english_name": "Yiddish",
- "name": "\u05d9\u05d9\u05b4\u05d3\u05d9\u05e9"
- },
- "yo": {
- "english_name": "Yoruba",
- "name": "Yor\u00f9b\u00e1"
- },
- "za": {
- "english_name": "Zhuang",
- "name": "Cuengh"
- },
- "zea": {
- "english_name": "Zeelandic",
- "name": "Ze\u00eauws"
- },
- "zh": {
- "english_name": "Chinese",
- "name": "\u4e2d\u6587"
- },
- "zh-classical": {
- "english_name": "Classical Chinese",
- "name": "\u53e4\u6587 / \u6587\u8a00\u6587"
- },
- "zh-min-nan": {
- "english_name": "Min Nan",
- "name": "B\u00e2n-l\u00e2m-g\u00fa"
- },
- "zh-yue": {
- "english_name": "Cantonese",
- "name": "\u7cb5\u8a9e"
- },
- "zu": {
- "english_name": "Zulu",
- "name": "isiZulu"
- }
- },
- "yahoo": [
- "ar",
- "bg",
- "cs",
- "da",
- "de",
- "el",
- "en",
- "es",
- "et",
- "fi",
- "fr",
- "he",
- "hr",
- "hu",
- "it",
- "ja",
- "ko",
- "lt",
- "lv",
- "nl",
- "no",
- "pl",
- "pt",
- "ro",
- "ru",
- "sk",
- "sl",
- "sv",
- "th",
- "tr",
- "zh_chs",
- "zh_cht"
- ]
-} \ No newline at end of file
diff --git a/searx/enginelib/__init__.py b/searx/enginelib/__init__.py
new file mode 100644
index 000000000..461791b18
--- /dev/null
+++ b/searx/enginelib/__init__.py
@@ -0,0 +1,136 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Engine related implementations
+
+.. note::
+
+ The long term goal is to modularize all relevant implementations to the
+ engines here in this Python package. In addition to improved modularization,
+ this will also be necessary in part because the probability of circular
+ imports will increase due to the increased typification of implementations in
+ the future.
+
+ ToDo:
+
+ - move :py:obj:`searx.engines.load_engine` to a new module `searx.enginelib`.
+"""
+
+
+from __future__ import annotations
+from typing import Union, Dict, List, Callable, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from searx.enginelib import traits
+
+
+class Engine: # pylint: disable=too-few-public-methods
+ """Class of engine instances build from YAML settings.
+
+ Further documentation see :ref:`general engine configuration`.
+
+ .. hint::
+
+ This class is currently never initialized and only used for type hinting.
+ """
+
+ # Common options in the engine module
+
+ engine_type: str
+ """Type of the engine (:origin:`searx/search/processors`)"""
+
+ paging: bool
+ """Engine supports multiple pages."""
+
+ time_range_support: bool
+ """Engine supports search time range."""
+
+ safesearch: bool
+ """Engine supports SafeSearch"""
+
+ language_support: bool
+ """Engine supports languages (locales) search."""
+
+ language: str
+ """For an engine, when there is ``language: ...`` in the YAML settings the engine
+ does support only this one language:
+
+ .. code:: yaml
+
+ - name: google french
+ engine: google
+ language: fr
+ """
+
+ region: str
+ """For an engine, when there is ``region: ...`` in the YAML settings the engine
+ does support only this one region::
+
+ .. code:: yaml
+
+ - name: google belgium
+ engine: google
+ region: fr-BE
+ """
+
+ fetch_traits: Callable
+ """Function to to fetch engine's traits from origin."""
+
+ traits: traits.EngineTraits
+ """Traits of the engine."""
+
+ # settings.yml
+
+ categories: List[str]
+ """Tabs, in which the engine is working."""
+
+ name: str
+ """Name that will be used across SearXNG to define this engine. In settings, on
+ the result page .."""
+
+ engine: str
+ """Name of the python file used to handle requests and responses to and from
+ this search engine (file name from :origin:`searx/engines` without
+ ``.py``)."""
+
+ enable_http: bool
+ """Enable HTTP (by default only HTTPS is enabled)."""
+
+ shortcut: str
+ """Code used to execute bang requests (``!foo``)"""
+
+ timeout: float
+ """Specific timeout for search-engine."""
+
+ display_error_messages: bool
+ """Display error messages on the web UI."""
+
+ proxies: dict
+ """Set proxies for a specific engine (YAML):
+
+ .. code:: yaml
+
+ proxies :
+ http: socks5://proxy:port
+ https: socks5://proxy:port
+ """
+
+ disabled: bool
+ """To disable by default the engine, but not deleting it. It will allow the
+ user to manually activate it in the settings."""
+
+ inactive: bool
+ """Remove the engine from the settings (*disabled & removed*)."""
+
+ about: dict
+ """Additional fileds describing the engine.
+
+ .. code:: yaml
+
+ about:
+ website: https://example.com
+ wikidata_id: Q306656
+ official_api_documentation: https://example.com/api-doc
+ use_official_api: true
+ require_api_key: true
+ results: HTML
+ """
diff --git a/searx/enginelib/traits.py b/searx/enginelib/traits.py
new file mode 100644
index 000000000..df7851594
--- /dev/null
+++ b/searx/enginelib/traits.py
@@ -0,0 +1,250 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Engine's traits are fetched from the origin engines and stored in a JSON file
+in the *data folder*. Most often traits are languages and region codes and
+their mapping from SearXNG's representation to the representation in the origin
+search engine. For new traits new properties can be added to the class
+:py:class:`EngineTraits`.
+
+To load traits from the persistence :py:obj:`EngineTraitsMap.from_data` can be
+used.
+"""
+
+from __future__ import annotations
+import json
+import dataclasses
+from typing import Dict, Union, Callable, Optional, TYPE_CHECKING
+from typing_extensions import Literal, Self
+
+from searx import locales
+from searx.data import data_dir, ENGINE_TRAITS
+
+if TYPE_CHECKING:
+ from . import Engine
+
+
+class EngineTraitsEncoder(json.JSONEncoder):
+ """Encodes :class:`EngineTraits` to a serializable object, see
+ :class:`json.JSONEncoder`."""
+
+ def default(self, o):
+ """Return dictionary of a :class:`EngineTraits` object."""
+ if isinstance(o, EngineTraits):
+ return o.__dict__
+ return super().default(o)
+
+
+@dataclasses.dataclass
+class EngineTraits:
+ """The class is intended to be instantiated for each engine."""
+
+ regions: Dict[str, str] = dataclasses.field(default_factory=dict)
+ """Maps SearXNG's internal representation of a region to the one of the engine.
+
+ SearXNG's internal representation can be parsed by babel and the value is
+ send to the engine:
+
+ .. code:: python
+
+ regions ={
+ 'fr-BE' : <engine's region name>,
+ }
+
+ for key, egnine_region regions.items():
+ searxng_region = babel.Locale.parse(key, sep='-')
+ ...
+ """
+
+ languages: Dict[str, str] = dataclasses.field(default_factory=dict)
+ """Maps SearXNG's internal representation of a language to the one of the engine.
+
+ SearXNG's internal representation can be parsed by babel and the value is
+ send to the engine:
+
+ .. code:: python
+
+ languages = {
+ 'ca' : <engine's language name>,
+ }
+
+ for key, egnine_lang in languages.items():
+ searxng_lang = babel.Locale.parse(key)
+ ...
+ """
+
+ all_locale: Optional[str] = None
+ """To which locale value SearXNG's ``all`` language is mapped (shown a "Default
+ language").
+ """
+
+ data_type: Literal['traits_v1'] = 'traits_v1'
+ """Data type, default is 'traits_v1'.
+ """
+
+ custom: Dict[str, Dict] = dataclasses.field(default_factory=dict)
+ """A place to store engine's custom traits, not related to the SearXNG core
+
+ """
+
+ def get_language(self, searxng_locale: str, default=None):
+ """Return engine's language string that *best fits* to SearXNG's locale.
+
+ :param searxng_locale: SearXNG's internal representation of locale
+ selected by the user.
+
+ :param default: engine's default language
+
+ The *best fits* rules are implemented in
+ :py:obj:`locales.get_engine_locale`. Except for the special value ``all``
+ which is determined from :py:obj`EngineTraits.all_language`.
+ """
+ if searxng_locale == 'all' and self.all_locale is not None:
+ return self.all_locale
+ return locales.get_engine_locale(searxng_locale, self.languages, default=default)
+
+ def get_region(self, searxng_locale: str, default=None):
+ """Return engine's region string that best fits to SearXNG's locale.
+
+ :param searxng_locale: SearXNG's internal representation of locale
+ selected by the user.
+
+ :param default: engine's default region
+
+ The *best fits* rules are implemented in
+ :py:obj:`locales.get_engine_locale`. Except for the special value ``all``
+ which is determined from :py:obj`EngineTraits.all_language`.
+ """
+ if searxng_locale == 'all' and self.all_locale is not None:
+ return self.all_locale
+ return locales.get_engine_locale(searxng_locale, self.regions, default=default)
+
+ def is_locale_supported(self, searxng_locale: str) -> bool:
+ """A *locale* (SearXNG's internal representation) is considered to be supported
+ by the engine if the *region* or the *language* is supported by the
+ engine. For verification the functions :py:func:`self.get_region` and
+ :py:func:`self.get_region` are used.
+ """
+ if self.data_type == 'traits_v1':
+ return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale))
+
+ raise TypeError('engine traits of type %s is unknown' % self.data_type)
+
+ def copy(self):
+ """Create a copy of the dataclass object."""
+ return EngineTraits(**dataclasses.asdict(self))
+
+ @classmethod
+ def fetch_traits(cls, engine: Engine) -> Union[Self, None]:
+ """Call a function ``fetch_traits(engine_traits)`` from engines namespace to fetch
+ and set properties from the origin engine in the object ``engine_traits``. If
+ function does not exists, ``None`` is returned.
+ """
+
+ fetch_traits = getattr(engine, 'fetch_traits', None)
+ engine_traits = None
+
+ if fetch_traits:
+ engine_traits = cls()
+ fetch_traits(engine_traits)
+ return engine_traits
+
+ def set_traits(self, engine: Engine):
+ """Set traits from self object in a :py:obj:`.Engine` namespace.
+
+ :param engine: engine instance build by :py:func:`searx.engines.load_engine`
+ """
+
+ if self.data_type == 'traits_v1':
+ self._set_traits_v1(engine)
+ else:
+ raise TypeError('engine traits of type %s is unknown' % self.data_type)
+
+ def _set_traits_v1(self, engine: Engine):
+ # For an engine, when there is `language: ...` in the YAML settings the engine
+ # does support only this one language (region)::
+ #
+ # - name: google italian
+ # engine: google
+ # language: it
+ # region: it-IT
+
+ traits = self.copy()
+
+ _msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
+
+ languages = traits.languages
+ if hasattr(engine, 'language'):
+ if engine.language not in languages:
+ raise ValueError(_msg % (engine.name, 'language', engine.language))
+ traits.languages = {engine.language: languages[engine.language]}
+
+ regions = traits.regions
+ if hasattr(engine, 'region'):
+ if engine.region not in regions:
+ raise ValueError(_msg % (engine.name, 'region', engine.region))
+ traits.regions = {engine.region: regions[engine.region]}
+
+ engine.language_support = bool(traits.languages or traits.regions)
+
+ # set the copied & modified traits in engine's namespace
+ engine.traits = traits
+
+
+class EngineTraitsMap(Dict[str, EngineTraits]):
+ """A python dictionary to map :class:`EngineTraits` by engine name."""
+
+ ENGINE_TRAITS_FILE = (data_dir / 'engine_traits.json').resolve()
+ """File with persistence of the :py:obj:`EngineTraitsMap`."""
+
+ def save_data(self):
+ """Store EngineTraitsMap in in file :py:obj:`self.ENGINE_TRAITS_FILE`"""
+ with open(self.ENGINE_TRAITS_FILE, 'w', encoding='utf-8') as f:
+ json.dump(self, f, indent=2, sort_keys=True, cls=EngineTraitsEncoder)
+
+ @classmethod
+ def from_data(cls) -> Self:
+ """Instantiate :class:`EngineTraitsMap` object from :py:obj:`ENGINE_TRAITS`"""
+ obj = cls()
+ for k, v in ENGINE_TRAITS.items():
+ obj[k] = EngineTraits(**v)
+ return obj
+
+ @classmethod
+ def fetch_traits(cls, log: Callable) -> Self:
+ from searx import engines # pylint: disable=cyclic-import, import-outside-toplevel
+
+ names = list(engines.engines)
+ names.sort()
+ obj = cls()
+
+ for engine_name in names:
+ engine = engines.engines[engine_name]
+
+ traits = EngineTraits.fetch_traits(engine)
+ if traits is not None:
+ log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages)))
+ log("%-20s: SearXNG regions --> %s" % (engine_name, len(traits.regions)))
+ obj[engine_name] = traits
+
+ return obj
+
+ def set_traits(self, engine: Engine):
+ """Set traits in a :py:obj:`Engine` namespace.
+
+ :param engine: engine instance build by :py:func:`searx.engines.load_engine`
+ """
+
+ engine_traits = EngineTraits(data_type='traits_v1')
+ if engine.name in self.keys():
+ engine_traits = self[engine.name]
+
+ elif engine.engine in self.keys():
+ # The key of the dictionary traits_map is the *engine name*
+ # configured in settings.xml. When multiple engines are configured
+ # in settings.yml to use the same origin engine (python module)
+ # these additional engines can use the languages from the origin
+ # engine. For this use the configured ``engine: ...`` from
+ # settings.yml
+ engine_traits = self[engine.engine]
+
+ engine_traits.set_traits(engine)
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 52bb5f20d..c8e8e7241 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -11,24 +11,22 @@ usage::
"""
+from __future__ import annotations
+
import sys
import copy
-from typing import Dict, List, Optional
-
from os.path import realpath, dirname
-from babel.localedata import locale_identifiers
+
+from typing import TYPE_CHECKING, Dict, Optional
+
from searx import logger, settings
-from searx.data import ENGINES_LANGUAGES
-from searx.network import get
-from searx.utils import load_module, match_language, gen_useragent
+from searx.utils import load_module
+if TYPE_CHECKING:
+ from searx.enginelib import Engine
logger = logger.getChild('engines')
ENGINE_DIR = dirname(realpath(__file__))
-BABEL_LANGS = [
- lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
- for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())
-]
ENGINE_DEFAULT_ARGS = {
"engine_type": "online",
"inactive": False,
@@ -36,8 +34,6 @@ ENGINE_DEFAULT_ARGS = {
"timeout": settings["outgoing"]["request_timeout"],
"shortcut": "-",
"categories": ["general"],
- "supported_languages": [],
- "language_aliases": {},
"paging": False,
"safesearch": False,
"time_range_support": False,
@@ -52,24 +48,6 @@ ENGINE_DEFAULT_ARGS = {
OTHER_CATEGORY = 'other'
-class Engine: # pylint: disable=too-few-public-methods
- """This class is currently never initialized and only used for type hinting."""
-
- name: str
- engine: str
- shortcut: str
- categories: List[str]
- supported_languages: List[str]
- about: dict
- inactive: bool
- disabled: bool
- language_support: bool
- paging: bool
- safesearch: bool
- time_range_support: bool
- timeout: float
-
-
# Defaults for the namespace of an engine module, see :py:func:`load_engine`
categories = {'general': []}
@@ -136,9 +114,15 @@ def load_engine(engine_data: dict) -> Optional[Engine]:
return None
update_engine_attributes(engine, engine_data)
- set_language_attributes(engine)
update_attributes_for_tor(engine)
+ # avoid cyclic imports
+ # pylint: disable=import-outside-toplevel
+ from searx.enginelib.traits import EngineTraitsMap
+
+ trait_map = EngineTraitsMap.from_data()
+ trait_map.set_traits(engine)
+
if not is_engine_active(engine):
return None
@@ -190,60 +174,6 @@ def update_engine_attributes(engine: Engine, engine_data):
setattr(engine, arg_name, copy.deepcopy(arg_value))
-def set_language_attributes(engine: Engine):
- # assign supported languages from json file
- if engine.name in ENGINES_LANGUAGES:
- engine.supported_languages = ENGINES_LANGUAGES[engine.name]
-
- elif engine.engine in ENGINES_LANGUAGES:
- # The key of the dictionary ENGINES_LANGUAGES is the *engine name*
- # configured in settings.xml. When multiple engines are configured in
- # settings.yml to use the same origin engine (python module) these
- # additional engines can use the languages from the origin engine.
- # For this use the configured ``engine: ...`` from settings.yml
- engine.supported_languages = ENGINES_LANGUAGES[engine.engine]
-
- if hasattr(engine, 'language'):
- # For an engine, when there is `language: ...` in the YAML settings, the
- # engine supports only one language, in this case
- # engine.supported_languages should contains this value defined in
- # settings.yml
- if engine.language not in engine.supported_languages:
- raise ValueError(
- "settings.yml - engine: '%s' / language: '%s' not supported" % (engine.name, engine.language)
- )
-
- if isinstance(engine.supported_languages, dict):
- engine.supported_languages = {engine.language: engine.supported_languages[engine.language]}
- else:
- engine.supported_languages = [engine.language]
-
- # find custom aliases for non standard language codes
- for engine_lang in engine.supported_languages:
- iso_lang = match_language(engine_lang, BABEL_LANGS, fallback=None)
- if (
- iso_lang
- and iso_lang != engine_lang
- and not engine_lang.startswith(iso_lang)
- and iso_lang not in engine.supported_languages
- ):
- engine.language_aliases[iso_lang] = engine_lang
-
- # language_support
- engine.language_support = len(engine.supported_languages) > 0
-
- # assign language fetching method if auxiliary method exists
- if hasattr(engine, '_fetch_supported_languages'):
- headers = {
- 'User-Agent': gen_useragent(),
- 'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language
- }
- engine.fetch_supported_languages = (
- # pylint: disable=protected-access
- lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers))
- )
-
-
def update_attributes_for_tor(engine: Engine) -> bool:
if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')
diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py
index b5e426107..56c3b447f 100644
--- a/searx/engines/archlinux.py
+++ b/searx/engines/archlinux.py
@@ -1,15 +1,32 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
"""
- Arch Linux Wiki
+Arch Linux Wiki
+~~~~~~~~~~~~~~~
+
+This implementation does not use a official API: Mediawiki provides API, but
+Arch Wiki blocks access to it.
- API: Mediawiki provides API, but Arch Wiki blocks access to it
"""
-from urllib.parse import urlencode, urljoin
-from lxml import html
+from typing import TYPE_CHECKING
+from urllib.parse import urlencode, urljoin, urlparse
+import lxml
+import babel
+
+from searx import network
from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
+from searx.enginelib.traits import EngineTraits
+from searx.locales import language_tag
+
+if TYPE_CHECKING:
+ import logging
+
+ logger: logging.Logger
+
+traits: EngineTraits
+
-# about
about = {
"website": 'https://wiki.archlinux.org/',
"wikidata_id": 'Q101445877',
@@ -22,125 +39,113 @@ about = {
# engine dependent config
categories = ['it', 'software wikis']
paging = True
-base_url = 'https://wiki.archlinux.org'
-
-# xpath queries
-xpath_results = '//ul[@class="mw-search-results"]/li'
-xpath_link = './/div[@class="mw-search-result-heading"]/a'
-
-
-# cut 'en' from 'en-US', 'de' from 'de-CH', and so on
-def locale_to_lang_code(locale):
- if locale.find('-') >= 0:
- locale = locale.split('-')[0]
- return locale
-
-
-# wikis for some languages were moved off from the main site, we need to make
-# requests to correct URLs to be able to get results in those languages
-lang_urls = {
- # fmt: off
- 'all': {
- 'base': 'https://wiki.archlinux.org',
- 'search': '/index.php?title=Special:Search&offset={offset}&{query}'
- },
- 'de': {
- 'base': 'https://wiki.archlinux.de',
- 'search': '/index.php?title=Spezial:Suche&offset={offset}&{query}'
- },
- 'fr': {
- 'base': 'https://wiki.archlinux.fr',
- 'search': '/index.php?title=Spécial:Recherche&offset={offset}&{query}'
- },
- 'ja': {
- 'base': 'https://wiki.archlinuxjp.org',
- 'search': '/index.php?title=特別:検索&offset={offset}&{query}'
- },
- 'ro': {
- 'base': 'http://wiki.archlinux.ro',
- 'search': '/index.php?title=Special:Căutare&offset={offset}&{query}'
- },
- 'tr': {
- 'base': 'http://archtr.org/wiki',
- 'search': '/index.php?title=Özel:Ara&offset={offset}&{query}'
- }
- # fmt: on
-}
-
-
-# get base & search URLs for selected language
-def get_lang_urls(language):
- if language in lang_urls:
- return lang_urls[language]
- return lang_urls['all']
-
-
-# Language names to build search requests for
-# those languages which are hosted on the main site.
-main_langs = {
- 'ar': 'العربية',
- 'bg': 'Български',
- 'cs': 'Česky',
- 'da': 'Dansk',
- 'el': 'Ελληνικά',
- 'es': 'Español',
- 'he': 'עברית',
- 'hr': 'Hrvatski',
- 'hu': 'Magyar',
- 'it': 'Italiano',
- 'ko': '한국어',
- 'lt': 'Lietuviškai',
- 'nl': 'Nederlands',
- 'pl': 'Polski',
- 'pt': 'Português',
- 'ru': 'Русский',
- 'sl': 'Slovenský',
- 'th': 'ไทย',
- 'uk': 'Українська',
- 'zh': '简体中文',
-}
-supported_languages = dict(lang_urls, **main_langs)
+main_wiki = 'wiki.archlinux.org'
-# do search-request
def request(query, params):
- # translate the locale (e.g. 'en-US') to language code ('en')
- language = locale_to_lang_code(params['language'])
-
- # if our language is hosted on the main site, we need to add its name
- # to the query in order to narrow the results to that language
- if language in main_langs:
- query += ' (' + main_langs[language] + ')'
- # prepare the request parameters
- query = urlencode({'search': query})
+ sxng_lang = params['searxng_locale'].split('-')[0]
+ netloc = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki)
+ title = traits.custom['title'].get(sxng_lang, 'Special:Search')
+ base_url = 'https://' + netloc + '/index.php?'
offset = (params['pageno'] - 1) * 20
- # get request URLs for our language of choice
- urls = get_lang_urls(language)
- search_url = urls['base'] + urls['search']
-
- params['url'] = search_url.format(query=query, offset=offset)
+ if netloc == main_wiki:
+ eng_lang: str = traits.get_language(sxng_lang, 'English')
+ query += ' (' + eng_lang + ')'
+ elif netloc == 'wiki.archlinuxcn.org':
+ base_url = 'https://' + netloc + '/wzh/index.php?'
+
+ args = {
+ 'search': query,
+ 'title': title,
+ 'limit': 20,
+ 'offset': offset,
+ 'profile': 'default',
+ }
+ params['url'] = base_url + urlencode(args)
return params
-# get response from search-request
def response(resp):
- # get the base URL for the language in which request was made
- language = locale_to_lang_code(resp.search_params['language'])
- base_url = get_lang_urls(language)['base']
results = []
+ dom = lxml.html.fromstring(resp.text)
- dom = html.fromstring(resp.text)
+ # get the base URL for the language in which request was made
+ sxng_lang = resp.search_params['searxng_locale'].split('-')[0]
+ netloc = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki)
+ base_url = 'https://' + netloc + '/index.php?'
+
+ for result in eval_xpath_list(dom, '//ul[@class="mw-search-results"]/li'):
+ link = eval_xpath_getindex(result, './/div[@class="mw-search-result-heading"]/a', 0)
+ content = extract_text(result.xpath('.//div[@class="searchresult"]'))
+ results.append(
+ {
+ 'url': urljoin(base_url, link.get('href')),
+ 'title': extract_text(link),
+ 'content': content,
+ }
+ )
- # parse results
- for result in eval_xpath_list(dom, xpath_results):
- link = eval_xpath_getindex(result, xpath_link, 0)
- href = urljoin(base_url, link.attrib.get('href'))
- title = extract_text(link)
+ return results
- results.append({'url': href, 'title': title})
- return results
+def fetch_traits(engine_traits: EngineTraits):
+ """Fetch languages from Archlinix-Wiki. The location of the Wiki address of a
+ language is mapped in a :py:obj:`custom field
+ <searx.enginelib.traits.EngineTraits.custom>` (``wiki_netloc``). Depending
+ on the location, the ``title`` argument in the request is translated.
+
+ .. code:: python
+
+ "custom": {
+ "wiki_netloc": {
+ "de": "wiki.archlinux.de",
+ # ...
+ "zh": "wiki.archlinuxcn.org"
+ }
+ "title": {
+ "de": "Spezial:Suche",
+ # ...
+ "zh": "Special:\u641c\u7d22"
+ },
+ },
+
+ """
+
+ engine_traits.custom['wiki_netloc'] = {}
+ engine_traits.custom['title'] = {}
+
+ title_map = {
+ 'de': 'Spezial:Suche',
+ 'fa': 'ویژه:جستجو',
+ 'ja': '特別:検索',
+ 'zh': 'Special:搜索',
+ }
+
+ resp = network.get('https://wiki.archlinux.org/')
+ if not resp.ok:
+ print("ERROR: response from wiki.archlinix.org is not OK.")
+
+ dom = lxml.html.fromstring(resp.text)
+ for a in eval_xpath_list(dom, "//a[@class='interlanguage-link-target']"):
+
+ sxng_tag = language_tag(babel.Locale.parse(a.get('lang'), sep='-'))
+ # zh_Hans --> zh
+ sxng_tag = sxng_tag.split('_')[0]
+
+ netloc = urlparse(a.get('href')).netloc
+ if netloc != 'wiki.archlinux.org':
+ title = title_map.get(sxng_tag)
+ if not title:
+ print("ERROR: title tag from %s (%s) is unknown" % (netloc, sxng_tag))
+ continue
+ engine_traits.custom['wiki_netloc'][sxng_tag] = netloc
+ engine_traits.custom['title'][sxng_tag] = title
+
+ eng_tag = extract_text(eval_xpath_list(a, ".//span"))
+ engine_traits.languages[sxng_tag] = eng_tag
+
+ engine_traits.languages['en'] = 'English'
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index 783c0056a..0f85c7036 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -1,16 +1,53 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""Bing (Web)
+"""This is the implementation of the Bing-WEB engine. Some of this
+implementations are shared by other engines:
+
+- :ref:`bing images engine`
+- :ref:`bing news engine`
+- :ref:`bing videos engine`
+
+On the `preference page`_ Bing offers a lot of languages an regions (see section
+'Search results languages' and 'Country/region'). However, the abundant choice
+does not correspond to reality, where Bing has a full-text indexer only for a
+limited number of languages. By example: you can select a language like Māori
+but you never get a result in this language.
+
+What comes a bit closer to the truth are the `search-APIs`_ but they don`t seem
+to be completely correct either (if you take a closer look you will find some
+inaccuracies there too):
+
+- :py:obj:`searx.engines.bing.bing_traits_url`
+- :py:obj:`searx.engines.bing_videos.bing_traits_url`
+- :py:obj:`searx.engines.bing_images.bing_traits_url`
+- :py:obj:`searx.engines.bing_news.bing_traits_url`
+
+.. _preference page: https://www.bing.com/account/general
+.. _search-APIs: https://learn.microsoft.com/en-us/bing/search-apis/
-- https://github.com/searx/searx/issues/2019#issuecomment-648227442
"""
-# pylint: disable=too-many-branches
+# pylint: disable=too-many-branches, invalid-name
+from typing import TYPE_CHECKING
+import datetime
import re
-from urllib.parse import urlencode, urlparse, parse_qs
+import uuid
+from urllib.parse import urlencode
from lxml import html
-from searx.utils import eval_xpath, extract_text, eval_xpath_list, match_language, eval_xpath_getindex
-from searx.network import multi_requests, Request
+import babel
+import babel.languages
+
+from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex
+from searx import network
+from searx.locales import language_tag, region_tag
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+ import logging
+
+ logger: logging.Logger
+
+traits: EngineTraits
about = {
"website": 'https://www.bing.com',
@@ -21,56 +58,124 @@ about = {
"results": 'HTML',
}
+send_accept_language_header = True
+"""Bing tries to guess user's language and territory from the HTTP
+Accept-Language. Optional the user can select a search-language (can be
+different to the UI language) and a region (market code)."""
+
# engine dependent config
categories = ['general', 'web']
paging = True
-time_range_support = False
-safesearch = False
-send_accept_language_header = True
-supported_languages_url = 'https://www.bing.com/account/general'
-language_aliases = {}
-
-# search-url
-base_url = 'https://www.bing.com/'
+time_range_support = True
+safesearch = True
+safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'} # cookie: ADLT=STRICT
-# initial query: https://www.bing.com/search?q=foo&search=&form=QBLH
-inital_query = 'search?{query}&search=&form=QBLH'
+base_url = 'https://www.bing.com/search'
+"""Bing (Web) search URL"""
-# following queries: https://www.bing.com/search?q=foo&search=&first=11&FORM=PERE
-page_query = 'search?{query}&search=&first={offset}&FORM=PERE'
+bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/market-codes'
+"""Bing (Web) search API description"""
def _get_offset_from_pageno(pageno):
return (pageno - 1) * 10 + 1
-def request(query, params):
+def set_bing_cookies(params, engine_language, engine_region, SID):
+
+ # set cookies
+ # -----------
+
+ params['cookies']['_EDGE_V'] = '1'
+
+ # _EDGE_S: F=1&SID=3A5253BD6BCA609509B741876AF961CA&mkt=zh-tw
+ _EDGE_S = [
+ 'F=1',
+ 'SID=%s' % SID,
+ 'mkt=%s' % engine_region.lower(),
+ 'ui=%s' % engine_language.lower(),
+ ]
+ params['cookies']['_EDGE_S'] = '&'.join(_EDGE_S)
+ logger.debug("cookie _EDGE_S=%s", params['cookies']['_EDGE_S'])
+
+ # "_EDGE_CD": "m=zh-tw",
+
+ _EDGE_CD = [ # pylint: disable=invalid-name
+ 'm=%s' % engine_region.lower(), # search region: zh-cn
+ 'u=%s' % engine_language.lower(), # UI: en-us
+ ]
+
+ params['cookies']['_EDGE_CD'] = '&'.join(_EDGE_CD) + ';'
+ logger.debug("cookie _EDGE_CD=%s", params['cookies']['_EDGE_CD'])
- offset = _get_offset_from_pageno(params.get('pageno', 1))
+ SRCHHPGUSR = [ # pylint: disable=invalid-name
+ 'SRCHLANG=%s' % engine_language,
+ # Trying to set ADLT cookie here seems not to have any effect, I assume
+ # there is some age verification by a cookie (and/or session ID) needed,
+ # to disable the SafeSearch.
+ 'ADLT=%s' % safesearch_types.get(params['safesearch'], 'DEMOTE'),
+ ]
+ params['cookies']['SRCHHPGUSR'] = '&'.join(SRCHHPGUSR)
+ logger.debug("cookie SRCHHPGUSR=%s", params['cookies']['SRCHHPGUSR'])
+
+
+def request(query, params):
+ """Assemble a Bing-Web request."""
- # logger.debug("params['pageno'] --> %s", params.get('pageno'))
- # logger.debug(" offset --> %s", offset)
+ engine_region = traits.get_region(params['searxng_locale'], 'en-US')
+ engine_language = traits.get_language(params['searxng_locale'], 'en')
- search_string = page_query
- if offset == 1:
- search_string = inital_query
+ SID = uuid.uuid1().hex.upper()
+ CVID = uuid.uuid1().hex.upper()
- if params['language'] == 'all':
- lang = 'EN'
- else:
- lang = match_language(params['language'], supported_languages, language_aliases)
+ set_bing_cookies(params, engine_language, engine_region, SID)
- query = 'language:{} {}'.format(lang.split('-')[0].upper(), query)
+ # build URL query
+ # ---------------
- search_path = search_string.format(query=urlencode({'q': query}), offset=offset)
+ # query term
+ page = int(params.get('pageno', 1))
+ query_params = {
+ # fmt: off
+ 'q': query,
+ 'pq': query,
+ 'cvid': CVID,
+ 'qs': 'n',
+ 'sp': '-1'
+ # fmt: on
+ }
- if offset > 1:
- referer = base_url + inital_query.format(query=urlencode({'q': query}))
+ # page
+ if page > 1:
+ referer = base_url + '?' + urlencode(query_params)
params['headers']['Referer'] = referer
logger.debug("headers.Referer --> %s", referer)
- params['url'] = base_url + search_path
- params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
+ query_params['first'] = _get_offset_from_pageno(page)
+
+ if page == 2:
+ query_params['FORM'] = 'PERE'
+ elif page > 2:
+ query_params['FORM'] = 'PERE%s' % (page - 2)
+
+ filters = ''
+ if params['time_range']:
+ query_params['filt'] = 'custom'
+
+ if params['time_range'] == 'day':
+ filters = 'ex1:"ez1"'
+ elif params['time_range'] == 'week':
+ filters = 'ex1:"ez2"'
+ elif params['time_range'] == 'month':
+ filters = 'ex1:"ez3"'
+ elif params['time_range'] == 'year':
+ epoch_1970 = datetime.date(1970, 1, 1)
+ today_no = (datetime.date.today() - epoch_1970).days
+ filters = 'ex1:"ez5_%s_%s"' % (today_no - 365, today_no)
+
+ params['url'] = base_url + '?' + urlencode(query_params)
+ if filters:
+ params['url'] = params['url'] + '&filters=' + filters
return params
@@ -107,7 +212,8 @@ def response(resp):
url_cite = extract_text(eval_xpath(result, './/div[@class="b_attribution"]/cite'))
# Bing can shorten the URL either at the end or in the middle of the string
if (
- url_cite.startswith('https://')
+ url_cite
+ and url_cite.startswith('https://')
and '…' not in url_cite
and '...' not in url_cite
and '›' not in url_cite
@@ -127,9 +233,9 @@ def response(resp):
# resolve all Bing redirections in parallel
request_list = [
- Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve
+ network.Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve
]
- response_list = multi_requests(request_list)
+ response_list = network.multi_requests(request_list)
for i, redirect_response in enumerate(response_list):
if not isinstance(redirect_response, Exception):
results[url_to_resolve_index[i]]['url'] = redirect_response.headers['location']
@@ -157,27 +263,71 @@ def response(resp):
return results
-# get supported languages from their site
-def _fetch_supported_languages(resp):
+def fetch_traits(engine_traits: EngineTraits):
+ """Fetch languages and regions from Bing-Web."""
+
+ xpath_market_codes = '//table[1]/tbody/tr/td[3]'
+ # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
+ xpath_language_codes = '//table[3]/tbody/tr/td[2]'
+
+ _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
+
+
+def _fetch_traits(engine_traits: EngineTraits, url: str, xpath_language_codes: str, xpath_market_codes: str):
+
+ # insert alias to map from a language (zh) to a language + script (zh_Hans)
+ engine_traits.languages['zh'] = 'zh-hans'
- lang_tags = set()
+ resp = network.get(url)
+
+ if not resp.ok:
+ print("ERROR: response from peertube is not OK.")
dom = html.fromstring(resp.text)
- lang_links = eval_xpath(dom, '//div[@id="language-section"]//li')
- for _li in lang_links:
+ map_lang = {'jp': 'ja'}
+ for td in eval_xpath(dom, xpath_language_codes):
+ eng_lang = td.text
- href = eval_xpath(_li, './/@href')[0]
- (_scheme, _netloc, _path, _params, query, _fragment) = urlparse(href)
- query = parse_qs(query, keep_blank_values=True)
+ if eng_lang in ('en-gb', 'pt-br'):
+ # language 'en' is already in the list and a language 'en-gb' can't
+ # be handled in SearXNG, same with pt-br which is covered by pt-pt.
+ continue
- # fmt: off
- setlang = query.get('setlang', [None, ])[0]
- # example: 'mn-Cyrl-MN' --> '['mn', 'Cyrl-MN']
- lang, nation = (setlang.split('-', maxsplit=1) + [None,])[:2] # fmt: skip
- # fmt: on
+ babel_lang = map_lang.get(eng_lang, eng_lang).replace('-', '_')
+ try:
+ sxng_tag = language_tag(babel.Locale.parse(babel_lang))
+ except babel.UnknownLocaleError:
+ print("ERROR: language (%s) is unknown by babel" % (eng_lang))
+ continue
+ conflict = engine_traits.languages.get(sxng_tag)
+ if conflict:
+ if conflict != eng_lang:
+ print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang))
+ continue
+ engine_traits.languages[sxng_tag] = eng_lang
- tag = lang + '-' + nation if nation else lang
- lang_tags.add(tag)
+ map_region = {
+ 'en-ID': 'id_ID',
+ 'no-NO': 'nb_NO',
+ }
- return list(lang_tags)
+ for td in eval_xpath(dom, xpath_market_codes):
+ eng_region = td.text
+ babel_region = map_region.get(eng_region, eng_region).replace('-', '_')
+
+ if eng_region == 'en-WW':
+ engine_traits.all_locale = eng_region
+ continue
+
+ try:
+ sxng_tag = region_tag(babel.Locale.parse(babel_region))
+ except babel.UnknownLocaleError:
+ print("ERROR: region (%s) is unknown by babel" % (eng_region))
+ continue
+ conflict = engine_traits.regions.get(sxng_tag)
+ if conflict:
+ if conflict != eng_region:
+ print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_region))
+ continue
+ engine_traits.regions[sxng_tag] = eng_region
diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py
index 107ce3cff..bd3a34aa5 100644
--- a/searx/engines/bing_images.py
+++ b/searx/engines/bing_images.py
@@ -1,20 +1,30 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""Bing (Images)
-
+"""Bing-Images: description see :py:obj:`searx.engines.bing`.
"""
+# pylint: disable=invalid-name
+
-from json import loads
+from typing import TYPE_CHECKING
+import uuid
+import json
from urllib.parse import urlencode
from lxml import html
-from searx.utils import match_language
-from searx.engines.bing import language_aliases
-from searx.engines.bing import ( # pylint: disable=unused-import
- _fetch_supported_languages,
- supported_languages_url,
+from searx.enginelib.traits import EngineTraits
+from searx.engines.bing import (
+ set_bing_cookies,
+ _fetch_traits,
)
+from searx.engines.bing import send_accept_language_header # pylint: disable=unused-import
+
+if TYPE_CHECKING:
+ import logging
+
+ logger: logging.Logger
+
+traits: EngineTraits
# about
about = {
@@ -31,77 +41,92 @@ categories = ['images', 'web']
paging = True
safesearch = True
time_range_support = True
-send_accept_language_header = True
-supported_languages_url = 'https://www.bing.com/account/general'
-number_of_results = 28
-# search-url
-base_url = 'https://www.bing.com/'
-search_string = (
+base_url = 'https://www.bing.com/images/async'
+"""Bing (Images) search URL"""
+
+bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-image-search/reference/market-codes'
+"""Bing (Images) search API description"""
+
+time_map = {
# fmt: off
- 'images/search'
- '?{query}'
- '&count={count}'
- '&first={first}'
- '&tsc=ImageHoverTitle'
+ 'day': 60 * 24,
+ 'week': 60 * 24 * 7,
+ 'month': 60 * 24 * 31,
+ 'year': 60 * 24 * 365,
# fmt: on
-)
-time_range_string = '&qft=+filterui:age-lt{interval}'
-time_range_dict = {'day': '1440', 'week': '10080', 'month': '43200', 'year': '525600'}
-
-# safesearch definitions
-safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'}
+}
-# do search-request
def request(query, params):
- offset = ((params['pageno'] - 1) * number_of_results) + 1
+ """Assemble a Bing-Image request."""
- search_path = search_string.format(query=urlencode({'q': query}), count=number_of_results, first=offset)
+ engine_region = traits.get_region(params['searxng_locale'], 'en-US')
+ engine_language = traits.get_language(params['searxng_locale'], 'en')
- language = match_language(params['language'], supported_languages, language_aliases).lower()
+ SID = uuid.uuid1().hex.upper()
+ set_bing_cookies(params, engine_language, engine_region, SID)
- params['cookies']['SRCHHPGUSR'] = 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
+ # build URL query
+ # - example: https://www.bing.com/images/async?q=foo&first=155&count=35
- params['cookies']['_EDGE_S'] = 'mkt=' + language + '&ui=' + language + '&F=1'
+ query_params = {
+ # fmt: off
+ 'q': query,
+ 'async' : 'content',
+ # to simplify the page count lets use the default of 35 images per page
+ 'first' : (int(params.get('pageno', 1)) - 1) * 35 + 1,
+ 'count' : 35,
+ # fmt: on
+ }
- params['url'] = base_url + search_path
- if params['time_range'] in time_range_dict:
- params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
+ # time range
+ # - example: one year (525600 minutes) 'qft=+filterui:age-lt525600'
+
+ if params['time_range']:
+ query_params['qft'] = 'filterui:age-lt%s' % time_map[params['time_range']]
+
+ params['url'] = base_url + '?' + urlencode(query_params)
return params
-# get response from search-request
def response(resp):
- results = []
+ """Get response from Bing-Images"""
+ results = []
dom = html.fromstring(resp.text)
- # parse results
- for result in dom.xpath('//div[@class="imgpt"]'):
- img_format = result.xpath('./div[contains(@class, "img_info")]/span/text()')[0]
- # Microsoft seems to experiment with this code so don't make the path too specific,
- # just catch the text section for the first anchor in img_info assuming this to be
- # the originating site.
- source = result.xpath('./div[contains(@class, "img_info")]//a/text()')[0]
+ for result in dom.xpath('//ul[contains(@class, "dgControl_list")]/li'):
- m = loads(result.xpath('./a/@m')[0])
+ metadata = result.xpath('.//a[@class="iusc"]/@m')
+ if not metadata:
+ continue
- # strip 'Unicode private use area' highlighting, they render to Tux
- # the Linux penguin and a standing diamond on my machine...
- title = m.get('t', '').replace('\ue000', '').replace('\ue001', '')
+ metadata = json.loads(result.xpath('.//a[@class="iusc"]/@m')[0])
+ title = ' '.join(result.xpath('.//div[@class="infnmpt"]//a/text()')).strip()
+ img_format = ' '.join(result.xpath('.//div[@class="imgpt"]/div/span/text()')).strip()
+ source = ' '.join(result.xpath('.//div[@class="imgpt"]//div[@class="lnkw"]//a/text()')).strip()
results.append(
{
'template': 'images.html',
- 'url': m['purl'],
- 'thumbnail_src': m['turl'],
- 'img_src': m['murl'],
- 'content': '',
+ 'url': metadata['purl'],
+ 'thumbnail_src': metadata['turl'],
+ 'img_src': metadata['murl'],
+ 'content': metadata['desc'],
'title': title,
'source': source,
'img_format': img_format,
}
)
-
return results
+
+
+def fetch_traits(engine_traits: EngineTraits):
+ """Fetch languages and regions from Bing-News."""
+
+ xpath_market_codes = '//table[1]/tbody/tr/td[3]'
+ # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
+ xpath_language_codes = '//table[3]/tbody/tr/td[2]'
+
+ _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py
index 7eea17bb4..d8c63857a 100644
--- a/searx/engines/bing_news.py
+++ b/searx/engines/bing_news.py
@@ -1,24 +1,30 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""Bing (News)
+"""Bing-News: description see :py:obj:`searx.engines.bing`.
"""
-from urllib.parse import (
- urlencode,
- urlparse,
- parse_qsl,
- quote,
-)
-from datetime import datetime
-from dateutil import parser
-from lxml import etree
-from lxml.etree import XPath
-from searx.utils import match_language, eval_xpath_getindex
-from searx.engines.bing import ( # pylint: disable=unused-import
- language_aliases,
- _fetch_supported_languages,
- supported_languages_url,
+# pylint: disable=invalid-name
+
+from typing import TYPE_CHECKING
+import uuid
+from urllib.parse import urlencode
+
+from lxml import html
+
+from searx.enginelib.traits import EngineTraits
+from searx.engines.bing import (
+ set_bing_cookies,
+ _fetch_traits,
)
+from searx.engines.bing import send_accept_language_header # pylint: disable=unused-import
+
+if TYPE_CHECKING:
+ import logging
+
+ logger: logging.Logger
+
+traits: EngineTraits
+
# about
about = {
@@ -34,108 +40,111 @@ about = {
categories = ['news']
paging = True
time_range_support = True
-send_accept_language_header = True
-
-# search-url
-base_url = 'https://www.bing.com/'
-search_string = 'news/search?{query}&first={offset}&format=RSS'
-search_string_with_time = 'news/search?{query}&first={offset}&qft=interval%3d"{interval}"&format=RSS'
-time_range_dict = {'day': '7', 'week': '8', 'month': '9'}
-
-
-def url_cleanup(url_string):
- """remove click"""
-
- parsed_url = urlparse(url_string)
- if parsed_url.netloc == 'www.bing.com' and parsed_url.path == '/news/apiclick.aspx':
- query = dict(parse_qsl(parsed_url.query))
- url_string = query.get('url', None)
- return url_string
-
-
-def image_url_cleanup(url_string):
- """replace the http://*bing.com/th?id=... by https://www.bing.com/th?id=..."""
-
- parsed_url = urlparse(url_string)
- if parsed_url.netloc.endswith('bing.com') and parsed_url.path == '/th':
- query = dict(parse_qsl(parsed_url.query))
- url_string = "https://www.bing.com/th?id=" + quote(query.get('id'))
- return url_string
-
-
-def _get_url(query, language, offset, time_range):
- if time_range in time_range_dict:
- search_path = search_string_with_time.format(
- # fmt: off
- query = urlencode({
- 'q': query,
- 'setmkt': language
- }),
- offset = offset,
- interval = time_range_dict[time_range]
- # fmt: on
- )
- else:
- # e.g. setmkt=de-de&setlang=de
- search_path = search_string.format(
- # fmt: off
- query = urlencode({
- 'q': query,
- 'setmkt': language
- }),
- offset = offset
- # fmt: on
- )
- return base_url + search_path
+time_map = {
+ 'day': '4',
+ 'week': '8',
+ 'month': '9',
+}
+"""A string '4' means *last hour*. We use *last hour* for ``day`` here since the
+difference of *last day* and *last week* in the result list is just marginally.
+"""
+
+base_url = 'https://www.bing.com/news/infinitescrollajax'
+"""Bing (News) search URL"""
+
+bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-news-search/reference/market-codes'
+"""Bing (News) search API description"""
+
+mkt_alias = {
+ 'zh': 'en-WW',
+ 'zh-CN': 'en-WW',
+}
+"""Bing News has an official market code 'zh-CN' but we won't get a result with
+this market code. For 'zh' and 'zh-CN' we better use the *Worldwide aggregate*
+market code (en-WW).
+"""
def request(query, params):
+ """Assemble a Bing-News request."""
+
+ sxng_locale = params['searxng_locale']
+ engine_region = traits.get_region(mkt_alias.get(sxng_locale, sxng_locale), traits.all_locale)
+ engine_language = traits.get_language(sxng_locale, 'en')
+
+ SID = uuid.uuid1().hex.upper()
+ set_bing_cookies(params, engine_language, engine_region, SID)
- if params['time_range'] and params['time_range'] not in time_range_dict:
- return params
+ # build URL query
+ #
+ # example: https://www.bing.com/news/infinitescrollajax?q=london&first=1
- offset = (params['pageno'] - 1) * 10 + 1
- if params['language'] == 'all':
- language = 'en-US'
- else:
- language = match_language(params['language'], supported_languages, language_aliases)
- params['url'] = _get_url(query, language, offset, params['time_range'])
+ query_params = {
+ # fmt: off
+ 'q': query,
+ 'InfiniteScroll': 1,
+ # to simplify the page count lets use the default of 10 images per page
+ 'first' : (int(params.get('pageno', 1)) - 1) * 10 + 1,
+ # fmt: on
+ }
+
+ if params['time_range']:
+ # qft=interval:"7"
+ query_params['qft'] = 'qft=interval="%s"' % time_map.get(params['time_range'], '9')
+
+ params['url'] = base_url + '?' + urlencode(query_params)
return params
def response(resp):
-
+ """Get response from Bing-Video"""
results = []
- rss = etree.fromstring(resp.content)
- namespaces = rss.nsmap
-
- for item in rss.xpath('./channel/item'):
- # url / title / content
- url = url_cleanup(eval_xpath_getindex(item, './link/text()', 0, default=None))
- title = eval_xpath_getindex(item, './title/text()', 0, default=url)
- content = eval_xpath_getindex(item, './description/text()', 0, default='')
-
- # publishedDate
- publishedDate = eval_xpath_getindex(item, './pubDate/text()', 0, default=None)
- try:
- publishedDate = parser.parse(publishedDate, dayfirst=False)
- except TypeError:
- publishedDate = datetime.now()
- except ValueError:
- publishedDate = datetime.now()
-
- # thumbnail
- thumbnail = eval_xpath_getindex(item, XPath('./News:Image/text()', namespaces=namespaces), 0, default=None)
- if thumbnail is not None:
- thumbnail = image_url_cleanup(thumbnail)
-
- # append result
- if thumbnail is not None:
- results.append(
- {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content, 'img_src': thumbnail}
- )
- else:
- results.append({'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content})
+
+ if not resp.ok or not resp.text:
+ return results
+
+ dom = html.fromstring(resp.text)
+
+ for newsitem in dom.xpath('//div[contains(@class, "newsitem")]'):
+
+ url = newsitem.xpath('./@url')[0]
+ title = ' '.join(newsitem.xpath('.//div[@class="caption"]//a[@class="title"]/text()')).strip()
+ content = ' '.join(newsitem.xpath('.//div[@class="snippet"]/text()')).strip()
+ thumbnail = None
+ author = newsitem.xpath('./@data-author')[0]
+ metadata = ' '.join(newsitem.xpath('.//div[@class="source"]/span/text()')).strip()
+
+ img_src = newsitem.xpath('.//a[@class="imagelink"]//img/@src')
+ if img_src:
+ thumbnail = 'https://www.bing.com/' + img_src[0]
+
+ results.append(
+ {
+ 'url': url,
+ 'title': title,
+ 'content': content,
+ 'img_src': thumbnail,
+ 'author': author,
+ 'metadata': metadata,
+ }
+ )
return results
+
+
+def fetch_traits(engine_traits: EngineTraits):
+ """Fetch languages and regions from Bing-News.
+
+ The :py:obj:`description <searx.engines.bing_news.bing_traits_url>` of the
+ first table says *"query parameter when calling the Video Search API."*
+ .. thats why I use the 4. table "News Category API markets" for the
+ ``xpath_market_codes``.
+
+ """
+
+ xpath_market_codes = '//table[4]/tbody/tr/td[3]'
+ # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
+ xpath_language_codes = '//table[3]/tbody/tr/td[2]'
+
+ _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py
index 85071de21..8ee0bb66e 100644
--- a/searx/engines/bing_videos.py
+++ b/searx/engines/bing_videos.py
@@ -1,21 +1,30 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""Bing (Videos)
-
+"""Bing-Videos: description see :py:obj:`searx.engines.bing`.
"""
+# pylint: disable=invalid-name
-from json import loads
+from typing import TYPE_CHECKING
+import uuid
+import json
from urllib.parse import urlencode
from lxml import html
-from searx.utils import match_language
-from searx.engines.bing import language_aliases
-
-from searx.engines.bing import ( # pylint: disable=unused-import
- _fetch_supported_languages,
- supported_languages_url,
+from searx.enginelib.traits import EngineTraits
+from searx.engines.bing import (
+ set_bing_cookies,
+ _fetch_traits,
)
+from searx.engines.bing import send_accept_language_header # pylint: disable=unused-import
+
+if TYPE_CHECKING:
+ import logging
+
+ logger: logging.Logger
+
+traits: EngineTraits
+
about = {
"website": 'https://www.bing.com/videos',
@@ -26,65 +35,76 @@ about = {
"results": 'HTML',
}
+# engine dependent config
categories = ['videos', 'web']
paging = True
safesearch = True
time_range_support = True
-send_accept_language_header = True
-number_of_results = 28
-base_url = 'https://www.bing.com/'
-search_string = (
+base_url = 'https://www.bing.com/videos/asyncv2'
+"""Bing (Videos) async search URL."""
+
+bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-video-search/reference/market-codes'
+"""Bing (Video) search API description"""
+
+time_map = {
# fmt: off
- 'videos/search'
- '?{query}'
- '&count={count}'
- '&first={first}'
- '&scope=video'
- '&FORM=QBLH'
+ 'day': 60 * 24,
+ 'week': 60 * 24 * 7,
+ 'month': 60 * 24 * 31,
+ 'year': 60 * 24 * 365,
# fmt: on
-)
-time_range_string = '&qft=+filterui:videoage-lt{interval}'
-time_range_dict = {'day': '1440', 'week': '10080', 'month': '43200', 'year': '525600'}
-
-# safesearch definitions
-safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'}
+}
-# do search-request
def request(query, params):
- offset = ((params['pageno'] - 1) * number_of_results) + 1
+ """Assemble a Bing-Video request."""
- search_path = search_string.format(query=urlencode({'q': query}), count=number_of_results, first=offset)
+ engine_region = traits.get_region(params['searxng_locale'], 'en-US')
+ engine_language = traits.get_language(params['searxng_locale'], 'en')
- # safesearch cookie
- params['cookies']['SRCHHPGUSR'] = 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
+ SID = uuid.uuid1().hex.upper()
+ set_bing_cookies(params, engine_language, engine_region, SID)
- # language cookie
- language = match_language(params['language'], supported_languages, language_aliases).lower()
- params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1'
+ # build URL query
+ #
+ # example: https://www.bing.com/videos/asyncv2?q=foo&async=content&first=1&count=35
- # query and paging
- params['url'] = base_url + search_path
+ query_params = {
+ # fmt: off
+ 'q': query,
+ 'async' : 'content',
+ # to simplify the page count lets use the default of 35 images per page
+ 'first' : (int(params.get('pageno', 1)) - 1) * 35 + 1,
+ 'count' : 35,
+ # fmt: on
+ }
# time range
- if params['time_range'] in time_range_dict:
- params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
+ #
+ # example: one week (10080 minutes) '&qft= filterui:videoage-lt10080' '&form=VRFLTR'
+
+ if params['time_range']:
+ query_params['form'] = 'VRFLTR'
+ query_params['qft'] = ' filterui:videoage-lt%s' % time_map[params['time_range']]
+
+ params['url'] = base_url + '?' + urlencode(query_params)
return params
-# get response from search-request
def response(resp):
+ """Get response from Bing-Video"""
results = []
dom = html.fromstring(resp.text)
- for result in dom.xpath('//div[@class="dg_u"]/div[contains(@class, "mc_vtvc")]'):
- metadata = loads(result.xpath('.//div[@class="vrhdata"]/@vrhm')[0])
+ for result in dom.xpath('//div[@class="dg_u"]//div[contains(@id, "mc_vtvc_video")]'):
+ metadata = json.loads(result.xpath('.//div[@class="vrhdata"]/@vrhm')[0])
info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip()
content = '{0} - {1}'.format(metadata['du'], info)
- thumbnail = '{0}th?id={1}'.format(base_url, metadata['thid'])
+ thumbnail = result.xpath('.//div[contains(@class, "mc_vtvc_th")]//img/@src')[0]
+
results.append(
{
'url': metadata['murl'],
@@ -96,3 +116,13 @@ def response(resp):
)
return results
+
+
+def fetch_traits(engine_traits: EngineTraits):
+ """Fetch languages and regions from Bing-Videos."""
+
+ xpath_market_codes = '//table[1]/tbody/tr/td[3]'
+ # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
+ xpath_language_codes = '//table[3]/tbody/tr/td[2]'
+
+ _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py
index 7dd84dd27..d734ec3c8 100644
--- a/searx/engines/dailymotion.py
+++ b/searx/engines/dailymotion.py
@@ -1,17 +1,35 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
-"""Dailymotion (Videos)
+# lint: pylint
+"""
+Dailymotion (Videos)
+~~~~~~~~~~~~~~~~~~~~
+
+.. _REST GET: https://developers.dailymotion.com/tools/
+.. _Global API Parameters: https://developers.dailymotion.com/api/#global-parameters
+.. _Video filters API: https://developers.dailymotion.com/api/#video-filters
+.. _Fields selection: https://developers.dailymotion.com/api/#fields-selection
"""
-from typing import Set
+from typing import TYPE_CHECKING
+
from datetime import datetime, timedelta
from urllib.parse import urlencode
import time
import babel
from searx.exceptions import SearxEngineAPIException
-from searx.network import raise_for_httperror
+from searx import network
from searx.utils import html_to_text
+from searx.locales import region_tag, language_tag
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+ import logging
+
+ logger: logging.Logger
+
+traits: EngineTraits
# about
about = {
@@ -37,11 +55,24 @@ time_delta_dict = {
}
safesearch = True
-safesearch_params = {2: '&is_created_for_kids=true', 1: '&is_created_for_kids=true', 0: ''}
+safesearch_params = {
+ 2: {'is_created_for_kids': 'true'},
+ 1: {'is_created_for_kids': 'true'},
+ 0: {},
+}
+"""True if this video is "Created for Kids" / intends to target an audience
+under the age of 16 (``is_created_for_kids`` in `Video filters API`_ )
+"""
-# search-url
-# - https://developers.dailymotion.com/tools/
-# - https://www.dailymotion.com/doc/api/obj-video.html
+family_filter_map = {
+ 2: 'true',
+ 1: 'true',
+ 0: 'false',
+}
+"""By default, the family filter is turned on. Setting this parameter to
+``false`` will stop filtering-out explicit content from searches and global
+contexts (``family_filter`` in `Global API Parameters`_ ).
+"""
result_fields = [
'allow_embed',
@@ -53,27 +84,21 @@ result_fields = [
'thumbnail_360_url',
'id',
]
-search_url = (
- 'https://api.dailymotion.com/videos?'
- 'fields={fields}&password_protected={password_protected}&private={private}&sort={sort}&limit={limit}'
-).format(
- fields=','.join(result_fields),
- password_protected='false',
- private='false',
- sort='relevance',
- limit=number_of_results,
-)
-iframe_src = "https://www.dailymotion.com/embed/video/{video_id}"
+"""`Fields selection`_, by default, a few fields are returned. To request more
+specific fields, the ``fields`` parameter is used with the list of fields
+SearXNG needs in the response to build a video result list.
+"""
-# The request query filters by 'languages' & 'country', therefore instead of
-# fetching only languages we need to fetch locales.
-supported_languages_url = 'https://api.dailymotion.com/locales'
-supported_languages_iso639: Set[str] = set()
+search_url = 'https://api.dailymotion.com/videos?'
+"""URL to retrieve a list of videos.
+- `REST GET`_
+- `Global API Parameters`_
+- `Video filters API`_
+"""
-def init(_engine_settings):
- global supported_languages_iso639
- supported_languages_iso639 = set([language.split('_')[0] for language in supported_languages])
+iframe_src = "https://www.dailymotion.com/embed/video/{video_id}"
+"""URL template to embed video in SearXNG's result list."""
def request(query, params):
@@ -81,34 +106,42 @@ def request(query, params):
if not query:
return False
- language = params['language']
- if language == 'all':
- language = 'en-US'
- locale = babel.Locale.parse(language, sep='-')
+ eng_region = traits.get_region(params['searxng_locale'], 'en_US')
+ eng_lang = traits.get_language(params['searxng_locale'], 'en')
- language_iso639 = locale.language
- if locale.language not in supported_languages_iso639:
- language_iso639 = 'en'
-
- query_args = {
+ args = {
'search': query,
- 'languages': language_iso639,
+ 'family_filter': family_filter_map.get(params['safesearch'], 'false'),
+ 'thumbnail_ratio': 'original', # original|widescreen|square
+ # https://developers.dailymotion.com/api/#video-filters
+ 'languages': eng_lang,
'page': params['pageno'],
+ 'password_protected': 'false',
+ 'private': 'false',
+ 'sort': 'relevance',
+ 'limit': number_of_results,
+ 'fields': ','.join(result_fields),
}
- if locale.territory:
- localization = locale.language + '_' + locale.territory
- if localization in supported_languages:
- query_args['country'] = locale.territory
+ args.update(safesearch_params.get(params['safesearch'], {}))
+
+ # Don't add localization and country arguments if the user does select a
+ # language (:de, :en, ..)
+
+ if len(params['searxng_locale'].split('-')) > 1:
+ # https://developers.dailymotion.com/api/#global-parameters
+ args['localization'] = eng_region
+ args['country'] = eng_region.split('_')[1]
+ # Insufficient rights for the `ams_country' parameter of route `GET /videos'
+ # 'ams_country': eng_region.split('_')[1],
time_delta = time_delta_dict.get(params["time_range"])
if time_delta:
created_after = datetime.now() - time_delta
- query_args['created_after'] = datetime.timestamp(created_after)
+ args['created_after'] = datetime.timestamp(created_after)
- query_str = urlencode(query_args)
- params['url'] = search_url + '&' + query_str + safesearch_params.get(params['safesearch'], '')
- params['raise_for_httperror'] = False
+ query_str = urlencode(args)
+ params['url'] = search_url + query_str
return params
@@ -123,7 +156,7 @@ def response(resp):
if 'error' in search_res:
raise SearxEngineAPIException(search_res['error'].get('message'))
- raise_for_httperror(resp)
+ network.raise_for_httperror(resp)
# parse results
for res in search_res.get('list', []):
@@ -167,7 +200,53 @@ def response(resp):
return results
-# get supported languages from their site
-def _fetch_supported_languages(resp):
- response_json = resp.json()
- return [item['locale'] for item in response_json['list']]
+def fetch_traits(engine_traits: EngineTraits):
+ """Fetch locales & languages from dailymotion.
+
+ Locales fetched from `api/locales <https://api.dailymotion.com/locales>`_.
+ There are duplications in the locale codes returned from Dailymotion which
+ can be ignored::
+
+ en_EN --> en_GB, en_US
+ ar_AA --> ar_EG, ar_AE, ar_SA
+
+ The language list `api/languages <https://api.dailymotion.com/languages>`_
+ contains over 7000 *languages* codes (see PR1071_). We use only those
+ language codes that are used in the locales.
+
+ .. _PR1071: https://github.com/searxng/searxng/pull/1071
+
+ """
+
+ resp = network.get('https://api.dailymotion.com/locales')
+ if not resp.ok:
+ print("ERROR: response from dailymotion/locales is not OK.")
+
+ for item in resp.json()['list']:
+ eng_tag = item['locale']
+ if eng_tag in ('en_EN', 'ar_AA'):
+ continue
+ try:
+ sxng_tag = region_tag(babel.Locale.parse(eng_tag))
+ except babel.UnknownLocaleError:
+ print("ERROR: item unknown --> %s" % item)
+ continue
+
+ conflict = engine_traits.regions.get(sxng_tag)
+ if conflict:
+ if conflict != eng_tag:
+ print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+ continue
+ engine_traits.regions[sxng_tag] = eng_tag
+
+ locale_lang_list = [x.split('_')[0] for x in engine_traits.regions.values()]
+
+ resp = network.get('https://api.dailymotion.com/languages')
+ if not resp.ok:
+ print("ERROR: response from dailymotion/languages is not OK.")
+
+ for item in resp.json()['list']:
+ eng_tag = item['code']
+ if eng_tag in locale_lang_list:
+ sxng_tag = language_tag(babel.Locale.parse(eng_tag))
+ engine_traits.languages[sxng_tag] = eng_tag
diff --git a/searx/engines/demo_offline.py b/searx/engines/demo_offline.py
index aeb74f443..9d6e3b52d 100644
--- a/searx/engines/demo_offline.py
+++ b/searx/engines/demo_offline.py
@@ -63,7 +63,7 @@ def search(query, request_params):
for row in result_list:
entry = {
'query': query,
- 'language': request_params['language'],
+ 'language': request_params['searxng_locale'],
'value': row.get("value"),
# choose a result template or comment out to use the *default*
'template': 'key-value.html',
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 2a7956ca8..85e977bdb 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -1,71 +1,207 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""DuckDuckGo Lite
+"""
+DuckDuckGo Lite
+~~~~~~~~~~~~~~~
"""
-from json import loads
-
-from lxml.html import fromstring
+from typing import TYPE_CHECKING
+from urllib.parse import urlencode
+import json
+import babel
+import lxml.html
+from searx import (
+ network,
+ locales,
+ redislib,
+)
+from searx import redisdb
from searx.utils import (
- dict_subset,
eval_xpath,
eval_xpath_getindex,
extract_text,
- match_language,
)
-from searx.network import get
+from searx.enginelib.traits import EngineTraits
+from searx.exceptions import SearxEngineAPIException
+
+if TYPE_CHECKING:
+ import logging
+
+ logger: logging.Logger
+
+traits: EngineTraits
-# about
about = {
"website": 'https://lite.duckduckgo.com/lite/',
"wikidata_id": 'Q12805',
- "official_api_documentation": 'https://duckduckgo.com/api',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
+send_accept_language_header = True
+"""DuckDuckGo-Lite tries to guess user's prefered language from the HTTP
+``Accept-Language``. Optional the user can select a region filter (but not a
+language).
+"""
+
# engine dependent config
categories = ['general', 'web']
paging = True
-supported_languages_url = 'https://duckduckgo.com/util/u588.js'
time_range_support = True
-send_accept_language_header = True
+safesearch = True # user can't select but the results are filtered
-language_aliases = {
- 'ar-SA': 'ar-XA',
- 'es-419': 'es-XL',
- 'ja': 'jp-JP',
- 'ko': 'kr-KR',
- 'sl-SI': 'sl-SL',
- 'zh-TW': 'tzh-TW',
- 'zh-HK': 'tzh-HK',
-}
+url = 'https://lite.duckduckgo.com/lite/'
+# url_ping = 'https://duckduckgo.com/t/sl_l'
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
+form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
-# search-url
-url = 'https://lite.duckduckgo.com/lite/'
-url_ping = 'https://duckduckgo.com/t/sl_l'
-# match query's language to a region code that duckduckgo will accept
-def get_region_code(lang, lang_list=None):
- if lang == 'all':
- return None
+def cache_vqd(query, value):
+ """Caches a ``vqd`` value from a query.
+
+ The vqd value depends on the query string and is needed for the follow up
+ pages or the images loaded by a XMLHttpRequest:
+
+ - DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...`
+ - DuckDuckGo Images: `https://duckduckgo.com/i.js??q=...&vqd=...`
+
+ """
+ c = redisdb.client()
+ if c:
+ logger.debug("cache vqd value: %s", value)
+ key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
+ c.set(key, value, ex=600)
+
+
+def get_vqd(query, headers):
+ """Returns the ``vqd`` that fits to the *query*. If there is no ``vqd`` cached
+ (:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the
+ response.
+
+ """
+ value = None
+ c = redisdb.client()
+ if c:
+ key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
+ value = c.get(key)
+ if value:
+ value = value.decode('utf-8')
+ logger.debug("re-use cached vqd value: %s", value)
+ return value
- lang_code = match_language(lang, lang_list or [], language_aliases, 'wt-WT')
- lang_parts = lang_code.split('-')
+ query_url = 'https://duckduckgo.com/?{query}&iar=images'.format(query=urlencode({'q': query}))
+ res = network.get(query_url, headers=headers)
+ content = res.text
+ if content.find('vqd=\'') == -1:
+ raise SearxEngineAPIException('Request failed')
+ value = content[content.find('vqd=\'') + 5 :]
+ value = value[: value.find('\'')]
+ logger.debug("new vqd value: %s", value)
+ cache_vqd(query, value)
+ return value
- # country code goes first
- return lang_parts[1].lower() + '-' + lang_parts[0].lower()
+
+def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'):
+ """Get DuckDuckGo's language identifier from SearXNG's locale.
+
+ DuckDuckGo defines its lanaguages by region codes (see
+ :py:obj:`fetch_traits`).
+
+ To get region and language of a DDG service use:
+
+ .. code: python
+
+ eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
+ eng_lang = get_ddg_lang(traits, params['searxng_locale'])
+
+ It might confuse, but the ``l`` value of the cookie is what SearXNG calls
+ the *region*:
+
+ .. code:: python
+
+ # !ddi paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'}
+ params['cookies']['ad'] = eng_lang
+ params['cookies']['ah'] = eng_region
+ params['cookies']['l'] = eng_region
+
+ .. hint::
+
+ `DDG-lite <https://lite.duckduckgo.com/lite>`__ does not offer a language
+ selection to the user, only a region can be selected by the user
+ (``eng_region`` from the example above). DDG-lite stores the selected
+ region in a cookie::
+
+ params['cookies']['kl'] = eng_region # 'ar-es'
+
+ """
+ return eng_traits.custom['lang_region'].get(sxng_locale, eng_traits.get_language(sxng_locale, default))
+
+
+ddg_reg_map = {
+ 'tw-tzh': 'zh_TW',
+ 'hk-tzh': 'zh_HK',
+ 'ct-ca': 'skip', # ct-ca and es-ca both map to ca_ES
+ 'es-ca': 'ca_ES',
+ 'id-en': 'id_ID',
+ 'no-no': 'nb_NO',
+ 'jp-jp': 'ja_JP',
+ 'kr-kr': 'ko_KR',
+ 'xa-ar': 'ar_SA',
+ 'sl-sl': 'sl_SI',
+ 'th-en': 'th_TH',
+ 'vn-en': 'vi_VN',
+}
+
+ddg_lang_map = {
+ # use ar --> ar_EG (Egypt's arabic)
+ "ar_DZ": 'lang_region',
+ "ar_JO": 'lang_region',
+ "ar_SA": 'lang_region',
+ # use bn --> bn_BD
+ 'bn_IN': 'lang_region',
+ # use de --> de_DE
+ 'de_CH': 'lang_region',
+ # use en --> en_US,
+ 'en_AU': 'lang_region',
+ 'en_CA': 'lang_region',
+ 'en_GB': 'lang_region',
+ # Esperanto
+ 'eo_XX': 'eo',
+ # use es --> es_ES,
+ 'es_AR': 'lang_region',
+ 'es_CL': 'lang_region',
+ 'es_CO': 'lang_region',
+ 'es_CR': 'lang_region',
+ 'es_EC': 'lang_region',
+ 'es_MX': 'lang_region',
+ 'es_PE': 'lang_region',
+ 'es_UY': 'lang_region',
+ 'es_VE': 'lang_region',
+ # use fr --> rf_FR
+ 'fr_CA': 'lang_region',
+ 'fr_CH': 'lang_region',
+ 'fr_BE': 'lang_region',
+ # use nl --> nl_NL
+ 'nl_BE': 'lang_region',
+ # use pt --> pt_PT
+ 'pt_BR': 'lang_region',
+ # skip these languages
+ 'od_IN': 'skip',
+ 'io_XX': 'skip',
+ 'tokipona_XX': 'skip',
+}
def request(query, params):
+ eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
+ # eng_lang = get_ddg_lang(traits, params['searxng_locale'])
+
params['url'] = url
params['method'] = 'POST'
-
params['data']['q'] = query
# The API is not documented, so we do some reverse engineering and emulate
@@ -88,23 +224,19 @@ def request(query, params):
params['data']['s'] = offset
params['data']['dc'] = offset + 1
+ # request needs a vqd argument
+ params['data']['vqd'] = get_vqd(query, params["headers"])
+
# initial page does not have additional data in the input form
if params['pageno'] > 1:
- # request the second page (and more pages) needs 'o' and 'api' arguments
- params['data']['o'] = 'json'
- params['data']['api'] = 'd.js'
- # initial page does not have additional data in the input form
- if params['pageno'] > 2:
- # request the third page (and more pages) some more arguments
- params['data']['nextParams'] = ''
- params['data']['v'] = ''
- params['data']['vqd'] = ''
+ params['data']['o'] = form_data.get('o', 'json')
+ params['data']['api'] = form_data.get('api', 'd.js')
+ params['data']['nextParams'] = form_data.get('nextParams', '')
+ params['data']['v'] = form_data.get('v', 'l')
- region_code = get_region_code(params['language'], supported_languages)
- if region_code:
- params['data']['kl'] = region_code
- params['cookies']['kl'] = region_code
+ params['data']['kl'] = eng_region
+ params['cookies']['kl'] = eng_region
params['data']['df'] = ''
if params['time_range'] in time_range_dict:
@@ -116,26 +248,40 @@ def request(query, params):
return params
-# get response from search-request
def response(resp):
- headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
- get(url_ping, headers=headers_ping)
-
if resp.status_code == 303:
return []
results = []
- doc = fromstring(resp.text)
+ doc = lxml.html.fromstring(resp.text)
result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
- if not len(result_table) >= 3:
+
+ if len(result_table) == 2:
+ # some locales (at least China) does not have a "next page" button and
+ # the layout of the HTML tables is different.
+ result_table = result_table[1]
+ elif not len(result_table) >= 3:
# no more results
return []
- result_table = result_table[2]
+ else:
+ result_table = result_table[2]
+ # update form data from response
+ form = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table//input/..')
+ if len(form):
+
+ form = form[0]
+ form_data['v'] = eval_xpath(form, '//input[@name="v"]/@value')[0]
+ form_data['api'] = eval_xpath(form, '//input[@name="api"]/@value')[0]
+ form_data['o'] = eval_xpath(form, '//input[@name="o"]/@value')[0]
+ logger.debug('form_data: %s', form_data)
+
+ value = eval_xpath(form, '//input[@name="vqd"]/@value')[0]
+ query = resp.search_params['data']['q']
+ cache_vqd(query, value)
tr_rows = eval_xpath(result_table, './/tr')
-
# In the last <tr> is the form of the 'previous/next page' links
tr_rows = tr_rows[:-1]
@@ -172,15 +318,105 @@ def response(resp):
return results
-# get supported languages from their site
-def _fetch_supported_languages(resp):
+def fetch_traits(engine_traits: EngineTraits):
+ """Fetch languages & regions from DuckDuckGo.
+
+ SearXNG's ``all`` locale maps DuckDuckGo's "Alle regions" (``wt-wt``).
+ DuckDuckGo's language "Browsers prefered language" (``wt_WT``) makes no
+ sense in a SearXNG request since SearXNG's ``all`` will not add a
+ ``Accept-Language`` HTTP header. The value in ``engine_traits.all_locale``
+ is ``wt-wt`` (the region).
+
+ Beside regions DuckDuckGo also defines its lanaguages by region codes. By
+ example these are the english languages in DuckDuckGo:
+
+ - en_US
+ - en_AU
+ - en_CA
+ - en_GB
+
+ The function :py:obj:`get_ddg_lang` evaluates DuckDuckGo's language from
+ SearXNG's locale.
- # response is a js file with regions as an embedded object
- response_page = resp.text
- response_page = response_page[response_page.find('regions:{') + 8 :]
- response_page = response_page[: response_page.find('}') + 1]
+ """
+ # pylint: disable=too-many-branches, too-many-statements
+ # fetch regions
+
+ engine_traits.all_locale = 'wt-wt'
+
+ # updated from u588 to u661 / should be updated automatically?
+ resp = network.get('https://duckduckgo.com/util/u661.js')
+
+ if not resp.ok:
+ print("ERROR: response from DuckDuckGo is not OK.")
+
+ pos = resp.text.find('regions:{') + 8
+ js_code = resp.text[pos:]
+ pos = js_code.find('}') + 1
+ regions = json.loads(js_code[:pos])
+
+ for eng_tag, name in regions.items():
+
+ if eng_tag == 'wt-wt':
+ engine_traits.all_locale = 'wt-wt'
+ continue
+
+ region = ddg_reg_map.get(eng_tag)
+ if region == 'skip':
+ continue
- regions_json = loads(response_page)
- supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
+ if not region:
+ eng_territory, eng_lang = eng_tag.split('-')
+ region = eng_lang + '_' + eng_territory.upper()
- return list(supported_languages)
+ try:
+ sxng_tag = locales.region_tag(babel.Locale.parse(region))
+ except babel.UnknownLocaleError:
+ print("ERROR: %s (%s) -> %s is unknown by babel" % (name, eng_tag, region))
+ continue
+
+ conflict = engine_traits.regions.get(sxng_tag)
+ if conflict:
+ if conflict != eng_tag:
+ print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+ continue
+ engine_traits.regions[sxng_tag] = eng_tag
+
+ # fetch languages
+
+ engine_traits.custom['lang_region'] = {}
+
+ pos = resp.text.find('languages:{') + 10
+ js_code = resp.text[pos:]
+ pos = js_code.find('}') + 1
+ js_code = '{"' + js_code[1:pos].replace(':', '":').replace(',', ',"')
+ languages = json.loads(js_code)
+
+ for eng_lang, name in languages.items():
+
+ if eng_lang == 'wt_WT':
+ continue
+
+ babel_tag = ddg_lang_map.get(eng_lang, eng_lang)
+ if babel_tag == 'skip':
+ continue
+
+ try:
+
+ if babel_tag == 'lang_region':
+ sxng_tag = locales.region_tag(babel.Locale.parse(eng_lang))
+ engine_traits.custom['lang_region'][sxng_tag] = eng_lang
+ continue
+
+ sxng_tag = locales.language_tag(babel.Locale.parse(babel_tag))
+
+ except babel.UnknownLocaleError:
+ print("ERROR: language %s (%s) is unknown by babel" % (name, eng_lang))
+ continue
+
+ conflict = engine_traits.languages.get(sxng_tag)
+ if conflict:
+ if conflict != eng_lang:
+ print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang))
+ continue
+ engine_traits.languages[sxng_tag] = eng_lang
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index 7ed0de35c..39fed87e7 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -1,22 +1,33 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""DuckDuckGo (Instant Answer API)
+"""
+DuckDuckGo Instant Answer API
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The `DDG-API <https://duckduckgo.com/api>`__ is no longer documented but from
+reverse engineering we can see that some services (e.g. instant answers) still
+in use from the DDG search engine.
+
+As far we can say the *instant answers* API does not support languages, or at
+least we could not find out how language support should work. It seems that
+most of the features are based on English terms.
"""
-import json
+from typing import TYPE_CHECKING
+
from urllib.parse import urlencode, urlparse, urljoin
from lxml import html
from searx.data import WIKIDATA_UNITS
-from searx.engines.duckduckgo import language_aliases
-from searx.engines.duckduckgo import ( # pylint: disable=unused-import
- _fetch_supported_languages,
- supported_languages_url,
-)
-from searx.utils import extract_text, html_to_text, match_language, get_string_replaces_function
+from searx.utils import extract_text, html_to_text, get_string_replaces_function
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
+if TYPE_CHECKING:
+ import logging
+
+ logger: logging.Logger
+
# about
about = {
"website": 'https://duckduckgo.com/',
@@ -37,7 +48,7 @@ replace_http_by_https = get_string_replaces_function({'http:': 'https:'})
def is_broken_text(text):
- """duckduckgo may return something like "<a href="xxxx">http://somewhere Related website<a/>"
+ """duckduckgo may return something like ``<a href="xxxx">http://somewhere Related website<a/>``
The href URL is broken, the "Related website" may contains some HTML.
@@ -62,8 +73,6 @@ def result_to_text(text, htmlResult):
def request(query, params):
params['url'] = URL.format(query=urlencode({'q': query}))
- language = match_language(params['language'], supported_languages, language_aliases)
- language = language.split('-')[0]
return params
@@ -71,7 +80,7 @@ def response(resp):
# pylint: disable=too-many-locals, too-many-branches, too-many-statements
results = []
- search_res = json.loads(resp.text)
+ search_res = resp.json()
# search_res.get('Entity') possible values (not exhaustive) :
# * continent / country / department / location / waterfall
@@ -235,7 +244,7 @@ def unit_to_str(unit):
def area_to_str(area):
- """parse {'unit': 'http://www.wikidata.org/entity/Q712226', 'amount': '+20.99'}"""
+ """parse ``{'unit': 'https://www.wikidata.org/entity/Q712226', 'amount': '+20.99'}``"""
unit = unit_to_str(area.get('unit'))
if unit is not None:
try:
diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py
index 19f649ef4..d8a6f1340 100644
--- a/searx/engines/duckduckgo_images.py
+++ b/searx/engines/duckduckgo_images.py
@@ -1,26 +1,30 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
- DuckDuckGo (Images)
+DuckDuckGo Images
+~~~~~~~~~~~~~~~~~
"""
-from json import loads
+from typing import TYPE_CHECKING
from urllib.parse import urlencode
-from searx.exceptions import SearxEngineAPIException
-from searx.engines.duckduckgo import get_region_code
-from searx.engines.duckduckgo import ( # pylint: disable=unused-import
- _fetch_supported_languages,
- supported_languages_url,
+
+from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
+from searx.engines.duckduckgo import (
+ get_ddg_lang,
+ get_vqd,
)
-from searx.network import get
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+ import logging
+
+ logger: logging.Logger
+
+traits: EngineTraits
# about
about = {
"website": 'https://duckduckgo.com/',
"wikidata_id": 'Q12805',
- "official_api_documentation": {
- 'url': 'https://duckduckgo.com/api',
- 'comment': 'but images are not supported',
- },
"use_official_api": False,
"require_api_key": False,
"results": 'JSON (site requires js to get images)',
@@ -32,70 +36,64 @@ paging = True
safesearch = True
send_accept_language_header = True
-# search-url
-images_url = 'https://duckduckgo.com/i.js?{query}&s={offset}&p={safesearch}&o=json&vqd={vqd}'
-site_url = 'https://duckduckgo.com/?{query}&iar=images&iax=1&ia=images'
+safesearch_cookies = {0: '-2', 1: None, 2: '1'}
+safesearch_args = {0: '1', 1: None, 2: '1'}
-# run query in site to get vqd number needed for requesting images
-# TODO: find a way to get this number without an extra request (is it a hash of the query?)
-def get_vqd(query, headers):
- query_url = site_url.format(query=urlencode({'q': query}))
- res = get(query_url, headers=headers)
- content = res.text
- if content.find('vqd=\'') == -1:
- raise SearxEngineAPIException('Request failed')
- vqd = content[content.find('vqd=\'') + 5 :]
- vqd = vqd[: vqd.find('\'')]
- return vqd
+def request(query, params):
+ eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
+ eng_lang = get_ddg_lang(traits, params['searxng_locale'])
-# do search-request
-def request(query, params):
- # to avoid running actual external requests when testing
- if 'is_test' not in params:
- vqd = get_vqd(query, params['headers'])
- else:
- vqd = '12345'
+ args = {
+ 'q': query,
+ 'o': 'json',
+ # 'u': 'bing',
+ 'l': eng_region,
+ 'vqd': get_vqd(query, params["headers"]),
+ }
- offset = (params['pageno'] - 1) * 50
+ if params['pageno'] > 1:
+ args['s'] = (params['pageno'] - 1) * 100
- safesearch = params['safesearch'] - 1
+ params['cookies']['ad'] = eng_lang # zh_CN
+ params['cookies']['ah'] = eng_region # "us-en,de-de"
+ params['cookies']['l'] = eng_region # "hk-tzh"
+ logger.debug("cookies: %s", params['cookies'])
- region_code = get_region_code(params['language'], lang_list=supported_languages)
- if region_code:
- params['url'] = images_url.format(
- query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd
- )
- else:
- params['url'] = images_url.format(query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd)
+ safe_search = safesearch_cookies.get(params['safesearch'])
+ if safe_search is not None:
+ params['cookies']['p'] = safe_search # "-2", "1"
+ safe_search = safesearch_args.get(params['safesearch'])
+ if safe_search is not None:
+ args['p'] = safe_search # "-1", "1"
+
+ args = urlencode(args)
+ params['url'] = 'https://duckduckgo.com/i.js?{args}&f={f}'.format(args=args, f=',,,,,')
+
+ params['headers']['Accept'] = 'application/json, text/javascript, */*; q=0.01'
+ params['headers']['Referer'] = 'https://duckduckgo.com/'
+ params['headers']['X-Requested-With'] = 'XMLHttpRequest'
+ logger.debug("headers: %s", params['headers'])
return params
-# get response from search-request
def response(resp):
results = []
+ res_json = resp.json()
- content = resp.text
- res_json = loads(content)
-
- # parse results
for result in res_json['results']:
- title = result['title']
- url = result['url']
- thumbnail = result['thumbnail']
- image = result['image']
-
- # append result
results.append(
{
'template': 'images.html',
- 'title': title,
+ 'title': result['title'],
'content': '',
- 'thumbnail_src': thumbnail,
- 'img_src': image,
- 'url': url,
+ 'thumbnail_src': result['thumbnail'],
+ 'img_src': result['image'],
+ 'url': result['url'],
+ 'img_format': '%s x %s' % (result['width'], result['height']),
+ 'source': result['source'],
}
)
diff --git a/searx/engines/duckduckgo_weather.py b/searx/engines/duckduckgo_weather.py
index 0540cbcb5..4f0ce1b49 100644
--- a/searx/engines/duckduckgo_weather.py
+++ b/searx/engines/duckduckgo_weather.py
@@ -1,13 +1,29 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""DuckDuckGo Weather"""
+"""
+DuckDuckGo Weather
+~~~~~~~~~~~~~~~~~~
+"""
+from typing import TYPE_CHECKING
from json import loads
from urllib.parse import quote
from datetime import datetime
from flask_babel import gettext
+from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
+from searx.engines.duckduckgo import get_ddg_lang
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+ import logging
+
+ logger: logging.Logger
+
+traits: EngineTraits
+
+
about = {
"website": 'https://duckduckgo.com/',
"wikidata_id": 'Q12805',
@@ -17,9 +33,11 @@ about = {
"results": "JSON",
}
-categories = ["others"]
+send_accept_language_header = True
-url = "https://duckduckgo.com/js/spice/forecast/{query}/{lang}"
+# engine dependent config
+categories = ["others"]
+URL = "https://duckduckgo.com/js/spice/forecast/{query}/{lang}"
def generate_condition_table(condition):
@@ -72,8 +90,17 @@ def generate_day_table(day):
def request(query, params):
- params["url"] = url.format(query=quote(query), lang=params['language'].split('-')[0])
+ eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
+ eng_lang = get_ddg_lang(traits, params['searxng_locale'])
+
+ # !ddw paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'}
+ params['cookies']['ad'] = eng_lang
+ params['cookies']['ah'] = eng_region
+ params['cookies']['l'] = eng_region
+ logger.debug("cookies: %s", params['cookies'])
+
+ params["url"] = URL.format(query=quote(query), lang=eng_lang.split('_')[0])
return params
diff --git a/searx/engines/gentoo.py b/searx/engines/gentoo.py
index 856c93710..f0cb6a794 100644
--- a/searx/engines/gentoo.py
+++ b/searx/engines/gentoo.py
@@ -25,6 +25,7 @@ base_url = 'https://wiki.gentoo.org'
# xpath queries
xpath_results = '//ul[@class="mw-search-results"]/li'
xpath_link = './/div[@class="mw-search-result-heading"]/a'
+xpath_content = './/div[@class="searchresult"]'
# cut 'en' from 'en-US', 'de' from 'de-CH', and so on
@@ -77,8 +78,6 @@ main_langs = {
'uk': 'Українська',
'zh': '简体中文',
}
-supported_languages = dict(lang_urls, **main_langs)
-
# do search-request
def request(query, params):
@@ -118,7 +117,8 @@ def response(resp):
link = result.xpath(xpath_link)[0]
href = urljoin(base_url, link.attrib.get('href'))
title = extract_text(link)
+ content = extract_text(result.xpath(xpath_content))
- results.append({'url': href, 'title': title})
+ results.append({'url': href, 'title': title, 'content': content})
return results
diff --git a/searx/engines/google.py b/searx/engines/google.py
index bdb351432..708068f3a 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -1,34 +1,39 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""This is the implementation of the google WEB engine. Some of this
-implementations are shared by other engines:
+"""This is the implementation of the Google WEB engine. Some of this
+implementations (manly the :py:obj:`get_google_info`) are shared by other
+engines:
- :ref:`google images engine`
- :ref:`google news engine`
- :ref:`google videos engine`
-
-The google WEB engine itself has a special setup option:
-
-.. code:: yaml
-
- - name: google
- ...
- use_mobile_ui: false
-
-``use_mobile_ui``: (default: ``false``)
- Enables to use *mobile endpoint* to bypass the google blocking (see
- :issue:`159`). On the mobile UI of Google Search, the button :guilabel:`More
- results` is not affected by Google rate limiting and we can still do requests
- while actively blocked by the original Google search. By activate
- ``use_mobile_ui`` this behavior is simulated by adding the parameter
- ``async=use_ac:true,_fmt:pc`` to the :py:func:`request`.
+- :ref:`google scholar engine`
+- :ref:`google autocomplete`
"""
+from typing import TYPE_CHECKING
+
+import re
from urllib.parse import urlencode
from lxml import html
-from searx.utils import match_language, extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
+import babel
+import babel.core
+import babel.languages
+
+from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
+from searx.locales import language_tag, region_tag, get_offical_locales
+from searx import network
from searx.exceptions import SearxEngineCaptchaException
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+ import logging
+
+ logger: logging.Logger
+
+traits: EngineTraits
+
# about
about = {
@@ -45,64 +50,6 @@ categories = ['general', 'web']
paging = True
time_range_support = True
safesearch = True
-send_accept_language_header = True
-use_mobile_ui = False
-supported_languages_url = 'https://www.google.com/preferences?#languages'
-
-# based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests
-google_domains = {
- 'BG': 'google.bg', # Bulgaria
- 'CZ': 'google.cz', # Czech Republic
- 'DE': 'google.de', # Germany
- 'DK': 'google.dk', # Denmark
- 'AT': 'google.at', # Austria
- 'CH': 'google.ch', # Switzerland
- 'GR': 'google.gr', # Greece
- 'AU': 'google.com.au', # Australia
- 'CA': 'google.ca', # Canada
- 'GB': 'google.co.uk', # United Kingdom
- 'ID': 'google.co.id', # Indonesia
- 'IE': 'google.ie', # Ireland
- 'IN': 'google.co.in', # India
- 'MY': 'google.com.my', # Malaysia
- 'NZ': 'google.co.nz', # New Zealand
- 'PH': 'google.com.ph', # Philippines
- 'SG': 'google.com.sg', # Singapore
- 'US': 'google.com', # United States (google.us) redirects to .com
- 'ZA': 'google.co.za', # South Africa
- 'AR': 'google.com.ar', # Argentina
- 'CL': 'google.cl', # Chile
- 'ES': 'google.es', # Spain
- 'MX': 'google.com.mx', # Mexico
- 'EE': 'google.ee', # Estonia
- 'FI': 'google.fi', # Finland
- 'BE': 'google.be', # Belgium
- 'FR': 'google.fr', # France
- 'IL': 'google.co.il', # Israel
- 'HR': 'google.hr', # Croatia
- 'HU': 'google.hu', # Hungary
- 'IT': 'google.it', # Italy
- 'JP': 'google.co.jp', # Japan
- 'KR': 'google.co.kr', # South Korea
- 'LT': 'google.lt', # Lithuania
- 'LV': 'google.lv', # Latvia
- 'NO': 'google.no', # Norway
- 'NL': 'google.nl', # Netherlands
- 'PL': 'google.pl', # Poland
- 'BR': 'google.com.br', # Brazil
- 'PT': 'google.pt', # Portugal
- 'RO': 'google.ro', # Romania
- 'RU': 'google.ru', # Russia
- 'SK': 'google.sk', # Slovakia
- 'SI': 'google.si', # Slovenia
- 'SE': 'google.se', # Sweden
- 'TH': 'google.co.th', # Thailand
- 'TR': 'google.com.tr', # Turkey
- 'UA': 'google.com.ua', # Ukraine
- 'CN': 'google.com.hk', # There is no google.cn, we use .com.hk for zh-CN
- 'HK': 'google.com.hk', # Hong Kong
- 'TW': 'google.com.tw', # Taiwan
-}
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
@@ -112,50 +59,50 @@ filter_mapping = {0: 'off', 1: 'medium', 2: 'high'}
# specific xpath variables
# ------------------------
-results_xpath = './/div[@data-sokoban-container]'
+results_xpath = './/div[contains(@jscontroller, "SC7lYd")]'
title_xpath = './/a/h3[1]'
href_xpath = './/a[h3]/@href'
-content_xpath = './/div[@data-content-feature=1]'
-
-# google *sections* are no usual *results*, we ignore them
-g_section_with_header = './g-section-with-header'
-
+content_xpath = './/div[@data-sncf]'
# Suggestions are links placed in a *card-section*, we extract only the text
# from the links not the links itself.
suggestion_xpath = '//div[contains(@class, "EIaa9b")]//a'
+# UI_ASYNC = 'use_ac:true,_fmt:html' # returns a HTTP 500 when user search for
+# # celebrities like '!google natasha allegri'
+# # or '!google chris evans'
+UI_ASYNC = 'use_ac:true,_fmt:prog'
+"""Format of the response from UI's async request."""
+
-def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
- """Composing various language properties for the google engines.
+def get_google_info(params, eng_traits):
+ """Composing various (language) properties for the google engines (:ref:`google
+ API`).
This function is called by the various google engines (:ref:`google web
engine`, :ref:`google images engine`, :ref:`google news engine` and
:ref:`google videos engine`).
- :param dict param: request parameters of the engine
-
- :param list lang_list: list of supported languages of the engine
- :py:obj:`ENGINES_LANGUAGES[engine-name] <searx.data.ENGINES_LANGUAGES>`
-
- :param dict lang_list: custom aliases for non standard language codes
- (used when calling :py:func:`searx.utils.match_language`)
+ :param dict param: Request parameters of the engine. At least
+ a ``searxng_locale`` key should be in the dictionary.
- :param bool supported_any_language: When a language is not specified, the
- language interpretation is left up to Google to decide how the search
- results should be delivered. This argument is ``True`` for the google
- engine and ``False`` for the other engines (google-images, -news,
- -scholar, -videos).
+ :param eng_traits: Engine's traits fetched from google preferences
+ (:py:obj:`searx.enginelib.traits.EngineTraits`)
:rtype: dict
:returns:
Py-Dictionary with the key/value pairs:
language:
- Return value from :py:func:`searx.utils.match_language`
+ The language code that is used by google (e.g. ``lang_en`` or
+ ``lang_zh-TW``)
country:
- The country code (e.g. US, AT, CA, FR, DE ..)
+ The country code that is used by google (e.g. ``US`` or ``TW``)
+
+ locale:
+ A instance of :py:obj:`babel.core.Locale` build from the
+ ``searxng_locale`` value.
subdomain:
Google subdomain :py:obj:`google_domains` that fits to the country
@@ -165,52 +112,67 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
Py-Dictionary with additional request arguments (can be passed to
:py:func:`urllib.parse.urlencode`).
+ - ``hl`` parameter: specifies the interface language of user interface.
+ - ``lr`` parameter: restricts search results to documents written in
+ a particular language.
+ - ``cr`` parameter: restricts search results to documents
+ originating in a particular country.
+ - ``ie`` parameter: sets the character encoding scheme that should
+ be used to interpret the query string ('utf8').
+ - ``oe`` parameter: sets the character encoding scheme that should
+ be used to decode the XML result ('utf8').
+
headers:
Py-Dictionary with additional HTTP headers (can be passed to
request's headers)
+
+ - ``Accept: '*/*``
+
"""
+
ret_val = {
'language': None,
'country': None,
'subdomain': None,
'params': {},
'headers': {},
+ 'cookies': {},
+ 'locale': None,
}
- # language ...
+ sxng_locale = params.get('searxng_locale', 'all')
+ try:
+ locale = babel.Locale.parse(sxng_locale, sep='-')
+ except babel.core.UnknownLocaleError:
+ locale = None
- _lang = params['language']
- _any_language = _lang.lower() == 'all'
- if _any_language:
- _lang = 'en-US'
- language = match_language(_lang, lang_list, custom_aliases)
- ret_val['language'] = language
+ eng_lang = eng_traits.get_language(sxng_locale, 'lang_en')
+ lang_code = eng_lang.split('_')[-1] # lang_zh-TW --> zh-TW / lang_en --> en
+ country = eng_traits.get_region(sxng_locale, eng_traits.all_locale)
- # country ...
+ # Test zh_hans & zh_hant --> in the topmost links in the result list of list
+ # TW and HK you should a find wiktionary.org zh_hant link. In the result
+ # list of zh-CN should not be no hant link instead you should find
+ # zh.m.wikipedia.org/zh somewhere in the top.
- _l = _lang.split('-')
- if len(_l) == 2:
- country = _l[1]
- else:
- country = _l[0].upper()
- if country == 'EN':
- country = 'US'
- ret_val['country'] = country
-
- # subdomain ...
-
- ret_val['subdomain'] = 'www.' + google_domains.get(country.upper(), 'google.com')
-
- # params & headers
+ # '!go 日 :zh-TW' --> https://zh.m.wiktionary.org/zh-hant/%E6%97%A5
+ # '!go 日 :zh-CN' --> https://zh.m.wikipedia.org/zh/%E6%97%A5
- lang_country = '%s-%s' % (language, country) # (en-US, en-EN, de-DE, de-AU, fr-FR ..)
+ ret_val['language'] = eng_lang
+ ret_val['country'] = country
+ ret_val['locale'] = locale
+ ret_val['subdomain'] = eng_traits.custom['supported_domains'].get(country.upper(), 'www.google.com')
# hl parameter:
- # https://developers.google.com/custom-search/docs/xml_results#hlsp The
- # Interface Language:
+ # The hl parameter specifies the interface language (host language) of
+ # your user interface. To improve the performance and the quality of your
+ # search results, you are strongly encouraged to set this parameter
+ # explicitly.
+ # https://developers.google.com/custom-search/docs/xml_results#hlsp
+ # The Interface Language:
# https://developers.google.com/custom-search/docs/xml_results_appendices#interfaceLanguages
- ret_val['params']['hl'] = lang_list.get(lang_country, language)
+ ret_val['params']['hl'] = lang_code
# lr parameter:
# The lr (language restrict) parameter restricts search results to
@@ -218,22 +180,72 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
# https://developers.google.com/custom-search/docs/xml_results#lrsp
# Language Collection Values:
# https://developers.google.com/custom-search/docs/xml_results_appendices#languageCollections
+ #
+ # To select 'all' languages an empty 'lr' value is used.
+ #
+ # Different to other google services, Google Schloar supports to select more
+ # than one language. The languages are seperated by a pipe '|' (logical OR).
+ # By example: &lr=lang_zh-TW%7Clang_de selects articles written in
+ # traditional chinese OR german language.
- if _any_language and supported_any_language:
+ ret_val['params']['lr'] = eng_lang
+ if sxng_locale == 'all':
+ ret_val['params']['lr'] = ''
- # interpretation is left up to Google (based on whoogle)
- #
- # - add parameter ``source=lnt``
- # - don't use parameter ``lr``
- # - don't add a ``Accept-Language`` HTTP header.
+ # cr parameter:
+ # The cr parameter restricts search results to documents originating in a
+ # particular country.
+ # https://developers.google.com/custom-search/docs/xml_results#crsp
- ret_val['params']['source'] = 'lnt'
+ ret_val['params']['cr'] = 'country' + country
+ if sxng_locale == 'all':
+ ret_val['params']['cr'] = ''
- else:
+ # gl parameter: (mandatory by Geeogle News)
+ # The gl parameter value is a two-letter country code. For WebSearch
+ # results, the gl parameter boosts search results whose country of origin
+ # matches the parameter value. See the Country Codes section for a list of
+ # valid values.
+ # Specifying a gl parameter value in WebSearch requests should improve the
+ # relevance of results. This is particularly true for international
+ # customers and, even more specifically, for customers in English-speaking
+ # countries other than the United States.
+ # https://developers.google.com/custom-search/docs/xml_results#glsp
+
+ ret_val['params']['gl'] = country
+
+ # ie parameter:
+ # The ie parameter sets the character encoding scheme that should be used
+ # to interpret the query string. The default ie value is latin1.
+ # https://developers.google.com/custom-search/docs/xml_results#iesp
+
+ ret_val['params']['ie'] = 'utf8'
- # restricts search results to documents written in a particular
- # language.
- ret_val['params']['lr'] = "lang_" + lang_list.get(lang_country, language)
+ # oe parameter:
+ # The oe parameter sets the character encoding scheme that should be used
+ # to decode the XML result. The default oe value is latin1.
+ # https://developers.google.com/custom-search/docs/xml_results#oesp
+
+ ret_val['params']['oe'] = 'utf8'
+
+ # num parameter:
+ # The num parameter identifies the number of search results to return.
+ # The default num value is 10, and the maximum value is 20. If you request
+ # more than 20 results, only 20 results will be returned.
+ # https://developers.google.com/custom-search/docs/xml_results#numsp
+
+ # HINT: seems to have no effect (tested in google WEB & Images)
+ # ret_val['params']['num'] = 20
+
+ # HTTP headers
+
+ ret_val['headers']['Accept'] = '*/*'
+
+ # Cookies
+
+ # - https://github.com/searxng/searxng/pull/1679#issuecomment-1235432746
+ # - https://github.com/searxng/searxng/issues/1555
+ ret_val['cookies']['CONSENT'] = "YES+"
return ret_val
@@ -245,33 +257,34 @@ def detect_google_sorry(resp):
def request(query, params):
"""Google search request"""
-
+ # pylint: disable=line-too-long
offset = (params['pageno'] - 1) * 10
-
- lang_info = get_lang_info(params, supported_languages, language_aliases, True)
-
- additional_parameters = {}
- if use_mobile_ui:
- additional_parameters = {
- 'asearch': 'arc',
- 'async': 'use_ac:true,_fmt:prog',
- }
+ google_info = get_google_info(params, traits)
# https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium
query_url = (
'https://'
- + lang_info['subdomain']
+ + google_info['subdomain']
+ '/search'
+ "?"
+ urlencode(
{
'q': query,
- **lang_info['params'],
- 'ie': "utf8",
- 'oe': "utf8",
- 'start': offset,
+ **google_info['params'],
'filter': '0',
- **additional_parameters,
+ 'start': offset,
+ # 'vet': '12ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0QxK8CegQIARAC..i',
+ # 'ved': '2ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0Q_skCegQIARAG',
+ # 'cs' : 1,
+ # 'sa': 'N',
+ # 'yv': 3,
+ # 'prmd': 'vin',
+ # 'ei': 'GASaY6TxOcy_xc8PtYeY6AE',
+ # 'sa': 'N',
+ # 'sstk': 'AcOHfVkD7sWCSAheZi-0tx_09XDO55gTWY0JNq3_V26cNN-c8lfD45aZYPI8s_Bqp8s57AHz5pxchDtAGCA_cikAWSjy9kw3kgg'
+ # formally known as use_mobile_ui
+ 'asearch': 'arc',
+ 'async': UI_ASYNC,
}
)
)
@@ -282,25 +295,38 @@ def request(query, params):
query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
params['url'] = query_url
- params['cookies']['CONSENT'] = "YES+"
- params['headers'].update(lang_info['headers'])
- if use_mobile_ui:
- params['headers']['Accept'] = '*/*'
- else:
- params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
-
+ params['cookies'] = google_info['cookies']
+ params['headers'].update(google_info['headers'])
return params
+# =26;[3,"dimg_ZNMiZPCqE4apxc8P3a2tuAQ_137"]a87;data:image/jpeg;base64,/9j/4AAQSkZJRgABA
+# ...6T+9Nl4cnD+gr9OK8I56/tX3l86nWYw//2Q==26;
+RE_DATA_IMAGE = re.compile(r'"(dimg_[^"]*)"[^;]*;(data:image[^;]*;[^;]*);')
+
+
+def _parse_data_images(dom):
+ data_image_map = {}
+ for img_id, data_image in RE_DATA_IMAGE.findall(dom.text_content()):
+ end_pos = data_image.rfind('=')
+ if end_pos > 0:
+ data_image = data_image[: end_pos + 1]
+ data_image_map[img_id] = data_image
+ logger.debug('data:image objects --> %s', list(data_image_map.keys()))
+ return data_image_map
+
+
def response(resp):
"""Get response from google's search request"""
-
+ # pylint: disable=too-many-branches, too-many-statements
detect_google_sorry(resp)
results = []
# convert the text to dom
dom = html.fromstring(resp.text)
+ data_image_map = _parse_data_images(dom)
+
# results --> answer
answer_list = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]')
if answer_list:
@@ -309,25 +335,9 @@ def response(resp):
else:
logger.debug("did not find 'answer'")
- # results --> number_of_results
- if not use_mobile_ui:
- try:
- _txt = eval_xpath_getindex(dom, '//div[@id="result-stats"]//text()', 0)
- _digit = ''.join([n for n in _txt if n.isdigit()])
- number_of_results = int(_digit)
- results.append({'number_of_results': number_of_results})
- except Exception as e: # pylint: disable=broad-except
- logger.debug("did not 'number_of_results'")
- logger.error(e, exc_info=True)
-
# parse results
- for result in eval_xpath_list(dom, results_xpath):
-
- # google *sections*
- if extract_text(eval_xpath(result, g_section_with_header)):
- logger.debug("ignoring <g-section-with-header>")
- continue
+ for result in eval_xpath_list(dom, results_xpath): # pylint: disable=too-many-nested-blocks
try:
title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None)
@@ -336,16 +346,30 @@ def response(resp):
logger.debug('ignoring item from the result_xpath list: missing title')
continue
title = extract_text(title_tag)
+
url = eval_xpath_getindex(result, href_xpath, 0, None)
if url is None:
+ logger.debug('ignoring item from the result_xpath list: missing url of title "%s"', title)
continue
- content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True)
- if content is None:
+
+ content_nodes = eval_xpath(result, content_xpath)
+ content = extract_text(content_nodes)
+
+ if not content:
logger.debug('ignoring item from the result_xpath list: missing content of title "%s"', title)
continue
- logger.debug('add link to results: %s', title)
- results.append({'url': url, 'title': title, 'content': content})
+ img_src = content_nodes[0].xpath('.//img/@src')
+ if img_src:
+ img_src = img_src[0]
+ if img_src.startswith('data:image'):
+ img_id = content_nodes[0].xpath('.//img/@id')
+ if img_id:
+ img_src = data_image_map.get(img_id[0])
+ else:
+ img_src = None
+
+ results.append({'url': url, 'title': title, 'content': content, 'img_src': img_src})
except Exception as e: # pylint: disable=broad-except
logger.error(e, exc_info=True)
@@ -361,15 +385,107 @@ def response(resp):
# get supported languages from their site
-def _fetch_supported_languages(resp):
- ret_val = {}
+
+
+skip_countries = [
+ # official language of google-country not in google-languages
+ 'AL', # Albanien (sq)
+ 'AZ', # Aserbaidschan (az)
+ 'BD', # Bangladesch (bn)
+ 'BN', # Brunei Darussalam (ms)
+ 'BT', # Bhutan (dz)
+ 'ET', # Äthiopien (am)
+ 'GE', # Georgien (ka, os)
+ 'GL', # Grönland (kl)
+ 'KH', # Kambodscha (km)
+ 'LA', # Laos (lo)
+ 'LK', # Sri Lanka (si, ta)
+ 'ME', # Montenegro (sr)
+ 'MK', # Nordmazedonien (mk, sq)
+ 'MM', # Myanmar (my)
+ 'MN', # Mongolei (mn)
+ 'MV', # Malediven (dv) // dv_MV is unknown by babel
+ 'MY', # Malaysia (ms)
+ 'NP', # Nepal (ne)
+ 'TJ', # Tadschikistan (tg)
+ 'TM', # Turkmenistan (tk)
+ 'UZ', # Usbekistan (uz)
+]
+
+
+def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True):
+ """Fetch languages from Google."""
+ # pylint: disable=import-outside-toplevel, too-many-branches
+
+ engine_traits.custom['supported_domains'] = {}
+
+ resp = network.get('https://www.google.com/preferences')
+ if not resp.ok:
+ raise RuntimeError("Response from Google's preferences is not OK.")
+
dom = html.fromstring(resp.text)
- radio_buttons = eval_xpath_list(dom, '//*[@id="langSec"]//input[@name="lr"]')
+ # supported language codes
- for x in radio_buttons:
- name = x.get("data-name")
- code = x.get("value").split('_')[-1]
- ret_val[code] = {"name": name}
+ lang_map = {'no': 'nb'}
+ for x in eval_xpath_list(dom, '//*[@id="langSec"]//input[@name="lr"]'):
- return ret_val
+ eng_lang = x.get("value").split('_')[-1]
+ try:
+ locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-')
+ except babel.UnknownLocaleError:
+ print("ERROR: %s -> %s is unknown by babel" % (x.get("data-name"), eng_lang))
+ continue
+ sxng_lang = language_tag(locale)
+
+ conflict = engine_traits.languages.get(sxng_lang)
+ if conflict:
+ if conflict != eng_lang:
+ print("CONFLICT: babel %s --> %s, %s" % (sxng_lang, conflict, eng_lang))
+ continue
+ engine_traits.languages[sxng_lang] = 'lang_' + eng_lang
+
+ # alias languages
+ engine_traits.languages['zh'] = 'lang_zh-CN'
+
+ # supported region codes
+
+ for x in eval_xpath_list(dom, '//*[@name="region"]/..//input[@name="region"]'):
+ eng_country = x.get("value")
+
+ if eng_country in skip_countries:
+ continue
+ if eng_country == 'ZZ':
+ engine_traits.all_locale = 'ZZ'
+ continue
+
+ sxng_locales = get_offical_locales(eng_country, engine_traits.languages.keys(), regional=True)
+
+ if not sxng_locales:
+ print("ERROR: can't map from google country %s (%s) to a babel region." % (x.get('data-name'), eng_country))
+ continue
+
+ for sxng_locale in sxng_locales:
+ engine_traits.regions[region_tag(sxng_locale)] = eng_country
+
+ # alias regions
+ engine_traits.regions['zh-CN'] = 'HK'
+
+ # supported domains
+
+ if add_domains:
+ resp = network.get('https://www.google.com/supported_domains')
+ if not resp.ok:
+ raise RuntimeError("Response from https://www.google.com/supported_domains is not OK.")
+
+ for domain in resp.text.split():
+ domain = domain.strip()
+ if not domain or domain in [
+ '.google.com',
+ ]:
+ continue
+ region = domain.split('.')[-1].upper()
+ engine_traits.custom['supported_domains'][region] = 'www' + domain
+ if region == 'HK':
+ # There is no google.cn, we use .com.hk for zh-CN
+ engine_traits.custom['supported_domains']['CN'] = 'www' + domain
diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py
index 528f8d21d..e6445b1c4 100644
--- a/searx/engines/google_images.py
+++ b/searx/engines/google_images.py
@@ -1,31 +1,38 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""This is the implementation of the google images engine using the google
-internal API used the Google Go Android app.
+"""This is the implementation of the Google Images engine using the internal
+Google API used by the Google Go Android app.
This internal API offer results in
-- JSON (_fmt:json)
-- Protobuf (_fmt:pb)
-- Protobuf compressed? (_fmt:pc)
-- HTML (_fmt:html)
-- Protobuf encoded in JSON (_fmt:jspb).
+- JSON (``_fmt:json``)
+- Protobuf_ (``_fmt:pb``)
+- Protobuf_ compressed? (``_fmt:pc``)
+- HTML (``_fmt:html``)
+- Protobuf_ encoded in JSON (``_fmt:jspb``).
+.. _Protobuf: https://en.wikipedia.org/wiki/Protocol_Buffers
"""
+from typing import TYPE_CHECKING
+
from urllib.parse import urlencode
from json import loads
+from searx.engines.google import fetch_traits # pylint: disable=unused-import
from searx.engines.google import (
- get_lang_info,
+ get_google_info,
time_range_dict,
detect_google_sorry,
)
-# pylint: disable=unused-import
-from searx.engines.google import supported_languages_url, _fetch_supported_languages
+if TYPE_CHECKING:
+ import logging
+ from searx.enginelib.traits import EngineTraits
+
+ logger: logging.Logger
+ traits: EngineTraits
-# pylint: enable=unused-import
# about
about = {
@@ -40,7 +47,6 @@ about = {
# engine dependent config
categories = ['images', 'web']
paging = True
-use_locale_domain = True
time_range_support = True
safesearch = True
send_accept_language_header = True
@@ -51,20 +57,18 @@ filter_mapping = {0: 'images', 1: 'active', 2: 'active'}
def request(query, params):
"""Google-Image search request"""
- lang_info = get_lang_info(params, supported_languages, language_aliases, False)
+ google_info = get_google_info(params, traits)
query_url = (
'https://'
- + lang_info['subdomain']
+ + google_info['subdomain']
+ '/search'
+ "?"
+ urlencode(
{
'q': query,
'tbm': "isch",
- **lang_info['params'],
- 'ie': "utf8",
- 'oe': "utf8",
+ **google_info['params'],
'asearch': 'isch',
'async': '_fmt:json,p:1,ijn:' + str(params['pageno']),
}
@@ -77,9 +81,8 @@ def request(query, params):
query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
params['url'] = query_url
- params['headers'].update(lang_info['headers'])
- params['headers']['User-Agent'] = 'NSTN/3.60.474802233.release Dalvik/2.1.0 (Linux; U; Android 12; US) gzip'
- params['headers']['Accept'] = '*/*'
+ params['cookies'] = google_info['cookies']
+ params['headers'].update(google_info['headers'])
return params
@@ -111,7 +114,11 @@ def response(resp):
copyright_notice = item["result"].get('iptc', {}).get('copyright_notice')
if copyright_notice:
- result_item['source'] += ' / ' + copyright_notice
+ result_item['source'] += ' | ' + copyright_notice
+
+ freshness_date = item["result"].get("freshness_date")
+ if freshness_date:
+ result_item['source'] += ' | ' + freshness_date
file_size = item.get('gsa', {}).get('file_size')
if file_size:
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
index 1ada2d64d..ae55ca9cb 100644
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@@ -1,24 +1,40 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""This is the implementation of the google news engine. The google news API
-ignores some parameters from the common :ref:`google API`:
+"""This is the implementation of the Google News engine.
-- num_ : the number of search results is ignored
+Google News has a different region handling compared to Google WEB.
+
+- the ``ceid`` argument has to be set (:py:obj:`ceid_list`)
+- the hl_ argument has to be set correctly (and different to Google WEB)
+- the gl_ argument is mandatory
+
+If one of this argument is not set correctly, the request is redirected to
+CONSENT dialog::
+
+ https://consent.google.com/m?continue=
+
+The google news API ignores some parameters from the common :ref:`google API`:
+
+- num_ : the number of search results is ignored / there is no paging all
+ results for a query term are in the first response.
- save_ : is ignored / Google-News results are always *SafeSearch*
+.. _hl: https://developers.google.com/custom-search/docs/xml_results#hlsp
+.. _gl: https://developers.google.com/custom-search/docs/xml_results#glsp
.. _num: https://developers.google.com/custom-search/docs/xml_results#numsp
.. _save: https://developers.google.com/custom-search/docs/xml_results#safesp
-
"""
-# pylint: disable=invalid-name
+from typing import TYPE_CHECKING
import binascii
import re
from urllib.parse import urlencode
from base64 import b64decode
from lxml import html
+import babel
+from searx import locales
from searx.utils import (
eval_xpath,
eval_xpath_list,
@@ -26,18 +42,19 @@ from searx.utils import (
extract_text,
)
-# pylint: disable=unused-import
+from searx.engines.google import fetch_traits as _fetch_traits # pylint: disable=unused-import
from searx.engines.google import (
- supported_languages_url,
- _fetch_supported_languages,
+ get_google_info,
+ detect_google_sorry,
)
+from searx.enginelib.traits import EngineTraits
-# pylint: enable=unused-import
+if TYPE_CHECKING:
+ import logging
-from searx.engines.google import (
- get_lang_info,
- detect_google_sorry,
-)
+ logger: logging.Logger
+
+traits: EngineTraits
# about
about = {
@@ -49,70 +66,77 @@ about = {
"results": 'HTML',
}
-# compared to other google engines google-news has a different time range
-# support. The time range is included in the search term.
-time_range_dict = {
- 'day': 'when:1d',
- 'week': 'when:7d',
- 'month': 'when:1m',
- 'year': 'when:1y',
-}
-
# engine dependent config
-
categories = ['news']
paging = False
-use_locale_domain = True
-time_range_support = True
+time_range_support = False
# Google-News results are always *SafeSearch*. Option 'safesearch' is set to
# False here, otherwise checker will report safesearch-errors::
#
# safesearch : results are identitical for safesearch=0 and safesearch=2
-safesearch = False
-send_accept_language_header = True
+safesearch = True
+# send_accept_language_header = True
def request(query, params):
"""Google-News search request"""
- lang_info = get_lang_info(params, supported_languages, language_aliases, False)
+ sxng_locale = params.get('searxng_locale', 'en-US')
+ ceid = locales.get_engine_locale(sxng_locale, traits.custom['ceid'], default='US:en')
+ google_info = get_google_info(params, traits)
+ google_info['subdomain'] = 'news.google.com' # google news has only one domain
- # google news has only one domain
- lang_info['subdomain'] = 'news.google.com'
+ ceid_region, ceid_lang = ceid.split(':')
+ ceid_lang, ceid_suffix = (
+ ceid_lang.split('-')
+ + [
+ None,
+ ]
+ )[:2]
- ceid = "%s:%s" % (lang_info['country'], lang_info['language'])
+ google_info['params']['hl'] = ceid_lang
- # google news redirects en to en-US
- if lang_info['params']['hl'] == 'en':
- lang_info['params']['hl'] = 'en-US'
+ if ceid_suffix and ceid_suffix not in ['Hans', 'Hant']:
- # Very special to google-news compared to other google engines, the time
- # range is included in the search term.
- if params['time_range']:
- query += ' ' + time_range_dict[params['time_range']]
+ if ceid_region.lower() == ceid_lang:
+ google_info['params']['hl'] = ceid_lang + '-' + ceid_region
+ else:
+ google_info['params']['hl'] = ceid_lang + '-' + ceid_suffix
+
+ elif ceid_region.lower() != ceid_lang:
+
+ if ceid_region in ['AT', 'BE', 'CH', 'IL', 'SA', 'IN', 'BD', 'PT']:
+ google_info['params']['hl'] = ceid_lang
+ else:
+ google_info['params']['hl'] = ceid_lang + '-' + ceid_region
+
+ google_info['params']['lr'] = 'lang_' + ceid_lang.split('-')[0]
+ google_info['params']['gl'] = ceid_region
query_url = (
'https://'
- + lang_info['subdomain']
- + '/search'
- + "?"
- + urlencode({'q': query, **lang_info['params'], 'ie': "utf8", 'oe': "utf8", 'gl': lang_info['country']})
+ + google_info['subdomain']
+ + "/search?"
+ + urlencode(
+ {
+ 'q': query,
+ **google_info['params'],
+ }
+ )
+ # ceid includes a ':' character which must not be urlencoded
+ ('&ceid=%s' % ceid)
- ) # ceid includes a ':' character which must not be urlencoded
- params['url'] = query_url
-
- params['cookies']['CONSENT'] = "YES+"
- params['headers'].update(lang_info['headers'])
- params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
+ )
+ params['url'] = query_url
+ params['cookies'] = google_info['cookies']
+ params['headers'].update(google_info['headers'])
return params
def response(resp):
"""Get response from google's search request"""
results = []
-
detect_google_sorry(resp)
# convert the text to dom
@@ -152,8 +176,8 @@ def response(resp):
# The pub_date is mostly a string like 'yesertday', not a real
# timezone date or time. Therefore we can't use publishedDate.
- pub_date = extract_text(eval_xpath(result, './article/div[1]/div[1]/time'))
- pub_origin = extract_text(eval_xpath(result, './article/div[1]/div[1]/a'))
+ pub_date = extract_text(eval_xpath(result, './article//time'))
+ pub_origin = extract_text(eval_xpath(result, './article//a[@data-n-tid]'))
content = ' / '.join([x for x in [pub_origin, pub_date] if x])
@@ -174,3 +198,127 @@ def response(resp):
# return results
return results
+
+
+ceid_list = [
+ 'AE:ar',
+ 'AR:es-419',
+ 'AT:de',
+ 'AU:en',
+ 'BD:bn',
+ 'BE:fr',
+ 'BE:nl',
+ 'BG:bg',
+ 'BR:pt-419',
+ 'BW:en',
+ 'CA:en',
+ 'CA:fr',
+ 'CH:de',
+ 'CH:fr',
+ 'CL:es-419',
+ 'CN:zh-Hans',
+ 'CO:es-419',
+ 'CU:es-419',
+ 'CZ:cs',
+ 'DE:de',
+ 'EG:ar',
+ 'ES:es',
+ 'ET:en',
+ 'FR:fr',
+ 'GB:en',
+ 'GH:en',
+ 'GR:el',
+ 'HK:zh-Hant',
+ 'HU:hu',
+ 'ID:en',
+ 'ID:id',
+ 'IE:en',
+ 'IL:en',
+ 'IL:he',
+ 'IN:bn',
+ 'IN:en',
+ 'IN:hi',
+ 'IN:ml',
+ 'IN:mr',
+ 'IN:ta',
+ 'IN:te',
+ 'IT:it',
+ 'JP:ja',
+ 'KE:en',
+ 'KR:ko',
+ 'LB:ar',
+ 'LT:lt',
+ 'LV:en',
+ 'LV:lv',
+ 'MA:fr',
+ 'MX:es-419',
+ 'MY:en',
+ 'NA:en',
+ 'NG:en',
+ 'NL:nl',
+ 'NO:no',
+ 'NZ:en',
+ 'PE:es-419',
+ 'PH:en',
+ 'PK:en',
+ 'PL:pl',
+ 'PT:pt-150',
+ 'RO:ro',
+ 'RS:sr',
+ 'RU:ru',
+ 'SA:ar',
+ 'SE:sv',
+ 'SG:en',
+ 'SI:sl',
+ 'SK:sk',
+ 'SN:fr',
+ 'TH:th',
+ 'TR:tr',
+ 'TW:zh-Hant',
+ 'TZ:en',
+ 'UA:ru',
+ 'UA:uk',
+ 'UG:en',
+ 'US:en',
+ 'US:es-419',
+ 'VE:es-419',
+ 'VN:vi',
+ 'ZA:en',
+ 'ZW:en',
+]
+"""List of region/language combinations supported by Google News. Values of the
+``ceid`` argument of the Google News REST API."""
+
+
+_skip_values = [
+ 'ET:en', # english (ethiopia)
+ 'ID:en', # english (indonesia)
+ 'LV:en', # english (latvia)
+]
+
+_ceid_locale_map = {'NO:no': 'nb-NO'}
+
+
+def fetch_traits(engine_traits: EngineTraits):
+ _fetch_traits(engine_traits, add_domains=False)
+
+ engine_traits.custom['ceid'] = {}
+
+ for ceid in ceid_list:
+ if ceid in _skip_values:
+ continue
+
+ region, lang = ceid.split(':')
+ x = lang.split('-')
+ if len(x) > 1:
+ if x[1] not in ['Hant', 'Hans']:
+ lang = x[0]
+
+ sxng_locale = _ceid_locale_map.get(ceid, lang + '-' + region)
+ try:
+ locale = babel.Locale.parse(sxng_locale, sep='-')
+ except babel.UnknownLocaleError:
+ print("ERROR: %s -> %s is unknown by babel" % (ceid, sxng_locale))
+ continue
+
+ engine_traits.custom['ceid'][locales.region_tag(locale)] = ceid
diff --git a/searx/engines/google_scholar.py b/searx/engines/google_scholar.py
index c07cd4cea..6f33d1e1a 100644
--- a/searx/engines/google_scholar.py
+++ b/searx/engines/google_scholar.py
@@ -1,19 +1,18 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""Google (Scholar)
+"""This is the implementation of the Google Scholar engine.
-For detailed description of the *REST-full* API see: `Query Parameter
-Definitions`_.
-
-.. _Query Parameter Definitions:
- https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
+Compared to other Google services the Scholar engine has a simple GET REST-API
+and there does not exists `async` API. Even though the API slightly vintage we
+can make use of the :ref:`google API` to assemble the arguments of the GET
+request.
"""
-# pylint: disable=invalid-name
+from typing import TYPE_CHECKING
+from typing import Optional
from urllib.parse import urlencode
from datetime import datetime
-from typing import Optional
from lxml import html
from searx.utils import (
@@ -23,19 +22,21 @@ from searx.utils import (
extract_text,
)
+from searx.exceptions import SearxEngineCaptchaException
+
+from searx.engines.google import fetch_traits # pylint: disable=unused-import
from searx.engines.google import (
- get_lang_info,
+ get_google_info,
time_range_dict,
- detect_google_sorry,
)
+from searx.enginelib.traits import EngineTraits
-# pylint: disable=unused-import
-from searx.engines.google import (
- supported_languages_url,
- _fetch_supported_languages,
-)
+if TYPE_CHECKING:
+ import logging
-# pylint: enable=unused-import
+ logger: logging.Logger
+
+traits: EngineTraits
# about
about = {
@@ -51,53 +52,62 @@ about = {
categories = ['science', 'scientific publications']
paging = True
language_support = True
-use_locale_domain = True
time_range_support = True
safesearch = False
send_accept_language_header = True
-def time_range_url(params):
- """Returns a URL query component for a google-Scholar time range based on
- ``params['time_range']``. Google-Scholar does only support ranges in years.
- To have any effect, all the Searx ranges (*day*, *week*, *month*, *year*)
- are mapped to *year*. If no range is set, an empty string is returned.
- Example::
+def time_range_args(params):
+ """Returns a dictionary with a time range arguments based on
+ ``params['time_range']``.
- &as_ylo=2019
- """
- # as_ylo=2016&as_yhi=2019
- ret_val = ''
- if params['time_range'] in time_range_dict:
- ret_val = urlencode({'as_ylo': datetime.now().year - 1})
- return '&' + ret_val
+ Google Scholar supports a detailed search by year. Searching by *last
+ month* or *last week* (as offered by SearXNG) is uncommon for scientific
+ publications and is not supported by Google Scholar.
+ To limit the result list when the users selects a range, all the SearXNG
+ ranges (*day*, *week*, *month*, *year*) are mapped to *year*. If no range
+ is set an empty dictionary of arguments is returned. Example; when
+ user selects a time range (current year minus one in 2022):
-def request(query, params):
- """Google-Scholar search request"""
+ .. code:: python
- offset = (params['pageno'] - 1) * 10
- lang_info = get_lang_info(params, supported_languages, language_aliases, False)
+ { 'as_ylo' : 2021 }
- # subdomain is: scholar.google.xy
- lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.")
+ """
+ ret_val = {}
+ if params['time_range'] in time_range_dict:
+ ret_val['as_ylo'] = datetime.now().year - 1
+ return ret_val
- query_url = (
- 'https://'
- + lang_info['subdomain']
- + '/scholar'
- + "?"
- + urlencode({'q': query, **lang_info['params'], 'ie': "utf8", 'oe': "utf8", 'start': offset})
- )
- query_url += time_range_url(params)
- params['url'] = query_url
+def detect_google_captcha(dom):
+ """In case of CAPTCHA Google Scholar open its own *not a Robot* dialog and is
+ not redirected to ``sorry.google.com``.
+ """
+ if eval_xpath(dom, "//form[@id='gs_captcha_f']"):
+ raise SearxEngineCaptchaException()
+
- params['cookies']['CONSENT'] = "YES+"
- params['headers'].update(lang_info['headers'])
- params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
+def request(query, params):
+ """Google-Scholar search request"""
- # params['google_subdomain'] = subdomain
+ google_info = get_google_info(params, traits)
+ # subdomain is: scholar.google.xy
+ google_info['subdomain'] = google_info['subdomain'].replace("www.", "scholar.")
+
+ args = {
+ 'q': query,
+ **google_info['params'],
+ 'start': (params['pageno'] - 1) * 10,
+ 'as_sdt': '2007', # include patents / to disable set '0,5'
+ 'as_vis': '0', # include citations / to disable set '1'
+ }
+ args.update(time_range_args(params))
+
+ params['url'] = 'https://' + google_info['subdomain'] + '/scholar?' + urlencode(args)
+ params['cookies'] = google_info['cookies']
+ params['headers'].update(google_info['headers'])
return params
@@ -138,19 +148,15 @@ def parse_gs_a(text: Optional[str]):
def response(resp): # pylint: disable=too-many-locals
- """Get response from google's search request"""
+ """Parse response from Google Scholar"""
results = []
- detect_google_sorry(resp)
-
- # which subdomain ?
- # subdomain = resp.search_params.get('google_subdomain')
-
# convert the text to dom
dom = html.fromstring(resp.text)
+ detect_google_captcha(dom)
# parse results
- for result in eval_xpath_list(dom, '//div[@data-cid]'):
+ for result in eval_xpath_list(dom, '//div[@data-rp]'):
title = extract_text(eval_xpath(result, './/h3[1]//a'))
@@ -158,7 +164,7 @@ def response(resp): # pylint: disable=too-many-locals
# this is a [ZITATION] block
continue
- pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ct1"]'))
+ pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ctg2"]'))
if pub_type:
pub_type = pub_type[1:-1].lower()
diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py
index fc574bd48..985189df5 100644
--- a/searx/engines/google_videos.py
+++ b/searx/engines/google_videos.py
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""This is the implementation of the google videos engine.
+"""This is the implementation of the Google Videos engine.
.. admonition:: Content-Security-Policy (CSP)
@@ -14,9 +14,8 @@
"""
-# pylint: disable=invalid-name
+from typing import TYPE_CHECKING
-import re
from urllib.parse import urlencode
from lxml import html
@@ -27,20 +26,22 @@ from searx.utils import (
extract_text,
)
+from searx.engines.google import fetch_traits # pylint: disable=unused-import
from searx.engines.google import (
- get_lang_info,
+ get_google_info,
time_range_dict,
filter_mapping,
- g_section_with_header,
- title_xpath,
suggestion_xpath,
detect_google_sorry,
)
+from searx.enginelib.traits import EngineTraits
-# pylint: disable=unused-import
-from searx.engines.google import supported_languages_url, _fetch_supported_languages
+if TYPE_CHECKING:
+ import logging
-# pylint: enable=unused-import
+ logger: logging.Logger
+
+traits: EngineTraits
# about
about = {
@@ -55,70 +56,32 @@ about = {
# engine dependent config
categories = ['videos', 'web']
-paging = False
+paging = True
language_support = True
-use_locale_domain = True
time_range_support = True
safesearch = True
-send_accept_language_header = True
-
-RE_CACHE = {}
-
-
-def _re(regexpr):
- """returns compiled regular expression"""
- RE_CACHE[regexpr] = RE_CACHE.get(regexpr, re.compile(regexpr))
- return RE_CACHE[regexpr]
-
-
-def scrap_out_thumbs_src(dom):
- ret_val = {}
- thumb_name = 'dimg_'
- for script in eval_xpath_list(dom, '//script[contains(., "google.ldi={")]'):
- _script = script.text
- # "dimg_35":"https://i.ytimg.c....",
- _dimurl = _re("s='([^']*)").findall(_script)
- for k, v in _re('(' + thumb_name + '[0-9]*)":"(http[^"]*)').findall(_script):
- v = v.replace(r'\u003d', '=')
- v = v.replace(r'\u0026', '&')
- ret_val[k] = v
- logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys())
- return ret_val
-
-
-def scrap_out_thumbs(dom):
- """Scrap out thumbnail data from <script> tags."""
- ret_val = {}
- thumb_name = 'dimg_'
-
- for script in eval_xpath_list(dom, '//script[contains(., "_setImagesSrc")]'):
- _script = script.text
-
- # var s='data:image/jpeg;base64, ...'
- _imgdata = _re("s='([^']*)").findall(_script)
- if not _imgdata:
- continue
-
- # var ii=['dimg_17']
- for _vidthumb in _re(r"(%s\d+)" % thumb_name).findall(_script):
- # At least the equal sign in the URL needs to be decoded
- ret_val[_vidthumb] = _imgdata[0].replace(r"\x3d", "=")
-
- logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys())
- return ret_val
def request(query, params):
"""Google-Video search request"""
- lang_info = get_lang_info(params, supported_languages, language_aliases, False)
+ google_info = get_google_info(params, traits)
query_url = (
'https://'
- + lang_info['subdomain']
+ + google_info['subdomain']
+ '/search'
+ "?"
- + urlencode({'q': query, 'tbm': "vid", **lang_info['params'], 'ie': "utf8", 'oe': "utf8"})
+ + urlencode(
+ {
+ 'q': query,
+ 'tbm': "vid",
+ 'start': 10 * params['pageno'],
+ **google_info['params'],
+ 'asearch': 'arc',
+ 'async': 'use_ac:true,_fmt:html',
+ }
+ )
)
if params['time_range'] in time_range_dict:
@@ -127,9 +90,8 @@ def request(query, params):
query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
params['url'] = query_url
- params['cookies']['CONSENT'] = "YES+"
- params['headers'].update(lang_info['headers'])
- params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
+ params['cookies'] = google_info['cookies']
+ params['headers'].update(google_info['headers'])
return params
@@ -141,43 +103,30 @@ def response(resp):
# convert the text to dom
dom = html.fromstring(resp.text)
- vidthumb_imgdata = scrap_out_thumbs(dom)
- thumbs_src = scrap_out_thumbs_src(dom)
- logger.debug(str(thumbs_src))
# parse results
for result in eval_xpath_list(dom, '//div[contains(@class, "g ")]'):
- # ignore google *sections*
- if extract_text(eval_xpath(result, g_section_with_header)):
- logger.debug("ignoring <g-section-with-header>")
- continue
-
- # ingnore articles without an image id / e.g. news articles
- img_id = eval_xpath_getindex(result, './/g-img/img/@id', 0, default=None)
- if img_id is None:
- logger.error("no img_id found in item %s (news article?)", len(results) + 1)
+ img_src = eval_xpath_getindex(result, './/img/@src', 0, None)
+ if img_src is None:
continue
- img_src = vidthumb_imgdata.get(img_id, None)
- if not img_src:
- img_src = thumbs_src.get(img_id, "")
+ title = extract_text(eval_xpath_getindex(result, './/a/h3[1]', 0))
+ url = eval_xpath_getindex(result, './/a/h3[1]/../@href', 0)
- title = extract_text(eval_xpath_getindex(result, title_xpath, 0))
- url = eval_xpath_getindex(result, './/div[@class="dXiKIc"]//a/@href', 0)
- length = extract_text(eval_xpath(result, './/div[contains(@class, "P7xzyf")]/span/span'))
c_node = eval_xpath_getindex(result, './/div[@class="Uroaid"]', 0)
content = extract_text(c_node)
- pub_info = extract_text(eval_xpath(result, './/div[@class="Zg1NU"]'))
+ pub_info = extract_text(eval_xpath(result, './/div[@class="P7xzyf"]'))
+ length = extract_text(eval_xpath(result, './/div[@class="J1mWY"]'))
results.append(
{
'url': url,
'title': title,
'content': content,
- 'length': length,
'author': pub_info,
'thumbnail': img_src,
+ 'length': length,
'template': 'videos.html',
}
)
diff --git a/searx/engines/peertube.py b/searx/engines/peertube.py
index 345c2f991..87b386d7a 100644
--- a/searx/engines/peertube.py
+++ b/searx/engines/peertube.py
@@ -1,18 +1,30 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
-"""
- peertube (Videos)
+# lint: pylint
+"""Peertube and :py:obj:`SepiaSearch <searx.engines.sepiasearch>` do share
+(more or less) the same REST API and the schema of the JSON result is identical.
+
"""
-from json import loads
-from datetime import datetime
+import re
from urllib.parse import urlencode
+from datetime import datetime
+from dateutil.parser import parse
+from dateutil.relativedelta import relativedelta
+
+import babel
+
+from searx import network
+from searx.locales import language_tag
from searx.utils import html_to_text
+from searx.enginelib.traits import EngineTraits
+
+traits: EngineTraits
-# about
about = {
+ # pylint: disable=line-too-long
"website": 'https://joinpeertube.org',
"wikidata_id": 'Q50938515',
- "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html',
+ "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
@@ -22,66 +34,155 @@ about = {
categories = ["videos"]
paging = True
base_url = "https://peer.tube"
-supported_languages_url = 'https://peer.tube/api/v1/videos/languages'
+"""Base URL of the Peertube instance. A list of instances is available at:
+
+- https://instances.joinpeertube.org/instances
+"""
+
+time_range_support = True
+time_range_table = {
+ 'day': relativedelta(),
+ 'week': relativedelta(weeks=-1),
+ 'month': relativedelta(months=-1),
+ 'year': relativedelta(years=-1),
+}
+
+safesearch = True
+safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
+
+
+def minute_to_hm(minute):
+ if isinstance(minute, int):
+ return "%d:%02d" % (divmod(minute, 60))
+ return None
-# do search-request
def request(query, params):
- sanitized_url = base_url.rstrip("/")
- pageno = (params["pageno"] - 1) * 15
- search_url = sanitized_url + "/api/v1/search/videos/?pageno={pageno}&{query}"
- query_dict = {"search": query}
- language = params["language"].split("-")[0]
- if "all" != language and language in supported_languages:
- query_dict["languageOneOf"] = language
- params["url"] = search_url.format(query=urlencode(query_dict), pageno=pageno)
- return params
+ """Assemble request for the Peertube API"""
+
+ if not query:
+ return False
+
+ # eng_region = traits.get_region(params['searxng_locale'], 'en_US')
+ eng_lang = traits.get_language(params['searxng_locale'], None)
+
+ params['url'] = (
+ base_url.rstrip("/")
+ + "/api/v1/search/videos?"
+ + urlencode(
+ {
+ 'search': query,
+ 'searchTarget': 'search-index', # Vidiversum
+ 'resultType': 'videos',
+ 'start': (params['pageno'] - 1) * 10,
+ 'count': 10,
+ # -createdAt: sort by date ascending / createdAt: date descending
+ 'sort': '-match', # sort by *match descending*
+ 'nsfw': safesearch_table[params['safesearch']],
+ }
+ )
+ )
+
+ if eng_lang is not None:
+ params['url'] += '&languageOneOf[]=' + eng_lang
+ params['url'] += '&boostLanguages[]=' + eng_lang
+ if params['time_range'] in time_range_table:
+ time = datetime.now().date() + time_range_table[params['time_range']]
+ params['url'] += '&startDate=' + time.isoformat()
-def _get_offset_from_pageno(pageno):
- return (pageno - 1) * 15 + 1
+ return params
-# get response from search-request
def response(resp):
- sanitized_url = base_url.rstrip("/")
+ return video_response(resp)
+
+
+def video_response(resp):
+ """Parse video response from SepiaSearch and Peertube instances."""
results = []
- search_res = loads(resp.text)
+ json_data = resp.json()
- # return empty array if there are no results
- if "data" not in search_res:
+ if 'data' not in json_data:
return []
- # parse results
- for res in search_res["data"]:
- title = res["name"]
- url = sanitized_url + "/videos/watch/" + res["uuid"]
- description = res["description"]
- if description:
- content = html_to_text(res["description"])
- else:
- content = ""
- thumbnail = sanitized_url + res["thumbnailPath"]
- publishedDate = datetime.strptime(res["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ for result in json_data['data']:
+ metadata = [
+ x
+ for x in [
+ result.get('channel', {}).get('displayName'),
+ result.get('channel', {}).get('name') + '@' + result.get('channel', {}).get('host'),
+ ', '.join(result.get('tags', [])),
+ ]
+ if x
+ ]
results.append(
{
- "template": "videos.html",
- "url": url,
- "title": title,
- "content": content,
- "publishedDate": publishedDate,
- "iframe_src": sanitized_url + res["embedPath"],
- "thumbnail": thumbnail,
+ 'url': result['url'],
+ 'title': result['name'],
+ 'content': html_to_text(result.get('description') or ''),
+ 'author': result.get('account', {}).get('displayName'),
+ 'length': minute_to_hm(result.get('duration')),
+ 'template': 'videos.html',
+ 'publishedDate': parse(result['publishedAt']),
+ 'iframe_src': result.get('embedUrl'),
+ 'thumbnail': result.get('thumbnailUrl') or result.get('previewUrl'),
+ 'metadata': ' | '.join(metadata),
}
)
- # return results
return results
-def _fetch_supported_languages(resp):
- videolanguages = resp.json()
- peertube_languages = list(videolanguages.keys())
- return peertube_languages
+def fetch_traits(engine_traits: EngineTraits):
+ """Fetch languages from peertube's search-index source code.
+
+ See videoLanguages_ in commit `8ed5c729 - Refactor and redesign client`_
+
+ .. _8ed5c729 - Refactor and redesign client:
+ https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729
+ .. _videoLanguages:
+ https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291
+ """
+
+ resp = network.get(
+ 'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue',
+ # the response from search-index repository is very slow
+ timeout=60,
+ )
+
+ if not resp.ok:
+ print("ERROR: response from peertube is not OK.")
+ return
+
+ js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL)
+ if not js_lang:
+ print("ERROR: can't determine languages from peertube")
+ return
+
+ for lang in re.finditer(r"\{ id: '([a-z]+)', label:", js_lang.group(1)):
+ try:
+ eng_tag = lang.group(1)
+ if eng_tag == 'oc':
+ # Occitanis not known by babel, its closest relative is Catalan
+ # but 'ca' is already in the list of engine_traits.languages -->
+ # 'oc' will be ignored.
+ continue
+
+ sxng_tag = language_tag(babel.Locale.parse(eng_tag))
+
+ except babel.UnknownLocaleError:
+ print("ERROR: %s is unknown by babel" % eng_tag)
+ continue
+
+ conflict = engine_traits.languages.get(sxng_tag)
+ if conflict:
+ if conflict != eng_tag:
+ print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+ continue
+ engine_traits.languages[sxng_tag] = eng_tag
+
+ engine_traits.languages['zh_Hans'] = 'zh'
+ engine_traits.languages['zh_Hant'] = 'zh'
diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py
index 6de2176d0..4a41676c5 100644
--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
@@ -34,7 +34,9 @@ import babel
from searx.exceptions import SearxEngineAPIException
from searx.network import raise_for_httperror
-from searx.locales import get_engine_locale
+from searx.enginelib.traits import EngineTraits
+
+traits: EngineTraits
# about
about = {
@@ -49,7 +51,6 @@ about = {
# engine dependent config
categories = []
paging = True
-supported_languages_url = about['website']
qwant_categ = None # web|news|inages|videos
safesearch = True
@@ -95,7 +96,7 @@ def request(query, params):
)
# add quant's locale
- q_locale = get_engine_locale(params['language'], supported_languages, default='en_US')
+ q_locale = traits.get_region(params["searxng_locale"], default='en_US')
params['url'] += '&locale=' + q_locale
# add safesearch option
@@ -243,15 +244,20 @@ def response(resp):
return results
-def _fetch_supported_languages(resp):
+def fetch_traits(engine_traits: EngineTraits):
+
+ # pylint: disable=import-outside-toplevel
+ from searx import network
+ from searx.locales import region_tag
+ resp = network.get(about['website'])
text = resp.text
text = text[text.find('INITIAL_PROPS') :]
text = text[text.find('{') : text.find('</script>')]
q_initial_props = loads(text)
q_locales = q_initial_props.get('locales')
- q_valid_locales = []
+ eng_tag_list = set()
for country, v in q_locales.items():
for lang in v['langs']:
@@ -261,25 +267,18 @@ def _fetch_supported_languages(resp):
# qwant-news does not support all locales from qwant-web:
continue
- q_valid_locales.append(_locale)
-
- supported_languages = {}
+ eng_tag_list.add(_locale)
- for q_locale in q_valid_locales:
+ for eng_tag in eng_tag_list:
try:
- locale = babel.Locale.parse(q_locale, sep='_')
- except babel.core.UnknownLocaleError:
- print("ERROR: can't determine babel locale of quant's locale %s" % q_locale)
+ sxng_tag = region_tag(babel.Locale.parse(eng_tag, sep='_'))
+ except babel.UnknownLocaleError:
+ print("ERROR: can't determine babel locale of quant's locale %s" % eng_tag)
continue
- # note: supported_languages (dict)
- #
- # dict's key is a string build up from a babel.Locale object / the
- # notation 'xx-XX' (and 'xx') conforms to SearXNG's locale (and
- # language) notation and dict's values are the locale strings used by
- # the engine.
-
- searxng_locale = locale.language + '-' + locale.territory # --> params['language']
- supported_languages[searxng_locale] = q_locale
-
- return supported_languages
+ conflict = engine_traits.regions.get(sxng_tag)
+ if conflict:
+ if conflict != eng_tag:
+ print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+ continue
+ engine_traits.regions[sxng_tag] = eng_tag
diff --git a/searx/engines/sepiasearch.py b/searx/engines/sepiasearch.py
index 9c45d6c43..72157b253 100644
--- a/searx/engines/sepiasearch.py
+++ b/searx/engines/sepiasearch.py
@@ -1,70 +1,80 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""SepiaSearch uses the same languages as :py:obj:`Peertube
+<searx.engines.peertube>` and the response is identical to the response from the
+peertube engines.
+
"""
- SepiaSearch (Videos)
-"""
-from json import loads
-from dateutil import parser, relativedelta
+from typing import TYPE_CHECKING
+
from urllib.parse import urlencode
from datetime import datetime
-# about
+from searx.engines.peertube import fetch_traits # pylint: disable=unused-import
+from searx.engines.peertube import (
+ # pylint: disable=unused-import
+ video_response,
+ safesearch_table,
+ time_range_table,
+)
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+ import logging
+
+ logger: logging.Logger
+
+traits: EngineTraits
+
about = {
+ # pylint: disable=line-too-long
"website": 'https://sepiasearch.org',
"wikidata_id": None,
- "official_api_documentation": "https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api", # NOQA
+ "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
+# engine dependent config
categories = ['videos']
paging = True
+
+base_url = 'https://sepiasearch.org'
+
time_range_support = True
safesearch = True
-supported_languages = [
- # fmt: off
- 'en', 'fr', 'ja', 'eu', 'ca', 'cs', 'eo', 'el',
- 'de', 'it', 'nl', 'es', 'oc', 'gd', 'zh', 'pt',
- 'sv', 'pl', 'fi', 'ru'
- # fmt: on
-]
-base_url = 'https://sepiasearch.org/api/v1/search/videos'
-
-safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
-
-time_range_table = {
- 'day': relativedelta.relativedelta(),
- 'week': relativedelta.relativedelta(weeks=-1),
- 'month': relativedelta.relativedelta(months=-1),
- 'year': relativedelta.relativedelta(years=-1),
-}
-def minute_to_hm(minute):
- if isinstance(minute, int):
- return "%d:%02d" % (divmod(minute, 60))
- return None
+def request(query, params):
+ """Assemble request for the SepiaSearch API"""
+
+ if not query:
+ return False
+ # eng_region = traits.get_region(params['searxng_locale'], 'en_US')
+ eng_lang = traits.get_language(params['searxng_locale'], None)
-def request(query, params):
params['url'] = (
- base_url
- + '?'
+ base_url.rstrip("/")
+ + "/api/v1/search/videos?"
+ urlencode(
{
'search': query,
'start': (params['pageno'] - 1) * 10,
'count': 10,
- 'sort': '-match',
+ # -createdAt: sort by date ascending / createdAt: date descending
+ 'sort': '-match', # sort by *match descending*
'nsfw': safesearch_table[params['safesearch']],
}
)
)
- language = params['language'].split('-')[0]
- if language in supported_languages:
- params['url'] += '&languageOneOf[]=' + language
+ if eng_lang is not None:
+ params['url'] += '&languageOneOf[]=' + eng_lang
+ params['url'] += '&boostLanguages[]=' + eng_lang
+
if params['time_range'] in time_range_table:
time = datetime.now().date() + time_range_table[params['time_range']]
params['url'] += '&startDate=' + time.isoformat()
@@ -73,34 +83,4 @@ def request(query, params):
def response(resp):
- results = []
-
- search_results = loads(resp.text)
-
- if 'data' not in search_results:
- return []
-
- for result in search_results['data']:
- title = result['name']
- content = result['description']
- thumbnail = result['thumbnailUrl']
- publishedDate = parser.parse(result['publishedAt'])
- author = result.get('account', {}).get('displayName')
- length = minute_to_hm(result.get('duration'))
- url = result['url']
-
- results.append(
- {
- 'url': url,
- 'title': title,
- 'content': content,
- 'author': author,
- 'length': length,
- 'template': 'videos.html',
- 'publishedDate': publishedDate,
- 'iframe_src': result.get('embedUrl'),
- 'thumbnail': thumbnail,
- }
- )
-
- return results
+ return video_response(resp)
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
index f857f7b6d..2813d0bf3 100644
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@@ -1,28 +1,108 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""Startpage (Web)
+"""Startpage's language & region selectors are a mess ..
+
+.. _startpage regions:
+
+Startpage regions
+=================
+
+In the list of regions there are tags we need to map to common region tags::
+
+ pt-BR_BR --> pt_BR
+ zh-CN_CN --> zh_Hans_CN
+ zh-TW_TW --> zh_Hant_TW
+ zh-TW_HK --> zh_Hant_HK
+ en-GB_GB --> en_GB
+
+and there is at least one tag with a three letter language tag (ISO 639-2)::
+
+ fil_PH --> fil_PH
+
+The locale code ``no_NO`` from Startpage does not exists and is mapped to
+``nb-NO``::
+
+ babel.core.UnknownLocaleError: unknown locale 'no_NO'
+
+For reference see languages-subtag at iana; ``no`` is the macrolanguage [1]_ and
+W3C recommends subtag over macrolanguage [2]_.
+
+.. [1] `iana: language-subtag-registry
+ <https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry>`_ ::
+
+ type: language
+ Subtag: nb
+ Description: Norwegian Bokmål
+ Added: 2005-10-16
+ Suppress-Script: Latn
+ Macrolanguage: no
+
+.. [2]
+ Use macrolanguages with care. Some language subtags have a Scope field set to
+ macrolanguage, i.e. this primary language subtag encompasses a number of more
+ specific primary language subtags in the registry. ... As we recommended for
+ the collection subtags mentioned above, in most cases you should try to use
+ the more specific subtags ... `W3: The primary language subtag
+ <https://www.w3.org/International/questions/qa-choosing-language-tags#langsubtag>`_
+
+.. _startpage languages:
+
+Startpage languages
+===================
+
+:py:obj:`send_accept_language_header`:
+ The displayed name in Startpage's settings page depend on the location of the
+ IP when ``Accept-Language`` HTTP header is unset. In :py:obj:`fetch_traits`
+ we use::
+
+ 'Accept-Language': "en-US,en;q=0.5",
+ ..
+
+ to get uniform names independent from the IP).
+
+.. _startpage categories:
+
+Startpage categories
+====================
+
+Startpage's category (for Web-search, News, Videos, ..) is set by
+:py:obj:`startpage_categ` in settings.yml::
+
+ - name: startpage
+ engine: startpage
+ startpage_categ: web
+ ...
+
+.. hint::
+
+ The default category is ``web`` .. and other categories than ``web`` are not
+ yet implemented.
"""
+from typing import TYPE_CHECKING
+from collections import OrderedDict
import re
-from time import time
-
-from urllib.parse import urlencode
from unicodedata import normalize, combining
+from time import time
from datetime import datetime, timedelta
-from dateutil import parser
-from lxml import html
-from babel import Locale
-from babel.localedata import locale_identifiers
+import dateutil.parser
+import lxml.html
+import babel
+
+from searx import network
+from searx.utils import extract_text, eval_xpath, gen_useragent
+from searx.exceptions import SearxEngineCaptchaException
+from searx.locales import region_tag
+from searx.enginelib.traits import EngineTraits
-from searx.network import get
-from searx.utils import extract_text, eval_xpath, match_language
-from searx.exceptions import (
- SearxEngineResponseException,
- SearxEngineCaptchaException,
-)
+if TYPE_CHECKING:
+ import logging
+ logger: logging.Logger
+
+traits: EngineTraits
# about
about = {
@@ -34,18 +114,28 @@ about = {
"results": 'HTML',
}
+startpage_categ = 'web'
+"""Startpage's category, visit :ref:`startpage categories`.
+"""
+
+send_accept_language_header = True
+"""Startpage tries to guess user's language and territory from the HTTP
+``Accept-Language``. Optional the user can select a search-language (can be
+different to the UI language) and a region filter.
+"""
+
# engine dependent config
categories = ['general', 'web']
-# there is a mechanism to block "bot" search
-# (probably the parameter qid), require
-# storing of qid's between mulitble search-calls
-
paging = True
-supported_languages_url = 'https://www.startpage.com/do/settings'
+time_range_support = True
+safesearch = True
+
+time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
+safesearch_dict = {0: '0', 1: '1', 2: '1'}
# search-url
-base_url = 'https://startpage.com/'
-search_url = base_url + 'sp/search?'
+base_url = 'https://www.startpage.com'
+search_url = base_url + '/sp/search'
# specific xpath variables
# ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
@@ -53,92 +143,193 @@ search_url = base_url + 'sp/search?'
results_xpath = '//div[@class="w-gl__result__main"]'
link_xpath = './/a[@class="w-gl__result-title result-link"]'
content_xpath = './/p[@class="w-gl__description"]'
+search_form_xpath = '//form[@id="search"]'
+"""XPath of Startpage's origin search form
+
+.. code: html
+
+ <form action="/sp/search" method="post">
+ <input type="text" name="query" value="" ..>
+ <input type="hidden" name="t" value="device">
+ <input type="hidden" name="lui" value="english">
+ <input type="hidden" name="sc" value="Q7Mt5TRqowKB00">
+ <input type="hidden" name="cat" value="web">
+ <input type="hidden" class="abp" id="abp-input" name="abp" value="1">
+ </form>
+"""
# timestamp of the last fetch of 'sc' code
sc_code_ts = 0
sc_code = ''
+sc_code_cache_sec = 30
+"""Time in seconds the sc-code is cached in memory :py:obj:`get_sc_code`."""
-def raise_captcha(resp):
+def get_sc_code(searxng_locale, params):
+ """Get an actual ``sc`` argument from Startpage's search form (HTML page).
- if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
- raise SearxEngineCaptchaException()
+ Startpage puts a ``sc`` argument on every HTML :py:obj:`search form
+ <search_form_xpath>`. Without this argument Startpage considers the request
+ is from a bot. We do not know what is encoded in the value of the ``sc``
+ argument, but it seems to be a kind of a *time-stamp*.
+ Startpage's search form generates a new sc-code on each request. This
+ function scrap a new sc-code from Startpage's home page every
+ :py:obj:`sc_code_cache_sec` seconds.
-def get_sc_code(headers):
- """Get an actual `sc` argument from startpage's home page.
+ """
- Startpage puts a `sc` argument on every link. Without this argument
- startpage considers the request is from a bot. We do not know what is
- encoded in the value of the `sc` argument, but it seems to be a kind of a
- *time-stamp*. This *time-stamp* is valid for a few hours.
+ global sc_code_ts, sc_code # pylint: disable=global-statement
- This function scrap a new *time-stamp* from startpage's home page every hour
- (3000 sec).
+ if sc_code and (time() < (sc_code_ts + sc_code_cache_sec)):
+ logger.debug("get_sc_code: reuse '%s'", sc_code)
+ return sc_code
+
+ headers = {**params['headers']}
+ headers['Origin'] = base_url
+ headers['Referer'] = base_url + '/'
+ # headers['Connection'] = 'keep-alive'
+ # headers['Accept-Encoding'] = 'gzip, deflate, br'
+ # headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8'
+ # headers['User-Agent'] = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:105.0) Gecko/20100101 Firefox/105.0'
+
+ # add Accept-Language header
+ if searxng_locale == 'all':
+ searxng_locale = 'en-US'
+ locale = babel.Locale.parse(searxng_locale, sep='-')
+
+ if send_accept_language_header:
+ ac_lang = locale.language
+ if locale.territory:
+ ac_lang = "%s-%s,%s;q=0.9,*;q=0.5" % (
+ locale.language,
+ locale.territory,
+ locale.language,
+ )
+ headers['Accept-Language'] = ac_lang
+
+ get_sc_url = base_url + '/?sc=%s' % (sc_code)
+ logger.debug("query new sc time-stamp ... %s", get_sc_url)
+ logger.debug("headers: %s", headers)
+ resp = network.get(get_sc_url, headers=headers)
+
+ # ?? x = network.get('https://www.startpage.com/sp/cdn/images/filter-chevron.svg', headers=headers)
+ # ?? https://www.startpage.com/sp/cdn/images/filter-chevron.svg
+ # ?? ping-back URL: https://www.startpage.com/sp/pb?sc=TLsB0oITjZ8F21
- """
+ if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
+ raise SearxEngineCaptchaException(
+ message="get_sc_code: got redirected to https://www.startpage.com/sp/captcha",
+ )
+
+ dom = lxml.html.fromstring(resp.text)
+
+ try:
+ sc_code = eval_xpath(dom, search_form_xpath + '//input[@name="sc"]/@value')[0]
+ except IndexError as exc:
+ logger.debug("suspend startpage API --> https://github.com/searxng/searxng/pull/695")
+ raise SearxEngineCaptchaException(
+ message="get_sc_code: [PR-695] query new sc time-stamp failed! (%s)" % resp.url,
+ ) from exc
+
+ sc_code_ts = time()
+ logger.debug("get_sc_code: new value is: %s", sc_code)
+ return sc_code
- global sc_code_ts, sc_code # pylint: disable=global-statement
- if time() > (sc_code_ts + 3000):
- logger.debug("query new sc time-stamp ...")
+def request(query, params):
+ """Assemble a Startpage request.
- resp = get(base_url, headers=headers)
- raise_captcha(resp)
- dom = html.fromstring(resp.text)
+ To avoid CAPTCHA we need to send a well formed HTTP POST request with a
+ cookie. We need to form a request that is identical to the request build by
+ Startpage's search form:
- try:
- # <input type="hidden" name="sc" value="...">
- sc_code = eval_xpath(dom, '//input[@name="sc"]/@value')[0]
- except IndexError as exc:
- # suspend startpage API --> https://github.com/searxng/searxng/pull/695
- raise SearxEngineResponseException(
- suspended_time=7 * 24 * 3600, message="PR-695: query new sc time-stamp failed!"
- ) from exc
+ - in the cookie the **region** is selected
+ - in the HTTP POST data the **language** is selected
- sc_code_ts = time()
- logger.debug("new value is: %s", sc_code)
+ Additionally the arguments form Startpage's search form needs to be set in
+ HTML POST data / compare ``<input>`` elements: :py:obj:`search_form_xpath`.
+ """
+ if startpage_categ == 'web':
+ return _request_cat_web(query, params)
- return sc_code
+ logger.error("Startpages's category '%' is not yet implemented.", startpage_categ)
+ return params
-# do search-request
-def request(query, params):
+def _request_cat_web(query, params):
- # pylint: disable=line-too-long
- # The format string from Startpage's FFox add-on [1]::
- #
- # https://www.startpage.com/do/dsearch?query={searchTerms}&cat=web&pl=ext-ff&language=__MSG_extensionUrlLanguage__&extVersion=1.3.0
- #
- # [1] https://addons.mozilla.org/en-US/firefox/addon/startpage-private-search/
+ engine_region = traits.get_region(params['searxng_locale'], 'en-US')
+ engine_language = traits.get_language(params['searxng_locale'], 'en')
+ # build arguments
args = {
'query': query,
- 'page': params['pageno'],
'cat': 'web',
- # 'pl': 'ext-ff',
- # 'extVersion': '1.3.0',
- # 'abp': "-1",
- 'sc': get_sc_code(params['headers']),
+ 't': 'device',
+ 'sc': get_sc_code(params['searxng_locale'], params), # hint: this func needs HTTP headers,
+ 'with_date': time_range_dict.get(params['time_range'], ''),
}
- # set language if specified
- if params['language'] != 'all':
- lang_code = match_language(params['language'], supported_languages, fallback=None)
- if lang_code:
- language_name = supported_languages[lang_code]['alias']
- args['language'] = language_name
- args['lui'] = language_name
+ if engine_language:
+ args['language'] = engine_language
+ args['lui'] = engine_language
+
+ args['abp'] = '1'
+ if params['pageno'] > 1:
+ args['page'] = params['pageno']
+
+ # build cookie
+ lang_homepage = 'en'
+ cookie = OrderedDict()
+ cookie['date_time'] = 'world'
+ cookie['disable_family_filter'] = safesearch_dict[params['safesearch']]
+ cookie['disable_open_in_new_window'] = '0'
+ cookie['enable_post_method'] = '1' # hint: POST
+ cookie['enable_proxy_safety_suggest'] = '1'
+ cookie['enable_stay_control'] = '1'
+ cookie['instant_answers'] = '1'
+ cookie['lang_homepage'] = 's/device/%s/' % lang_homepage
+ cookie['num_of_results'] = '10'
+ cookie['suggestions'] = '1'
+ cookie['wt_unit'] = 'celsius'
+
+ if engine_language:
+ cookie['language'] = engine_language
+ cookie['language_ui'] = engine_language
+
+ if engine_region:
+ cookie['search_results_region'] = engine_region
+
+ params['cookies']['preferences'] = 'N1N'.join(["%sEEE%s" % x for x in cookie.items()])
+ logger.debug('cookie preferences: %s', params['cookies']['preferences'])
+
+ # POST request
+ logger.debug("data: %s", args)
+ params['data'] = args
+ params['method'] = 'POST'
+ params['url'] = search_url
+ params['headers']['Origin'] = base_url
+ params['headers']['Referer'] = base_url + '/'
+ # is the Accept header needed?
+ # params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
- params['url'] = search_url + urlencode(args)
return params
# get response from search-request
def response(resp):
- results = []
+ dom = lxml.html.fromstring(resp.text)
- dom = html.fromstring(resp.text)
+ if startpage_categ == 'web':
+ return _response_cat_web(dom)
+
+ logger.error("Startpages's category '%' is not yet implemented.", startpage_categ)
+ return []
+
+
+def _response_cat_web(dom):
+ results = []
# parse results
for result in eval_xpath(dom, results_xpath):
@@ -173,7 +364,7 @@ def response(resp):
content = content[date_pos:]
try:
- published_date = parser.parse(date_string, dayfirst=True)
+ published_date = dateutil.parser.parse(date_string, dayfirst=True)
except ValueError:
pass
@@ -199,62 +390,103 @@ def response(resp):
return results
-# get supported languages from their site
-def _fetch_supported_languages(resp):
- # startpage's language selector is a mess each option has a displayed name
- # and a value, either of which may represent the language name in the native
- # script, the language name in English, an English transliteration of the
- # native name, the English name of the writing script used by the language,
- # or occasionally something else entirely.
-
- # this cases are so special they need to be hardcoded, a couple of them are misspellings
- language_names = {
- 'english_uk': 'en-GB',
- 'fantizhengwen': ['zh-TW', 'zh-HK'],
- 'hangul': 'ko',
- 'malayam': 'ml',
- 'norsk': 'nb',
- 'sinhalese': 'si',
- 'sudanese': 'su',
+def fetch_traits(engine_traits: EngineTraits):
+ """Fetch :ref:`languages <startpage languages>` and :ref:`regions <startpage
+ regions>` from Startpage."""
+ # pylint: disable=too-many-branches
+
+ headers = {
+ 'User-Agent': gen_useragent(),
+ 'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language
}
+ resp = network.get('https://www.startpage.com/do/settings', headers=headers)
- # get the English name of every language known by babel
- language_names.update(
- {
- # fmt: off
- name.lower(): lang_code
- # pylint: disable=protected-access
- for lang_code, name in Locale('en')._data['languages'].items()
- # fmt: on
- }
- )
+ if not resp.ok:
+ print("ERROR: response from Startpage is not OK.")
+
+ dom = lxml.html.fromstring(resp.text)
+
+ # regions
+
+ sp_region_names = []
+ for option in dom.xpath('//form[@name="settings"]//select[@name="search_results_region"]/option'):
+ sp_region_names.append(option.get('value'))
+
+ for eng_tag in sp_region_names:
+ if eng_tag == 'all':
+ continue
+ babel_region_tag = {'no_NO': 'nb_NO'}.get(eng_tag, eng_tag) # norway
+
+ if '-' in babel_region_tag:
+ l, r = babel_region_tag.split('-')
+ r = r.split('_')[-1]
+ sxng_tag = region_tag(babel.Locale.parse(l + '_' + r, sep='_'))
+
+ else:
+ try:
+ sxng_tag = region_tag(babel.Locale.parse(babel_region_tag, sep='_'))
+
+ except babel.UnknownLocaleError:
+ print("ERROR: can't determine babel locale of startpage's locale %s" % eng_tag)
+ continue
+
+ conflict = engine_traits.regions.get(sxng_tag)
+ if conflict:
+ if conflict != eng_tag:
+ print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+ continue
+ engine_traits.regions[sxng_tag] = eng_tag
+
+ # languages
+
+ catalog_engine2code = {name.lower(): lang_code for lang_code, name in babel.Locale('en').languages.items()}
# get the native name of every language known by babel
- for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, locale_identifiers()):
- native_name = Locale(lang_code).get_language_name().lower()
+
+ for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, babel.localedata.locale_identifiers()):
+ native_name = babel.Locale(lang_code).get_language_name().lower()
# add native name exactly as it is
- language_names[native_name] = lang_code
+ catalog_engine2code[native_name] = lang_code
# add "normalized" language name (i.e. français becomes francais and español becomes espanol)
unaccented_name = ''.join(filter(lambda c: not combining(c), normalize('NFKD', native_name)))
if len(unaccented_name) == len(unaccented_name.encode()):
# add only if result is ascii (otherwise "normalization" didn't work)
- language_names[unaccented_name] = lang_code
+ catalog_engine2code[unaccented_name] = lang_code
+
+ # values that can't be determined by babel's languages names
+
+ catalog_engine2code.update(
+ {
+ # traditional chinese used in ..
+ 'fantizhengwen': 'zh_Hant',
+ # Korean alphabet
+ 'hangul': 'ko',
+ # Malayalam is one of 22 scheduled languages of India.
+ 'malayam': 'ml',
+ 'norsk': 'nb',
+ 'sinhalese': 'si',
+ }
+ )
+
+ skip_eng_tags = {
+ 'english_uk', # SearXNG lang 'en' already maps to 'english'
+ }
- dom = html.fromstring(resp.text)
- sp_lang_names = []
for option in dom.xpath('//form[@name="settings"]//select[@name="language"]/option'):
- sp_lang_names.append((option.get('value'), extract_text(option).lower()))
-
- supported_languages = {}
- for sp_option_value, sp_option_text in sp_lang_names:
- lang_code = language_names.get(sp_option_value) or language_names.get(sp_option_text)
- if isinstance(lang_code, str):
- supported_languages[lang_code] = {'alias': sp_option_value}
- elif isinstance(lang_code, list):
- for _lc in lang_code:
- supported_languages[_lc] = {'alias': sp_option_value}
- else:
- print('Unknown language option in Startpage: {} ({})'.format(sp_option_value, sp_option_text))
- return supported_languages
+ eng_tag = option.get('value')
+ if eng_tag in skip_eng_tags:
+ continue
+ name = extract_text(option).lower()
+
+ sxng_tag = catalog_engine2code.get(eng_tag)
+ if sxng_tag is None:
+ sxng_tag = catalog_engine2code[name]
+
+ conflict = engine_traits.languages.get(sxng_tag)
+ if conflict:
+ if conflict != eng_tag:
+ print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+ continue
+ engine_traits.languages[sxng_tag] = eng_tag
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index 8d3b0839a..6ea77f092 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -1,9 +1,12 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""Wikidata
+"""This module implements the Wikidata engine. Some implementations are shared
+from :ref:`wikipedia engine`.
+
"""
# pylint: disable=missing-class-docstring
+from typing import TYPE_CHECKING
from hashlib import md5
from urllib.parse import urlencode, unquote
from json import loads
@@ -13,12 +16,17 @@ from babel.dates import format_datetime, format_date, format_time, get_datetime_
from searx.data import WIKIDATA_UNITS
from searx.network import post, get
-from searx.utils import match_language, searx_useragent, get_string_replaces_function
+from searx.utils import searx_useragent, get_string_replaces_function
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
-from searx.engines.wikipedia import ( # pylint: disable=unused-import
- _fetch_supported_languages,
- supported_languages_url,
-)
+from searx.engines.wikipedia import fetch_traits as _fetch_traits
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+ import logging
+
+ logger: logging.Logger
+
+traits: EngineTraits
# about
about = {
@@ -154,33 +162,35 @@ def send_wikidata_query(query, method='GET'):
def request(query, params):
- language = params['language'].split('-')[0]
- if language == 'all':
- language = 'en'
- else:
- language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
+
+ # wikidata does not support zh-classical (zh_Hans) / zh-TW, zh-HK and zh-CN
+ # mapped to zh
+ sxng_lang = params['searxng_locale'].split('-')[0]
+ language = traits.get_language(sxng_lang, 'en')
query, attributes = get_query(query, language)
+ logger.debug("request --> language %s // len(attributes): %s", language, len(attributes))
params['method'] = 'POST'
params['url'] = SPARQL_ENDPOINT_URL
params['data'] = {'query': query}
params['headers'] = get_headers()
-
params['language'] = language
params['attributes'] = attributes
+
return params
def response(resp):
+
results = []
jsonresponse = loads(resp.content.decode())
- language = resp.search_params['language'].lower()
+ language = resp.search_params['language']
attributes = resp.search_params['attributes']
+ logger.debug("request --> language %s // len(attributes): %s", language, len(attributes))
seen_entities = set()
-
for result in jsonresponse.get('results', {}).get('bindings', []):
attribute_result = {key: value['value'] for key, value in result.items()}
entity_url = attribute_result['item']
@@ -756,3 +766,15 @@ def init(engine_settings=None): # pylint: disable=unused-argument
lang = result['name']['xml:lang']
entity_id = result['item']['value'].replace('http://www.wikidata.org/entity/', '')
WIKIDATA_PROPERTIES[(entity_id, lang)] = name.capitalize()
+
+
+def fetch_traits(engine_traits: EngineTraits):
+ """Use languages evaluated from :py:obj:`wikipedia.fetch_traits
+ <searx.engines.wikipedia.fetch_traits>` except zh-classical (zh_Hans) what
+ is not supported by wikidata."""
+
+ _fetch_traits(engine_traits)
+ # wikidata does not support zh-classical (zh_Hans)
+ engine_traits.languages.pop('zh_Hans')
+ # wikidata does not have net-locations for the languages
+ engine_traits.custom['wiki_netloc'] = {}
diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py
index ca841e8b3..9d2d30afa 100644
--- a/searx/engines/wikipedia.py
+++ b/searx/engines/wikipedia.py
@@ -1,13 +1,26 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""This module implements the Wikipedia engine. Some of this implementations
+are shared by other engines:
+
+- :ref:`wikidata engine`
+
+The list of supported languages is fetched from the article linked by
+:py:obj:`wikipedia_article_depth`. Unlike traditional search engines, wikipedia
+does not support one Wikipedia for all the languages, but there is one Wikipedia
+for every language (:py:obj:`fetch_traits`).
"""
- Wikipedia (Web)
-"""
-from urllib.parse import quote
-from json import loads
-from lxml.html import fromstring
-from searx.utils import match_language, searx_useragent
-from searx.network import raise_for_httperror
+import urllib.parse
+import babel
+
+from lxml import html
+
+from searx import network
+from searx.locales import language_tag
+from searx.enginelib.traits import EngineTraits
+
+traits: EngineTraits
# about
about = {
@@ -19,32 +32,40 @@ about = {
"results": 'JSON',
}
-
send_accept_language_header = True
-# search-url
-search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
-supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
-language_variants = {"zh": ("zh-cn", "zh-hk", "zh-mo", "zh-my", "zh-sg", "zh-tw")}
+wikipedia_article_depth = 'https://meta.wikimedia.org/wiki/Wikipedia_article_depth'
+"""The *editing depth* of Wikipedia is one of several possible rough indicators
+of the encyclopedia's collaborative quality, showing how frequently its articles
+are updated. The measurement of depth was introduced after some limitations of
+the classic measurement of article count were realized.
+"""
+
+# example: https://zh-classical.wikipedia.org/api/rest_v1/page/summary/日
+rest_v1_summary_url = 'https://{wiki_netloc}/api/rest_v1/page/summary/{title}'
+"""`wikipedia rest_v1 summary API`_: The summary response includes an extract of
+the first paragraph of the page in plain text and HTML as well as the type of
+page. This is useful for page previews (fka. Hovercards, aka. Popups) on the web
+and link previews in the apps.
+.. _wikipedia rest_v1 summary API: https://en.wikipedia.org/api/rest_v1/#/Page%20content/get_page_summary__title_
-# set language in base_url
-def url_lang(lang):
- lang_pre = lang.split('-')[0]
- if lang_pre == 'all' or lang_pre not in supported_languages and lang_pre not in language_aliases:
- return 'en'
- return match_language(lang, supported_languages, language_aliases).split('-')[0]
+"""
-# do search-request
def request(query, params):
+ """Assemble a request (`wikipedia rest_v1 summary API`_)."""
if query.islower():
query = query.title()
- language = url_lang(params['language'])
- params['url'] = search_url.format(title=quote(query), language=language)
+ engine_language = traits.get_language(params['searxng_locale'], 'en')
+ wiki_netloc = traits.custom['wiki_netloc'].get(engine_language, 'https://en.wikipedia.org/wiki/')
+ title = urllib.parse.quote(query)
+
+ # '!wikipedia 日 :zh-TW' --> https://zh-classical.wikipedia.org/
+ # '!wikipedia 日 :zh' --> https://zh.wikipedia.org/
+ params['url'] = rest_v1_summary_url.format(wiki_netloc=wiki_netloc, title=title)
- params['headers']['User-Agent'] = searx_useragent()
params['raise_for_httperror'] = False
params['soft_max_redirects'] = 2
@@ -53,13 +74,14 @@ def request(query, params):
# get response from search-request
def response(resp):
+
+ results = []
if resp.status_code == 404:
return []
-
if resp.status_code == 400:
try:
- api_result = loads(resp.text)
- except:
+ api_result = resp.json()
+ except Exception: # pylint: disable=broad-except
pass
else:
if (
@@ -68,49 +90,135 @@ def response(resp):
):
return []
- raise_for_httperror(resp)
-
- results = []
- api_result = loads(resp.text)
-
- # skip disambiguation pages
- if api_result.get('type') != 'standard':
- return []
+ network.raise_for_httperror(resp)
+ api_result = resp.json()
title = api_result['title']
wikipedia_link = api_result['content_urls']['desktop']['page']
-
- results.append({'url': wikipedia_link, 'title': title})
-
- results.append(
- {
- 'infobox': title,
- 'id': wikipedia_link,
- 'content': api_result.get('extract', ''),
- 'img_src': api_result.get('thumbnail', {}).get('source'),
- 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}],
- }
- )
+ results.append({'url': wikipedia_link, 'title': title, 'content': api_result.get('description', '')})
+
+ if api_result.get('type') == 'standard':
+ results.append(
+ {
+ 'infobox': title,
+ 'id': wikipedia_link,
+ 'content': api_result.get('extract', ''),
+ 'img_src': api_result.get('thumbnail', {}).get('source'),
+ 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}],
+ }
+ )
return results
-# get supported languages from their site
-def _fetch_supported_languages(resp):
- supported_languages = {}
- dom = fromstring(resp.text)
- tables = dom.xpath('//table[contains(@class,"sortable")]')
- for table in tables:
- # exclude header row
- trs = table.xpath('.//tr')[1:]
- for tr in trs:
- td = tr.xpath('./td')
- code = td[3].xpath('./a')[0].text
- name = td[1].xpath('./a')[0].text
- english_name = td[1].xpath('./a')[0].text
- articles = int(td[4].xpath('./a')[0].text.replace(',', ''))
+# Nonstandard language codes
+#
+# These Wikipedias use language codes that do not conform to the ISO 639
+# standard (which is how wiki subdomains are chosen nowadays).
+
+lang_map = {
+ 'be-tarask': 'bel',
+ 'ak': 'aka',
+ 'als': 'gsw',
+ 'bat-smg': 'sgs',
+ 'cbk-zam': 'cbk',
+ 'fiu-vro': 'vro',
+ 'map-bms': 'map',
+ 'nrm': 'nrf',
+ 'roa-rup': 'rup',
+ 'nds-nl': 'nds',
+ #'simple: – invented code used for the Simple English Wikipedia (not the official IETF code en-simple)
+ 'zh-min-nan': 'nan',
+ 'zh-yue': 'yue',
+ 'an': 'arg',
+ 'zh-classical': 'zh-Hant', # babel maps classical to zh-Hans (for whatever reason)
+}
+
+unknown_langs = [
+ 'an', # Aragonese
+ 'ba', # Bashkir
+ 'bar', # Bavarian
+ 'bcl', # Central Bicolano
+ 'be-tarask', # Belarusian variant / Belarusian is already covered by 'be'
+ 'bpy', # Bishnupriya Manipuri is unknown by babel
+ 'hif', # Fiji Hindi
+ 'ilo', # Ilokano
+ 'li', # Limburgish
+ 'sco', # Scots (sco) is not known by babel, Scottish Gaelic (gd) is known by babel
+ 'sh', # Serbo-Croatian
+ 'simple', # simple english is not know as a natural language different to english (babel)
+ 'vo', # Volapük
+ 'wa', # Walloon
+]
+
+
+def fetch_traits(engine_traits: EngineTraits):
+ """Fetch languages from Wikipedia.
+
+ The location of the Wikipedia address of a language is mapped in a
+ :py:obj:`custom field <searx.enginelib.traits.EngineTraits.custom>`
+ (``wiki_netloc``). Here is a reduced example:
+
+ .. code:: python
+
+ traits.custom['wiki_netloc'] = {
+ "en": "en.wikipedia.org",
+ ..
+ "gsw": "als.wikipedia.org",
+ ..
+ "zh": "zh.wikipedia.org",
+ "zh-classical": "zh-classical.wikipedia.org"
+ }
+
+ """
+
+ engine_traits.custom['wiki_netloc'] = {}
+
+ # insert alias to map from a region like zh-CN to a language zh_Hans
+ engine_traits.languages['zh_Hans'] = 'zh'
+
+ resp = network.get(wikipedia_article_depth)
+ if not resp.ok:
+ print("ERROR: response from Wikipedia is not OK.")
+
+ dom = html.fromstring(resp.text)
+ for row in dom.xpath('//table[contains(@class,"sortable")]//tbody/tr'):
+
+ cols = row.xpath('./td')
+ if not cols:
+ continue
+ cols = [c.text_content().strip() for c in cols]
+
+ depth = float(cols[3].replace('-', '0').replace(',', ''))
+ articles = int(cols[4].replace(',', '').replace(',', ''))
+
+ if articles < 10000:
# exclude languages with too few articles
- if articles >= 100:
- supported_languages[code] = {"name": name, "english_name": english_name}
+ continue
+
+ if int(depth) < 20:
+ # Rough indicator of a Wikipedia’s quality, showing how frequently
+ # its articles are updated.
+ continue
- return supported_languages
+ eng_tag = cols[2]
+ wiki_url = row.xpath('./td[3]/a/@href')[0]
+ wiki_url = urllib.parse.urlparse(wiki_url)
+
+ if eng_tag in unknown_langs:
+ continue
+
+ try:
+ sxng_tag = language_tag(babel.Locale.parse(lang_map.get(eng_tag, eng_tag), sep='-'))
+ except babel.UnknownLocaleError:
+ print("ERROR: %s [%s] is unknown by babel" % (cols[0], eng_tag))
+ continue
+
+ conflict = engine_traits.languages.get(sxng_tag)
+ if conflict:
+ if conflict != eng_tag:
+ print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+ continue
+
+ engine_traits.languages[sxng_tag] = eng_tag
+ engine_traits.custom['wiki_netloc'][eng_tag] = wiki_url.netloc
diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py
index c13ce6d78..0fdeacec2 100644
--- a/searx/engines/yahoo.py
+++ b/searx/engines/yahoo.py
@@ -17,8 +17,10 @@ from searx.utils import (
eval_xpath_getindex,
eval_xpath_list,
extract_text,
- match_language,
)
+from searx.enginelib.traits import EngineTraits
+
+traits: EngineTraits
# about
about = {
@@ -34,8 +36,7 @@ about = {
categories = ['general', 'web']
paging = True
time_range_support = True
-supported_languages_url = 'https://search.yahoo.com/preferences/languages'
-"""Supported languages are read from Yahoo preference page."""
+# send_accept_language_header = True
time_range_dict = {
'day': ('1d', 'd'),
@@ -43,15 +44,10 @@ time_range_dict = {
'month': ('1m', 'm'),
}
-language_aliases = {
- 'zh-HK': 'zh_chs',
- 'zh-CN': 'zh_chs', # dead since 2015 / routed to hk.search.yahoo.com
- 'zh-TW': 'zh_cht',
-}
-
lang2domain = {
'zh_chs': 'hk.search.yahoo.com',
'zh_cht': 'tw.search.yahoo.com',
+ 'any': 'search.yahoo.com',
'en': 'search.yahoo.com',
'bg': 'search.yahoo.com',
'cs': 'search.yahoo.com',
@@ -67,21 +63,23 @@ lang2domain = {
}
"""Map language to domain"""
-
-def _get_language(params):
-
- lang = language_aliases.get(params['language'])
- if lang is None:
- lang = match_language(params['language'], supported_languages, language_aliases)
- lang = lang.split('-')[0]
- logger.debug("params['language']: %s --> %s", params['language'], lang)
- return lang
+locale_aliases = {
+ 'zh': 'zh_Hans',
+ 'zh-HK': 'zh_Hans',
+ 'zh-CN': 'zh_Hans', # dead since 2015 / routed to hk.search.yahoo.com
+ 'zh-TW': 'zh_Hant',
+}
def request(query, params):
"""build request"""
+
+ lang = locale_aliases.get(params['language'], None)
+ if not lang:
+ lang = params['language'].split('-')[0]
+ lang = traits.get_language(lang, traits.all_locale)
+
offset = (params['pageno'] - 1) * 7 + 1
- lang = _get_language(params)
age, btf = time_range_dict.get(params['time_range'], ('', ''))
args = urlencode(
@@ -154,13 +152,37 @@ def response(resp):
return results
-# get supported languages from their site
-def _fetch_supported_languages(resp):
- supported_languages = []
+def fetch_traits(engine_traits: EngineTraits):
+ """Fetch languages from yahoo"""
+
+ # pylint: disable=import-outside-toplevel
+ import babel
+ from searx import network
+ from searx.locales import language_tag
+
+ engine_traits.all_locale = 'any'
+
+ resp = network.get('https://search.yahoo.com/preferences/languages')
+ if not resp.ok:
+ print("ERROR: response from peertube is not OK.")
+
dom = html.fromstring(resp.text)
offset = len('lang_')
+ eng2sxng = {'zh_chs': 'zh_Hans', 'zh_cht': 'zh_Hant'}
+
for val in eval_xpath_list(dom, '//div[contains(@class, "lang-item")]/input/@value'):
- supported_languages.append(val[offset:])
+ eng_tag = val[offset:]
+
+ try:
+ sxng_tag = language_tag(babel.Locale.parse(eng2sxng.get(eng_tag, eng_tag)))
+ except babel.UnknownLocaleError:
+ print('ERROR: unknown language --> %s' % eng_tag)
+ continue
- return supported_languages
+ conflict = engine_traits.languages.get(sxng_tag)
+ if conflict:
+ if conflict != eng_tag:
+ print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+ continue
+ engine_traits.languages[sxng_tag] = eng_tag
diff --git a/searx/locales.py b/searx/locales.py
index 9e06bf39d..ffa5e731c 100644
--- a/searx/locales.py
+++ b/searx/locales.py
@@ -4,11 +4,11 @@
"""Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.
"""
-from typing import Set
+from typing import Set, Optional, List
import os
import pathlib
-from babel import Locale
+import babel
from babel.support import Translations
import babel.languages
import babel.core
@@ -134,7 +134,7 @@ def locales_initialize(directory=None):
flask_babel.get_translations = get_translations
for tag, descr in ADDITIONAL_TRANSLATIONS.items():
- locale = Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
+ locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
LOCALE_NAMES[tag] = descr
if locale.text_direction == 'rtl':
RTL_LOCALES.add(tag)
@@ -142,7 +142,7 @@ def locales_initialize(directory=None):
for tag in LOCALE_BEST_MATCH:
descr = LOCALE_NAMES.get(tag)
if not descr:
- locale = Locale.parse(tag, sep='-')
+ locale = babel.Locale.parse(tag, sep='-')
LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
if locale.text_direction == 'rtl':
RTL_LOCALES.add(tag)
@@ -154,12 +154,77 @@ def locales_initialize(directory=None):
tag = dirname.replace('_', '-')
descr = LOCALE_NAMES.get(tag)
if not descr:
- locale = Locale.parse(dirname)
+ locale = babel.Locale.parse(dirname)
LOCALE_NAMES[tag] = get_locale_descr(locale, dirname)
if locale.text_direction == 'rtl':
RTL_LOCALES.add(tag)
+def region_tag(locale: babel.Locale) -> str:
+ """Returns SearXNG's region tag from the locale (e.g. zh-TW , en-US)."""
+ if not locale.territory:
+ raise ValueError('%s missed a territory')
+ return locale.language + '-' + locale.territory
+
+
+def language_tag(locale: babel.Locale) -> str:
+ """Returns SearXNG's language tag from the locale and if exits, the tag
+ includes the script name (e.g. en, zh_Hant).
+ """
+ sxng_lang = locale.language
+ if locale.script:
+ sxng_lang += '_' + locale.script
+ return sxng_lang
+
+
+def get_locale(locale_tag: str) -> Optional[babel.Locale]:
+ """Returns a :py:obj:`babel.Locale` object parsed from argument
+ ``locale_tag``"""
+ try:
+ locale = babel.Locale.parse(locale_tag, sep='-')
+ return locale
+
+ except babel.core.UnknownLocaleError:
+ return None
+
+
+def get_offical_locales(
+ territory: str, languages=None, regional: bool = False, de_facto: bool = True
+) -> Set[babel.Locale]:
+ """Returns a list of :py:obj:`babel.Locale` with languages from
+ :py:obj:`babel.languages.get_official_languages`.
+
+ :param territory: The territory (country or region) code.
+
+ :param languages: A list of language codes the languages from
+ :py:obj:`babel.languages.get_official_languages` should be in
+ (intersection). If this argument is ``None``, all official languages in
+ this territory are used.
+
+ :param regional: If the regional flag is set, then languages which are
+ regionally official are also returned.
+
+ :param de_facto: If the de_facto flag is set to `False`, then languages
+ which are “de facto” official are not returned.
+
+ """
+ ret_val = set()
+ o_languages = babel.languages.get_official_languages(territory, regional=regional, de_facto=de_facto)
+
+ if languages:
+ languages = [l.lower() for l in languages]
+ o_languages = set(l for l in o_languages if l.lower() in languages)
+
+ for lang in o_languages:
+ try:
+ locale = babel.Locale.parse(lang + '_' + territory)
+ ret_val.add(locale)
+ except babel.UnknownLocaleError:
+ continue
+
+ return ret_val
+
+
def get_engine_locale(searxng_locale, engine_locales, default=None):
"""Return engine's language (aka locale) string that best fits to argument
``searxng_locale``.
@@ -177,6 +242,10 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
...
'pl-PL' : 'pl_PL',
'pt-PT' : 'pt_PT'
+ ..
+ 'zh' : 'zh'
+ 'zh_Hans' : 'zh'
+ 'zh_Hant' : 'zh-classical'
}
.. hint::
@@ -210,13 +279,13 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
engine.
"""
- # pylint: disable=too-many-branches
+ # pylint: disable=too-many-branches, too-many-return-statements
engine_locale = engine_locales.get(searxng_locale)
if engine_locale is not None:
- # There was a 1:1 mapping (e.g. "fr-BE --> fr_BE" or "fr --> fr_FR"), no
- # need to narrow language nor territory.
+ # There was a 1:1 mapping (e.g. a region "fr-BE --> fr_BE" or a language
+ # "zh --> zh"), no need to narrow language-script nor territory.
return engine_locale
try:
@@ -227,6 +296,12 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
except babel.core.UnknownLocaleError:
return default
+ searxng_lang = language_tag(locale)
+ engine_locale = engine_locales.get(searxng_lang)
+ if engine_locale is not None:
+ # There was a 1:1 mapping (e.g. "zh-HK --> zh_Hant" or "zh-CN --> zh_Hans")
+ return engine_locale
+
# SearXNG's selected locale is not supported by the engine ..
if locale.territory:
@@ -247,10 +322,6 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
if locale.language:
- searxng_lang = locale.language
- if locale.script:
- searxng_lang += '_' + locale.script
-
terr_lang_dict = {}
for territory, langs in babel.core.get_global("territory_languages").items():
if not langs.get(searxng_lang, {}).get('official_status'):
@@ -303,3 +374,98 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
engine_locale = default
return default
+
+
+def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Optional[str] = None) -> Optional[str]:
+ """Return tag from ``locale_tag_list`` that best fits to ``searxng_locale``.
+
+ :param str searxng_locale: SearXNG's internal representation of locale (de,
+ de-DE, fr-BE, zh, zh-CN, zh-TW ..).
+
+ :param list locale_tag_list: The list of locale tags to select from
+
+ :param str fallback: fallback locale tag (if unset --> ``None``)
+
+ The rules to find a match are implemented in :py:obj:`get_engine_locale`,
+ the ``engine_locales`` is build up by :py:obj:`build_engine_locales`.
+
+ .. hint::
+
+ The *SearXNG locale* string and the members of ``locale_tag_list`` has to
+ be known by babel! The :py:obj:`ADDITIONAL_TRANSLATIONS` are used in the
+ UI and are not known by babel --> will be ignored.
+ """
+
+ # searxng_locale = 'es'
+ # locale_tag_list = ['es-AR', 'es-ES', 'es-MX']
+
+ if not searxng_locale:
+ return fallback
+
+ locale = get_locale(searxng_locale)
+ if locale is None:
+ return fallback
+
+ # normalize to a SearXNG locale that can be passed to get_engine_locale
+
+ searxng_locale = language_tag(locale)
+ if locale.territory:
+ searxng_locale = region_tag(locale)
+
+ # clean up locale_tag_list
+
+ tag_list = []
+ for tag in locale_tag_list:
+ if tag in ('all', 'auto') or tag in ADDITIONAL_TRANSLATIONS:
+ continue
+ tag_list.append(tag)
+
+ # emulate fetch_traits
+ engine_locales = build_engine_locales(tag_list)
+ return get_engine_locale(searxng_locale, engine_locales, default=fallback)
+
+
+def build_engine_locales(tag_list: List[str]):
+ """From a list of locale tags a dictionary is build that can be passed by
+ argument ``engine_locales`` to :py:obj:`get_engine_locale`. This function
+ is mainly used by :py:obj:`match_locale` and is similar to what the
+ ``fetch_traits(..)`` function of engines do.
+
+ If there are territory codes in the ``tag_list`` that have a *script code*
+ additional keys are added to the returned dictionary.
+
+ .. code:: python
+
+ >>> import locales
+ >>> engine_locales = locales.build_engine_locales(['en', 'en-US', 'zh', 'zh-CN', 'zh-TW'])
+ >>> engine_locales
+ {
+ 'en': 'en', 'en-US': 'en-US',
+ 'zh': 'zh', 'zh-CN': 'zh-CN', 'zh_Hans': 'zh-CN',
+ 'zh-TW': 'zh-TW', 'zh_Hant': 'zh-TW'
+ }
+ >>> get_engine_locale('zh-Hans', engine_locales)
+ 'zh-CN'
+
+ This function is a good example to understand the language/region model
+ of SearXNG:
+
+ SearXNG only distinguishes between **search languages** and **search
+ regions**, by adding the *script-tags*, languages with *script-tags* can
+ be assigned to the **regions** that SearXNG supports.
+
+ """
+ engine_locales = {}
+
+ for tag in tag_list:
+ locale = get_locale(tag)
+ if locale is None:
+ logger.warn("build_engine_locales: skip locale tag %s / unknown by babel", tag)
+ continue
+ if locale.territory:
+ engine_locales[region_tag(locale)] = tag
+ if locale.script:
+ engine_locales[language_tag(locale)] = tag
+ else:
+ engine_locales[language_tag(locale)] = tag
+ return engine_locales
diff --git a/searx/preferences.py b/searx/preferences.py
index 0eac8441c..5cee83a03 100644
--- a/searx/preferences.py
+++ b/searx/preferences.py
@@ -13,7 +13,7 @@ from typing import Iterable, Dict, List
import flask
from searx import settings, autocomplete
-from searx.engines import Engine
+from searx.enginelib import Engine
from searx.plugins import Plugin
from searx.locales import LOCALE_NAMES
from searx.webutils import VALID_LANGUAGE_CODE
diff --git a/searx/query.py b/searx/query.py
index dbc52ec75..751308baa 100644
--- a/searx/query.py
+++ b/searx/query.py
@@ -4,7 +4,7 @@ from abc import abstractmethod, ABC
import re
from searx import settings
-from searx.languages import language_codes
+from searx.sxng_locales import sxng_locales
from searx.engines import categories, engines, engine_shortcuts
from searx.external_bang import get_bang_definition_and_autocomplete
from searx.search import EngineRef
@@ -84,7 +84,7 @@ class LanguageParser(QueryPartParser):
found = False
# check if any language-code is equal with
# declared language-codes
- for lc in language_codes:
+ for lc in sxng_locales:
lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
# if correct language-code is found
@@ -125,7 +125,7 @@ class LanguageParser(QueryPartParser):
self.raw_text_query.autocomplete_list.append(lang)
return
- for lc in language_codes:
+ for lc in sxng_locales:
if lc[0] not in settings['search']['languages']:
continue
lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
diff --git a/searx/search/processors/__init__.py b/searx/search/processors/__init__.py
index a270b4ef5..1390de456 100644
--- a/searx/search/processors/__init__.py
+++ b/searx/search/processors/__init__.py
@@ -30,7 +30,10 @@ from .abstract import EngineProcessor
logger = logger.getChild('search.processors')
PROCESSORS: Dict[str, EngineProcessor] = {}
-"""Cache request processores, stored by *engine-name* (:py:func:`initialize`)"""
+"""Cache request processores, stored by *engine-name* (:py:func:`initialize`)
+
+:meta hide-value:
+"""
def get_processor_class(engine_type):
diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py
index d74616db0..5f1882ca4 100644
--- a/searx/search/processors/abstract.py
+++ b/searx/search/processors/abstract.py
@@ -138,7 +138,8 @@ class EngineProcessor(ABC):
return False
def get_params(self, search_query, engine_category):
- """Returns a set of *request params* or ``None`` if request is not supported.
+ """Returns a set of (see :ref:`request params <engine request arguments>`) or
+ ``None`` if request is not supported.
Not supported conditions (``None`` is returned):
@@ -159,11 +160,20 @@ class EngineProcessor(ABC):
params['safesearch'] = search_query.safesearch
params['time_range'] = search_query.time_range
params['engine_data'] = search_query.engine_data.get(self.engine_name, {})
+ params['searxng_locale'] = search_query.lang
+
+ # deprecated / vintage --> use params['searxng_locale']
+ #
+ # Conditions related to engine's traits are implemented in engine.traits
+ # module. Don't do 'locale' decissions here in the abstract layer of the
+ # search processor, just pass the value from user's choice unchanged to
+ # the engine request.
if hasattr(self.engine, 'language') and self.engine.language:
params['language'] = self.engine.language
else:
params['language'] = search_query.lang
+
return params
@abstractmethod
diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py
index 242718416..697533d8c 100644
--- a/searx/search/processors/online.py
+++ b/searx/search/processors/online.py
@@ -51,6 +51,9 @@ class OnlineProcessor(EngineProcessor):
super().initialize()
def get_params(self, search_query, engine_category):
+ """Returns a set of :ref:`request params <engine request online>` or ``None``
+ if request is not supported.
+ """
params = super().get_params(search_query, engine_category)
if params is None:
return None
@@ -184,11 +187,6 @@ class OnlineProcessor(EngineProcessor):
self.handle_exception(result_container, e, suspend=True)
self.logger.exception('CAPTCHA')
except SearxEngineTooManyRequestsException as e:
- if "google" in self.engine_name:
- self.logger.warn(
- "Set to 'true' the use_mobile_ui parameter in the 'engines:'"
- " section of your settings.yml file if google is blocked for you."
- )
self.handle_exception(result_container, e, suspend=True)
self.logger.exception('Too many requests')
except SearxEngineAccessDeniedException as e:
@@ -223,7 +221,7 @@ class OnlineProcessor(EngineProcessor):
'test': ['unique_results'],
}
- if getattr(self.engine, 'supported_languages', []):
+ if getattr(self.engine, 'traits', False):
tests['lang_fr'] = {
'matrix': {'query': 'paris', 'lang': 'fr'},
'result_container': ['not_empty', ('has_language', 'fr')],
diff --git a/searx/search/processors/online_currency.py b/searx/search/processors/online_currency.py
index 92398239f..7cb4205c9 100644
--- a/searx/search/processors/online_currency.py
+++ b/searx/search/processors/online_currency.py
@@ -38,8 +38,8 @@ class OnlineCurrencyProcessor(OnlineProcessor):
engine_type = 'online_currency'
def get_params(self, search_query, engine_category):
- """Returns a set of *request params* or ``None`` if search query does not match
- to :py:obj:`parser_re`."""
+ """Returns a set of :ref:`request params <engine request online_currency>`
+ or ``None`` if search query does not match to :py:obj:`parser_re`."""
params = super().get_params(search_query, engine_category)
if params is None:
diff --git a/searx/search/processors/online_dictionary.py b/searx/search/processors/online_dictionary.py
index fbfc9df8e..6145a47d1 100644
--- a/searx/search/processors/online_dictionary.py
+++ b/searx/search/processors/online_dictionary.py
@@ -18,8 +18,9 @@ class OnlineDictionaryProcessor(OnlineProcessor):
engine_type = 'online_dictionary'
def get_params(self, search_query, engine_category):
- """Returns a set of *request params* or ``None`` if search query does not match
- to :py:obj:`parser_re`."""
+ """Returns a set of :ref:`request params <engine request online_dictionary>` or
+ ``None`` if search query does not match to :py:obj:`parser_re`.
+ """
params = super().get_params(search_query, engine_category)
if params is None:
return None
diff --git a/searx/search/processors/online_url_search.py b/searx/search/processors/online_url_search.py
index 6383fa37f..a1dd6a018 100644
--- a/searx/search/processors/online_url_search.py
+++ b/searx/search/processors/online_url_search.py
@@ -20,9 +20,10 @@ class OnlineUrlSearchProcessor(OnlineProcessor):
engine_type = 'online_url_search'
def get_params(self, search_query, engine_category):
- """Returns a set of *request params* or ``None`` if search query does not match
- to at least one of :py:obj:`re_search_urls`.
+ """Returns a set of :ref:`request params <engine request online>` or ``None`` if
+ search query does not match to :py:obj:`re_search_urls`.
"""
+
params = super().get_params(search_query, engine_category)
if params is None:
return None
diff --git a/searx/settings.yml b/searx/settings.yml
index 841457b5e..fabd87bad 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -731,22 +731,9 @@ engines:
- name: google
engine: google
shortcut: go
- # see https://docs.searxng.org/src/searx.engines.google.html#module-searx.engines.google
- use_mobile_ui: false
# additional_tests:
# android: *test_android
- # - name: google italian
- # engine: google
- # shortcut: goit
- # use_mobile_ui: false
- # language: it
-
- # - name: google mobile ui
- # engine: google
- # shortcut: gomui
- # use_mobile_ui: true
-
- name: google images
engine: google_images
shortcut: goi
@@ -1758,9 +1745,8 @@ engines:
engine: peertube
shortcut: ptb
paging: true
- # https://instances.joinpeertube.org/instances
- base_url: https://peertube.biz/
- # base_url: https://tube.tardis.world/
+ # alternatives see: https://instances.joinpeertube.org/instances
+ # base_url: https://tube.4aem.com
categories: videos
disabled: true
timeout: 6.0
diff --git a/searx/settings_defaults.py b/searx/settings_defaults.py
index 6e98076ff..7f657aa54 100644
--- a/searx/settings_defaults.py
+++ b/searx/settings_defaults.py
@@ -12,13 +12,13 @@ import logging
from base64 import b64decode
from os.path import dirname, abspath
-from searx.languages import language_codes as languages
+from .sxng_locales import sxng_locales
searx_dir = abspath(dirname(__file__))
logger = logging.getLogger('searx')
OUTPUT_FORMATS = ['html', 'csv', 'json', 'rss']
-LANGUAGE_CODES = ['all', 'auto'] + list(l[0] for l in languages)
+SXNG_LOCALE_TAGS = ['all', 'auto'] + list(l[0] for l in sxng_locales)
SIMPLE_STYLE = ('auto', 'light', 'dark')
CATEGORIES_AS_TABS = {
'general': {},
@@ -156,8 +156,8 @@ SCHEMA = {
'safe_search': SettingsValue((0, 1, 2), 0),
'autocomplete': SettingsValue(str, ''),
'autocomplete_min': SettingsValue(int, 4),
- 'default_lang': SettingsValue(tuple(LANGUAGE_CODES + ['']), ''),
- 'languages': SettingSublistValue(LANGUAGE_CODES, LANGUAGE_CODES),
+ 'default_lang': SettingsValue(tuple(SXNG_LOCALE_TAGS + ['']), ''),
+ 'languages': SettingSublistValue(SXNG_LOCALE_TAGS, SXNG_LOCALE_TAGS),
'ban_time_on_fail': SettingsValue(numbers.Real, 5),
'max_ban_time_on_fail': SettingsValue(numbers.Real, 120),
'suspended_times': {
diff --git a/searx/languages.py b/searx/sxng_locales.py
index 377e7495b..0600a9c91 100644
--- a/searx/languages.py
+++ b/searx/sxng_locales.py
@@ -1,73 +1,120 @@
# -*- coding: utf-8 -*-
-# list of language codes
-# this file is generated automatically by utils/fetch_languages.py
-language_codes = (
- ('af-ZA', 'Afrikaans', 'Suid-Afrika', 'Afrikaans', '\U0001f1ff\U0001f1e6'),
- ('ar-EG', 'العربية', 'مصر', 'Arabic', '\U0001f1ea\U0001f1ec'),
- ('be-BY', 'Беларуская', 'Беларусь', 'Belarusian', '\U0001f1e7\U0001f1fe'),
+'''List of SearXNG's locale codes.
+
+This file is generated automatically by::
+
+ ./manage pyenv.cmd searxng_extra/update/update_engine_traits.py
+'''
+
+sxng_locales = (
+ ('ar', 'العربية', '', 'Arabic', '\U0001f310'),
+ ('bg', 'Български', '', 'Bulgarian', '\U0001f310'),
('bg-BG', 'Български', 'България', 'Bulgarian', '\U0001f1e7\U0001f1ec'),
+ ('ca', 'Català', '', 'Catalan', '\U0001f310'),
('ca-ES', 'Català', 'Espanya', 'Catalan', '\U0001f1ea\U0001f1f8'),
+ ('cs', 'Čeština', '', 'Czech', '\U0001f310'),
('cs-CZ', 'Čeština', 'Česko', 'Czech', '\U0001f1e8\U0001f1ff'),
+ ('da', 'Dansk', '', 'Danish', '\U0001f310'),
('da-DK', 'Dansk', 'Danmark', 'Danish', '\U0001f1e9\U0001f1f0'),
('de', 'Deutsch', '', 'German', '\U0001f310'),
('de-AT', 'Deutsch', 'Österreich', 'German', '\U0001f1e6\U0001f1f9'),
('de-CH', 'Deutsch', 'Schweiz', 'German', '\U0001f1e8\U0001f1ed'),
('de-DE', 'Deutsch', 'Deutschland', 'German', '\U0001f1e9\U0001f1ea'),
+ ('el', 'Ελληνικά', '', 'Greek', '\U0001f310'),
('el-GR', 'Ελληνικά', 'Ελλάδα', 'Greek', '\U0001f1ec\U0001f1f7'),
('en', 'English', '', 'English', '\U0001f310'),
('en-AU', 'English', 'Australia', 'English', '\U0001f1e6\U0001f1fa'),
('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'),
('en-GB', 'English', 'United Kingdom', 'English', '\U0001f1ec\U0001f1e7'),
('en-IE', 'English', 'Ireland', 'English', '\U0001f1ee\U0001f1ea'),
+ ('en-IN', 'English', 'India', 'English', '\U0001f1ee\U0001f1f3'),
('en-MY', 'English', 'Malaysia', 'English', '\U0001f1f2\U0001f1fe'),
('en-NZ', 'English', 'New Zealand', 'English', '\U0001f1f3\U0001f1ff'),
+ ('en-PH', 'English', 'Philippines', 'English', '\U0001f1f5\U0001f1ed'),
('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'),
+ ('en-ZA', 'English', 'South Africa', 'English', '\U0001f1ff\U0001f1e6'),
('es', 'Español', '', 'Spanish', '\U0001f310'),
('es-AR', 'Español', 'Argentina', 'Spanish', '\U0001f1e6\U0001f1f7'),
('es-CL', 'Español', 'Chile', 'Spanish', '\U0001f1e8\U0001f1f1'),
('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'),
('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'),
+ ('es-US', 'Español', 'Estados Unidos', 'Spanish', '\U0001f1fa\U0001f1f8'),
+ ('et', 'Eesti', '', 'Estonian', '\U0001f310'),
('et-EE', 'Eesti', 'Eesti', 'Estonian', '\U0001f1ea\U0001f1ea'),
- ('fa-IR', 'فارسی', 'ایران', 'Persian', '\U0001f1ee\U0001f1f7'),
+ ('fi', 'Suomi', '', 'Finnish', '\U0001f310'),
('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'),
- ('fil-PH', 'Filipino', 'Pilipinas', 'Filipino', '\U0001f1f5\U0001f1ed'),
('fr', 'Français', '', 'French', '\U0001f310'),
('fr-BE', 'Français', 'Belgique', 'French', '\U0001f1e7\U0001f1ea'),
('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'),
('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'),
('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'),
- ('he-IL', 'עברית', 'ישראל', 'Hebrew', '\U0001f1ee\U0001f1f1'),
- ('hi-IN', 'हिन्दी', 'भारत', 'Hindi', '\U0001f1ee\U0001f1f3'),
- ('hr-HR', 'Hrvatski', 'Hrvatska', 'Croatian', '\U0001f1ed\U0001f1f7'),
+ ('he', 'עברית', '', 'Hebrew', '\U0001f1ee\U0001f1f7'),
+ ('hi', 'हिन्दी', '', 'Hindi', '\U0001f310'),
+ ('hr', 'Hrvatski', '', 'Croatian', '\U0001f310'),
+ ('hu', 'Magyar', '', 'Hungarian', '\U0001f310'),
('hu-HU', 'Magyar', 'Magyarország', 'Hungarian', '\U0001f1ed\U0001f1fa'),
+ ('id', 'Indonesia', '', 'Indonesian', '\U0001f310'),
('id-ID', 'Indonesia', 'Indonesia', 'Indonesian', '\U0001f1ee\U0001f1e9'),
- ('is-IS', 'Íslenska', 'Ísland', 'Icelandic', '\U0001f1ee\U0001f1f8'),
+ ('is', 'Íslenska', '', 'Icelandic', '\U0001f310'),
+ ('it', 'Italiano', '', 'Italian', '\U0001f310'),
+ ('it-CH', 'Italiano', 'Svizzera', 'Italian', '\U0001f1e8\U0001f1ed'),
('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'),
+ ('ja', '日本語', '', 'Japanese', '\U0001f310'),
('ja-JP', '日本語', '日本', 'Japanese', '\U0001f1ef\U0001f1f5'),
+ ('ko', '한국어', '', 'Korean', '\U0001f310'),
('ko-KR', '한국어', '대한민국', 'Korean', '\U0001f1f0\U0001f1f7'),
- ('lt-LT', 'Lietuvių', 'Lietuva', 'Lithuanian', '\U0001f1f1\U0001f1f9'),
- ('lv-LV', 'Latviešu', 'Latvija', 'Latvian', '\U0001f1f1\U0001f1fb'),
+ ('lt', 'Lietuvių', '', 'Lithuanian', '\U0001f310'),
+ ('lv', 'Latviešu', '', 'Latvian', '\U0001f310'),
+ ('nb', 'Norsk Bokmål', '', 'Norwegian Bokmål', '\U0001f310'),
+ ('nb-NO', 'Norsk Bokmål', 'Norge', 'Norwegian Bokmål', '\U0001f1f3\U0001f1f4'),
('nl', 'Nederlands', '', 'Dutch', '\U0001f310'),
('nl-BE', 'Nederlands', 'België', 'Dutch', '\U0001f1e7\U0001f1ea'),
('nl-NL', 'Nederlands', 'Nederland', 'Dutch', '\U0001f1f3\U0001f1f1'),
- ('no-NO', 'Norsk', '', 'Norwegian (Bokmål)', '\U0001f1f3\U0001f1f4'),
+ ('pl', 'Polski', '', 'Polish', '\U0001f310'),
('pl-PL', 'Polski', 'Polska', 'Polish', '\U0001f1f5\U0001f1f1'),
('pt', 'Português', '', 'Portuguese', '\U0001f310'),
('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'),
('pt-PT', 'Português', 'Portugal', 'Portuguese', '\U0001f1f5\U0001f1f9'),
+ ('ro', 'Română', '', 'Romanian', '\U0001f310'),
('ro-RO', 'Română', 'România', 'Romanian', '\U0001f1f7\U0001f1f4'),
+ ('ru', 'Русский', '', 'Russian', '\U0001f310'),
('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'),
- ('sk-SK', 'Slovenčina', 'Slovensko', 'Slovak', '\U0001f1f8\U0001f1f0'),
- ('sl-SI', 'Slovenščina', 'Slovenija', 'Slovenian', '\U0001f1f8\U0001f1ee'),
- ('sr-RS', 'Српски', 'Србија', 'Serbian', '\U0001f1f7\U0001f1f8'),
+ ('sk', 'Slovenčina', '', 'Slovak', '\U0001f310'),
+ ('sl', 'Slovenščina', '', 'Slovenian', '\U0001f310'),
+ ('sr', 'Српски', '', 'Serbian', '\U0001f310'),
+ ('sv', 'Svenska', '', 'Swedish', '\U0001f310'),
('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'),
- ('sw-TZ', 'Kiswahili', 'Tanzania', 'Swahili', '\U0001f1f9\U0001f1ff'),
+ ('th', 'ไทย', '', 'Thai', '\U0001f310'),
('th-TH', 'ไทย', 'ไทย', 'Thai', '\U0001f1f9\U0001f1ed'),
+ ('tr', 'Türkçe', '', 'Turkish', '\U0001f310'),
('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'),
- ('uk-UA', 'Українська', 'Україна', 'Ukrainian', '\U0001f1fa\U0001f1e6'),
- ('vi-VN', 'Tiếng Việt', 'Việt Nam', 'Vietnamese', '\U0001f1fb\U0001f1f3'),
+ ('uk', 'Українська', '', 'Ukrainian', '\U0001f310'),
+ ('vi', 'Tiếng Việt', '', 'Vietnamese', '\U0001f310'),
('zh', '中文', '', 'Chinese', '\U0001f310'),
('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'),
- ('zh-HK', '中文', '中國香港', 'Chinese', '\U0001f1ed\U0001f1f0'),
+ ('zh-HK', '中文', '中國香港特別行政區', 'Chinese', '\U0001f1ed\U0001f1f0'),
('zh-TW', '中文', '台灣', 'Chinese', '\U0001f1f9\U0001f1fc'),
)
+'''
+A list of five-digit tuples:
+
+0. SearXNG's internal locale tag (a language or region tag)
+1. Name of the language (:py:obj:`babel.core.Locale.get_language_name`)
+2. For region tags the name of the region (:py:obj:`babel.core.Locale.get_territory_name`).
+ Empty string for language tags.
+3. English language name (from :py:obj:`babel.core.Locale.english_name`)
+4. Unicode flag (emoji) that fits to SearXNG's internal region tag. Languages
+ are represented by a globe (🌐)
+
+.. code:: python
+
+ ('en', 'English', '', 'English', '🌐'),
+ ('en-CA', 'English', 'Canada', 'English', '🇨🇦'),
+ ('en-US', 'English', 'United States', 'English', '🇺🇸'),
+ ..
+ ('fr', 'Français', '', 'French', '🌐'),
+ ('fr-BE', 'Français', 'Belgique', 'French', '🇧🇪'),
+ ('fr-CA', 'Français', 'Canada', 'French', '🇨🇦'),
+
+:meta hide-value:
+'''
diff --git a/searx/templates/simple/filters/languages.html b/searx/templates/simple/filters/languages.html
index 54e07e209..a42a304a5 100644
--- a/searx/templates/simple/filters/languages.html
+++ b/searx/templates/simple/filters/languages.html
@@ -1,12 +1,12 @@
<select class="language" id="language" name="language" aria-label="{{ _('Search language') }}">{{- '' -}}
- <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
+ <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }} [all]</option>
<option value="auto" {% if current_language == 'auto' %}selected="selected"{% endif %}>
{{- _('Auto-detect') -}}
{%- if current_language == 'auto' %} ({{ search_language }}){%- endif -%}
</option>
- {%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
- <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
- {% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}
+ {%- for sxng_tag,lang_name,country_name,english_name,flag in sxng_locales | sort(attribute=1) -%}
+ <option value="{{ sxng_tag }}" {% if sxng_tag == current_language %}selected="selected"{% endif %}>
+ {% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %} - {{ country_name }} {% endif %} [{{sxng_tag}}]
</option>
{%- endfor -%}
</select>
diff --git a/searx/templates/simple/preferences.html b/searx/templates/simple/preferences.html
index 9626b04d4..a0cc8efc2 100644
--- a/searx/templates/simple/preferences.html
+++ b/searx/templates/simple/preferences.html
@@ -115,10 +115,10 @@
<legend id="pref_language">{{ _('Search language') }}</legend>
<p class="value">{{- '' -}}
<select name='language' aria-labelledby="pref_language" aria-describedby="desc_language">{{- '' -}}
- <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
- <option value="auto" {% if current_language == 'auto' %}selected="selected"{% endif %}>{{ _('Auto-detect') }}</option>
- {%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
- <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}</option>
+ <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }} [all]</option>
+ <option value="auto" {% if current_language == 'auto' %}selected="selected"{% endif %}>{{ _('Auto-detect') }} [auto]</option>
+ {%- for sxng_tag,lang_name,country_name,english_name,flag in sxng_locales | sort(attribute=1) -%}
+ <option value="{{ sxng_tag }}" {% if sxng_tag == current_language %}selected="selected"{% endif %}>{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %} - {{ country_name }} {% endif %} [{{sxng_tag}}]</option>
{%- endfor -%}
</select>{{- '' -}}
</p>
diff --git a/searx/utils.py b/searx/utils.py
index e6180906b..161983011 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -18,13 +18,11 @@ from urllib.parse import urljoin, urlparse
from lxml import html
from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult
-from babel.core import get_global
-
from searx import settings
from searx.data import USER_AGENTS, data_dir
from searx.version import VERSION_TAG
-from searx.languages import language_codes
+from searx.sxng_locales import sxng_locales
from searx.exceptions import SearxXPathSyntaxException, SearxEngineXPathException
from searx import logger
@@ -53,8 +51,8 @@ _LANG_TO_LC_CACHE: Dict[str, Dict[str, str]] = {}
_FASTTEXT_MODEL: Optional["fasttext.FastText._FastText"] = None
"""fasttext model to predict laguage of a search term"""
-SEARCH_LANGUAGE_CODES = frozenset([searxng_locale[0].split('-')[0] for searxng_locale in language_codes])
-"""Languages supported by most searxng engines (:py:obj:`searx.languages.language_codes`)."""
+SEARCH_LANGUAGE_CODES = frozenset([searxng_locale[0].split('-')[0] for searxng_locale in sxng_locales])
+"""Languages supported by most searxng engines (:py:obj:`searx.sxng_locales.sxng_locales`)."""
class _NotSetClass: # pylint: disable=too-few-public-methods
@@ -355,102 +353,16 @@ def is_valid_lang(lang) -> Optional[Tuple[bool, str, str]]:
is_abbr = len(lang) == 2
lang = lang.lower()
if is_abbr:
- for l in language_codes:
+ for l in sxng_locales:
if l[0][:2] == lang:
return (True, l[0][:2], l[3].lower())
return None
- for l in language_codes:
+ for l in sxng_locales:
if l[1].lower() == lang or l[3].lower() == lang:
return (True, l[0][:2], l[3].lower())
return None
-def _get_lang_to_lc_dict(lang_list: List[str]) -> Dict[str, str]:
- key = str(lang_list)
- value = _LANG_TO_LC_CACHE.get(key, None)
- if value is None:
- value = {}
- for lang in lang_list:
- value.setdefault(lang.split('-')[0], lang)
- _LANG_TO_LC_CACHE[key] = value
- return value
-
-
-# babel's get_global contains all sorts of miscellaneous locale and territory related data
-# see get_global in: https://github.com/python-babel/babel/blob/master/babel/core.py
-def _get_from_babel(lang_code: str, key):
- match = get_global(key).get(lang_code.replace('-', '_'))
- # for some keys, such as territory_aliases, match may be a list
- if isinstance(match, str):
- return match.replace('_', '-')
- return match
-
-
-def _match_language(lang_code: str, lang_list=[], custom_aliases={}) -> Optional[str]: # pylint: disable=W0102
- """auxiliary function to match lang_code in lang_list"""
- # replace language code with a custom alias if necessary
- if lang_code in custom_aliases:
- lang_code = custom_aliases[lang_code]
-
- if lang_code in lang_list:
- return lang_code
-
- # try to get the most likely country for this language
- subtags = _get_from_babel(lang_code, 'likely_subtags')
- if subtags:
- if subtags in lang_list:
- return subtags
- subtag_parts = subtags.split('-')
- new_code = subtag_parts[0] + '-' + subtag_parts[-1]
- if new_code in custom_aliases:
- new_code = custom_aliases[new_code]
- if new_code in lang_list:
- return new_code
-
- # try to get the any supported country for this language
- return _get_lang_to_lc_dict(lang_list).get(lang_code)
-
-
-def match_language( # pylint: disable=W0102
- locale_code, lang_list=[], custom_aliases={}, fallback: Optional[str] = 'en-US'
-) -> Optional[str]:
- """get the language code from lang_list that best matches locale_code"""
- # try to get language from given locale_code
- language = _match_language(locale_code, lang_list, custom_aliases)
- if language:
- return language
-
- locale_parts = locale_code.split('-')
- lang_code = locale_parts[0]
-
- # if locale_code has script, try matching without it
- if len(locale_parts) > 2:
- language = _match_language(lang_code + '-' + locale_parts[-1], lang_list, custom_aliases)
- if language:
- return language
-
- # try to get language using an equivalent country code
- if len(locale_parts) > 1:
- country_alias = _get_from_babel(locale_parts[-1], 'territory_aliases')
- if country_alias:
- language = _match_language(lang_code + '-' + country_alias[0], lang_list, custom_aliases)
- if language:
- return language
-
- # try to get language using an equivalent language code
- alias = _get_from_babel(lang_code, 'language_aliases')
- if alias:
- language = _match_language(alias, lang_list, custom_aliases)
- if language:
- return language
-
- if lang_code != locale_code:
- # try to get language from given language without giving the country
- language = _match_language(lang_code, lang_list, custom_aliases)
-
- return language or fallback
-
-
def load_module(filename: str, module_dir: str) -> types.ModuleType:
modname = splitext(filename)[0]
modpath = join(module_dir, filename)
diff --git a/searx/webapp.py b/searx/webapp.py
index 95c33f704..4ed6c2eb7 100755
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -89,7 +89,6 @@ from searx.utils import (
html_to_text,
gen_useragent,
dict_subset,
- match_language,
)
from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH
from searx.query import RawTextQuery
@@ -117,12 +116,13 @@ from searx.locales import (
RTL_LOCALES,
localeselector,
locales_initialize,
+ match_locale,
)
# renaming names from searx imports ...
from searx.autocomplete import search_autocomplete, backends as autocomplete_backends
-from searx.languages import language_codes as languages
from searx.redisdb import initialize as redis_initialize
+from searx.sxng_locales import sxng_locales
from searx.search import SearchWithPlugins, initialize as search_initialize
from searx.network import stream as http_stream, set_context_network_name
from searx.search.checker import get_result as checker_get_result
@@ -227,7 +227,7 @@ def _get_browser_language(req, lang_list):
if '-' in lang:
lang_parts = lang.split('-')
lang = "{}-{}".format(lang_parts[0], lang_parts[-1].upper())
- locale = match_language(lang, lang_list, fallback=None)
+ locale = match_locale(lang, lang_list, fallback=None)
if locale is not None:
return locale
return 'en'
@@ -407,7 +407,7 @@ def get_client_settings():
def render(template_name: str, **kwargs):
-
+ # pylint: disable=too-many-statements
kwargs['client_settings'] = str(
base64.b64encode(
bytes(
@@ -438,17 +438,20 @@ def render(template_name: str, **kwargs):
kwargs['OTHER_CATEGORY'] = OTHER_CATEGORY
# i18n
- kwargs['language_codes'] = [l for l in languages if l[0] in settings['search']['languages']]
+ kwargs['sxng_locales'] = [l for l in sxng_locales if l[0] in settings['search']['languages']]
locale = request.preferences.get_value('locale')
kwargs['locale_rfc5646'] = _get_locale_rfc5646(locale)
if locale in RTL_LOCALES and 'rtl' not in kwargs:
kwargs['rtl'] = True
+
if 'current_language' not in kwargs:
- kwargs['current_language'] = match_language(
- request.preferences.get_value('language'), settings['search']['languages']
- )
+ _locale = request.preferences.get_value('language')
+ if _locale in ('auto', 'all'):
+ kwargs['current_language'] = _locale
+ else:
+ kwargs['current_language'] = match_locale(_locale, settings['search']['languages'])
# values from settings
kwargs['search_formats'] = [x for x in settings['search']['formats'] if x != 'html']
@@ -810,6 +813,13 @@ def search():
)
)
+ if search_query.lang in ('auto', 'all'):
+ current_language = search_query.lang
+ else:
+ current_language = match_locale(
+ search_query.lang, settings['search']['languages'], fallback=request.preferences.get_value("language")
+ )
+
# search_query.lang contains the user choice (all, auto, en, ...)
# when the user choice is "auto", search.search_query.lang contains the detected language
# otherwise it is equals to search_query.lang
@@ -832,12 +842,8 @@ def search():
result_container.unresponsive_engines
),
current_locale = request.preferences.get_value("locale"),
- current_language = match_language(
- search_query.lang,
- settings['search']['languages'],
- fallback=request.preferences.get_value("language")
- ),
- search_language = match_language(
+ current_language = current_language,
+ search_language = match_locale(
search.search_query.lang,
settings['search']['languages'],
fallback=request.preferences.get_value("language")
@@ -907,16 +913,11 @@ def autocompleter():
# and there is a query part
if len(raw_text_query.autocomplete_list) == 0 and len(sug_prefix) > 0:
- # get language from cookie
- language = request.preferences.get_value('language')
- if not language or language == 'all':
- language = 'en'
- else:
- language = language.split('-')[0]
+ # get SearXNG's locale and autocomplete backend from cookie
+ sxng_locale = request.preferences.get_value('language')
+ backend_name = request.preferences.get_value('autocomplete')
- # run autocompletion
- raw_results = search_autocomplete(request.preferences.get_value('autocomplete'), sug_prefix, language)
- for result in raw_results:
+ for result in search_autocomplete(backend_name, sug_prefix, sxng_locale):
# attention: this loop will change raw_text_query object and this is
# the reason why the sug_prefix was stored before (see above)
if result != sug_prefix:
@@ -1001,7 +1002,9 @@ def preferences():
'rate80': rate80,
'rate95': rate95,
'warn_timeout': e.timeout > settings['outgoing']['request_timeout'],
- 'supports_selected_language': _is_selected_language_supported(e, request.preferences),
+ 'supports_selected_language': e.traits.is_locale_supported(
+ str(request.preferences.get_value('language') or 'all')
+ ),
'result_count': result_count,
}
# end of stats
@@ -1052,7 +1055,9 @@ def preferences():
# supports
supports = {}
for _, e in filtered_engines.items():
- supports_selected_language = _is_selected_language_supported(e, request.preferences)
+ supports_selected_language = e.traits.is_locale_supported(
+ str(request.preferences.get_value('language') or 'all')
+ )
safesearch = e.safesearch
time_range_support = e.time_range_support
for checker_test_name in checker_results.get(e.name, {}).get('errors', {}):
@@ -1099,16 +1104,6 @@ def preferences():
)
-def _is_selected_language_supported(engine, preferences: Preferences): # pylint: disable=redefined-outer-name
- language = preferences.get_value('language')
- if language == 'all':
- return True
- x = match_language(
- language, getattr(engine, 'supported_languages', []), getattr(engine, 'language_aliases', {}), None
- )
- return bool(x)
-
-
@app.route('/image_proxy', methods=['GET'])
def image_proxy():
# pylint: disable=too-many-return-statements, too-many-branches
@@ -1327,10 +1322,7 @@ def config():
if not request.preferences.validate_token(engine):
continue
- supported_languages = engine.supported_languages
- if isinstance(engine.supported_languages, dict):
- supported_languages = list(engine.supported_languages.keys())
-
+ _languages = engine.traits.languages.keys()
_engines.append(
{
'name': name,
@@ -1339,7 +1331,8 @@ def config():
'enabled': not engine.disabled,
'paging': engine.paging,
'language_support': engine.language_support,
- 'supported_languages': supported_languages,
+ 'languages': list(_languages),
+ 'regions': list(engine.traits.regions.keys()),
'safesearch': engine.safesearch,
'time_range_support': engine.time_range_support,
'timeout': engine.timeout,
diff --git a/searx/webutils.py b/searx/webutils.py
index 6c023ebc3..e62b0d695 100644
--- a/searx/webutils.py
+++ b/searx/webutils.py
@@ -1,4 +1,6 @@
# -*- coding: utf-8 -*-
+from __future__ import annotations
+
import os
import pathlib
import csv
@@ -8,7 +10,7 @@ import re
import inspect
import itertools
from datetime import datetime, timedelta
-from typing import Iterable, List, Tuple, Dict
+from typing import Iterable, List, Tuple, Dict, TYPE_CHECKING
from io import StringIO
from codecs import getincrementalencoder
@@ -16,7 +18,10 @@ from codecs import getincrementalencoder
from flask_babel import gettext, format_date
from searx import logger, settings
-from searx.engines import Engine, OTHER_CATEGORY
+from searx.engines import OTHER_CATEGORY
+
+if TYPE_CHECKING:
+ from searx.enginelib import Engine
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
diff --git a/searxng_extra/update/update_engine_descriptions.py b/searxng_extra/update/update_engine_descriptions.py
index 6052bf084..66bc303db 100755
--- a/searxng_extra/update/update_engine_descriptions.py
+++ b/searxng_extra/update/update_engine_descriptions.py
@@ -18,8 +18,8 @@ from os.path import join
from lxml.html import fromstring
from searx.engines import wikidata, set_loggers
-from searx.utils import extract_text, match_language
-from searx.locales import LOCALE_NAMES, locales_initialize
+from searx.utils import extract_text
+from searx.locales import LOCALE_NAMES, locales_initialize, match_locale
from searx import searx_dir
from searx.utils import gen_useragent, detect_language
import searx.search
@@ -225,9 +225,9 @@ def fetch_website_description(engine_name, website):
fetched_lang, desc = get_website_description(website, lang, WIKIPEDIA_LANGUAGES[lang])
if fetched_lang is None or desc is None:
continue
- matched_lang = match_language(fetched_lang, LANGUAGES, fallback=None)
+ matched_lang = match_locale(fetched_lang, LANGUAGES, fallback=None)
if matched_lang is None:
- fetched_wikipedia_lang = match_language(fetched_lang, WIKIPEDIA_LANGUAGES.values(), fallback=None)
+ fetched_wikipedia_lang = match_locale(fetched_lang, WIKIPEDIA_LANGUAGES.values(), fallback=None)
matched_lang = wikipedia_languages_r.get(fetched_wikipedia_lang)
if matched_lang is not None:
update_description(engine_name, matched_lang, desc, website, replace=False)
diff --git a/searxng_extra/update/update_engine_traits.py b/searxng_extra/update/update_engine_traits.py
new file mode 100755
index 000000000..7449912dc
--- /dev/null
+++ b/searxng_extra/update/update_engine_traits.py
@@ -0,0 +1,198 @@
+#!/usr/bin/env python
+# lint: pylint
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Update :py:obj:`searx.enginelib.traits.EngineTraitsMap` and :origin:`searx/languages.py`
+
+:py:obj:`searx.enginelib.traits.EngineTraitsMap.ENGINE_TRAITS_FILE`:
+ Persistence of engines traits, fetched from the engines.
+
+:origin:`searx/languages.py`
+ Is generated from intersecting each engine's supported traits.
+
+The script :origin:`searxng_extra/update/update_engine_traits.py` is called in
+the :origin:`CI Update data ... <.github/workflows/data-update.yml>`
+
+"""
+
+# pylint: disable=invalid-name
+from unicodedata import lookup
+from pathlib import Path
+from pprint import pformat
+import babel
+
+from searx import settings, searx_dir
+from searx import network
+from searx.engines import load_engines
+from searx.enginelib.traits import EngineTraitsMap
+
+# Output files.
+languages_file = Path(searx_dir) / 'sxng_locales.py'
+languages_file_header = """\
+# -*- coding: utf-8 -*-
+'''List of SearXNG's locale codes.
+
+This file is generated automatically by::
+
+ ./manage pyenv.cmd searxng_extra/update/update_engine_traits.py
+'''
+
+sxng_locales = (
+"""
+languages_file_footer = """,
+)
+'''
+A list of five-digit tuples:
+
+0. SearXNG's internal locale tag (a language or region tag)
+1. Name of the language (:py:obj:`babel.core.Locale.get_language_name`)
+2. For region tags the name of the region (:py:obj:`babel.core.Locale.get_territory_name`).
+ Empty string for language tags.
+3. English language name (from :py:obj:`babel.core.Locale.english_name`)
+4. Unicode flag (emoji) that fits to SearXNG's internal region tag. Languages
+ are represented by a globe (\U0001F310)
+
+.. code:: python
+
+ ('en', 'English', '', 'English', '\U0001f310'),
+ ('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'),
+ ('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'),
+ ..
+ ('fr', 'Français', '', 'French', '\U0001f310'),
+ ('fr-BE', 'Français', 'Belgique', 'French', '\U0001f1e7\U0001f1ea'),
+ ('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'),
+
+:meta hide-value:
+'''
+"""
+
+
+lang2emoji = {
+ 'ha': '\U0001F1F3\U0001F1EA', # Hausa / Niger
+ 'bs': '\U0001F1E7\U0001F1E6', # Bosnian / Bosnia & Herzegovina
+ 'jp': '\U0001F1EF\U0001F1F5', # Japanese
+ 'ua': '\U0001F1FA\U0001F1E6', # Ukrainian
+ 'he': '\U0001F1EE\U0001F1F7', # Hebrew
+}
+
+
+def main():
+ load_engines(settings['engines'])
+ # traits_map = EngineTraitsMap.from_data()
+ traits_map = fetch_traits_map()
+ sxng_tag_list = filter_locales(traits_map)
+ write_languages_file(sxng_tag_list)
+
+
+def fetch_traits_map():
+ """Fetchs supported languages for each engine and writes json file with those."""
+ network.set_timeout_for_thread(10.0)
+
+ def log(msg):
+ print(msg)
+
+ traits_map = EngineTraitsMap.fetch_traits(log=log)
+ print("fetched properties from %s engines" % len(traits_map))
+ print("write json file: %s" % traits_map.ENGINE_TRAITS_FILE)
+ traits_map.save_data()
+ return traits_map
+
+
+def filter_locales(traits_map: EngineTraitsMap):
+ """Filter language & region tags by a threshold."""
+
+ min_eng_per_region = 11
+ min_eng_per_lang = 13
+
+ _ = {}
+ for eng in traits_map.values():
+ for reg in eng.regions.keys():
+ _[reg] = _.get(reg, 0) + 1
+
+ regions = set(k for k, v in _.items() if v >= min_eng_per_region)
+ lang_from_region = set(k.split('-')[0] for k in regions)
+
+ _ = {}
+ for eng in traits_map.values():
+ for lang in eng.languages.keys():
+ # ignore script types like zh_Hant, zh_Hans or sr_Latin, pa_Arab (they
+ # already counted by existence of 'zh' or 'sr', 'pa')
+ if '_' in lang:
+ # print("ignore %s" % lang)
+ continue
+ _[lang] = _.get(lang, 0) + 1
+
+ languages = set(k for k, v in _.items() if v >= min_eng_per_lang)
+
+ sxng_tag_list = set()
+ sxng_tag_list.update(regions)
+ sxng_tag_list.update(lang_from_region)
+ sxng_tag_list.update(languages)
+
+ return sxng_tag_list
+
+
+def write_languages_file(sxng_tag_list):
+
+ language_codes = []
+
+ for sxng_tag in sorted(sxng_tag_list):
+ sxng_locale: babel.Locale = babel.Locale.parse(sxng_tag, sep='-')
+
+ flag = get_unicode_flag(sxng_locale) or ''
+
+ item = (
+ sxng_tag,
+ sxng_locale.get_language_name().title(),
+ sxng_locale.get_territory_name() or '',
+ sxng_locale.english_name.split(' (')[0],
+ UnicodeEscape(flag),
+ )
+
+ language_codes.append(item)
+
+ language_codes = tuple(language_codes)
+
+ with open(languages_file, 'w', encoding='utf-8') as new_file:
+ file_content = "{header} {language_codes}{footer}".format(
+ header=languages_file_header,
+ language_codes=pformat(language_codes, width=120, indent=4)[1:-1],
+ footer=languages_file_footer,
+ )
+ new_file.write(file_content)
+ new_file.close()
+
+
+class UnicodeEscape(str):
+ """Escape unicode string in :py:obj:`pprint.pformat`"""
+
+ def __repr__(self):
+ return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'"
+
+
+def get_unicode_flag(locale: babel.Locale):
+ """Determine a unicode flag (emoji) that fits to the ``locale``"""
+
+ emoji = lang2emoji.get(locale.language)
+ if emoji:
+ return emoji
+
+ if not locale.territory:
+ return '\U0001F310'
+
+ emoji = lang2emoji.get(locale.territory.lower())
+ if emoji:
+ return emoji
+
+ try:
+ c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + locale.territory[0])
+ c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + locale.territory[1])
+ # print("OK : %s --> %s%s" % (locale, c1, c2))
+ except KeyError as exc:
+ print("ERROR: %s --> %s" % (locale, exc))
+ return None
+
+ return c1 + c2
+
+
+if __name__ == "__main__":
+ main()
diff --git a/searxng_extra/update/update_languages.py b/searxng_extra/update/update_languages.py
deleted file mode 100755
index 87b13b276..000000000
--- a/searxng_extra/update/update_languages.py
+++ /dev/null
@@ -1,313 +0,0 @@
-#!/usr/bin/env python
-# lint: pylint
-
-# SPDX-License-Identifier: AGPL-3.0-or-later
-"""This script generates languages.py from intersecting each engine's supported
-languages.
-
-Output files: :origin:`searx/data/engines_languages.json` and
-:origin:`searx/languages.py` (:origin:`CI Update data ...
-<.github/workflows/data-update.yml>`).
-
-"""
-
-# pylint: disable=invalid-name
-from unicodedata import lookup
-import json
-from pathlib import Path
-from pprint import pformat
-from babel import Locale, UnknownLocaleError
-from babel.languages import get_global
-from babel.core import parse_locale
-
-from searx import settings, searx_dir
-from searx.engines import load_engines, engines
-from searx.network import set_timeout_for_thread
-
-# Output files.
-engines_languages_file = Path(searx_dir) / 'data' / 'engines_languages.json'
-languages_file = Path(searx_dir) / 'languages.py'
-
-
-# Fetches supported languages for each engine and writes json file with those.
-def fetch_supported_languages():
- set_timeout_for_thread(10.0)
-
- engines_languages = {}
- names = list(engines)
- names.sort()
-
- for engine_name in names:
- if hasattr(engines[engine_name], 'fetch_supported_languages'):
- engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
- print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name))
- if type(engines_languages[engine_name]) == list: # pylint: disable=unidiomatic-typecheck
- engines_languages[engine_name] = sorted(engines_languages[engine_name])
-
- print("fetched languages from %s engines" % len(engines_languages))
-
- # write json file
- with open(engines_languages_file, 'w', encoding='utf-8') as f:
- json.dump(engines_languages, f, indent=2, sort_keys=True)
-
- return engines_languages
-
-
-# Get babel Locale object from lang_code if possible.
-def get_locale(lang_code):
- try:
- locale = Locale.parse(lang_code, sep='-')
- return locale
- except (UnknownLocaleError, ValueError):
- return None
-
-
-lang2emoji = {
- 'ha': '\U0001F1F3\U0001F1EA', # Hausa / Niger
- 'bs': '\U0001F1E7\U0001F1E6', # Bosnian / Bosnia & Herzegovina
- 'jp': '\U0001F1EF\U0001F1F5', # Japanese
- 'ua': '\U0001F1FA\U0001F1E6', # Ukrainian
- 'he': '\U0001F1EE\U0001F1F7', # Hebrew
-}
-
-
-def get_unicode_flag(lang_code):
- """Determine a unicode flag (emoji) that fits to the ``lang_code``"""
-
- emoji = lang2emoji.get(lang_code.lower())
- if emoji:
- return emoji
-
- if len(lang_code) == 2:
- return '\U0001F310'
-
- language = territory = script = variant = ''
- try:
- language, territory, script, variant = parse_locale(lang_code, '-')
- except ValueError as exc:
- print(exc)
-
- # https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
- if not territory:
- # https://www.unicode.org/emoji/charts/emoji-list.html#country-flag
- emoji = lang2emoji.get(language)
- if not emoji:
- print(
- "%s --> language: %s / territory: %s / script: %s / variant: %s"
- % (lang_code, language, territory, script, variant)
- )
- return emoji
-
- emoji = lang2emoji.get(territory.lower())
- if emoji:
- return emoji
-
- try:
- c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[0])
- c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[1])
- # print("%s --> territory: %s --> %s%s" %(lang_code, territory, c1, c2 ))
- except KeyError as exc:
- print("%s --> territory: %s --> %s" % (lang_code, territory, exc))
- return None
-
- return c1 + c2
-
-
-def get_territory_name(lang_code):
- country_name = None
- locale = get_locale(lang_code)
- try:
- if locale is not None:
- country_name = locale.get_territory_name()
- except FileNotFoundError as exc:
- print("ERROR: %s --> %s" % (locale, exc))
- return country_name
-
-
-# Join all language lists.
-def join_language_lists(engines_languages):
- language_list = {}
- for engine_name in engines_languages:
- for lang_code in engines_languages[engine_name]:
-
- # apply custom fixes if necessary
- if lang_code in getattr(engines[engine_name], 'language_aliases', {}).values():
- lang_code = next(
- lc for lc, alias in engines[engine_name].language_aliases.items() if lang_code == alias
- )
-
- locale = get_locale(lang_code)
-
- # ensure that lang_code uses standard language and country codes
- if locale and locale.territory:
- lang_code = "{lang}-{country}".format(lang=locale.language, country=locale.territory)
- short_code = lang_code.split('-')[0]
-
- # add language without country if not in list
- if short_code not in language_list:
- if locale:
- # get language's data from babel's Locale object
- language_name = locale.get_language_name().title()
- english_name = locale.english_name.split(' (')[0]
- elif short_code in engines_languages['wikipedia']:
- # get language's data from wikipedia if not known by babel
- language_name = engines_languages['wikipedia'][short_code]['name']
- english_name = engines_languages['wikipedia'][short_code]['english_name']
- else:
- language_name = None
- english_name = None
-
- # add language to list
- language_list[short_code] = {
- 'name': language_name,
- 'english_name': english_name,
- 'counter': set(),
- 'countries': {},
- }
-
- # add language with country if not in list
- if lang_code != short_code and lang_code not in language_list[short_code]['countries']:
- country_name = ''
- if locale:
- # get country name from babel's Locale object
- try:
- country_name = locale.get_territory_name()
- except FileNotFoundError as exc:
- print("ERROR: %s --> %s" % (locale, exc))
- locale = None
-
- language_list[short_code]['countries'][lang_code] = {
- 'country_name': country_name,
- 'counter': set(),
- }
-
- # count engine for both language_country combination and language alone
- language_list[short_code]['counter'].add(engine_name)
- if lang_code != short_code:
- language_list[short_code]['countries'][lang_code]['counter'].add(engine_name)
-
- return language_list
-
-
-# Filter language list so it only includes the most supported languages and countries
-def filter_language_list(all_languages):
- min_engines_per_lang = 12
- min_engines_per_country = 7
- # pylint: disable=consider-using-dict-items, consider-iterating-dictionary
- main_engines = [
- engine_name
- for engine_name in engines.keys()
- if 'general' in engines[engine_name].categories
- and engines[engine_name].supported_languages
- and not engines[engine_name].disabled
- ]
-
- # filter list to include only languages supported by most engines or all default general engines
- filtered_languages = {
- code: lang
- for code, lang in all_languages.items()
- if (
- len(lang['counter']) >= min_engines_per_lang
- or all(main_engine in lang['counter'] for main_engine in main_engines)
- )
- }
-
- def _copy_lang_data(lang, country_name=None):
- new_dict = {}
- new_dict['name'] = all_languages[lang]['name']
- new_dict['english_name'] = all_languages[lang]['english_name']
- if country_name:
- new_dict['country_name'] = country_name
- return new_dict
-
- # for each language get country codes supported by most engines or at least one country code
- filtered_languages_with_countries = {}
- for lang, lang_data in filtered_languages.items():
- countries = lang_data['countries']
- filtered_countries = {}
-
- # get language's country codes with enough supported engines
- for lang_country, country_data in countries.items():
- if len(country_data['counter']) >= min_engines_per_country:
- filtered_countries[lang_country] = _copy_lang_data(lang, country_data['country_name'])
-
- # add language without countries too if there's more than one country to choose from
- if len(filtered_countries) > 1:
- filtered_countries[lang] = _copy_lang_data(lang, None)
- elif len(filtered_countries) == 1:
- lang_country = next(iter(filtered_countries))
-
- # if no country has enough engines try to get most likely country code from babel
- if not filtered_countries:
- lang_country = None
- subtags = get_global('likely_subtags').get(lang)
- if subtags:
- country_code = subtags.split('_')[-1]
- if len(country_code) == 2:
- lang_country = "{lang}-{country}".format(lang=lang, country=country_code)
-
- if lang_country:
- filtered_countries[lang_country] = _copy_lang_data(lang, None)
- else:
- filtered_countries[lang] = _copy_lang_data(lang, None)
-
- filtered_languages_with_countries.update(filtered_countries)
-
- return filtered_languages_with_countries
-
-
-class UnicodeEscape(str):
- """Escape unicode string in :py:obj:`pprint.pformat`"""
-
- def __repr__(self):
- return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'"
-
-
-# Write languages.py.
-def write_languages_file(languages):
- file_headers = (
- "# -*- coding: utf-8 -*-",
- "# list of language codes",
- "# this file is generated automatically by utils/fetch_languages.py",
- "language_codes = (\n",
- )
-
- language_codes = []
-
- for code in sorted(languages):
-
- name = languages[code]['name']
- if name is None:
- print("ERROR: languages['%s'] --> %s" % (code, languages[code]))
- continue
-
- flag = get_unicode_flag(code) or ''
- item = (
- code,
- languages[code]['name'].split(' (')[0],
- get_territory_name(code) or '',
- languages[code].get('english_name') or '',
- UnicodeEscape(flag),
- )
-
- language_codes.append(item)
-
- language_codes = tuple(language_codes)
-
- with open(languages_file, 'w', encoding='utf-8') as new_file:
- file_content = "{file_headers} {language_codes},\n)\n".format(
- # fmt: off
- file_headers = '\n'.join(file_headers),
- language_codes = pformat(language_codes, indent=4)[1:-1]
- # fmt: on
- )
- new_file.write(file_content)
- new_file.close()
-
-
-if __name__ == "__main__":
- load_engines(settings['engines'])
- _engines_languages = fetch_supported_languages()
- _all_languages = join_language_lists(_engines_languages)
- _filtered_languages = filter_language_list(_all_languages)
- write_languages_file(_filtered_languages)
diff --git a/searxng_extra/update/update_osm_keys_tags.py b/searxng_extra/update/update_osm_keys_tags.py
index 72197498d..72f3d61c5 100755
--- a/searxng_extra/update/update_osm_keys_tags.py
+++ b/searxng_extra/update/update_osm_keys_tags.py
@@ -50,7 +50,7 @@ from pathlib import Path
from searx import searx_dir
from searx.network import set_timeout_for_thread
from searx.engines import wikidata, set_loggers
-from searx.languages import language_codes
+from searx.sxng_locales import sxng_locales
from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
set_loggers(wikidata, 'wikidata')
@@ -76,7 +76,7 @@ GROUP BY ?key ?item ?itemLabel
ORDER BY ?key ?item ?itemLabel
"""
-LANGUAGES = [l[0].lower() for l in language_codes]
+LANGUAGES = [l[0].lower() for l in sxng_locales]
PRESET_KEYS = {
('wikidata',): {'en': 'Wikidata'},
diff --git a/tests/unit/test_locales.py b/tests/unit/test_locales.py
new file mode 100644
index 000000000..61561c17b
--- /dev/null
+++ b/tests/unit/test_locales.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Test some code from module :py:obj:`searx.locales`"""
+
+from searx import locales
+from searx.sxng_locales import sxng_locales
+from tests import SearxTestCase
+
+
+class TestLocales(SearxTestCase):
+ """Implemented tests:
+
+ - :py:obj:`searx.locales.match_locale`
+ """
+
+ def test_match_locale(self):
+
+ locale_tag_list = [x[0] for x in sxng_locales]
+
+ # Test SearXNG search languages
+
+ self.assertEqual(locales.match_locale('de', locale_tag_list), 'de')
+ self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr')
+ self.assertEqual(locales.match_locale('zh', locale_tag_list), 'zh')
+
+ # Test SearXNG search regions
+
+ self.assertEqual(locales.match_locale('ca-es', locale_tag_list), 'ca-ES')
+ self.assertEqual(locales.match_locale('de-at', locale_tag_list), 'de-AT')
+ self.assertEqual(locales.match_locale('de-de', locale_tag_list), 'de-DE')
+ self.assertEqual(locales.match_locale('en-UK', locale_tag_list), 'en-GB')
+ self.assertEqual(locales.match_locale('fr-be', locale_tag_list), 'fr-BE')
+ self.assertEqual(locales.match_locale('fr-be', locale_tag_list), 'fr-BE')
+ self.assertEqual(locales.match_locale('fr-ca', locale_tag_list), 'fr-CA')
+ self.assertEqual(locales.match_locale('fr-ch', locale_tag_list), 'fr-CH')
+ self.assertEqual(locales.match_locale('zh-cn', locale_tag_list), 'zh-CN')
+ self.assertEqual(locales.match_locale('zh-tw', locale_tag_list), 'zh-TW')
+ self.assertEqual(locales.match_locale('zh-hk', locale_tag_list), 'zh-HK')
+
+ # Test language script code
+
+ self.assertEqual(locales.match_locale('zh-hans', locale_tag_list), 'zh-CN')
+ self.assertEqual(locales.match_locale('zh-hans-cn', locale_tag_list), 'zh-CN')
+ self.assertEqual(locales.match_locale('zh-hant', locale_tag_list), 'zh-TW')
+ self.assertEqual(locales.match_locale('zh-hant-tw', locale_tag_list), 'zh-TW')
+
+ # Test individual locale lists
+
+ self.assertEqual(locales.match_locale('es', [], fallback='fallback'), 'fallback')
+
+ self.assertEqual(locales.match_locale('de', ['de-CH', 'de-DE']), 'de-DE')
+ self.assertEqual(locales.match_locale('de', ['de-CH', 'de-DE']), 'de-DE')
+ self.assertEqual(locales.match_locale('es', ['ES']), 'ES')
+ self.assertEqual(locales.match_locale('es', ['es-AR', 'es-ES', 'es-MX']), 'es-ES')
+ self.assertEqual(locales.match_locale('es-AR', ['es-AR', 'es-ES', 'es-MX']), 'es-AR')
+ self.assertEqual(locales.match_locale('es-CO', ['es-AR', 'es-ES']), 'es-ES')
+ self.assertEqual(locales.match_locale('es-CO', ['es-AR']), 'es-AR')
+
+ # Tests from the commit message of 9ae409a05a
+
+ # Assumption:
+ # A. When a user selects a language the results should be optimized according to
+ # the selected language.
+ #
+ # B. When user selects a language and a territory the results should be
+ # optimized with first priority on territory and second on language.
+
+ # Assume we have an engine that supports the follwoing locales:
+ locale_tag_list = ['zh-CN', 'zh-HK', 'nl-BE', 'fr-CA']
+
+ # Examples (Assumption A.)
+ # ------------------------
+
+ # A user selects region 'zh-TW' which should end in zh_HK.
+ # hint: CN is 'Hans' and HK ('Hant') fits better to TW ('Hant')
+ self.assertEqual(locales.match_locale('zh-TW', locale_tag_list), 'zh-HK')
+
+ # A user selects only the language 'zh' which should end in CN
+ self.assertEqual(locales.match_locale('zh', locale_tag_list), 'zh-CN')
+
+ # A user selects only the language 'fr' which should end in fr_CA
+ self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-CA')
+
+ # The difference in priority on the territory is best shown with a
+ # engine that supports the following locales:
+ locale_tag_list = ['fr-FR', 'fr-CA', 'en-GB', 'nl-BE']
+
+ # A user selects only a language
+ self.assertEqual(locales.match_locale('en', locale_tag_list), 'en-GB')
+
+ # hint: the engine supports fr_FR and fr_CA since no territory is given,
+ # fr_FR takes priority ..
+ self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-FR')
+
+ # Examples (Assumption B.)
+ # ------------------------
+
+ # A user selects region 'fr-BE' which should end in nl-BE
+ self.assertEqual(locales.match_locale('fr-BE', locale_tag_list), 'nl-BE')
+
+ # If the user selects a language and there are two locales like the
+ # following:
+
+ locale_tag_list = ['fr-BE', 'fr-CH']
+
+ # The get_engine_locale selects the locale by looking at the "population
+ # percent" and this percentage has an higher amount in BE (68.%)
+ # compared to CH (21%)
+
+ self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-BE')
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 6f51f1ee3..2ad4593a1 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -87,39 +87,6 @@ class TestUtils(SearxTestCase):
html = '<p><b>Lorem ipsum</i>dolor sit amet</p>'
self.assertEqual(utils.html_to_text(html), "Lorem ipsum")
- def test_match_language(self):
- self.assertEqual(utils.match_language('es', ['es']), 'es')
- self.assertEqual(utils.match_language('es', [], fallback='fallback'), 'fallback')
- self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}), 'jp')
-
- # handle script tags
- self.assertEqual(utils.match_language('zh-CN', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hans-CN')
- self.assertEqual(utils.match_language('zh-TW', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hant-TW')
- self.assertEqual(utils.match_language('zh-Hans-CN', ['zh-CN', 'zh-TW']), 'zh-CN')
- self.assertEqual(utils.match_language('zh-Hant-TW', ['zh-CN', 'zh-TW']), 'zh-TW')
- self.assertEqual(utils.match_language('zh-Hans', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-CN')
- self.assertEqual(utils.match_language('zh-Hant', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-TW')
-
- aliases = {'en-GB': 'en-UK', 'he': 'iw'}
-
- # guess country
- self.assertEqual(utils.match_language('de-DE', ['de']), 'de')
- self.assertEqual(utils.match_language('de', ['de-DE']), 'de-DE')
- self.assertEqual(utils.match_language('es-CO', ['es-AR', 'es-ES', 'es-MX']), 'es-ES')
- self.assertEqual(utils.match_language('es-CO', ['es-MX']), 'es-MX')
- self.assertEqual(utils.match_language('en-UK', ['en-AU', 'en-GB', 'en-US']), 'en-GB')
- self.assertEqual(utils.match_language('en-GB', ['en-AU', 'en-UK', 'en-US'], aliases), 'en-UK')
-
- # language aliases
- self.assertEqual(utils.match_language('iw', ['he']), 'he')
- self.assertEqual(utils.match_language('he', ['iw'], aliases), 'iw')
- self.assertEqual(utils.match_language('iw-IL', ['he']), 'he')
- self.assertEqual(utils.match_language('he-IL', ['iw'], aliases), 'iw')
- self.assertEqual(utils.match_language('iw', ['he-IL']), 'he-IL')
- self.assertEqual(utils.match_language('he', ['iw-IL'], aliases), 'iw-IL')
- self.assertEqual(utils.match_language('iw-IL', ['he-IL']), 'he-IL')
- self.assertEqual(utils.match_language('he-IL', ['iw-IL'], aliases), 'iw-IL')
-
def test_ecma_unscape(self):
self.assertEqual(utils.ecma_unescape('text%20with%20space'), 'text with space')
self.assertEqual(utils.ecma_unescape('text using %xx: %F3'), 'text using %xx: ó')
diff --git a/utils/templates/etc/searxng/settings.yml b/utils/templates/etc/searxng/settings.yml
index d8b659b1a..04d25b9d3 100644
--- a/utils/templates/etc/searxng/settings.yml
+++ b/utils/templates/etc/searxng/settings.yml
@@ -52,9 +52,6 @@ enabled_plugins:
engines:
- - name: google
- use_mobile_ui: true
-
# - name: fdroid
# disabled: false
#