summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
Diffstat (limited to 'searx')
-rw-r--r--searx/data/engine_traits.json198
-rw-r--r--searx/engines/duckduckgo.py79
-rw-r--r--searx/engines/duckduckgo_definitions.py1
-rw-r--r--searx/engines/duckduckgo_images.py1
4 files changed, 266 insertions, 13 deletions
diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json
index 27b665cbb..251b7295a 100644
--- a/searx/data/engine_traits.json
+++ b/searx/data/engine_traits.json
@@ -2124,11 +2124,73 @@
]
},
"ddg definitions": {
- "all_locale": null,
+ "all_locale": "wt-wt",
"custom": {},
"data_type": "supported_languages",
"languages": {},
- "regions": {},
+ "regions": {
+ "ar-SA": "xa-ar",
+ "bg-BG": "bg-bg",
+ "ca-ES": "es-ca",
+ "cs-CZ": "cz-cs",
+ "da-DK": "dk-da",
+ "de-AT": "at-de",
+ "de-CH": "ch-de",
+ "de-DE": "de-de",
+ "el-GR": "gr-el",
+ "en-AU": "au-en",
+ "en-CA": "ca-en",
+ "en-GB": "uk-en",
+ "en-IE": "ie-en",
+ "en-IL": "il-en",
+ "en-IN": "in-en",
+ "en-MY": "my-en",
+ "en-NZ": "nz-en",
+ "en-PH": "ph-en",
+ "en-PK": "pk-en",
+ "en-SG": "sg-en",
+ "en-US": "us-en",
+ "en-ZA": "za-en",
+ "es-AR": "ar-es",
+ "es-CL": "cl-es",
+ "es-CO": "co-es",
+ "es-ES": "es-es",
+ "es-MX": "mx-es",
+ "es-PE": "pe-es",
+ "es-US": "us-es",
+ "et-EE": "ee-et",
+ "fi-FI": "fi-fi",
+ "fr-BE": "be-fr",
+ "fr-CA": "ca-fr",
+ "fr-CH": "ch-fr",
+ "fr-FR": "fr-fr",
+ "hr-HR": "hr-hr",
+ "hu-HU": "hu-hu",
+ "id-ID": "id-en",
+ "it-IT": "it-it",
+ "ja-JP": "jp-jp",
+ "ko-KR": "kr-kr",
+ "lt-LT": "lt-lt",
+ "lv-LV": "lv-lv",
+ "nb-NO": "no-no",
+ "nl-BE": "be-nl",
+ "nl-NL": "nl-nl",
+ "pl-PL": "pl-pl",
+ "pt-BR": "br-pt",
+ "pt-PT": "pt-pt",
+ "ro-RO": "ro-ro",
+ "ru-RU": "ru-ru",
+ "sk-SK": "sk-sk",
+ "sl-SI": "sl-sl",
+ "sv-SE": "se-sv",
+ "th-TH": "th-en",
+ "tr-TR": "tr-tr",
+ "uk-UA": "ua-uk",
+ "vi-VN": "vn-en",
+ "zh-CN": "cn-zh",
+ "zh-HK": "hk-tzh",
+ "zh-TW": "tw-tzh"
+ },
"supported_languages": [
"ar-XA",
"bg-BG",
@@ -2196,11 +2258,73 @@
]
},
"duckduckgo": {
- "all_locale": null,
+ "all_locale": "wt-wt",
"custom": {},
"data_type": "supported_languages",
"languages": {},
- "regions": {},
+ "regions": {
+ "ar-SA": "xa-ar",
+ "bg-BG": "bg-bg",
+ "ca-ES": "es-ca",
+ "cs-CZ": "cz-cs",
+ "da-DK": "dk-da",
+ "de-AT": "at-de",
+ "de-CH": "ch-de",
+ "de-DE": "de-de",
+ "el-GR": "gr-el",
+ "en-AU": "au-en",
+ "en-CA": "ca-en",
+ "en-GB": "uk-en",
+ "en-IE": "ie-en",
+ "en-IL": "il-en",
+ "en-IN": "in-en",
+ "en-MY": "my-en",
+ "en-NZ": "nz-en",
+ "en-PH": "ph-en",
+ "en-PK": "pk-en",
+ "en-SG": "sg-en",
+ "en-US": "us-en",
+ "en-ZA": "za-en",
+ "es-AR": "ar-es",
+ "es-CL": "cl-es",
+ "es-CO": "co-es",
+ "es-ES": "es-es",
+ "es-MX": "mx-es",
+ "es-PE": "pe-es",
+ "es-US": "us-es",
+ "et-EE": "ee-et",
+ "fi-FI": "fi-fi",
+ "fr-BE": "be-fr",
+ "fr-CA": "ca-fr",
+ "fr-CH": "ch-fr",
+ "fr-FR": "fr-fr",
+ "hr-HR": "hr-hr",
+ "hu-HU": "hu-hu",
+ "id-ID": "id-en",
+ "it-IT": "it-it",
+ "ja-JP": "jp-jp",
+ "ko-KR": "kr-kr",
+ "lt-LT": "lt-lt",
+ "lv-LV": "lv-lv",
+ "nb-NO": "no-no",
+ "nl-BE": "be-nl",
+ "nl-NL": "nl-nl",
+ "pl-PL": "pl-pl",
+ "pt-BR": "br-pt",
+ "pt-PT": "pt-pt",
+ "ro-RO": "ro-ro",
+ "ru-RU": "ru-ru",
+ "sk-SK": "sk-sk",
+ "sl-SI": "sl-sl",
+ "sv-SE": "se-sv",
+ "th-TH": "th-en",
+ "tr-TR": "tr-tr",
+ "uk-UA": "ua-uk",
+ "vi-VN": "vn-en",
+ "zh-CN": "cn-zh",
+ "zh-HK": "hk-tzh",
+ "zh-TW": "tw-tzh"
+ },
"supported_languages": [
"ar-XA",
"bg-BG",
@@ -2268,11 +2392,73 @@
]
},
"duckduckgo images": {
- "all_locale": null,
+ "all_locale": "wt-wt",
"custom": {},
"data_type": "supported_languages",
"languages": {},
- "regions": {},
+ "regions": {
+ "ar-SA": "xa-ar",
+ "bg-BG": "bg-bg",
+ "ca-ES": "es-ca",
+ "cs-CZ": "cz-cs",
+ "da-DK": "dk-da",
+ "de-AT": "at-de",
+ "de-CH": "ch-de",
+ "de-DE": "de-de",
+ "el-GR": "gr-el",
+ "en-AU": "au-en",
+ "en-CA": "ca-en",
+ "en-GB": "uk-en",
+ "en-IE": "ie-en",
+ "en-IL": "il-en",
+ "en-IN": "in-en",
+ "en-MY": "my-en",
+ "en-NZ": "nz-en",
+ "en-PH": "ph-en",
+ "en-PK": "pk-en",
+ "en-SG": "sg-en",
+ "en-US": "us-en",
+ "en-ZA": "za-en",
+ "es-AR": "ar-es",
+ "es-CL": "cl-es",
+ "es-CO": "co-es",
+ "es-ES": "es-es",
+ "es-MX": "mx-es",
+ "es-PE": "pe-es",
+ "es-US": "us-es",
+ "et-EE": "ee-et",
+ "fi-FI": "fi-fi",
+ "fr-BE": "be-fr",
+ "fr-CA": "ca-fr",
+ "fr-CH": "ch-fr",
+ "fr-FR": "fr-fr",
+ "hr-HR": "hr-hr",
+ "hu-HU": "hu-hu",
+ "id-ID": "id-en",
+ "it-IT": "it-it",
+ "ja-JP": "jp-jp",
+ "ko-KR": "kr-kr",
+ "lt-LT": "lt-lt",
+ "lv-LV": "lv-lv",
+ "nb-NO": "no-no",
+ "nl-BE": "be-nl",
+ "nl-NL": "nl-nl",
+ "pl-PL": "pl-pl",
+ "pt-BR": "br-pt",
+ "pt-PT": "pt-pt",
+ "ro-RO": "ro-ro",
+ "ru-RU": "ru-ru",
+ "sk-SK": "sk-sk",
+ "sl-SI": "sl-sl",
+ "sv-SE": "se-sv",
+ "th-TH": "th-en",
+ "tr-TR": "tr-tr",
+ "uk-UA": "ua-uk",
+ "vi-VN": "vn-en",
+ "zh-CN": "cn-zh",
+ "zh-HK": "hk-tzh",
+ "zh-TW": "tw-tzh"
+ },
"supported_languages": [
"ar-XA",
"bg-BG",
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 2a7956ca8..cb47122ae 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -3,9 +3,8 @@
"""DuckDuckGo Lite
"""
-from json import loads
-
-from lxml.html import fromstring
+import json
+from lxml import html
from searx.utils import (
dict_subset,
@@ -14,7 +13,10 @@ from searx.utils import (
extract_text,
match_language,
)
-from searx.network import get
+from searx import network
+from searx.enginelib.traits import EngineTraits
+
+traits: EngineTraits
# about
about = {
@@ -120,13 +122,13 @@ def request(query, params):
def response(resp):
headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
- get(url_ping, headers=headers_ping)
+ network.get(url_ping, headers=headers_ping)
if resp.status_code == 303:
return []
results = []
- doc = fromstring(resp.text)
+ doc = html.fromstring(resp.text)
result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
if not len(result_table) >= 3:
@@ -180,7 +182,70 @@ def _fetch_supported_languages(resp):
response_page = response_page[response_page.find('regions:{') + 8 :]
response_page = response_page[: response_page.find('}') + 1]
- regions_json = loads(response_page)
+ regions_json = json.loads(response_page)
supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
return list(supported_languages)
+
+
+def fetch_traits(engine_traits: EngineTraits):
+ """Fetch regions from DuckDuckGo."""
+ # pylint: disable=import-outside-toplevel
+
+ engine_traits.data_type = 'supported_languages' # deprecated
+
+ import babel
+ from searx.locales import region_tag
+
+ engine_traits.all_locale = 'wt-wt'
+
+ resp = network.get('https://duckduckgo.com/util/u588.js')
+ if not resp.ok:
+ print("ERROR: response from DuckDuckGo is not OK.")
+
+ pos = resp.text.find('regions:{') + 8
+ js_code = resp.text[pos:]
+ pos = js_code.find('}') + 1
+ regions = json.loads(js_code[:pos])
+
+ reg_map = {
+ 'tw-tzh': 'zh_TW',
+ 'hk-tzh': 'zh_HK',
+ 'ct-ca': 'skip', # ct-ca and es-ca both map to ca_ES
+ 'es-ca': 'ca_ES',
+ 'id-en': 'id_ID',
+ 'no-no': 'nb_NO',
+ 'jp-jp': 'ja_JP',
+ 'kr-kr': 'ko_KR',
+ 'xa-ar': 'ar_SA',
+ 'sl-sl': 'sl_SI',
+ 'th-en': 'th_TH',
+ 'vn-en': 'vi_VN',
+ }
+
+ for eng_tag, name in regions.items():
+
+ if eng_tag == 'wt-wt':
+ engine_traits.all_locale = 'wt-wt'
+ continue
+
+ region = reg_map.get(eng_tag)
+ if region == 'skip':
+ continue
+
+ if not region:
+ eng_territory, eng_lang = eng_tag.split('-')
+ region = eng_lang + '_' + eng_territory.upper()
+
+ try:
+ sxng_tag = region_tag(babel.Locale.parse(region))
+ except babel.UnknownLocaleError:
+ print("ERROR: %s (%s) -> %s is unknown by babel" % (name, eng_tag, region))
+ continue
+
+ conflict = engine_traits.regions.get(sxng_tag)
+ if conflict:
+ if conflict != eng_tag:
+ print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+ continue
+ engine_traits.regions[sxng_tag] = eng_tag
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index 7ed0de35c..8b42799be 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -11,6 +11,7 @@ from lxml import html
from searx.data import WIKIDATA_UNITS
from searx.engines.duckduckgo import language_aliases
from searx.engines.duckduckgo import ( # pylint: disable=unused-import
+ fetch_traits,
_fetch_supported_languages,
supported_languages_url,
)
diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py
index 19f649ef4..927bc6cff 100644
--- a/searx/engines/duckduckgo_images.py
+++ b/searx/engines/duckduckgo_images.py
@@ -8,6 +8,7 @@ from urllib.parse import urlencode
from searx.exceptions import SearxEngineAPIException
from searx.engines.duckduckgo import get_region_code
from searx.engines.duckduckgo import ( # pylint: disable=unused-import
+ fetch_traits,
_fetch_supported_languages,
supported_languages_url,
)