3 files changed, 50 insertions, 42 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index fa9749e9d..a3dd7a95a 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -193,7 +193,7 @@ def set_language_attributes(engine):
     if hasattr(engine, '_fetch_supported_languages'):
         headers = {
             'User-Agent': gen_useragent(),
-            'Accept-Language': 'ja-JP,ja;q=0.8,en-US;q=0.5,en;q=0.3',  # bing needs a non-English language
+            'Accept-Language': "en-US,en;q=0.5",  # bing needs to set the English language
         }
         engine.fetch_supported_languages = (
             # pylint: disable=protected-access
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index 59fc22be4..1170227ad 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -6,7 +6,7 @@
 """
 
 import re
-from urllib.parse import urlencode
+from urllib.parse import urlencode, urlparse, parse_qs
 from lxml import html
 from searx.utils import eval_xpath, extract_text, match_language
 
@@ -25,7 +25,7 @@ paging = True
 time_range_support = False
 safesearch = False
 supported_languages_url = 'https://www.bing.com/account/general'
-language_aliases = {'zh-CN': 'zh-CHS', 'zh-TW': 'zh-CHT', 'zh-HK': 'zh-CHT'}
+language_aliases = {}
 
 # search-url
 base_url = 'https://www.bing.com/'
@@ -127,18 +127,27 @@ def response(resp):
 
 # get supported languages from their site
 def _fetch_supported_languages(resp):
+
     lang_tags = set()
 
-    setmkt = re.compile('setmkt=([^&]*)')
     dom = html.fromstring(resp.text)
-    lang_links = eval_xpath(dom, "//li/a[contains(@href, 'setmkt')]")
-
-    for a in lang_links:
-        href = eval_xpath(a, './@href')[0]
-        match = setmkt.search(href)
-        l_tag = match.groups()[0]
-        _lang, _nation = l_tag.split('-', 1)
-        l_tag = _lang.lower() + '-' + _nation.upper()
-        lang_tags.add(l_tag)
+    lang_links = eval_xpath(dom, '//div[@id="language-section"]//li')
+
+    for _li in lang_links:
+
+        href = eval_xpath(_li, './/@href')[0]
+        (_scheme, _netloc, _path, _params, query, _fragment) = urlparse(href)
+        query = parse_qs(query, keep_blank_values=True)
+
+        # fmt: off
+        setlang = query.get('setlang', [None, ])[0]
+        # example: 'mn-Cyrl-MN' --> '['mn', 'Cyrl-MN']
+        lang, nation = (setlang.split('-', maxsplit=1) + [None,])[:2]  # fmt: skip
+        # fmt: on
+
+        if not nation:
+            nation = lang.upper()
+        tag = lang + '-' + nation
+        lang_tags.add(tag)
 
     return list(lang_tags)
diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py
index 73b61b896..f07d07144 100644
--- a/searx/engines/bing_images.py
+++ b/searx/engines/bing_images.py
@@ -1,11 +1,13 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""
- Bing (Images)
+# lint: pylint
+"""Bing (Images)
+
 """
 
+from json import loads
 from urllib.parse import urlencode
+
 from lxml import html
-from json import loads
 
 from searx.utils import match_language
 from searx.engines.bing import language_aliases
@@ -77,31 +79,28 @@ def response(resp):
 
     # parse results
     for result in dom.xpath('//div[@class="imgpt"]'):
-        try:
-            img_format = result.xpath('./div[contains(@class, "img_info")]/span/text()')[0]
-            # Microsoft seems to experiment with this code so don't make the path too specific,
-            # just catch the text section for the first anchor in img_info assuming this to be
-            # the originating site.
-            source = result.xpath('./div[contains(@class, "img_info")]//a/text()')[0]
-
-            m = loads(result.xpath('./a/@m')[0])
-
-            # strip 'Unicode private use area' highlighting, they render to Tux
-            # the Linux penguin and a standing diamond on my machine...
-            title = m.get('t', '').replace('\ue000', '').replace('\ue001', '')
-            results.append(
-                {
-                    'template': 'images.html',
-                    'url': m['purl'],
-                    'thumbnail_src': m['turl'],
-                    'img_src': m['murl'],
-                    'content': '',
-                    'title': title,
-                    'source': source,
-                    'img_format': img_format,
-                }
-            )
-        except:
-            continue
+        img_format = result.xpath('./div[contains(@class, "img_info")]/span/text()')[0]
+        # Microsoft seems to experiment with this code so don't make the path too specific,
+        # just catch the text section for the first anchor in img_info assuming this to be
+        # the originating site.
+        source = result.xpath('./div[contains(@class, "img_info")]//a/text()')[0]
+
+        m = loads(result.xpath('./a/@m')[0])
+
+        # strip 'Unicode private use area' highlighting, they render to Tux
+        # the Linux penguin and a standing diamond on my machine...
+        title = m.get('t', '').replace('\ue000', '').replace('\ue001', '')
+        results.append(
+            {
+                'template': 'images.html',
+                'url': m['purl'],
+                'thumbnail_src': m['turl'],
+                'img_src': m['murl'],
+                'content': '',
+                'title': title,
+                'source': source,
+                'img_format': img_format,
+            }
+        )
 
     return results