summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/__init__.py2
-rw-r--r--searx/engines/bing.py33
-rw-r--r--searx/engines/bing_videos.py38
3 files changed, 40 insertions, 33 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index fa9749e9d..a3dd7a95a 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -193,7 +193,7 @@ def set_language_attributes(engine):
if hasattr(engine, '_fetch_supported_languages'):
headers = {
'User-Agent': gen_useragent(),
- 'Accept-Language': 'ja-JP,ja;q=0.8,en-US;q=0.5,en;q=0.3', # bing needs a non-English language
+ 'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language
}
engine.fetch_supported_languages = (
# pylint: disable=protected-access
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index 59fc22be4..1170227ad 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -6,7 +6,7 @@
"""
import re
-from urllib.parse import urlencode
+from urllib.parse import urlencode, urlparse, parse_qs
from lxml import html
from searx.utils import eval_xpath, extract_text, match_language
@@ -25,7 +25,7 @@ paging = True
time_range_support = False
safesearch = False
supported_languages_url = 'https://www.bing.com/account/general'
-language_aliases = {'zh-CN': 'zh-CHS', 'zh-TW': 'zh-CHT', 'zh-HK': 'zh-CHT'}
+language_aliases = {}
# search-url
base_url = 'https://www.bing.com/'
@@ -127,18 +127,27 @@ def response(resp):
# get supported languages from their site
def _fetch_supported_languages(resp):
+
lang_tags = set()
- setmkt = re.compile('setmkt=([^&]*)')
dom = html.fromstring(resp.text)
- lang_links = eval_xpath(dom, "//li/a[contains(@href, 'setmkt')]")
-
- for a in lang_links:
- href = eval_xpath(a, './@href')[0]
- match = setmkt.search(href)
- l_tag = match.groups()[0]
- _lang, _nation = l_tag.split('-', 1)
- l_tag = _lang.lower() + '-' + _nation.upper()
- lang_tags.add(l_tag)
+ lang_links = eval_xpath(dom, '//div[@id="language-section"]//li')
+
+ for _li in lang_links:
+
+ href = eval_xpath(_li, './/@href')[0]
+ (_scheme, _netloc, _path, _params, query, _fragment) = urlparse(href)
+ query = parse_qs(query, keep_blank_values=True)
+
+ # fmt: off
+ setlang = query.get('setlang', [None, ])[0]
+ # example: 'mn-Cyrl-MN' --> '['mn', 'Cyrl-MN']
+ lang, nation = (setlang.split('-', maxsplit=1) + [None,])[:2] # fmt: skip
+ # fmt: on
+
+ if not nation:
+ nation = lang.upper()
+ tag = lang + '-' + nation
+ lang_tags.add(tag)
return list(lang_tags)
diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py
index 7f8820546..184f564df 100644
--- a/searx/engines/bing_videos.py
+++ b/searx/engines/bing_videos.py
@@ -1,12 +1,14 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
-"""
- Bing (Videos)
+# lint: pylint
+"""Bing (Videos)
+
"""
from json import loads
-from lxml import html
from urllib.parse import urlencode
+from lxml import html
+
from searx.utils import match_language
from searx.engines.bing import language_aliases
@@ -82,22 +84,18 @@ def response(resp):
dom = html.fromstring(resp.text)
for result in dom.xpath('//div[@class="dg_u"]'):
- try:
- metadata = loads(result.xpath('.//div[@class="vrhdata"]/@vrhm')[0])
- info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip()
- content = '{0} - {1}'.format(metadata['du'], info)
- thumbnail = '{0}th?id={1}'.format(base_url, metadata['thid'])
- results.append(
- {
- 'url': metadata['murl'],
- 'thumbnail': thumbnail,
- 'title': metadata.get('vt', ''),
- 'content': content,
- 'template': 'videos.html',
- }
- )
-
- except:
- continue
+ metadata = loads(result.xpath('.//div[@class="vrhdata"]/@vrhm')[0])
+ info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip()
+ content = '{0} - {1}'.format(metadata['du'], info)
+ thumbnail = '{0}th?id={1}'.format(base_url, metadata['thid'])
+ results.append(
+ {
+ 'url': metadata['murl'],
+ 'thumbnail': thumbnail,
+ 'title': metadata.get('vt', ''),
+ 'content': content,
+ 'template': 'videos.html',
+ }
+ )
return results