diff options
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/__init__.py | 2 | ||||
| -rw-r--r-- | searx/engines/bing.py | 33 | ||||
| -rw-r--r-- | searx/engines/bing_videos.py | 38 |
3 files changed, 40 insertions, 33 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index fa9749e9d..a3dd7a95a 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -193,7 +193,7 @@ def set_language_attributes(engine): if hasattr(engine, '_fetch_supported_languages'): headers = { 'User-Agent': gen_useragent(), - 'Accept-Language': 'ja-JP,ja;q=0.8,en-US;q=0.5,en;q=0.3', # bing needs a non-English language + 'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language } engine.fetch_supported_languages = ( # pylint: disable=protected-access diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 59fc22be4..1170227ad 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -6,7 +6,7 @@ """ import re -from urllib.parse import urlencode +from urllib.parse import urlencode, urlparse, parse_qs from lxml import html from searx.utils import eval_xpath, extract_text, match_language @@ -25,7 +25,7 @@ paging = True time_range_support = False safesearch = False supported_languages_url = 'https://www.bing.com/account/general' -language_aliases = {'zh-CN': 'zh-CHS', 'zh-TW': 'zh-CHT', 'zh-HK': 'zh-CHT'} +language_aliases = {} # search-url base_url = 'https://www.bing.com/' @@ -127,18 +127,27 @@ def response(resp): # get supported languages from their site def _fetch_supported_languages(resp): + lang_tags = set() - setmkt = re.compile('setmkt=([^&]*)') dom = html.fromstring(resp.text) - lang_links = eval_xpath(dom, "//li/a[contains(@href, 'setmkt')]") - - for a in lang_links: - href = eval_xpath(a, './@href')[0] - match = setmkt.search(href) - l_tag = match.groups()[0] - _lang, _nation = l_tag.split('-', 1) - l_tag = _lang.lower() + '-' + _nation.upper() - lang_tags.add(l_tag) + lang_links = eval_xpath(dom, '//div[@id="language-section"]//li') + + for _li in lang_links: + + href = eval_xpath(_li, './/@href')[0] + (_scheme, _netloc, _path, _params, query, _fragment) = urlparse(href) + query = parse_qs(query, keep_blank_values=True) + + # fmt: off + setlang = query.get('setlang', [None, ])[0] + # example: 'mn-Cyrl-MN' --> '['mn', 'Cyrl-MN'] + lang, nation = (setlang.split('-', maxsplit=1) + [None,])[:2] # fmt: skip + # fmt: on + + if not nation: + nation = lang.upper() + tag = lang + '-' + nation + lang_tags.add(tag) return list(lang_tags) diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py index 7f8820546..184f564df 100644 --- a/searx/engines/bing_videos.py +++ b/searx/engines/bing_videos.py @@ -1,12 +1,14 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -""" - Bing (Videos) +# lint: pylint +"""Bing (Videos) + """ from json import loads -from lxml import html from urllib.parse import urlencode +from lxml import html + from searx.utils import match_language from searx.engines.bing import language_aliases @@ -82,22 +84,18 @@ def response(resp): dom = html.fromstring(resp.text) for result in dom.xpath('//div[@class="dg_u"]'): - try: - metadata = loads(result.xpath('.//div[@class="vrhdata"]/@vrhm')[0]) - info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip() - content = '{0} - {1}'.format(metadata['du'], info) - thumbnail = '{0}th?id={1}'.format(base_url, metadata['thid']) - results.append( - { - 'url': metadata['murl'], - 'thumbnail': thumbnail, - 'title': metadata.get('vt', ''), - 'content': content, - 'template': 'videos.html', - } - ) - - except: - continue + metadata = loads(result.xpath('.//div[@class="vrhdata"]/@vrhm')[0]) + info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip() + content = '{0} - {1}'.format(metadata['du'], info) + thumbnail = '{0}th?id={1}'.format(base_url, metadata['thid']) + results.append( + { + 'url': metadata['murl'], + 'thumbnail': thumbnail, + 'title': metadata.get('vt', ''), + 'content': content, + 'template': 'videos.html', + } + ) return results |