diff options
| author | Adam Tauber <asciimoo@gmail.com> | 2017-10-13 21:36:21 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2017-10-13 21:36:21 +0200 |
| commit | c8a66a090a7bde3c8003b9dc6cb1d13359b5d667 (patch) | |
| tree | 314c503175756afe642e17302dcbd8b7be18de7b /searx/engines | |
| parent | 1adc8d6e2604be1a159c936b0fd77efdd09c555e (diff) | |
| parent | db27c6fa5f2845c0ff533d324a51400f4a625cea (diff) | |
Merge pull request #1061 from a01200356/bing
[fix] Language support for Bing Images and Videos
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/bing_images.py | 56 | ||||
| -rw-r--r-- | searx/engines/bing_videos.py | 5 | ||||
| -rw-r--r-- | searx/engines/duckduckgo.py | 2 | ||||
| -rw-r--r-- | searx/engines/swisscows.py | 2 |
4 files changed, 53 insertions, 12 deletions
diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 6300c94e4..15679056c 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -18,7 +18,6 @@ from lxml import html from json import loads import re -from searx.engines.bing import _fetch_supported_languages, supported_languages_url from searx.url_utils import urlencode # engine dependent config @@ -26,6 +25,8 @@ categories = ['images'] paging = True safesearch = True time_range_support = True +language_support = True +supported_languages_url = 'https://www.bing.com/account/general' # search-url base_url = 'https://www.bing.com/' @@ -45,23 +46,41 @@ safesearch_types = {2: 'STRICT', _quote_keys_regex = re.compile('({|,)([a-z][a-z0-9]*):(")', re.I | re.U) +# get supported region code +def get_region_code(lang, lang_list=None): + region = None + if lang in (lang_list or supported_languages): + region = lang + elif lang.startswith('no'): + region = 'nb-NO' + else: + # try to get a supported country code with language + lang = lang.split('-')[0] + for lc in (lang_list or supported_languages): + if lang == lc.split('-')[0]: + region = lc + break + if region: + return region.lower() + else: + return 'en-us' + + # do search-request def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 - # required for cookie - if params['language'] == 'all': - language = 'en-US' - else: - language = params['language'] - search_path = search_string.format( query=urlencode({'q': query}), offset=offset) + language = get_region_code(params['language']) + params['cookies']['SRCHHPGUSR'] = \ - 'NEWWND=0&NRSLT=-1&SRCHLANG=' + language.split('-')[0] +\ - '&ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') + 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') + + params['cookies']['_EDGE_S'] = 'mkt=' + language +\ + '&ui=' + language + '&F=1' params['url'] = base_url + search_path if params['time_range'] in time_range_dict: @@ -106,3 +125,22 @@ def response(resp): # return results return results + + +# get supported languages from their site +def _fetch_supported_languages(resp): + supported_languages = [] + dom = html.fromstring(resp.text) + + regions_xpath = '//div[@id="region-section-content"]' \ + + '//ul[@class="b_vList"]/li/a/@href' + + regions = dom.xpath(regions_xpath) + for region in regions: + code = re.search('setmkt=[^\&]+', region).group()[7:] + if code == 'nb-NO': + code = 'no-NO' + + supported_languages.append(code) + + return supported_languages diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py index 918064c9b..bd91bce37 100644 --- a/searx/engines/bing_videos.py +++ b/searx/engines/bing_videos.py @@ -12,6 +12,7 @@ from json import loads from lxml import html +from searx.engines.bing_images import _fetch_supported_languages, supported_languages_url, get_region_code from searx.engines.xpath import extract_text from searx.url_utils import urlencode @@ -21,6 +22,7 @@ paging = True safesearch = True time_range_support = True number_of_results = 10 +language_support = True search_url = 'https://www.bing.com/videos/asyncv2?{query}&async=content&'\ 'first={offset}&count={number_of_results}&CW=1366&CH=25&FORM=R5VR5' @@ -45,7 +47,8 @@ def request(query, params): 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') # language cookie - params['cookies']['_EDGE_S'] = 'mkt=' + params['language'].lower() + '&F=1' + region = get_region_code(params['language'], lang_list=supported_languages) + params['cookies']['_EDGE_S'] = 'mkt=' + region + '&F=1' # query and paging params['url'] = search_url.format(query=urlencode({'q': query}), diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 407d731f0..921e29f8b 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -134,4 +134,4 @@ def _fetch_supported_languages(resp): regions_json = loads(response_page) supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys()) - return supported_languages + return list(supported_languages) diff --git a/searx/engines/swisscows.py b/searx/engines/swisscows.py index e9c13ca24..00346a7d0 100644 --- a/searx/engines/swisscows.py +++ b/searx/engines/swisscows.py @@ -118,7 +118,7 @@ def _fetch_supported_languages(resp): dom = fromstring(resp.text) options = dom.xpath('//div[@id="regions-popup"]//ul/li/a') for option in options: - code = option.xpath('./@data-val')[0] + code = option.xpath('./@data-search-language')[0] if code.startswith('nb-'): code = code.replace('nb', 'no', 1) supported_languages.append(code) |