diff options
Diffstat (limited to 'searx/engines/bing_images.py')
| -rw-r--r-- | searx/engines/bing_images.py | 56 |
1 files changed, 47 insertions, 9 deletions
diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 6300c94e4..15679056c 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -18,7 +18,6 @@ from lxml import html from json import loads import re -from searx.engines.bing import _fetch_supported_languages, supported_languages_url from searx.url_utils import urlencode # engine dependent config @@ -26,6 +25,8 @@ categories = ['images'] paging = True safesearch = True time_range_support = True +language_support = True +supported_languages_url = 'https://www.bing.com/account/general' # search-url base_url = 'https://www.bing.com/' @@ -45,23 +46,41 @@ safesearch_types = {2: 'STRICT', _quote_keys_regex = re.compile('({|,)([a-z][a-z0-9]*):(")', re.I | re.U) +# get supported region code +def get_region_code(lang, lang_list=None): + region = None + if lang in (lang_list or supported_languages): + region = lang + elif lang.startswith('no'): + region = 'nb-NO' + else: + # try to get a supported country code with language + lang = lang.split('-')[0] + for lc in (lang_list or supported_languages): + if lang == lc.split('-')[0]: + region = lc + break + if region: + return region.lower() + else: + return 'en-us' + + # do search-request def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 - # required for cookie - if params['language'] == 'all': - language = 'en-US' - else: - language = params['language'] - search_path = search_string.format( query=urlencode({'q': query}), offset=offset) + language = get_region_code(params['language']) + params['cookies']['SRCHHPGUSR'] = \ - 'NEWWND=0&NRSLT=-1&SRCHLANG=' + language.split('-')[0] +\ - '&ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') + 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') + + params['cookies']['_EDGE_S'] = 'mkt=' + language +\ + '&ui=' + language + '&F=1' params['url'] = base_url + search_path if params['time_range'] in time_range_dict: @@ -106,3 +125,22 @@ def response(resp): # return results return results + + +# get supported languages from their site +def _fetch_supported_languages(resp): + supported_languages = [] + dom = html.fromstring(resp.text) + + regions_xpath = '//div[@id="region-section-content"]' \ + + '//ul[@class="b_vList"]/li/a/@href' + + regions = dom.xpath(regions_xpath) + for region in regions: + code = re.search('setmkt=[^\&]+', region).group()[7:] + if code == 'nb-NO': + code = 'no-NO' + + supported_languages.append(code) + + return supported_languages |