diff options
| author | Markus Heiser <markus.heiser@darmarIT.de> | 2020-03-04 11:00:30 +0000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-03-04 11:00:30 +0000 |
| commit | a5d3585a0c47b736b0845fcf6786f0f5b57d4215 (patch) | |
| tree | e61e7ad412e84fe28570edad8edc13e6551af4ad /searx/engines | |
| parent | 6a3ef5561ba48e287f0b9c03a0b6d2f13b703077 (diff) | |
| parent | ad7a6e6e1022923850343b2c19e47bbd9fbad050 (diff) | |
Merge pull request #1866 from return42/fix-news
bugfix: google-news and bing-news has changed the language parameter
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/bing.py | 23 | ||||
| -rw-r--r-- | searx/engines/bing_images.py | 21 | ||||
| -rw-r--r-- | searx/engines/bing_news.py | 4 | ||||
| -rw-r--r-- | searx/engines/bing_videos.py | 6 | ||||
| -rw-r--r-- | searx/engines/google_news.py | 2 |
5 files changed, 25 insertions, 31 deletions
diff --git a/searx/engines/bing.py b/searx/engines/bing.py index b193f7c60..afb776acd 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -110,13 +110,18 @@ def response(resp): # get supported languages from their site def _fetch_supported_languages(resp): - supported_languages = [] + lang_tags = set() + + setmkt = re.compile('setmkt=([^&]*)') dom = html.fromstring(resp.text) - options = eval_xpath(dom, '//div[@id="limit-languages"]//input') - for option in options: - code = eval_xpath(option, './@id')[0].replace('_', '-') - if code == 'nb': - code = 'no' - supported_languages.append(code) - - return supported_languages + lang_links = eval_xpath(dom, "//li/a[contains(@href, 'setmkt')]") + + for a in lang_links: + href = eval_xpath(a, './@href')[0] + match = setmkt.search(href) + l_tag = match.groups()[0] + _lang, _nation = l_tag.split('-', 1) + l_tag = _lang.lower() + '-' + _nation.upper() + lang_tags.add(l_tag) + + return list(lang_tags) diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 44e2c3bbc..138ed11c6 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -18,6 +18,8 @@ import re from searx.url_utils import urlencode from searx.utils import match_language +from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases + # engine dependent config categories = ['images'] paging = True @@ -103,22 +105,3 @@ def response(resp): continue return results - - -# get supported languages from their site -def _fetch_supported_languages(resp): - supported_languages = [] - dom = html.fromstring(resp.text) - - regions_xpath = '//div[@id="region-section-content"]' \ - + '//ul[@class="b_vList"]/li/a/@href' - - regions = dom.xpath(regions_xpath) - for region in regions: - code = re.search('setmkt=[^\&]+', region).group()[7:] - if code == 'nb-NO': - code = 'no-NO' - - supported_languages.append(code) - - return supported_languages diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 669130c42..d13be777c 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -15,9 +15,10 @@ from datetime import datetime from dateutil import parser from lxml import etree from searx.utils import list_get, match_language -from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases from searx.url_utils import urlencode, urlparse, parse_qsl +from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases + # engine dependent config categories = ['news'] paging = True @@ -58,6 +59,7 @@ def _get_url(query, language, offset, time_range): offset=offset, interval=time_range_dict[time_range]) else: + # e.g. setmkt=de-de&setlang=de search_path = search_string.format( query=urlencode({'q': query, 'setmkt': language}), offset=offset) diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py index f1e636819..f048f0d8e 100644 --- a/searx/engines/bing_videos.py +++ b/searx/engines/bing_videos.py @@ -12,10 +12,10 @@ from json import loads from lxml import html -from searx.engines.bing_images import _fetch_supported_languages, supported_languages_url from searx.url_utils import urlencode from searx.utils import match_language +from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases categories = ['videos'] paging = True @@ -67,6 +67,10 @@ def request(query, params): if params['time_range'] in time_range_dict: params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']]) + # bing videos did not like "older" versions < 70.0.1 when selectin other + # languages then 'en' .. very strange ?!?! + params['headers']['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:73.0.1) Gecko/20100101 Firefox/73.0.1' + return params diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index 9c837b45b..c9cc75435 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -54,7 +54,7 @@ def request(query, params): if params['language'] != 'all': language = match_language(params['language'], supported_languages, language_aliases).split('-')[0] if language: - params['url'] += '&lr=lang_' + language + params['url'] += '&hl=' + language return params |