summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2020-03-01 08:01:36 +0100
committerMarkus Heiser <markus.heiser@darmarit.de>2020-03-01 08:01:36 +0100
commite0c99d9dcbe4c2eee0a7c6f4a7326a8376467640 (patch)
treefb5397b6e1c12469533b0cf40d0ab3e657b7e7a8 /searx
parent40843fe95ada52bc8791b4706392bda5afecf32e (diff)
bugfix: fetch_supported_languages bing, -news, -videos, -images
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/bing.py23
-rw-r--r--searx/engines/bing_images.py21
-rw-r--r--searx/engines/bing_news.py3
-rw-r--r--searx/engines/bing_videos.py2
4 files changed, 19 insertions, 30 deletions
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index b193f7c60..c8fc4fa2e 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -110,13 +110,18 @@ def response(resp):
# get supported languages from their site
def _fetch_supported_languages(resp):
- supported_languages = []
+ lang_tags = set()
+
+ setmkt = re.compile('setmkt=([^&]*)')
dom = html.fromstring(resp.text)
- options = eval_xpath(dom, '//div[@id="limit-languages"]//input')
- for option in options:
- code = eval_xpath(option, './@id')[0].replace('_', '-')
- if code == 'nb':
- code = 'no'
- supported_languages.append(code)
-
- return supported_languages
+ lang_links = eval_xpath(dom, "//li/a[contains(@href, 'setmkt')]")
+
+ for a in lang_links:
+ href = eval_xpath(a, './@href')[0]
+ match = setmkt.search(href)
+ l_tag = match.groups()[0]
+ _lang, _nation = l_tag.split('-',1)
+ l_tag = _lang.lower() + '-' + _nation.upper()
+ lang_tags.add(l_tag)
+
+ return list(lang_tags)
diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py
index 44e2c3bbc..138ed11c6 100644
--- a/searx/engines/bing_images.py
+++ b/searx/engines/bing_images.py
@@ -18,6 +18,8 @@ import re
from searx.url_utils import urlencode
from searx.utils import match_language
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
+
# engine dependent config
categories = ['images']
paging = True
@@ -103,22 +105,3 @@ def response(resp):
continue
return results
-
-
-# get supported languages from their site
-def _fetch_supported_languages(resp):
- supported_languages = []
- dom = html.fromstring(resp.text)
-
- regions_xpath = '//div[@id="region-section-content"]' \
- + '//ul[@class="b_vList"]/li/a/@href'
-
- regions = dom.xpath(regions_xpath)
- for region in regions:
- code = re.search('setmkt=[^\&]+', region).group()[7:]
- if code == 'nb-NO':
- code = 'no-NO'
-
- supported_languages.append(code)
-
- return supported_languages
diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py
index 827555bd7..14fd7b99a 100644
--- a/searx/engines/bing_news.py
+++ b/searx/engines/bing_news.py
@@ -15,9 +15,10 @@ from datetime import datetime
from dateutil import parser
from lxml import etree
from searx.utils import list_get, match_language
-from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
from searx.url_utils import urlencode, urlparse, parse_qsl
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
+
# engine dependent config
categories = ['news']
paging = True
diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py
index f1e636819..6e7b6d3aa 100644
--- a/searx/engines/bing_videos.py
+++ b/searx/engines/bing_videos.py
@@ -12,10 +12,10 @@
from json import loads
from lxml import html
-from searx.engines.bing_images import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode
from searx.utils import match_language
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
categories = ['videos']
paging = True