summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authorAdam Tauber <asciimoo@gmail.com>2016-12-28 20:09:57 +0100
committerGitHub <noreply@github.com>2016-12-28 20:09:57 +0100
commit9743bde25ef2ce6b765b8192aafcdc0a15739b17 (patch)
tree00fd6b0b14773c0e20425d4a6478d67f244d64ed /searx/engines
parentea034fafa994227ea89662710901e73cb901e28c (diff)
parent8bff42f049dcac77559beaf2932a47921feb1d49 (diff)
Merge pull request #748 from a01200356/languages
[mod] Allow users to search in most engine supported languages
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/__init__.py14
-rw-r--r--searx/engines/archlinux.py5
-rw-r--r--searx/engines/bing.py15
-rw-r--r--searx/engines/bing_images.py3
-rw-r--r--searx/engines/bing_news.py3
-rw-r--r--searx/engines/dailymotion.py22
-rw-r--r--searx/engines/duckduckgo.py38
-rw-r--r--searx/engines/duckduckgo_definitions.py3
-rw-r--r--searx/engines/gigablast.py20
-rw-r--r--searx/engines/google.py20
-rw-r--r--searx/engines/google_news.py4
-rw-r--r--searx/engines/mediawiki.py2
-rw-r--r--searx/engines/photon.py4
-rw-r--r--searx/engines/qwant.py2
-rw-r--r--searx/engines/startpage.py2
-rw-r--r--searx/engines/subtitleseeker.py14
-rw-r--r--searx/engines/swisscows.py21
-rw-r--r--searx/engines/twitter.py2
-rw-r--r--searx/engines/wikidata.py6
-rw-r--r--searx/engines/wikipedia.py29
-rw-r--r--searx/engines/yacy.py2
-rw-r--r--searx/engines/yahoo.py22
-rw-r--r--searx/engines/yahoo_news.py2
-rw-r--r--searx/engines/yandex.py6
-rw-r--r--searx/engines/youtube_api.py2
25 files changed, 227 insertions, 36 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 3a1db2760..5275351f1 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -20,6 +20,8 @@ from os.path import realpath, dirname
import sys
from flask_babel import gettext
from operator import itemgetter
+from json import loads
+from requests import get
from searx import settings
from searx import logger
from searx.utils import load_module
@@ -33,10 +35,13 @@ engines = {}
categories = {'general': []}
+languages = loads(open(engine_dir + '/../data/engines_languages.json').read())
+
engine_shortcuts = {}
engine_default_args = {'paging': False,
'categories': ['general'],
'language_support': True,
+ 'supported_languages': [],
'safesearch': False,
'timeout': settings['outgoing']['request_timeout'],
'shortcut': '-',
@@ -85,6 +90,15 @@ def load_engine(engine_data):
.format(engine.name, engine_attr))
sys.exit(1)
+ # assign supported languages from json file
+ if engine_data['name'] in languages:
+ setattr(engine, 'supported_languages', languages[engine_data['name']])
+
+ # assign language fetching method if auxiliary method exists
+ if hasattr(engine, '_fetch_supported_languages'):
+ setattr(engine, 'fetch_supported_languages',
+ lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))
+
engine.stats = {
'result_count': 0,
'search_count': 0,
diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py
index 5ba512766..dca825790 100644
--- a/searx/engines/archlinux.py
+++ b/searx/engines/archlinux.py
@@ -29,8 +29,8 @@ xpath_link = './/div[@class="mw-search-result-heading"]/a'
# cut 'en' from 'en_US', 'de' from 'de_CH', and so on
def locale_to_lang_code(locale):
- if locale.find('_') >= 0:
- locale = locale.split('_')[0]
+ if locale.find('-') >= 0:
+ locale = locale.split('-')[0]
return locale
@@ -95,6 +95,7 @@ main_langs = {
'uk': 'Українська',
'zh': '简体中文'
}
+supported_languages = dict(lang_urls, **main_langs)
# do search-request
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index 58db61251..b2ad7b6cf 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -21,6 +21,7 @@ from searx.engines.xpath import extract_text
categories = ['general']
paging = True
language_support = True
+supported_languages_url = 'https://www.bing.com/account/general'
# search-url
base_url = 'https://www.bing.com/'
@@ -32,7 +33,7 @@ def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1
if params['language'] != 'all':
- query = u'language:{} {}'.format(params['language'].split('_')[0].upper(),
+ query = u'language:{} {}'.format(params['language'].split('-')[0].upper(),
query.decode('utf-8')).encode('utf-8')
search_path = search_string.format(
@@ -81,3 +82,15 @@ def response(resp):
# return results
return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+ supported_languages = []
+ dom = html.fromstring(resp.text)
+ options = dom.xpath('//div[@id="limit-languages"]//input')
+ for option in options:
+ code = option.xpath('./@id')[0].replace('_', '-')
+ supported_languages.append(code)
+
+ return supported_languages
diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py
index 4dd362cb3..97f6dca37 100644
--- a/searx/engines/bing_images.py
+++ b/searx/engines/bing_images.py
@@ -19,6 +19,7 @@ from urllib import urlencode
from lxml import html
from json import loads
import re
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url
# engine dependent config
categories = ['images']
@@ -53,7 +54,7 @@ def request(query, params):
if params['language'] == 'all':
language = 'en-US'
else:
- language = params['language'].replace('_', '-')
+ language = params['language']
search_path = search_string.format(
query=urlencode({'q': query}),
diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py
index 4e7c33129..765bcd38e 100644
--- a/searx/engines/bing_news.py
+++ b/searx/engines/bing_news.py
@@ -17,6 +17,7 @@ from datetime import datetime
from dateutil import parser
from lxml import etree
from searx.utils import list_get
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url
# engine dependent config
categories = ['news']
@@ -74,7 +75,7 @@ def request(query, params):
if params['language'] == 'all':
language = 'en-US'
else:
- language = params['language'].replace('_', '-')
+ language = params['language']
params['url'] = _get_url(query, language, offset, params['time_range'])
diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py
index 317f34f59..8c69aafe0 100644
--- a/searx/engines/dailymotion.py
+++ b/searx/engines/dailymotion.py
@@ -15,6 +15,7 @@
from urllib import urlencode
from json import loads
from datetime import datetime
+from requests import get
# engine dependent config
categories = ['videos']
@@ -27,6 +28,8 @@ search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,descr
embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\
'data-src="//www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>'
+supported_languages_url = 'https://api.dailymotion.com/languages'
+
# do search-request
def request(query, params):
@@ -74,3 +77,22 @@ def response(resp):
# return results
return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+ supported_languages = {}
+
+ response_json = loads(resp.text)
+
+ for language in response_json['list']:
+ supported_languages[language['code']] = {}
+
+ name = language['native_name']
+ if name:
+ supported_languages[language['code']]['name'] = name
+ english_name = language['name']
+ if english_name:
+ supported_languages[language['code']]['english_name'] = english_name
+
+ return supported_languages
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 9959a52e6..df230222d 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -15,13 +15,15 @@
from urllib import urlencode
from lxml.html import fromstring
+from requests import get
+from json import loads
from searx.engines.xpath import extract_text
-from searx.languages import language_codes
# engine dependent config
categories = ['general']
paging = True
language_support = True
+supported_languages_url = 'https://duckduckgo.com/d2030.js'
time_range_support = True
# search-url
@@ -46,19 +48,31 @@ def request(query, params):
offset = (params['pageno'] - 1) * 30
+ # custom fixes for languages
if params['language'] == 'all':
locale = None
+ elif params['language'][:2] == 'ja':
+ locale = 'jp-jp'
+ elif params['language'][:2] == 'sl':
+ locale = 'sl-sl'
+ elif params['language'] == 'zh-TW':
+ locale = 'tw-tzh'
+ elif params['language'] == 'zh-HK':
+ locale = 'hk-tzh'
+ elif params['language'][-2:] == 'SA':
+ locale = 'xa-' + params['language'].split('-')[0]
+ elif params['language'][-2:] == 'GB':
+ locale = 'uk-' + params['language'].split('-')[0]
else:
- locale = params['language'].split('_')
+ locale = params['language'].split('-')
if len(locale) == 2:
# country code goes first
locale = locale[1].lower() + '-' + locale[0].lower()
else:
# tries to get a country code from language
locale = locale[0].lower()
- lang_codes = [x[0] for x in language_codes]
- for lc in lang_codes:
- lc = lc.split('_')
+ for lc in supported_languages:
+ lc = lc.split('-')
if locale == lc[0]:
locale = lc[1].lower() + '-' + lc[0].lower()
break
@@ -102,3 +116,17 @@ def response(resp):
# return results
return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+
+ # response is a js file with regions as an embedded object
+ response_page = resp.text
+ response_page = response_page[response_page.find('regions:{') + 8:]
+ response_page = response_page[:response_page.find('}') + 1]
+
+ regions_json = loads(response_page)
+ supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
+
+ return supported_languages
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index 208ccca28..dd3f12e1e 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -4,6 +4,7 @@ from re import compile, sub
from lxml import html
from searx.utils import html_to_text
from searx.engines.xpath import extract_text
+from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
url = 'https://api.duckduckgo.com/'\
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'
@@ -23,7 +24,7 @@ def result_to_text(url, text, htmlResult):
def request(query, params):
params['url'] = url.format(query=urlencode({'q': query}))
- params['headers']['Accept-Language'] = params['language']
+ params['headers']['Accept-Language'] = params['language'].split('-')[0]
return params
diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py
index 5430eb3ba..827b9cd03 100644
--- a/searx/engines/gigablast.py
+++ b/searx/engines/gigablast.py
@@ -14,6 +14,7 @@ from json import loads
from random import randint
from time import time
from urllib import urlencode
+from lxml.html import fromstring
# engine dependent config
categories = ['general']
@@ -40,6 +41,8 @@ url_xpath = './/url'
title_xpath = './/title'
content_xpath = './/sum'
+supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
+
# do search-request
def request(query, params):
@@ -48,7 +51,9 @@ def request(query, params):
if params['language'] == 'all':
language = 'xx'
else:
- language = params['language'][0:2]
+ language = params['language'].replace('-', '_').lower()
+ if language.split('-')[0] != 'zh':
+ language = language.split('-')[0]
if params['safesearch'] >= 1:
safesearch = 1
@@ -82,3 +87,16 @@ def response(resp):
# return results
return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+ supported_languages = []
+ dom = fromstring(resp.text)
+ links = dom.xpath('//span[@id="menu2"]/a')
+ for link in links:
+ code = link.xpath('./@href')[0][-2:]
+ if code != 'xx' and code not in supported_languages:
+ supported_languages.append(code)
+
+ return supported_languages
diff --git a/searx/engines/google.py b/searx/engines/google.py
index a02b6940e..803cd307e 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -103,6 +103,7 @@ map_hostname_start = 'maps.google.'
maps_path = '/maps'
redirect_path = '/url'
images_path = '/images'
+supported_languages_url = 'https://www.google.com/preferences?#languages'
# specific xpath variables
results_xpath = '//div[@class="g"]'
@@ -167,8 +168,12 @@ def request(query, params):
language = 'en'
country = 'US'
url_lang = ''
+ elif params['language'][:2] == 'jv':
+ language = 'jw'
+ country = 'ID'
+ url_lang = 'lang_jw'
else:
- language_array = params['language'].lower().split('_')
+ language_array = params['language'].lower().split('-')
if len(language_array) == 2:
country = language_array[1]
else:
@@ -355,3 +360,16 @@ def attributes_to_html(attributes):
retval = retval + '<tr><th>' + a.get('label') + '</th><td>' + value + '</td></tr>'
retval = retval + '</table>'
return retval
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+ supported_languages = {}
+ dom = html.fromstring(resp.text)
+ options = dom.xpath('//table//td/font/label/span')
+ for option in options:
+ code = option.xpath('./@id')[0][1:]
+ name = option.text.title()
+ supported_languages[code] = {"name": name}
+
+ return supported_languages
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
index 11357f3e6..49c6a5d50 100644
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@@ -12,6 +12,8 @@
from lxml import html
from urllib import urlencode
+from json import loads
+from searx.engines.google import _fetch_supported_languages, supported_languages_url
# search-url
categories = ['news']
@@ -50,7 +52,7 @@ def request(query, params):
search_options=urlencode(search_options))
if params['language'] != 'all':
- language_array = params['language'].lower().split('_')
+ language_array = params['language'].lower().split('-')
params['url'] += '&lr=lang_' + language_array[0]
return params
diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py
index 26d3720d9..93d98d3aa 100644
--- a/searx/engines/mediawiki.py
+++ b/searx/engines/mediawiki.py
@@ -46,7 +46,7 @@ def request(query, params):
if params['language'] == 'all':
language = 'en'
else:
- language = params['language'].split('_')[0]
+ language = params['language'].split('-')[0]
# format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)]
if any(x[1] == 'language' for x in format_strings):
diff --git a/searx/engines/photon.py b/searx/engines/photon.py
index 2197005e5..a029bbfef 100644
--- a/searx/engines/photon.py
+++ b/searx/engines/photon.py
@@ -26,7 +26,7 @@ search_string = 'api/?{query}&limit={limit}'
result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
# list of supported languages
-allowed_languages = ['de', 'en', 'fr', 'it']
+supported_languages = ['de', 'en', 'fr', 'it']
# do search-request
@@ -37,7 +37,7 @@ def request(query, params):
if params['language'] != 'all':
language = params['language'].split('_')[0]
- if language in allowed_languages:
+ if language in supported_languages:
params['url'] = params['url'] + "&lang=" + language
# using searx User-Agent
diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py
index d8b084292..67803fa94 100644
--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
@@ -46,7 +46,7 @@ def request(query, params):
# add language tag if specified
if params['language'] != 'all':
- params['url'] += '&locale=' + params['language'].lower()
+ params['url'] += '&locale=' + params['language'].replace('-', '_').lower()
return params
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
index 6f6eae1cf..54aafdee5 100644
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@@ -47,7 +47,7 @@ def request(query, params):
# set language if specified
if params['language'] != 'all':
- params['data']['with_language'] = ('lang_' + params['language'].split('_')[0])
+ params['data']['with_language'] = ('lang_' + params['language'].split('-')[0])
return params
diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py
index daba68be7..77b010c3f 100644
--- a/searx/engines/subtitleseeker.py
+++ b/searx/engines/subtitleseeker.py
@@ -22,7 +22,7 @@ language = ""
# search-url
url = 'http://www.subtitleseeker.com/'
-search_url = url + 'search/TITLES/{query}&p={pageno}'
+search_url = url + 'search/TITLES/{query}?p={pageno}'
# specific xpath variables
results_xpath = '//div[@class="boxRows"]'
@@ -43,10 +43,16 @@ def response(resp):
search_lang = ""
- if resp.search_params['language'] != 'all':
- search_lang = [lc[1]
+ # dirty fix for languages named differenly in their site
+ if resp.search_params['language'][:2] == 'fa':
+ search_lang = 'Farsi'
+ elif resp.search_params['language'] == 'pt-BR':
+ search_lang = 'Brazilian'
+ elif resp.search_params['language'] != 'all':
+ search_lang = [lc[3]
for lc in language_codes
- if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]
+ if lc[0].split('-')[0] == resp.search_params['language'].split('-')[0]]
+ search_lang = search_lang[0].split(' (')[0]
# parse results
for result in dom.xpath(results_xpath):
diff --git a/searx/engines/swisscows.py b/searx/engines/swisscows.py
index 72184e428..d8a454039 100644
--- a/searx/engines/swisscows.py
+++ b/searx/engines/swisscows.py
@@ -13,6 +13,7 @@
from json import loads
from urllib import urlencode, unquote
import re
+from lxml.html import fromstring
# engine dependent config
categories = ['general', 'images']
@@ -23,6 +24,8 @@ language_support = True
base_url = 'https://swisscows.ch/'
search_string = '?{query}&page={page}'
+supported_languages_url = base_url
+
# regex
regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
regex_json_remove_start = re.compile(r'^initialData:\s*')
@@ -35,9 +38,11 @@ def request(query, params):
if params['language'] == 'all':
ui_language = 'browser'
region = 'browser'
+ elif params['language'].split('-')[0] == 'no':
+ region = 'nb-NO'
else:
- region = params['language'].replace('_', '-')
- ui_language = params['language'].split('_')[0]
+ region = params['language']
+ ui_language = params['language'].split('-')[0]
search_path = search_string.format(
query=urlencode({'query': query,
@@ -106,3 +111,15 @@ def response(resp):
# return results
return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+ supported_languages = []
+ dom = fromstring(resp.text)
+ options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
+ for option in options:
+ code = option.xpath('./@data-val')[0]
+ supported_languages.append(code)
+
+ return supported_languages
diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py
index 36efac186..6cca05f70 100644
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
@@ -40,7 +40,7 @@ def request(query, params):
# set language if specified
if params['language'] != 'all':
- params['cookies']['lang'] = params['language'].split('_')[0]
+ params['cookies']['lang'] = params['language'].split('-')[0]
else:
params['cookies']['lang'] = 'en'
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index 91040e218..3f849bc7d 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -14,6 +14,8 @@
from searx import logger
from searx.poolrequests import get
from searx.engines.xpath import extract_text
+from searx.utils import format_date_by_locale
+from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
from json import loads
from lxml.html import fromstring
@@ -55,7 +57,7 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]'
def request(query, params):
- language = params['language'].split('_')[0]
+ language = params['language'].split('-')[0]
if language == 'all':
language = 'en'
@@ -70,7 +72,7 @@ def response(resp):
html = fromstring(resp.content)
wikidata_ids = html.xpath(wikidata_ids_xpath)
- language = resp.search_params['language'].split('_')[0]
+ language = resp.search_params['language'].split('-')[0]
if language == 'all':
language = 'en'
diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py
index 70191d22b..78acd349d 100644
--- a/searx/engines/wikipedia.py
+++ b/searx/engines/wikipedia.py
@@ -12,6 +12,8 @@
from json import loads
from urllib import urlencode, quote
+from lxml.html import fromstring
+
# search-url
base_url = 'https://{language}.wikipedia.org/'
@@ -24,14 +26,16 @@ search_postfix = 'w/api.php?'\
'&explaintext'\
'&pithumbsize=300'\
'&redirects'
+supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
# set language in base_url
def url_lang(lang):
- if lang == 'all':
+ lang = lang.split('-')[0]
+ if lang == 'all' or lang not in supported_languages:
language = 'en'
else:
- language = lang.split('_')[0]
+ language = lang
return base_url.format(language=language)
@@ -111,3 +115,24 @@ def response(resp):
'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]})
return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+ supported_languages = {}
+ dom = fromstring(resp.text)
+ tables = dom.xpath('//table[contains(@class,"sortable")]')
+ for table in tables:
+ # exclude header row
+ trs = table.xpath('.//tr')[1:]
+ for tr in trs:
+ td = tr.xpath('./td')
+ code = td[3].xpath('./a')[0].text
+ name = td[2].xpath('./a')[0].text
+ english_name = td[1].xpath('./a')[0].text
+ articles = int(td[4].xpath('./a/b')[0].text.replace(',', ''))
+ # exclude languages with too few articles
+ if articles >= 100000:
+ supported_languages[code] = {"name": name, "english_name": english_name, "articles": articles}
+
+ return supported_languages
diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py
index 92cf881c0..7b1b6b35d 100644
--- a/searx/engines/yacy.py
+++ b/searx/engines/yacy.py
@@ -53,7 +53,7 @@ def request(query, params):
# add language tag if specified
if params['language'] != 'all':
- params['url'] += '&lr=lang_' + params['language'].split('_')[0]
+ params['url'] += '&lr=lang_' + params['language'].split('-')[0]
return params
diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py
index 2bb34b83d..5c62c2ed8 100644
--- a/searx/engines/yahoo.py
+++ b/searx/engines/yahoo.py
@@ -27,6 +27,8 @@ base_url = 'https://search.yahoo.com/'
search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time'
+supported_languages_url = 'https://search.yahoo.com/web/advanced'
+
# specific xpath variables
results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
url_xpath = './/h3/a/@href'
@@ -72,7 +74,13 @@ def _get_url(query, offset, language, time_range):
def _get_language(params):
if params['language'] == 'all':
return 'en'
- return params['language'].split('_')[0]
+ elif params['language'][:2] == 'zh':
+ if params['language'] == 'zh' or params['language'] == 'zh-CH':
+ return 'szh'
+ else:
+ return 'tzh'
+ else:
+ return params['language'].split('-')[0]
# do search-request
@@ -132,3 +140,15 @@ def response(resp):
# return results
return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+ supported_languages = []
+ dom = html.fromstring(resp.text)
+ options = dom.xpath('//div[@id="yschlang"]/span/label/input')
+ for option in options:
+ code = option.xpath('./@value')[0][5:].replace('_', '-')
+ supported_languages.append(code)
+
+ return supported_languages
diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py
index e91c1d34e..3e4cf02eb 100644
--- a/searx/engines/yahoo_news.py
+++ b/searx/engines/yahoo_news.py
@@ -12,7 +12,7 @@
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text, extract_url
-from searx.engines.yahoo import parse_url
+from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
from datetime import datetime, timedelta
import re
from dateutil import parser
diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py
index b83a747f9..65aee28b8 100644
--- a/searx/engines/yandex.py
+++ b/searx/engines/yandex.py
@@ -22,7 +22,9 @@ language_support = True # TODO
default_tld = 'com'
language_map = {'ru': 'ru',
- 'ua': 'uk',
+ 'ua': 'ua',
+ 'be': 'by',
+ 'kk': 'kz',
'tr': 'com.tr'}
# search-url
@@ -36,7 +38,7 @@ content_xpath = './/div[@class="text-container typo typo_text_m typo_line_m orga
def request(query, params):
- lang = params['language'].split('_')[0]
+ lang = params['language'].split('-')[0]
host = base_url.format(tld=language_map.get(lang) or default_tld)
params['url'] = host + search_url.format(page=params['pageno'] - 1,
query=urlencode({'text': query}))
diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py
index 8fd939a25..1dfca5166 100644
--- a/searx/engines/youtube_api.py
+++ b/searx/engines/youtube_api.py
@@ -36,7 +36,7 @@ def request(query, params):
# add language tag if specified
if params['language'] != 'all':
- params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0]
+ params['url'] += '&relevanceLanguage=' + params['language'].split('-')[0]
return params