diff options
| author | Adam Tauber <asciimoo@gmail.com> | 2016-12-28 20:09:57 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2016-12-28 20:09:57 +0100 |
| commit | 9743bde25ef2ce6b765b8192aafcdc0a15739b17 (patch) | |
| tree | 00fd6b0b14773c0e20425d4a6478d67f244d64ed /searx/engines | |
| parent | ea034fafa994227ea89662710901e73cb901e28c (diff) | |
| parent | 8bff42f049dcac77559beaf2932a47921feb1d49 (diff) | |
Merge pull request #748 from a01200356/languages
[mod] Allow users to search in most engine supported languages
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/__init__.py | 14 | ||||
| -rw-r--r-- | searx/engines/archlinux.py | 5 | ||||
| -rw-r--r-- | searx/engines/bing.py | 15 | ||||
| -rw-r--r-- | searx/engines/bing_images.py | 3 | ||||
| -rw-r--r-- | searx/engines/bing_news.py | 3 | ||||
| -rw-r--r-- | searx/engines/dailymotion.py | 22 | ||||
| -rw-r--r-- | searx/engines/duckduckgo.py | 38 | ||||
| -rw-r--r-- | searx/engines/duckduckgo_definitions.py | 3 | ||||
| -rw-r--r-- | searx/engines/gigablast.py | 20 | ||||
| -rw-r--r-- | searx/engines/google.py | 20 | ||||
| -rw-r--r-- | searx/engines/google_news.py | 4 | ||||
| -rw-r--r-- | searx/engines/mediawiki.py | 2 | ||||
| -rw-r--r-- | searx/engines/photon.py | 4 | ||||
| -rw-r--r-- | searx/engines/qwant.py | 2 | ||||
| -rw-r--r-- | searx/engines/startpage.py | 2 | ||||
| -rw-r--r-- | searx/engines/subtitleseeker.py | 14 | ||||
| -rw-r--r-- | searx/engines/swisscows.py | 21 | ||||
| -rw-r--r-- | searx/engines/twitter.py | 2 | ||||
| -rw-r--r-- | searx/engines/wikidata.py | 6 | ||||
| -rw-r--r-- | searx/engines/wikipedia.py | 29 | ||||
| -rw-r--r-- | searx/engines/yacy.py | 2 | ||||
| -rw-r--r-- | searx/engines/yahoo.py | 22 | ||||
| -rw-r--r-- | searx/engines/yahoo_news.py | 2 | ||||
| -rw-r--r-- | searx/engines/yandex.py | 6 | ||||
| -rw-r--r-- | searx/engines/youtube_api.py | 2 |
25 files changed, 227 insertions, 36 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 3a1db2760..5275351f1 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -20,6 +20,8 @@ from os.path import realpath, dirname import sys from flask_babel import gettext from operator import itemgetter +from json import loads +from requests import get from searx import settings from searx import logger from searx.utils import load_module @@ -33,10 +35,13 @@ engines = {} categories = {'general': []} +languages = loads(open(engine_dir + '/../data/engines_languages.json').read()) + engine_shortcuts = {} engine_default_args = {'paging': False, 'categories': ['general'], 'language_support': True, + 'supported_languages': [], 'safesearch': False, 'timeout': settings['outgoing']['request_timeout'], 'shortcut': '-', @@ -85,6 +90,15 @@ def load_engine(engine_data): .format(engine.name, engine_attr)) sys.exit(1) + # assign supported languages from json file + if engine_data['name'] in languages: + setattr(engine, 'supported_languages', languages[engine_data['name']]) + + # assign language fetching method if auxiliary method exists + if hasattr(engine, '_fetch_supported_languages'): + setattr(engine, 'fetch_supported_languages', + lambda: engine._fetch_supported_languages(get(engine.supported_languages_url))) + engine.stats = { 'result_count': 0, 'search_count': 0, diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index 5ba512766..dca825790 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -29,8 +29,8 @@ xpath_link = './/div[@class="mw-search-result-heading"]/a' # cut 'en' from 'en_US', 'de' from 'de_CH', and so on def locale_to_lang_code(locale): - if locale.find('_') >= 0: - locale = locale.split('_')[0] + if locale.find('-') >= 0: + locale = locale.split('-')[0] return locale @@ -95,6 +95,7 @@ main_langs = { 'uk': 'Українська', 'zh': '简体中文' } +supported_languages = dict(lang_urls, **main_langs) # do search-request diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 58db61251..b2ad7b6cf 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -21,6 +21,7 @@ from searx.engines.xpath import extract_text categories = ['general'] paging = True language_support = True +supported_languages_url = 'https://www.bing.com/account/general' # search-url base_url = 'https://www.bing.com/' @@ -32,7 +33,7 @@ def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 if params['language'] != 'all': - query = u'language:{} {}'.format(params['language'].split('_')[0].upper(), + query = u'language:{} {}'.format(params['language'].split('-')[0].upper(), query.decode('utf-8')).encode('utf-8') search_path = search_string.format( @@ -81,3 +82,15 @@ def response(resp): # return results return results + + +# get supported languages from their site +def _fetch_supported_languages(resp): + supported_languages = [] + dom = html.fromstring(resp.text) + options = dom.xpath('//div[@id="limit-languages"]//input') + for option in options: + code = option.xpath('./@id')[0].replace('_', '-') + supported_languages.append(code) + + return supported_languages diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 4dd362cb3..97f6dca37 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -19,6 +19,7 @@ from urllib import urlencode from lxml import html from json import loads import re +from searx.engines.bing import _fetch_supported_languages, supported_languages_url # engine dependent config categories = ['images'] @@ -53,7 +54,7 @@ def request(query, params): if params['language'] == 'all': language = 'en-US' else: - language = params['language'].replace('_', '-') + language = params['language'] search_path = search_string.format( query=urlencode({'q': query}), diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 4e7c33129..765bcd38e 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -17,6 +17,7 @@ from datetime import datetime from dateutil import parser from lxml import etree from searx.utils import list_get +from searx.engines.bing import _fetch_supported_languages, supported_languages_url # engine dependent config categories = ['news'] @@ -74,7 +75,7 @@ def request(query, params): if params['language'] == 'all': language = 'en-US' else: - language = params['language'].replace('_', '-') + language = params['language'] params['url'] = _get_url(query, language, offset, params['time_range']) diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 317f34f59..8c69aafe0 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -15,6 +15,7 @@ from urllib import urlencode from json import loads from datetime import datetime +from requests import get # engine dependent config categories = ['videos'] @@ -27,6 +28,8 @@ search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,descr embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\ 'data-src="//www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>' +supported_languages_url = 'https://api.dailymotion.com/languages' + # do search-request def request(query, params): @@ -74,3 +77,22 @@ def response(resp): # return results return results + + +# get supported languages from their site +def _fetch_supported_languages(resp): + supported_languages = {} + + response_json = loads(resp.text) + + for language in response_json['list']: + supported_languages[language['code']] = {} + + name = language['native_name'] + if name: + supported_languages[language['code']]['name'] = name + english_name = language['name'] + if english_name: + supported_languages[language['code']]['english_name'] = english_name + + return supported_languages diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 9959a52e6..df230222d 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -15,13 +15,15 @@ from urllib import urlencode from lxml.html import fromstring +from requests import get +from json import loads from searx.engines.xpath import extract_text -from searx.languages import language_codes # engine dependent config categories = ['general'] paging = True language_support = True +supported_languages_url = 'https://duckduckgo.com/d2030.js' time_range_support = True # search-url @@ -46,19 +48,31 @@ def request(query, params): offset = (params['pageno'] - 1) * 30 + # custom fixes for languages if params['language'] == 'all': locale = None + elif params['language'][:2] == 'ja': + locale = 'jp-jp' + elif params['language'][:2] == 'sl': + locale = 'sl-sl' + elif params['language'] == 'zh-TW': + locale = 'tw-tzh' + elif params['language'] == 'zh-HK': + locale = 'hk-tzh' + elif params['language'][-2:] == 'SA': + locale = 'xa-' + params['language'].split('-')[0] + elif params['language'][-2:] == 'GB': + locale = 'uk-' + params['language'].split('-')[0] else: - locale = params['language'].split('_') + locale = params['language'].split('-') if len(locale) == 2: # country code goes first locale = locale[1].lower() + '-' + locale[0].lower() else: # tries to get a country code from language locale = locale[0].lower() - lang_codes = [x[0] for x in language_codes] - for lc in lang_codes: - lc = lc.split('_') + for lc in supported_languages: + lc = lc.split('-') if locale == lc[0]: locale = lc[1].lower() + '-' + lc[0].lower() break @@ -102,3 +116,17 @@ def response(resp): # return results return results + + +# get supported languages from their site +def _fetch_supported_languages(resp): + + # response is a js file with regions as an embedded object + response_page = resp.text + response_page = response_page[response_page.find('regions:{') + 8:] + response_page = response_page[:response_page.find('}') + 1] + + regions_json = loads(response_page) + supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys()) + + return supported_languages diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index 208ccca28..dd3f12e1e 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -4,6 +4,7 @@ from re import compile, sub from lxml import html from searx.utils import html_to_text from searx.engines.xpath import extract_text +from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url url = 'https://api.duckduckgo.com/'\ + '?{query}&format=json&pretty=0&no_redirect=1&d=1' @@ -23,7 +24,7 @@ def result_to_text(url, text, htmlResult): def request(query, params): params['url'] = url.format(query=urlencode({'q': query})) - params['headers']['Accept-Language'] = params['language'] + params['headers']['Accept-Language'] = params['language'].split('-')[0] return params diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index 5430eb3ba..827b9cd03 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -14,6 +14,7 @@ from json import loads from random import randint from time import time from urllib import urlencode +from lxml.html import fromstring # engine dependent config categories = ['general'] @@ -40,6 +41,8 @@ url_xpath = './/url' title_xpath = './/title' content_xpath = './/sum' +supported_languages_url = 'https://gigablast.com/search?&rxikd=1' + # do search-request def request(query, params): @@ -48,7 +51,9 @@ def request(query, params): if params['language'] == 'all': language = 'xx' else: - language = params['language'][0:2] + language = params['language'].replace('-', '_').lower() + if language.split('-')[0] != 'zh': + language = language.split('-')[0] if params['safesearch'] >= 1: safesearch = 1 @@ -82,3 +87,16 @@ def response(resp): # return results return results + + +# get supported languages from their site +def _fetch_supported_languages(resp): + supported_languages = [] + dom = fromstring(resp.text) + links = dom.xpath('//span[@id="menu2"]/a') + for link in links: + code = link.xpath('./@href')[0][-2:] + if code != 'xx' and code not in supported_languages: + supported_languages.append(code) + + return supported_languages diff --git a/searx/engines/google.py b/searx/engines/google.py index a02b6940e..803cd307e 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -103,6 +103,7 @@ map_hostname_start = 'maps.google.' maps_path = '/maps' redirect_path = '/url' images_path = '/images' +supported_languages_url = 'https://www.google.com/preferences?#languages' # specific xpath variables results_xpath = '//div[@class="g"]' @@ -167,8 +168,12 @@ def request(query, params): language = 'en' country = 'US' url_lang = '' + elif params['language'][:2] == 'jv': + language = 'jw' + country = 'ID' + url_lang = 'lang_jw' else: - language_array = params['language'].lower().split('_') + language_array = params['language'].lower().split('-') if len(language_array) == 2: country = language_array[1] else: @@ -355,3 +360,16 @@ def attributes_to_html(attributes): retval = retval + '<tr><th>' + a.get('label') + '</th><td>' + value + '</td></tr>' retval = retval + '</table>' return retval + + +# get supported languages from their site +def _fetch_supported_languages(resp): + supported_languages = {} + dom = html.fromstring(resp.text) + options = dom.xpath('//table//td/font/label/span') + for option in options: + code = option.xpath('./@id')[0][1:] + name = option.text.title() + supported_languages[code] = {"name": name} + + return supported_languages diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index 11357f3e6..49c6a5d50 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -12,6 +12,8 @@ from lxml import html from urllib import urlencode +from json import loads +from searx.engines.google import _fetch_supported_languages, supported_languages_url # search-url categories = ['news'] @@ -50,7 +52,7 @@ def request(query, params): search_options=urlencode(search_options)) if params['language'] != 'all': - language_array = params['language'].lower().split('_') + language_array = params['language'].lower().split('-') params['url'] += '&lr=lang_' + language_array[0] return params diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py index 26d3720d9..93d98d3aa 100644 --- a/searx/engines/mediawiki.py +++ b/searx/engines/mediawiki.py @@ -46,7 +46,7 @@ def request(query, params): if params['language'] == 'all': language = 'en' else: - language = params['language'].split('_')[0] + language = params['language'].split('-')[0] # format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)] if any(x[1] == 'language' for x in format_strings): diff --git a/searx/engines/photon.py b/searx/engines/photon.py index 2197005e5..a029bbfef 100644 --- a/searx/engines/photon.py +++ b/searx/engines/photon.py @@ -26,7 +26,7 @@ search_string = 'api/?{query}&limit={limit}' result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}' # list of supported languages -allowed_languages = ['de', 'en', 'fr', 'it'] +supported_languages = ['de', 'en', 'fr', 'it'] # do search-request @@ -37,7 +37,7 @@ def request(query, params): if params['language'] != 'all': language = params['language'].split('_')[0] - if language in allowed_languages: + if language in supported_languages: params['url'] = params['url'] + "&lang=" + language # using searx User-Agent diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index d8b084292..67803fa94 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -46,7 +46,7 @@ def request(query, params): # add language tag if specified if params['language'] != 'all': - params['url'] += '&locale=' + params['language'].lower() + params['url'] += '&locale=' + params['language'].replace('-', '_').lower() return params diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 6f6eae1cf..54aafdee5 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -47,7 +47,7 @@ def request(query, params): # set language if specified if params['language'] != 'all': - params['data']['with_language'] = ('lang_' + params['language'].split('_')[0]) + params['data']['with_language'] = ('lang_' + params['language'].split('-')[0]) return params diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py index daba68be7..77b010c3f 100644 --- a/searx/engines/subtitleseeker.py +++ b/searx/engines/subtitleseeker.py @@ -22,7 +22,7 @@ language = "" # search-url url = 'http://www.subtitleseeker.com/' -search_url = url + 'search/TITLES/{query}&p={pageno}' +search_url = url + 'search/TITLES/{query}?p={pageno}' # specific xpath variables results_xpath = '//div[@class="boxRows"]' @@ -43,10 +43,16 @@ def response(resp): search_lang = "" - if resp.search_params['language'] != 'all': - search_lang = [lc[1] + # dirty fix for languages named differenly in their site + if resp.search_params['language'][:2] == 'fa': + search_lang = 'Farsi' + elif resp.search_params['language'] == 'pt-BR': + search_lang = 'Brazilian' + elif resp.search_params['language'] != 'all': + search_lang = [lc[3] for lc in language_codes - if lc[0][:2] == resp.search_params['language'].split('_')[0]][0] + if lc[0].split('-')[0] == resp.search_params['language'].split('-')[0]] + search_lang = search_lang[0].split(' (')[0] # parse results for result in dom.xpath(results_xpath): diff --git a/searx/engines/swisscows.py b/searx/engines/swisscows.py index 72184e428..d8a454039 100644 --- a/searx/engines/swisscows.py +++ b/searx/engines/swisscows.py @@ -13,6 +13,7 @@ from json import loads from urllib import urlencode, unquote import re +from lxml.html import fromstring # engine dependent config categories = ['general', 'images'] @@ -23,6 +24,8 @@ language_support = True base_url = 'https://swisscows.ch/' search_string = '?{query}&page={page}' +supported_languages_url = base_url + # regex regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment') regex_json_remove_start = re.compile(r'^initialData:\s*') @@ -35,9 +38,11 @@ def request(query, params): if params['language'] == 'all': ui_language = 'browser' region = 'browser' + elif params['language'].split('-')[0] == 'no': + region = 'nb-NO' else: - region = params['language'].replace('_', '-') - ui_language = params['language'].split('_')[0] + region = params['language'] + ui_language = params['language'].split('-')[0] search_path = search_string.format( query=urlencode({'query': query, @@ -106,3 +111,15 @@ def response(resp): # return results return results + + +# get supported languages from their site +def _fetch_supported_languages(resp): + supported_languages = [] + dom = fromstring(resp.text) + options = dom.xpath('//div[@id="regions-popup"]//ul/li/a') + for option in options: + code = option.xpath('./@data-val')[0] + supported_languages.append(code) + + return supported_languages diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py index 36efac186..6cca05f70 100644 --- a/searx/engines/twitter.py +++ b/searx/engines/twitter.py @@ -40,7 +40,7 @@ def request(query, params): # set language if specified if params['language'] != 'all': - params['cookies']['lang'] = params['language'].split('_')[0] + params['cookies']['lang'] = params['language'].split('-')[0] else: params['cookies']['lang'] = 'en' diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index 91040e218..3f849bc7d 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -14,6 +14,8 @@ from searx import logger from searx.poolrequests import get from searx.engines.xpath import extract_text +from searx.utils import format_date_by_locale +from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url from json import loads from lxml.html import fromstring @@ -55,7 +57,7 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]' def request(query, params): - language = params['language'].split('_')[0] + language = params['language'].split('-')[0] if language == 'all': language = 'en' @@ -70,7 +72,7 @@ def response(resp): html = fromstring(resp.content) wikidata_ids = html.xpath(wikidata_ids_xpath) - language = resp.search_params['language'].split('_')[0] + language = resp.search_params['language'].split('-')[0] if language == 'all': language = 'en' diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 70191d22b..78acd349d 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -12,6 +12,8 @@ from json import loads from urllib import urlencode, quote +from lxml.html import fromstring + # search-url base_url = 'https://{language}.wikipedia.org/' @@ -24,14 +26,16 @@ search_postfix = 'w/api.php?'\ '&explaintext'\ '&pithumbsize=300'\ '&redirects' +supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' # set language in base_url def url_lang(lang): - if lang == 'all': + lang = lang.split('-')[0] + if lang == 'all' or lang not in supported_languages: language = 'en' else: - language = lang.split('_')[0] + language = lang return base_url.format(language=language) @@ -111,3 +115,24 @@ def response(resp): 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]}) return results + + +# get supported languages from their site +def _fetch_supported_languages(resp): + supported_languages = {} + dom = fromstring(resp.text) + tables = dom.xpath('//table[contains(@class,"sortable")]') + for table in tables: + # exclude header row + trs = table.xpath('.//tr')[1:] + for tr in trs: + td = tr.xpath('./td') + code = td[3].xpath('./a')[0].text + name = td[2].xpath('./a')[0].text + english_name = td[1].xpath('./a')[0].text + articles = int(td[4].xpath('./a/b')[0].text.replace(',', '')) + # exclude languages with too few articles + if articles >= 100000: + supported_languages[code] = {"name": name, "english_name": english_name, "articles": articles} + + return supported_languages diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py index 92cf881c0..7b1b6b35d 100644 --- a/searx/engines/yacy.py +++ b/searx/engines/yacy.py @@ -53,7 +53,7 @@ def request(query, params): # add language tag if specified if params['language'] != 'all': - params['url'] += '&lr=lang_' + params['language'].split('_')[0] + params['url'] += '&lr=lang_' + params['language'].split('-')[0] return params diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index 2bb34b83d..5c62c2ed8 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -27,6 +27,8 @@ base_url = 'https://search.yahoo.com/' search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}' search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time' +supported_languages_url = 'https://search.yahoo.com/web/advanced' + # specific xpath variables results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]" url_xpath = './/h3/a/@href' @@ -72,7 +74,13 @@ def _get_url(query, offset, language, time_range): def _get_language(params): if params['language'] == 'all': return 'en' - return params['language'].split('_')[0] + elif params['language'][:2] == 'zh': + if params['language'] == 'zh' or params['language'] == 'zh-CH': + return 'szh' + else: + return 'tzh' + else: + return params['language'].split('-')[0] # do search-request @@ -132,3 +140,15 @@ def response(resp): # return results return results + + +# get supported languages from their site +def _fetch_supported_languages(resp): + supported_languages = [] + dom = html.fromstring(resp.text) + options = dom.xpath('//div[@id="yschlang"]/span/label/input') + for option in options: + code = option.xpath('./@value')[0][5:].replace('_', '-') + supported_languages.append(code) + + return supported_languages diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py index e91c1d34e..3e4cf02eb 100644 --- a/searx/engines/yahoo_news.py +++ b/searx/engines/yahoo_news.py @@ -12,7 +12,7 @@ from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text, extract_url -from searx.engines.yahoo import parse_url +from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url from datetime import datetime, timedelta import re from dateutil import parser diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py index b83a747f9..65aee28b8 100644 --- a/searx/engines/yandex.py +++ b/searx/engines/yandex.py @@ -22,7 +22,9 @@ language_support = True # TODO default_tld = 'com' language_map = {'ru': 'ru', - 'ua': 'uk', + 'ua': 'ua', + 'be': 'by', + 'kk': 'kz', 'tr': 'com.tr'} # search-url @@ -36,7 +38,7 @@ content_xpath = './/div[@class="text-container typo typo_text_m typo_line_m orga def request(query, params): - lang = params['language'].split('_')[0] + lang = params['language'].split('-')[0] host = base_url.format(tld=language_map.get(lang) or default_tld) params['url'] = host + search_url.format(page=params['pageno'] - 1, query=urlencode({'text': query})) diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py index 8fd939a25..1dfca5166 100644 --- a/searx/engines/youtube_api.py +++ b/searx/engines/youtube_api.py @@ -36,7 +36,7 @@ def request(query, params): # add language tag if specified if params['language'] != 'all': - params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0] + params['url'] += '&relevanceLanguage=' + params['language'].split('-')[0] return params |