From 149802c56926bf48520c98932c4c36b8152b3d2d Mon Sep 17 00:00:00 2001 From: marc Date: Fri, 5 Aug 2016 23:34:56 -0500 Subject: [enh] add supported_languages on engines and auto-generate languages.py --- searx/engines/yahoo.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'searx/engines/yahoo.py') diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index 2bb34b83d..c00e42368 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -20,6 +20,10 @@ from searx.engines.xpath import extract_text, extract_url categories = ['general'] paging = True language_support = True +supported_languages = ["ar", "bg", "ca", "szh", "tzh", "hr", "cs", "da", "nl", "en", + "et", "fi", "fr", "de", "el", "he", "hu", "is", "id", "it", "ja", + "ko", "lv", "lt", "no", "fa", "pl", "pt", "ro", "ru", "sk", "sr", + "sl", "es", "sv", "th", "tr"] time_range_support = True # search-url @@ -72,7 +76,13 @@ def _get_url(query, offset, language, time_range): def _get_language(params): if params['language'] == 'all': return 'en' - return params['language'].split('_')[0] + elif params['language'][:2] == 'zh': + if params['language'] == 'zh' or params['language'] == 'zh-CH': + return 'szh' + else: + return 'tzh' + else: + return params['language'].split('-')[0] # do search-request -- cgit v1.2.3 From f62ce21f50b540315a708ebfbf36878ddec9d1c4 Mon Sep 17 00:00:00 2001 From: marc Date: Sat, 5 Nov 2016 20:51:38 -0600 Subject: [mod] fetch supported languages for several engines utils/fetch_languages.py gets languages supported by each engine and generates engines_languages.json with each engine's supported language. --- searx/engines/yahoo.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'searx/engines/yahoo.py') diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index c00e42368..db10c8939 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -14,16 +14,13 @@ from urllib import urlencode from urlparse import unquote from lxml import html +from requests import get from searx.engines.xpath import extract_text, extract_url # engine dependent config categories = ['general'] paging = True language_support = True -supported_languages = ["ar", "bg", "ca", "szh", "tzh", "hr", "cs", "da", "nl", "en", - "et", "fi", "fr", "de", "el", "he", "hu", "is", "id", "it", "ja", - "ko", "lv", "lt", "no", "fa", "pl", "pt", "ro", "ru", "sk", "sr", - "sl", "es", "sv", "th", "tr"] time_range_support = True # search-url @@ -31,6 +28,8 @@ base_url = 'https://search.yahoo.com/' search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}' search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time' +supported_languages_url = 'https://search.yahoo.com/web/advanced' + # specific xpath variables results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]" url_xpath = './/h3/a/@href' @@ -142,3 +141,16 @@ def response(resp): # return results return results + + +# get supported languages from their site +def fetch_supported_languages(): + supported_languages = [] + response = get(supported_languages_url) + dom = html.fromstring(response.text) + options = dom.xpath('//div[@id="yschlang"]/span/label/input') + for option in options: + code = option.xpath('./@value')[0][5:] + supported_languages.append(code) + + return supported_languages -- cgit v1.2.3 From af35eee10b98940c51c6e5e18629de514b4bd48d Mon Sep 17 00:00:00 2001 From: marc Date: Thu, 15 Dec 2016 00:34:43 -0600 Subject: tests for _fetch_supported_languages in engines and refactor method to make it testable without making requests --- searx/engines/yahoo.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'searx/engines/yahoo.py') diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index db10c8939..5c62c2ed8 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -14,7 +14,6 @@ from urllib import urlencode from urlparse import unquote from lxml import html -from requests import get from searx.engines.xpath import extract_text, extract_url # engine dependent config @@ -144,13 +143,12 @@ def response(resp): # get supported languages from their site -def fetch_supported_languages(): +def _fetch_supported_languages(resp): supported_languages = [] - response = get(supported_languages_url) - dom = html.fromstring(response.text) + dom = html.fromstring(resp.text) options = dom.xpath('//div[@id="yschlang"]/span/label/input') for option in options: - code = option.xpath('./@value')[0][5:] + code = option.xpath('./@value')[0][5:].replace('_', '-') supported_languages.append(code) return supported_languages -- cgit v1.2.3