From 149802c56926bf48520c98932c4c36b8152b3d2d Mon Sep 17 00:00:00 2001 From: marc Date: Fri, 5 Aug 2016 23:34:56 -0500 Subject: [enh] add supported_languages on engines and auto-generate languages.py --- searx/engines/duckduckgo.py | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) (limited to 'searx/engines/duckduckgo.py') diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 9959a52e6..a1cb5882c 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -22,6 +22,13 @@ from searx.languages import language_codes categories = ['general'] paging = True language_support = True +supported_languages = ["es-AR", "en-AU", "de-AT", "fr-BE", "nl-BE", "pt-BR", "bg-BG", "en-CA", "fr-CA", "ca-CT", + "es-CL", "zh-CN", "es-CO", "hr-HR", "cs-CZ", "da-DK", "et-EE", "fi-FI", "fr-FR", "de-DE", + "el-GR", "tzh-HK", "hu-HU", "en-IN", "id-ID", "en-ID", "en-IE", "he-IL", "it-IT", "jp-JP", + "kr-KR", "es-XL", "lv-LV", "lt-LT", "ms-MY", "en-MY", "es-MX", "nl-NL", "en-NZ", "no-NO", + "es-PE", "en-PH", "tl-PH", "pl-PL", "pt-PT", "ro-RO", "ru-RU", "ar-XA", "en-XA", "en-SG", + "sk-SK", "sl-SL", "en-ZA", "es-ES", "ca-ES", "sv-SE", "de-CH", "fr-CH", "it-CH", "tzh-TW", + "th-TH", "tr-TR", "uk-UA", "en-UK", "en-US", "es-US", "vi-VN"] time_range_support = True # search-url @@ -46,10 +53,23 @@ def request(query, params): offset = (params['pageno'] - 1) * 30 + # custom fixes for languages if params['language'] == 'all': locale = None + elif params['language'][:2] == 'ja': + locale = 'jp-jp' + elif params['language'] == 'zh-TW': + locale = 'tw-tzh' + elif params['language'] == 'zh-HK': + locale = 'hk-tzh' + elif params['language'][-2:] == 'SA': + locale = 'xa' + params['language'].split('-')[0] + elif params['language'][-2:] == 'GB': + locale = 'uk' + params['language'].split('-')[0] + elif params['language'] == 'es-419': + locale = 'xl-es' else: - locale = params['language'].split('_') + locale = params['language'].split('-') if len(locale) == 2: # country code goes first locale = locale[1].lower() + '-' + locale[0].lower() @@ -58,7 +78,25 @@ def request(query, params): locale = locale[0].lower() lang_codes = [x[0] for x in language_codes] for lc in lang_codes: - lc = lc.split('_') + lc = lc.split('-') + if locale == lc[0] and len(lc) == 2: + locale = lc[1].lower() + '-' + lc[0].lower() + break + + if locale: + params['url'] = url.format( + query=urlencode({'q': query, 'kl': locale}), offset=offset) + else: + locale = params['language'].split('-') + if len(locale) == 2: + # country code goes first + locale = locale[1].lower() + '-' + locale[0].lower() + else: + # tries to get a country code from language + locale = locale[0].lower() + lang_codes = [x[0] for x in language_codes] + for lc in lang_codes: + lc = lc.split('-') if locale == lc[0]: locale = lc[1].lower() + '-' + lc[0].lower() break -- cgit v1.2.3 From a11948c71bfe7b2aac6e50e7634874d5073c7d84 Mon Sep 17 00:00:00 2001 From: marc Date: Sat, 29 Oct 2016 21:04:01 -0500 Subject: Add language support for more engines. --- searx/engines/duckduckgo.py | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) (limited to 'searx/engines/duckduckgo.py') diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index a1cb5882c..3e1752dd0 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -16,7 +16,6 @@ from urllib import urlencode from lxml.html import fromstring from searx.engines.xpath import extract_text -from searx.languages import language_codes # engine dependent config categories = ['general'] @@ -76,26 +75,7 @@ def request(query, params): else: # tries to get a country code from language locale = locale[0].lower() - lang_codes = [x[0] for x in language_codes] - for lc in lang_codes: - lc = lc.split('-') - if locale == lc[0] and len(lc) == 2: - locale = lc[1].lower() + '-' + lc[0].lower() - break - - if locale: - params['url'] = url.format( - query=urlencode({'q': query, 'kl': locale}), offset=offset) - else: - locale = params['language'].split('-') - if len(locale) == 2: - # country code goes first - locale = locale[1].lower() + '-' + locale[0].lower() - else: - # tries to get a country code from language - locale = locale[0].lower() - lang_codes = [x[0] for x in language_codes] - for lc in lang_codes: + for lc in supported_languages: lc = lc.split('-') if locale == lc[0]: locale = lc[1].lower() + '-' + lc[0].lower() -- cgit v1.2.3 From f62ce21f50b540315a708ebfbf36878ddec9d1c4 Mon Sep 17 00:00:00 2001 From: marc Date: Sat, 5 Nov 2016 20:51:38 -0600 Subject: [mod] fetch supported languages for several engines utils/fetch_languages.py gets languages supported by each engine and generates engines_languages.json with each engine's supported language. --- searx/engines/duckduckgo.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'searx/engines/duckduckgo.py') diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 3e1752dd0..d37d2778b 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -15,19 +15,15 @@ from urllib import urlencode from lxml.html import fromstring +from requests import get +from json import loads from searx.engines.xpath import extract_text # engine dependent config categories = ['general'] paging = True language_support = True -supported_languages = ["es-AR", "en-AU", "de-AT", "fr-BE", "nl-BE", "pt-BR", "bg-BG", "en-CA", "fr-CA", "ca-CT", - "es-CL", "zh-CN", "es-CO", "hr-HR", "cs-CZ", "da-DK", "et-EE", "fi-FI", "fr-FR", "de-DE", - "el-GR", "tzh-HK", "hu-HU", "en-IN", "id-ID", "en-ID", "en-IE", "he-IL", "it-IT", "jp-JP", - "kr-KR", "es-XL", "lv-LV", "lt-LT", "ms-MY", "en-MY", "es-MX", "nl-NL", "en-NZ", "no-NO", - "es-PE", "en-PH", "tl-PH", "pl-PL", "pt-PT", "ro-RO", "ru-RU", "ar-XA", "en-XA", "en-SG", - "sk-SK", "sl-SL", "en-ZA", "es-ES", "ca-ES", "sv-SE", "de-CH", "fr-CH", "it-CH", "tzh-TW", - "th-TH", "tr-TR", "uk-UA", "en-UK", "en-US", "es-US", "vi-VN"] +supported_languages_url = 'https://duckduckgo.com/d2030.js' time_range_support = True # search-url @@ -65,8 +61,6 @@ def request(query, params): locale = 'xa' + params['language'].split('-')[0] elif params['language'][-2:] == 'GB': locale = 'uk' + params['language'].split('-')[0] - elif params['language'] == 'es-419': - locale = 'xl-es' else: locale = params['language'].split('-') if len(locale) == 2: @@ -120,3 +114,18 @@ def response(resp): # return results return results + + +# get supported languages from their site +def fetch_supported_languages(): + response = get(supported_languages_url) + + # response is a js file with regions as an embedded object + response_page = response.text + response_page = response_page[response_page.find('regions:{') + 8:] + response_page = response_page[:response_page.find('}') + 1] + + regions_json = loads(response_page) + supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys()) + + return supported_languages -- cgit v1.2.3 From e0c270bd72f7b2a40222e3ed264e25d36cb0fc30 Mon Sep 17 00:00:00 2001 From: marc Date: Tue, 13 Dec 2016 23:51:15 -0600 Subject: tests for language support in engines --- searx/engines/duckduckgo.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'searx/engines/duckduckgo.py') diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index d37d2778b..9cf5fb339 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -53,14 +53,16 @@ def request(query, params): locale = None elif params['language'][:2] == 'ja': locale = 'jp-jp' + elif params['language'][:2] == 'sl': + locale = 'sl-sl' elif params['language'] == 'zh-TW': locale = 'tw-tzh' elif params['language'] == 'zh-HK': locale = 'hk-tzh' elif params['language'][-2:] == 'SA': - locale = 'xa' + params['language'].split('-')[0] + locale = 'xa-' + params['language'].split('-')[0] elif params['language'][-2:] == 'GB': - locale = 'uk' + params['language'].split('-')[0] + locale = 'uk-' + params['language'].split('-')[0] else: locale = params['language'].split('-') if len(locale) == 2: -- cgit v1.2.3 From af35eee10b98940c51c6e5e18629de514b4bd48d Mon Sep 17 00:00:00 2001 From: marc Date: Thu, 15 Dec 2016 00:34:43 -0600 Subject: tests for _fetch_supported_languages in engines and refactor method to make it testable without making requests --- searx/engines/duckduckgo.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'searx/engines/duckduckgo.py') diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 9cf5fb339..df230222d 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -119,11 +119,10 @@ def response(resp): # get supported languages from their site -def fetch_supported_languages(): - response = get(supported_languages_url) +def _fetch_supported_languages(resp): # response is a js file with regions as an embedded object - response_page = response.text + response_page = resp.text response_page = response_page[response_page.find('regions:{') + 8:] response_page = response_page[:response_page.find('}') + 1] -- cgit v1.2.3