From d1eae9359f8c5920632a730744ea2208070f06da Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Wed, 14 Feb 2018 16:17:46 -0600 Subject: fix fetch_langauges to be more accurate Add languages supported by either all default general engines or 10 engines. --- searx/engines/google.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'searx/engines/google.py') diff --git a/searx/engines/google.py b/searx/engines/google.py index 0a8678362..99c0d2b45 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -72,7 +72,7 @@ country_to_hostname = { 'RO': 'www.google.ro', # Romania 'RU': 'www.google.ru', # Russia 'SK': 'www.google.sk', # Slovakia - 'SL': 'www.google.si', # Slovenia (SL -> si) + 'SI': 'www.google.si', # Slovenia 'SE': 'www.google.se', # Sweden 'TH': 'www.google.co.th', # Thailand 'TR': 'www.google.com.tr', # Turkey -- cgit v1.2.3 From 772c048d01c7585fd60afca1ce30a1914e6e5b4a Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Wed, 28 Feb 2018 22:30:48 -0600 Subject: refactor engine's search language handling Add match_language function in utils to match any user given language code with a list of engine's supported languages. Also add language_aliases dict on each engine to translate standard language codes into the custom codes used by the engine. --- searx/engines/google.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'searx/engines/google.py') diff --git a/searx/engines/google.py b/searx/engines/google.py index 99c0d2b45..93075e2dc 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -14,6 +14,7 @@ from lxml import html, etree from searx.engines.xpath import extract_text, extract_url from searx import logger from searx.url_utils import urlencode, urlparse, parse_qsl +from searx.utils import match_language logger = logger.getChild('google engine') @@ -165,22 +166,20 @@ def extract_text_from_dom(result, xpath): def request(query, params): offset = (params['pageno'] - 1) * 10 + language = match_language(params['language'], supported_languages) + language_array = language.split('-') + if params['language'].find('-') > 0: + country = params['language'].split('-')[1] + elif len(language_array) == 2: + country = language_array[1] + else: + country = 'US' + # temporary fix until a way of supporting en-US is found - if params['language'] == 'en-US': - params['language'] = 'en-GB' + if language == 'en-US': + country = 'GB' - if params['language'][:2] == 'jv': - language = 'jw' - country = 'ID' - url_lang = 'lang_jw' - else: - language_array = params['language'].lower().split('-') - if len(language_array) == 2: - country = language_array[1] - else: - country = 'US' - language = language_array[0] + ',' + language_array[0] + '-' + country - url_lang = 'lang_' + language_array[0] + url_lang = 'lang_' + language if use_locale_domain: google_hostname = country_to_hostname.get(country.upper(), default_hostname) @@ -196,7 +195,7 @@ def request(query, params): if params['time_range'] in time_range_dict: params['url'] += time_range_search.format(range=time_range_dict[params['time_range']]) - params['headers']['Accept-Language'] = language + params['headers']['Accept-Language'] = language + ',' + language + '-' + country params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' params['google_hostname'] = google_hostname -- cgit v1.2.3 From f7f9c50393785c8d6ad6c40a4c507b292ea438b1 Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Wed, 18 Apr 2018 22:55:37 -0500 Subject: [fix] force English results in Google when using en-US --- searx/engines/google.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'searx/engines/google.py') diff --git a/searx/engines/google.py b/searx/engines/google.py index 93075e2dc..62e7d1170 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -91,7 +91,7 @@ url_map = 'https://www.openstreetmap.org/'\ search_path = '/search' search_url = ('https://{hostname}' + search_path + - '?{query}&start={offset}&gws_rd=cr&gbv=1&lr={lang}&ei=x') + '?{query}&start={offset}&gws_rd=cr&gbv=1&lr={lang}&hl={lang_short}&ei=x') time_range_search = "&tbs=qdr:{range}" time_range_dict = {'day': 'd', @@ -175,10 +175,6 @@ def request(query, params): else: country = 'US' - # temporary fix until a way of supporting en-US is found - if language == 'en-US': - country = 'GB' - url_lang = 'lang_' + language if use_locale_domain: @@ -191,7 +187,8 @@ def request(query, params): params['url'] = search_url.format(offset=offset, query=urlencode({'q': query}), hostname=google_hostname, - lang=url_lang) + lang=url_lang, + lang_short=language) if params['time_range'] in time_range_dict: params['url'] += time_range_search.format(range=time_range_dict[params['time_range']]) -- cgit v1.2.3