From 772c048d01c7585fd60afca1ce30a1914e6e5b4a Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Wed, 28 Feb 2018 22:30:48 -0600 Subject: refactor engine's search language handling Add match_language function in utils to match any user given language code with a list of engine's supported languages. Also add language_aliases dict on each engine to translate standard language codes into the custom codes used by the engine. --- searx/utils.py | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'searx/utils.py') diff --git a/searx/utils.py b/searx/utils.py index 8f095f3b0..77c392909 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -4,6 +4,7 @@ import hmac import os import re +from babel.core import get_global from babel.dates import format_date from codecs import getincrementalencoder from imp import load_source @@ -12,6 +13,7 @@ from os.path import splitext, join from random import choice import sys +from searx import settings from searx.version import VERSION_STRING from searx.languages import language_codes from searx import settings @@ -322,6 +324,65 @@ def is_valid_lang(lang): return False +# auxiliary function to match lang_code in lang_list +def _match_language(lang_code, lang_list=[], custom_aliases={}): + # replace language code with a custom alias if necessary + if lang_code in custom_aliases: + lang_code = custom_aliases[lang_code] + + if lang_code in lang_list: + return lang_code + + # try to get the most likely country for this language + subtags = get_global('likely_subtags').get(lang_code) + if subtags: + subtag_parts = subtags.split('_') + new_code = subtag_parts[0] + '-' + subtag_parts[-1] + if new_code in custom_aliases: + new_code = custom_aliases[new_code] + if new_code in lang_list: + return new_code + + # try to get the any supported country for this language + for lc in lang_list: + if lang_code == lc.split('-')[0]: + return lc + + return None + + +# get the language code from lang_list that best matches locale_code +def match_language(locale_code, lang_list=[], custom_aliases={}, fallback='en-US'): + # try to get language from given locale_code + language = _match_language(locale_code, lang_list, custom_aliases) + if language: + return language + + locale_parts = locale_code.split('-') + lang_code = locale_parts[0] + + # try to get language using an equivalent country code + if len(locale_parts) > 1: + country_alias = get_global('territory_aliases').get(locale_parts[-1]) + if country_alias: + language = _match_language(lang_code + '-' + country_alias[0], lang_list, custom_aliases) + if language: + return language + + # try to get language using an equivalent language code + alias = get_global('language_aliases').get(lang_code) + if alias: + language = _match_language(alias, lang_list, custom_aliases) + if language: + return language + + if lang_code != locale_code: + # try to get language from given language without giving the country + language = _match_language(lang_code, lang_list, custom_aliases) + + return language or fallback + + def load_module(filename, module_dir): modname = splitext(filename)[0] if modname in sys.modules: -- cgit v1.2.3 From 75b276f408487db8fecc6eab7abd6126323a7efe Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Sun, 20 May 2018 18:10:22 -0500 Subject: fix bing "garbage" results (issue #1275) --- searx/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'searx/utils.py') diff --git a/searx/utils.py b/searx/utils.py index 77c392909..eccbaaf19 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -57,9 +57,9 @@ blocked_tags = ('script', 'style') -def gen_useragent(): +def gen_useragent(os=None): # TODO - return ua.format(os=choice(ua_os), version=choice(ua_versions)) + return ua.format(os=os or choice(ua_os), version=choice(ua_versions)) def searx_useragent(): -- cgit v1.2.3 From aef2b07969bddb1697c340604d1e22cea8a696cc Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Thu, 14 Jun 2018 11:48:31 +0200 Subject: [fix] add basestring for py3 --- searx/utils.py | 1 + 1 file changed, 1 insertion(+) (limited to 'searx/utils.py') diff --git a/searx/utils.py b/searx/utils.py index 77c392909..1ec45ed6e 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -33,6 +33,7 @@ if sys.version_info[0] == 3: unichr = chr unicode = str IS_PY2 = False + basestring = str else: IS_PY2 = True -- cgit v1.2.3 From c7000cd1df6d8f9aaa787515a6eca16f8a083715 Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Sat, 23 Jun 2018 16:24:06 -0500 Subject: [fix] update user agent versions this fixes duckduckgo error response --- searx/utils.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'searx/utils.py') diff --git a/searx/utils.py b/searx/utils.py index 1ec45ed6e..3b2e39919 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -39,14 +39,15 @@ else: logger = logger.getChild('utils') -ua_versions = ('40.0', - '41.0', - '42.0', - '43.0', - '44.0', - '45.0', - '46.0', - '47.0') +ua_versions = ('52.8.1', + '53.0', + '54.0', + '55.0', + '56.0', + '57.0', + '58.0', + '59.0', + '60.0.2') ua_os = ('Windows NT 6.3; WOW64', 'X11; Linux x86_64', -- cgit v1.2.3 From 066bd916bf0c0344c978d2ea46cf9e9960841a61 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Sun, 28 May 2017 15:46:45 +0200 Subject: [mod] fetch firefox versions in a standalone script --- searx/utils.py | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) (limited to 'searx/utils.py') diff --git a/searx/utils.py b/searx/utils.py index bd6c3fe2f..f457284e3 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -10,8 +10,10 @@ from codecs import getincrementalencoder from imp import load_source from numbers import Number from os.path import splitext, join +from io import open from random import choice import sys +import json from searx import settings from searx.version import VERSION_STRING @@ -39,29 +41,11 @@ else: logger = logger.getChild('utils') -ua_versions = ('52.8.1', - '53.0', - '54.0', - '55.0', - '56.0', - '57.0', - '58.0', - '59.0', - '60.0.2') - -ua_os = ('Windows NT 6.3; WOW64', - 'X11; Linux x86_64', - 'X11; Linux x86') - -ua = "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}" - blocked_tags = ('script', 'style') - -def gen_useragent(os=None): - # TODO - return ua.format(os=os or choice(ua_os), version=choice(ua_versions)) +useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__)) + + "/data/useragents.json", 'r', encoding='utf-8').read()) def searx_useragent(): @@ -70,6 +54,10 @@ def searx_useragent(): suffix=settings['outgoing'].get('useragent_suffix', '')) +def gen_useragent(): + return str(useragents['ua'].format(os=choice(useragents['os']), version=choice(useragents['versions']))) + + def highlight_content(content, query): if not content: -- cgit v1.2.3 From 50c836864a9a7a765561d886b11f44d8cea0bce9 Mon Sep 17 00:00:00 2001 From: Alex Date: Sun, 5 Aug 2018 10:55:42 +0200 Subject: fetch_firefox_version.py : compatible with Python 3 and minor fixes. --- searx/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'searx/utils.py') diff --git a/searx/utils.py b/searx/utils.py index f457284e3..dfa22c5fc 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -54,8 +54,8 @@ def searx_useragent(): suffix=settings['outgoing'].get('useragent_suffix', '')) -def gen_useragent(): - return str(useragents['ua'].format(os=choice(useragents['os']), version=choice(useragents['versions']))) +def gen_useragent(os=None): + return str(useragents['ua'].format(os=os or choice(useragents['os']), version=choice(useragents['versions']))) def highlight_content(content, query): -- cgit v1.2.3