summaryrefslogtreecommitdiff
path: root/searx/utils.py
diff options
context:
space:
mode:
authorAdam Tauber <asciimoo@gmail.com>2018-08-19 13:22:22 +0200
committerGitHub <noreply@github.com>2018-08-19 13:22:22 +0200
commitb75f1b6cc39a94989a74d52eb0f1267c3e3c665e (patch)
treed3bab81ca2071196b1b4223d6d2db7d408b79bf2 /searx/utils.py
parente7f7eda18cc69287f30c512a98b4e90453bcd8e7 (diff)
parent931c1bb0f663bc13998f5a78ae7cd9485d37453c (diff)
Merge branch 'master' into patch-2
Diffstat (limited to 'searx/utils.py')
-rw-r--r--searx/utils.py89
1 files changed, 70 insertions, 19 deletions
diff --git a/searx/utils.py b/searx/utils.py
index 8f095f3b0..dfa22c5fc 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -4,14 +4,18 @@ import hmac
import os
import re
+from babel.core import get_global
from babel.dates import format_date
from codecs import getincrementalencoder
from imp import load_source
from numbers import Number
from os.path import splitext, join
+from io import open
from random import choice
import sys
+import json
+from searx import settings
from searx.version import VERSION_STRING
from searx.languages import language_codes
from searx import settings
@@ -31,33 +35,17 @@ if sys.version_info[0] == 3:
unichr = chr
unicode = str
IS_PY2 = False
+ basestring = str
else:
IS_PY2 = True
logger = logger.getChild('utils')
-ua_versions = ('40.0',
- '41.0',
- '42.0',
- '43.0',
- '44.0',
- '45.0',
- '46.0',
- '47.0')
-
-ua_os = ('Windows NT 6.3; WOW64',
- 'X11; Linux x86_64',
- 'X11; Linux x86')
-
-ua = "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
-
blocked_tags = ('script',
'style')
-
-def gen_useragent():
- # TODO
- return ua.format(os=choice(ua_os), version=choice(ua_versions))
+useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__))
+ + "/data/useragents.json", 'r', encoding='utf-8').read())
def searx_useragent():
@@ -66,6 +54,10 @@ def searx_useragent():
suffix=settings['outgoing'].get('useragent_suffix', ''))
+def gen_useragent(os=None):
+ return str(useragents['ua'].format(os=os or choice(useragents['os']), version=choice(useragents['versions'])))
+
+
def highlight_content(content, query):
if not content:
@@ -322,6 +314,65 @@ def is_valid_lang(lang):
return False
+# auxiliary function to match lang_code in lang_list
+def _match_language(lang_code, lang_list=[], custom_aliases={}):
+ # replace language code with a custom alias if necessary
+ if lang_code in custom_aliases:
+ lang_code = custom_aliases[lang_code]
+
+ if lang_code in lang_list:
+ return lang_code
+
+ # try to get the most likely country for this language
+ subtags = get_global('likely_subtags').get(lang_code)
+ if subtags:
+ subtag_parts = subtags.split('_')
+ new_code = subtag_parts[0] + '-' + subtag_parts[-1]
+ if new_code in custom_aliases:
+ new_code = custom_aliases[new_code]
+ if new_code in lang_list:
+ return new_code
+
+ # try to get the any supported country for this language
+ for lc in lang_list:
+ if lang_code == lc.split('-')[0]:
+ return lc
+
+ return None
+
+
+# get the language code from lang_list that best matches locale_code
+def match_language(locale_code, lang_list=[], custom_aliases={}, fallback='en-US'):
+ # try to get language from given locale_code
+ language = _match_language(locale_code, lang_list, custom_aliases)
+ if language:
+ return language
+
+ locale_parts = locale_code.split('-')
+ lang_code = locale_parts[0]
+
+ # try to get language using an equivalent country code
+ if len(locale_parts) > 1:
+ country_alias = get_global('territory_aliases').get(locale_parts[-1])
+ if country_alias:
+ language = _match_language(lang_code + '-' + country_alias[0], lang_list, custom_aliases)
+ if language:
+ return language
+
+ # try to get language using an equivalent language code
+ alias = get_global('language_aliases').get(lang_code)
+ if alias:
+ language = _match_language(alias, lang_list, custom_aliases)
+ if language:
+ return language
+
+ if lang_code != locale_code:
+ # try to get language from given language without giving the country
+ language = _match_language(lang_code, lang_list, custom_aliases)
+
+ return language or fallback
+
+
def load_module(filename, module_dir):
modname = splitext(filename)[0]
if modname in sys.modules: