diff options
Diffstat (limited to 'searx/engines/__init__.py')
| -rw-r--r-- | searx/engines/__init__.py | 269 |
1 files changed, 269 insertions, 0 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py new file mode 100644 index 000000000..2393f52b6 --- /dev/null +++ b/searx/engines/__init__.py @@ -0,0 +1,269 @@ + +''' +searx is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +searx is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with searx. If not, see < http://www.gnu.org/licenses/ >. + +(C) 2013- by Adam Tauber, <asciimoo@gmail.com> +''' + +import sys +import threading +from os.path import realpath, dirname +from io import open +from babel.localedata import locale_identifiers +from flask_babel import gettext +from operator import itemgetter +from json import loads +from requests import get +from searx import settings +from searx import logger +from searx.utils import load_module, match_language, get_engine_from_settings + + +logger = logger.getChild('engines') + +engine_dir = dirname(realpath(__file__)) + +engines = {} + +categories = {'general': []} + +languages = loads(open(engine_dir + '/../data/engines_languages.json', 'r', encoding='utf-8').read()) +babel_langs = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0] + for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())] + +engine_shortcuts = {} +engine_default_args = {'paging': False, + 'categories': ['general'], + 'language_support': True, + 'supported_languages': [], + 'safesearch': False, + 'timeout': settings['outgoing']['request_timeout'], + 'shortcut': '-', + 'disabled': False, + 'suspend_end_time': 0, + 'continuous_errors': 0, + 'time_range_support': False, + 'offline': False} + + +def load_engine(engine_data): + engine_name = engine_data['name'] + if '_' in engine_name: + logger.error('Engine name contains underscore: "{}"'.format(engine_name)) + sys.exit(1) + + if engine_name.lower() != engine_name: + logger.warn('Engine name is not lowercase: "{}", converting to lowercase'.format(engine_name)) + engine_name = engine_name.lower() + engine_data['name'] = engine_name + + engine_module = engine_data['engine'] + + try: + engine = load_module(engine_module + '.py', engine_dir) + except: + logger.exception('Cannot load engine "{}"'.format(engine_module)) + return None + + for param_name in engine_data: + if param_name == 'engine': + continue + if param_name == 'categories': + if engine_data['categories'] == 'none': + engine.categories = [] + else: + engine.categories = list(map(str.strip, engine_data['categories'].split(','))) + continue + setattr(engine, param_name, engine_data[param_name]) + + for arg_name, arg_value in engine_default_args.items(): + if not hasattr(engine, arg_name): + setattr(engine, arg_name, arg_value) + + # checking required variables + for engine_attr in dir(engine): + if engine_attr.startswith('_'): + continue + if engine_attr == 'inactive' and getattr(engine, engine_attr) is True: + return None + if getattr(engine, engine_attr) is None: + logger.error('Missing engine config attribute: "{0}.{1}"' + .format(engine.name, engine_attr)) + sys.exit(1) + + # assign supported languages from json file + if engine_data['name'] in languages: + setattr(engine, 'supported_languages', languages[engine_data['name']]) + + # find custom aliases for non standard language codes + if hasattr(engine, 'supported_languages'): + if hasattr(engine, 'language_aliases'): + language_aliases = getattr(engine, 'language_aliases') + else: + language_aliases = {} + + for engine_lang in getattr(engine, 'supported_languages'): + iso_lang = match_language(engine_lang, babel_langs, fallback=None) + if iso_lang and iso_lang != engine_lang and not engine_lang.startswith(iso_lang) and \ + iso_lang not in getattr(engine, 'supported_languages'): + language_aliases[iso_lang] = engine_lang + + setattr(engine, 'language_aliases', language_aliases) + + # assign language fetching method if auxiliary method exists + if hasattr(engine, '_fetch_supported_languages'): + setattr(engine, 'fetch_supported_languages', + lambda: engine._fetch_supported_languages(get(engine.supported_languages_url))) + + engine.stats = { + 'result_count': 0, + 'search_count': 0, + 'engine_time': 0, + 'engine_time_count': 0, + 'score_count': 0, + 'errors': 0 + } + + if not engine.offline: + engine.stats['page_load_time'] = 0 + engine.stats['page_load_count'] = 0 + + for category_name in engine.categories: + categories.setdefault(category_name, []).append(engine) + + if engine.shortcut in engine_shortcuts: + logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut)) + sys.exit(1) + + engine_shortcuts[engine.shortcut] = engine.name + + return engine + + +def to_percentage(stats, maxvalue): + for engine_stat in stats: + if maxvalue: + engine_stat['percentage'] = int(engine_stat['avg'] / maxvalue * 100) + else: + engine_stat['percentage'] = 0 + return stats + + +def get_engines_stats(): + # TODO refactor + pageloads = [] + engine_times = [] + results = [] + scores = [] + errors = [] + scores_per_result = [] + + max_pageload = max_engine_times = max_results = max_score = max_errors = max_score_per_result = 0 # noqa + for engine in engines.values(): + if engine.stats['search_count'] == 0: + continue + results_num = \ + engine.stats['result_count'] / float(engine.stats['search_count']) + + if engine.stats['engine_time_count'] != 0: + this_engine_time = engine.stats['engine_time'] / float(engine.stats['engine_time_count']) # noqa + else: + this_engine_time = 0 + + if results_num: + score = engine.stats['score_count'] / float(engine.stats['search_count']) # noqa + score_per_result = score / results_num + else: + score = score_per_result = 0.0 + + if not engine.offline: + load_times = 0 + if engine.stats['page_load_count'] != 0: + load_times = engine.stats['page_load_time'] / float(engine.stats['page_load_count']) # noqa + max_pageload = max(load_times, max_pageload) + pageloads.append({'avg': load_times, 'name': engine.name}) + + max_engine_times = max(this_engine_time, max_engine_times) + max_results = max(results_num, max_results) + max_score = max(score, max_score) + max_score_per_result = max(score_per_result, max_score_per_result) + max_errors = max(max_errors, engine.stats['errors']) + + engine_times.append({'avg': this_engine_time, 'name': engine.name}) + results.append({'avg': results_num, 'name': engine.name}) + scores.append({'avg': score, 'name': engine.name}) + errors.append({'avg': engine.stats['errors'], 'name': engine.name}) + scores_per_result.append({ + 'avg': score_per_result, + 'name': engine.name + }) + + pageloads = to_percentage(pageloads, max_pageload) + engine_times = to_percentage(engine_times, max_engine_times) + results = to_percentage(results, max_results) + scores = to_percentage(scores, max_score) + scores_per_result = to_percentage(scores_per_result, max_score_per_result) + erros = to_percentage(errors, max_errors) + + return [ + ( + gettext('Engine time (sec)'), + sorted(engine_times, key=itemgetter('avg')) + ), + ( + gettext('Page loads (sec)'), + sorted(pageloads, key=itemgetter('avg')) + ), + ( + gettext('Number of results'), + sorted(results, key=itemgetter('avg'), reverse=True) + ), + ( + gettext('Scores'), + sorted(scores, key=itemgetter('avg'), reverse=True) + ), + ( + gettext('Scores per result'), + sorted(scores_per_result, key=itemgetter('avg'), reverse=True) + ), + ( + gettext('Errors'), + sorted(errors, key=itemgetter('avg'), reverse=True) + ), + ] + + +def load_engines(engine_list): + global engines + engines.clear() + for engine_data in engine_list: + engine = load_engine(engine_data) + if engine is not None: + engines[engine.name] = engine + return engines + + +def initialize_engines(engine_list): + load_engines(engine_list) + + def engine_init(engine_name, init_fn): + init_fn(get_engine_from_settings(engine_name)) + logger.debug('%s engine: Initialized', engine_name) + + for engine_name, engine in engines.items(): + if hasattr(engine, 'init'): + init_fn = getattr(engine, 'init') + if init_fn: + logger.debug('%s engine: Starting background initialization', engine_name) + threading.Thread(target=engine_init, args=(engine_name, init_fn)).start() |