diff options
Diffstat (limited to 'searx/search.py')
| -rw-r--r-- | searx/search.py | 237 |
1 files changed, 58 insertions, 179 deletions
diff --git a/searx/search.py b/searx/search.py index 96fffd569..3dfdd7213 100644 --- a/searx/search.py +++ b/searx/search.py @@ -15,27 +15,22 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2013- by Adam Tauber, <asciimoo@gmail.com> ''' +import typing import gc -import sys import threading from time import time from uuid import uuid4 from _thread import start_new_thread -from flask_babel import gettext import requests.exceptions import searx.poolrequests as requests_lib -from searx.engines import ( - categories, engines, settings -) +from searx.engines import engines, settings from searx.answerers import ask from searx.external_bang import get_bang_url from searx.utils import gen_useragent -from searx.query import RawTextQuery, SearchQuery, VALID_LANGUAGE_CODE from searx.results import ResultContainer from searx import logger from searx.plugins import plugins -from searx.exceptions import SearxParameterException logger = logger.getChild('search') @@ -53,6 +48,49 @@ else: exit(1) +class EngineRef: + + __slots__ = 'name', 'category', 'from_bang' + + def __init__(self, name: str, category: str, from_bang: bool=False): + self.name = name + self.category = category + self.from_bang = from_bang + + def __str__(self): + return "(" + self.name + "," + self.category + "," + str(self.from_bang) + ")" + + +class SearchQuery: + """container for all the search parameters (query, language, etc...)""" + + __slots__ = 'query', 'engineref_list', 'categories', 'lang', 'safesearch', 'pageno', 'time_range',\ + 'timeout_limit', 'external_bang' + + def __init__(self, + query: str, + engineref_list: typing.List[EngineRef], + categories: typing.List[str], + lang: str, + safesearch: bool, + pageno: int, + time_range: typing.Optional[str], + timeout_limit: typing.Optional[float]=None, + external_bang: typing.Optional[str]=False): + self.query = query + self.engineref_list = engineref_list + self.categories = categories + self.lang = lang + self.safesearch = safesearch + self.pageno = pageno + self.time_range = time_range + self.timeout_limit = timeout_limit + self.external_bang = external_bang + + def __str__(self): + return self.query + ";" + str(self.engineref_list) + + def send_http_request(engine, request_params): # create dictionary which contain all # informations about the request @@ -247,167 +285,11 @@ def default_request_params(): } -# remove duplicate queries. -# FIXME: does not fix "!music !soundcloud", because the categories are 'none' and 'music' -def deduplicate_query_engines(query_engines): - uniq_query_engines = {q["category"] + '|' + q["name"]: q for q in query_engines} - return uniq_query_engines.values() - - -def get_search_query_from_webapp(preferences, form): - # no text for the query ? - if not form.get('q'): - raise SearxParameterException('q', '') - - # set blocked engines - disabled_engines = preferences.engines.get_disabled() - - # parse query, if tags are set, which change - # the serch engine or search-language - raw_text_query = RawTextQuery(form['q'], disabled_engines) - - # set query - query = raw_text_query.getQuery() - - # get and check page number - pageno_param = form.get('pageno', '1') - if not pageno_param.isdigit() or int(pageno_param) < 1: - raise SearxParameterException('pageno', pageno_param) - query_pageno = int(pageno_param) - - # get language - # set specific language if set on request, query or preferences - # TODO support search with multible languages - if len(raw_text_query.languages): - query_lang = raw_text_query.languages[-1] - elif 'language' in form: - query_lang = form.get('language') - else: - query_lang = preferences.get_value('language') - - # check language - if not VALID_LANGUAGE_CODE.match(query_lang): - raise SearxParameterException('language', query_lang) - - # get safesearch - if 'safesearch' in form: - query_safesearch = form.get('safesearch') - # first check safesearch - if not query_safesearch.isdigit(): - raise SearxParameterException('safesearch', query_safesearch) - query_safesearch = int(query_safesearch) - else: - query_safesearch = preferences.get_value('safesearch') - - # safesearch : second check - if query_safesearch < 0 or query_safesearch > 2: - raise SearxParameterException('safesearch', query_safesearch) - - # get time_range - query_time_range = form.get('time_range') - - # check time_range - if query_time_range not in ('None', None, '', 'day', 'week', 'month', 'year'): - raise SearxParameterException('time_range', query_time_range) - - # query_engines - query_engines = raw_text_query.engines - - # timeout_limit - query_timeout = raw_text_query.timeout_limit - if query_timeout is None and 'timeout_limit' in form: - raw_time_limit = form.get('timeout_limit') - if raw_time_limit in ['None', '']: - raw_time_limit = None - else: - try: - query_timeout = float(raw_time_limit) - except ValueError: - raise SearxParameterException('timeout_limit', raw_time_limit) - - # query_categories - query_categories = [] - - # if engines are calculated from query, - # set categories by using that informations - if query_engines and raw_text_query.specific: - additional_categories = set() - for engine in query_engines: - if 'from_bang' in engine and engine['from_bang']: - additional_categories.add('none') - else: - additional_categories.add(engine['category']) - query_categories = list(additional_categories) - - # otherwise, using defined categories to - # calculate which engines should be used - else: - # set categories/engines - load_default_categories = True - for pd_name, pd in form.items(): - if pd_name == 'categories': - query_categories.extend(categ for categ in map(str.strip, pd.split(',')) if categ in categories) - elif pd_name == 'engines': - pd_engines = [{'category': engines[engine].categories[0], - 'name': engine} - for engine in map(str.strip, pd.split(',')) if engine in engines] - if pd_engines: - query_engines.extend(pd_engines) - load_default_categories = False - elif pd_name.startswith('category_'): - category = pd_name[9:] - - # if category is not found in list, skip - if category not in categories: - continue - - if pd != 'off': - # add category to list - query_categories.append(category) - elif category in query_categories: - # remove category from list if property is set to 'off' - query_categories.remove(category) - - if not load_default_categories: - if not query_categories: - query_categories = list(set(engine['category'] - for engine in query_engines)) - else: - # if no category is specified for this search, - # using user-defined default-configuration which - # (is stored in cookie) - if not query_categories: - cookie_categories = preferences.get_value('categories') - for ccateg in cookie_categories: - if ccateg in categories: - query_categories.append(ccateg) - - # if still no category is specified, using general - # as default-category - if not query_categories: - query_categories = ['general'] - - # using all engines for that search, which are - # declared under the specific categories - for categ in query_categories: - query_engines.extend({'category': categ, - 'name': engine.name} - for engine in categories[categ] - if (engine.name, categ) not in disabled_engines) - - query_engines = deduplicate_query_engines(query_engines) - external_bang = raw_text_query.external_bang - - return (SearchQuery(query, query_engines, query_categories, - query_lang, query_safesearch, query_pageno, - query_time_range, query_timeout, preferences, - external_bang=external_bang), - raw_text_query) - - class Search: """Search information container""" + __slots__ = "search_query", "result_container", "start_time", "actual_timeout" + def __init__(self, search_query): # init vars super().__init__() @@ -444,9 +326,6 @@ class Search: return False def _is_accepted(self, engine_name, engine): - if not self.search_query.preferences.validate_token(engine): - return False - # skip suspended engines if engine.suspend_end_time >= time(): logger.debug('Engine currently suspended: %s', engine_name) @@ -462,13 +341,13 @@ class Search: return True - def _get_params(self, selected_engine, user_agent): - if selected_engine['name'] not in engines: + def _get_params(self, engineref, user_agent): + if engineref.name not in engines: return None, None - engine = engines[selected_engine['name']] + engine = engines[engineref.name] - if not self._is_accepted(selected_engine['name'], engine): + if not self._is_accepted(engineref.name, engine): return None, None # set default request parameters @@ -485,15 +364,13 @@ class Search: request_params['safesearch'] = self.search_query.safesearch request_params['time_range'] = self.search_query.time_range - request_params['category'] = selected_engine['category'] + request_params['category'] = engineref.category request_params['pageno'] = self.search_query.pageno return request_params, engine.timeout # do search-request def _get_requests(self): - global number_of_searches - # init vars requests = [] @@ -505,14 +382,14 @@ class Search: default_timeout = 0 # start search-reqest for all selected engines - for selected_engine in self.search_query.engines: + for engineref in self.search_query.engineref_list: # set default request parameters - request_params, engine_timeout = self._get_params(selected_engine, user_agent) + request_params, engine_timeout = self._get_params(engineref, user_agent) if request_params is None: continue # append request to list - requests.append((selected_engine['name'], self.search_query.query, request_params)) + requests.append((engineref.name, self.search_query.query, request_params)) # update default_timeout default_timeout = max(default_timeout, engine_timeout) @@ -535,7 +412,7 @@ class Search: actual_timeout = min(query_timeout, max_request_timeout) logger.debug("actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})" - .format(self.actual_timeout, default_timeout, query_timeout, max_request_timeout)) + .format(actual_timeout, default_timeout, query_timeout, max_request_timeout)) return requests, actual_timeout @@ -567,6 +444,8 @@ class Search: class SearchWithPlugins(Search): """Similar to the Search class but call the plugins.""" + __slots__ = 'ordered_plugin_list', 'request' + def __init__(self, search_query, ordered_plugin_list, request): super().__init__(search_query) self.ordered_plugin_list = ordered_plugin_list |