From 2dbc0de0cd000459ebfdb3d015be8684e737e95c Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 22 Sep 2020 13:59:27 +0200 Subject: [mod] add searx/webadapter.py * move searx.search.get_search_query_from_webapp to searx.webadapter * move searx.query.SearchQuery to searx.search --- searx/webadapter.py | 162 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 searx/webadapter.py (limited to 'searx/webadapter.py') diff --git a/searx/webadapter.py b/searx/webadapter.py new file mode 100644 index 000000000..cad834bba --- /dev/null +++ b/searx/webadapter.py @@ -0,0 +1,162 @@ +from searx.exceptions import SearxParameterException +from searx.query import RawTextQuery, VALID_LANGUAGE_CODE +from searx.engines import categories, engines +from searx.search import SearchQuery + + +# remove duplicate queries. +# FIXME: does not fix "!music !soundcloud", because the categories are 'none' and 'music' +def deduplicate_query_engines(query_engines): + uniq_query_engines = {q["category"] + '|' + q["name"]: q for q in query_engines} + return uniq_query_engines.values() + + +def get_search_query_from_webapp(preferences, form): + # no text for the query ? + if not form.get('q'): + raise SearxParameterException('q', '') + + # set blocked engines + disabled_engines = preferences.engines.get_disabled() + + # parse query, if tags are set, which change + # the serch engine or search-language + raw_text_query = RawTextQuery(form['q'], disabled_engines) + + # set query + query = raw_text_query.getQuery() + + # get and check page number + pageno_param = form.get('pageno', '1') + if not pageno_param.isdigit() or int(pageno_param) < 1: + raise SearxParameterException('pageno', pageno_param) + query_pageno = int(pageno_param) + + # get language + # set specific language if set on request, query or preferences + # TODO support search with multible languages + if len(raw_text_query.languages): + query_lang = raw_text_query.languages[-1] + elif 'language' in form: + query_lang = form.get('language') + else: + query_lang = preferences.get_value('language') + + # check language + if not VALID_LANGUAGE_CODE.match(query_lang): + raise SearxParameterException('language', query_lang) + + # get safesearch + if 'safesearch' in form: + query_safesearch = form.get('safesearch') + # first check safesearch + if not query_safesearch.isdigit(): + raise SearxParameterException('safesearch', query_safesearch) + query_safesearch = int(query_safesearch) + else: + query_safesearch = preferences.get_value('safesearch') + + # safesearch : second check + if query_safesearch < 0 or query_safesearch > 2: + raise SearxParameterException('safesearch', query_safesearch) + + # get time_range + query_time_range = form.get('time_range') + + # check time_range + if query_time_range not in ('None', None, '', 'day', 'week', 'month', 'year'): + raise SearxParameterException('time_range', query_time_range) + + # query_engines + query_engines = raw_text_query.engines + + # timeout_limit + query_timeout = raw_text_query.timeout_limit + if query_timeout is None and 'timeout_limit' in form: + raw_time_limit = form.get('timeout_limit') + if raw_time_limit in ['None', '']: + raw_time_limit = None + else: + try: + query_timeout = float(raw_time_limit) + except ValueError: + raise SearxParameterException('timeout_limit', raw_time_limit) + + # query_categories + query_categories = [] + + # if engines are calculated from query, + # set categories by using that informations + if query_engines and raw_text_query.specific: + additional_categories = set() + for engine in query_engines: + if 'from_bang' in engine and engine['from_bang']: + additional_categories.add('none') + else: + additional_categories.add(engine['category']) + query_categories = list(additional_categories) + + # otherwise, using defined categories to + # calculate which engines should be used + else: + # set categories/engines + load_default_categories = True + for pd_name, pd in form.items(): + if pd_name == 'categories': + query_categories.extend(categ for categ in map(str.strip, pd.split(',')) if categ in categories) + elif pd_name == 'engines': + pd_engines = [{'category': engines[engine].categories[0], + 'name': engine} + for engine in map(str.strip, pd.split(',')) if engine in engines] + if pd_engines: + query_engines.extend(pd_engines) + load_default_categories = False + elif pd_name.startswith('category_'): + category = pd_name[9:] + + # if category is not found in list, skip + if category not in categories: + continue + + if pd != 'off': + # add category to list + query_categories.append(category) + elif category in query_categories: + # remove category from list if property is set to 'off' + query_categories.remove(category) + + if not load_default_categories: + if not query_categories: + query_categories = list(set(engine['category'] + for engine in query_engines)) + else: + # if no category is specified for this search, + # using user-defined default-configuration which + # (is stored in cookie) + if not query_categories: + cookie_categories = preferences.get_value('categories') + for ccateg in cookie_categories: + if ccateg in categories: + query_categories.append(ccateg) + + # if still no category is specified, using general + # as default-category + if not query_categories: + query_categories = ['general'] + + # using all engines for that search, which are + # declared under the specific categories + for categ in query_categories: + query_engines.extend({'category': categ, + 'name': engine.name} + for engine in categories[categ] + if (engine.name, categ) not in disabled_engines) + + query_engines = deduplicate_query_engines(query_engines) + external_bang = raw_text_query.external_bang + + return (SearchQuery(query, query_engines, query_categories, + query_lang, query_safesearch, query_pageno, + query_time_range, query_timeout, preferences, + external_bang=external_bang), + raw_text_query) -- cgit v1.2.3 From 2929495112e9869af24986b27066163572bc558a Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 22 Sep 2020 16:22:22 +0200 Subject: [mod] add searx.search.EngineRef was previously a Dict with two or three keys: name, category, from_bang make clear that this is a engine reference (see tests/unit/test_search.py for example) all variables using this class are renamed accordingly. --- searx/webadapter.py | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) (limited to 'searx/webadapter.py') diff --git a/searx/webadapter.py b/searx/webadapter.py index cad834bba..97379b17e 100644 --- a/searx/webadapter.py +++ b/searx/webadapter.py @@ -1,14 +1,14 @@ from searx.exceptions import SearxParameterException from searx.query import RawTextQuery, VALID_LANGUAGE_CODE from searx.engines import categories, engines -from searx.search import SearchQuery +from searx.search import SearchQuery, EngineRef # remove duplicate queries. # FIXME: does not fix "!music !soundcloud", because the categories are 'none' and 'music' -def deduplicate_query_engines(query_engines): - uniq_query_engines = {q["category"] + '|' + q["name"]: q for q in query_engines} - return uniq_query_engines.values() +def deduplicate_engineref_list(engineref_list): + engineref_dict = {q.category + '|' + q.name: q for q in engineref_list} + return engineref_dict.values() def get_search_query_from_webapp(preferences, form): @@ -68,7 +68,7 @@ def get_search_query_from_webapp(preferences, form): raise SearxParameterException('time_range', query_time_range) # query_engines - query_engines = raw_text_query.engines + query_engineref_list = raw_text_query.enginerefs # timeout_limit query_timeout = raw_text_query.timeout_limit @@ -87,13 +87,13 @@ def get_search_query_from_webapp(preferences, form): # if engines are calculated from query, # set categories by using that informations - if query_engines and raw_text_query.specific: + if query_engineref_list and raw_text_query.specific: additional_categories = set() - for engine in query_engines: - if 'from_bang' in engine and engine['from_bang']: + for engineref in query_engineref_list: + if engineref.from_bang: additional_categories.add('none') else: - additional_categories.add(engine['category']) + additional_categories.add(engineref.category) query_categories = list(additional_categories) # otherwise, using defined categories to @@ -105,11 +105,10 @@ def get_search_query_from_webapp(preferences, form): if pd_name == 'categories': query_categories.extend(categ for categ in map(str.strip, pd.split(',')) if categ in categories) elif pd_name == 'engines': - pd_engines = [{'category': engines[engine].categories[0], - 'name': engine} + pd_engines = [EngineRef(engineref, engines[engineref].categories[0]) for engine in map(str.strip, pd.split(',')) if engine in engines] if pd_engines: - query_engines.extend(pd_engines) + query_engineref_list.extend(pd_engines) load_default_categories = False elif pd_name.startswith('category_'): category = pd_name[9:] @@ -128,7 +127,7 @@ def get_search_query_from_webapp(preferences, form): if not load_default_categories: if not query_categories: query_categories = list(set(engine['category'] - for engine in query_engines)) + for engine in query_engineref_list)) else: # if no category is specified for this search, # using user-defined default-configuration which @@ -147,15 +146,14 @@ def get_search_query_from_webapp(preferences, form): # using all engines for that search, which are # declared under the specific categories for categ in query_categories: - query_engines.extend({'category': categ, - 'name': engine.name} - for engine in categories[categ] - if (engine.name, categ) not in disabled_engines) + query_engineref_list.extend(EngineRef(engine.name, categ) + for engine in categories[categ] + if (engine.name, categ) not in disabled_engines) - query_engines = deduplicate_query_engines(query_engines) + query_engineref_list = deduplicate_engineref_list(query_engineref_list) external_bang = raw_text_query.external_bang - return (SearchQuery(query, query_engines, query_categories, + return (SearchQuery(query, query_engineref_list, query_categories, query_lang, query_safesearch, query_pageno, query_time_range, query_timeout, preferences, external_bang=external_bang), -- cgit v1.2.3 From eecfff268913045f957c0fceb2a1caf56f236a1f Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 22 Sep 2020 16:31:17 +0200 Subject: [mod] check time_range parameter in searx/webadapter.py --- searx/webadapter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'searx/webadapter.py') diff --git a/searx/webadapter.py b/searx/webadapter.py index 97379b17e..0567c8d34 100644 --- a/searx/webadapter.py +++ b/searx/webadapter.py @@ -64,7 +64,8 @@ def get_search_query_from_webapp(preferences, form): query_time_range = form.get('time_range') # check time_range - if query_time_range not in ('None', None, '', 'day', 'week', 'month', 'year'): + query_time_range = None if query_time_range in ('', 'None') else query_time_range + if query_time_range not in (None, 'day', 'week', 'month', 'year'): raise SearxParameterException('time_range', query_time_range) # query_engines -- cgit v1.2.3 From 691d12726be1411d8c80895e927293bdcc3e9572 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 22 Sep 2020 16:55:59 +0200 Subject: [mod] check the engine tokens in searx/webadapter.py instead of searx/search.py --- searx/webadapter.py | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) (limited to 'searx/webadapter.py') diff --git a/searx/webadapter.py b/searx/webadapter.py index 0567c8d34..667d44c86 100644 --- a/searx/webadapter.py +++ b/searx/webadapter.py @@ -11,6 +11,31 @@ def deduplicate_engineref_list(engineref_list): return engineref_dict.values() +def validate_engineref_list(engineref_list, preferences): + """ + Validate query_engines according to the preferences + Returns: + list of existing engines with a validated token + list of unknown engine + list of engine with invalid token according to the preferences + """ + valid = [] + unknown = [] + no_token = [] + for engineref in engineref_list: + if engineref.name not in engines: + unknown.append(engineref) + continue + + engine = engines[engineref.name] + if not preferences.validate_token(engine): + no_token.append(engineref) + continue + + valid.append(engineref) + return valid, unknown, no_token + + def get_search_query_from_webapp(preferences, form): # no text for the query ? if not form.get('q'): @@ -152,10 +177,14 @@ def get_search_query_from_webapp(preferences, form): if (engine.name, categ) not in disabled_engines) query_engineref_list = deduplicate_engineref_list(query_engineref_list) + query_engineref_list, query_engineref_list_unknown, query_engineref_list_notoken =\ + validate_engineref_list(query_engineref_list, preferences) external_bang = raw_text_query.external_bang return (SearchQuery(query, query_engineref_list, query_categories, query_lang, query_safesearch, query_pageno, - query_time_range, query_timeout, preferences, + query_time_range, query_timeout, external_bang=external_bang), - raw_text_query) + raw_text_query, + query_engineref_list_unknown, + query_engineref_list_notoken) -- cgit v1.2.3 From d5b5e48f04e2e156cb975c136ec4b179b170509a Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 22 Sep 2020 17:45:32 +0200 Subject: [mod] searx/webadapter.py: each web parameter has a dedicated function (parse_pageno, page_lang,....). --- searx/webadapter.py | 64 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 25 deletions(-) (limited to 'searx/webadapter.py') diff --git a/searx/webadapter.py b/searx/webadapter.py index 667d44c86..eed320a22 100644 --- a/searx/webadapter.py +++ b/searx/webadapter.py @@ -36,27 +36,14 @@ def validate_engineref_list(engineref_list, preferences): return valid, unknown, no_token -def get_search_query_from_webapp(preferences, form): - # no text for the query ? - if not form.get('q'): - raise SearxParameterException('q', '') - - # set blocked engines - disabled_engines = preferences.engines.get_disabled() - - # parse query, if tags are set, which change - # the serch engine or search-language - raw_text_query = RawTextQuery(form['q'], disabled_engines) - - # set query - query = raw_text_query.getQuery() - - # get and check page number +def parse_pageno(form): pageno_param = form.get('pageno', '1') if not pageno_param.isdigit() or int(pageno_param) < 1: raise SearxParameterException('pageno', pageno_param) - query_pageno = int(pageno_param) + return int(pageno_param) + +def parse_lang(raw_text_query, form, preferences): # get language # set specific language if set on request, query or preferences # TODO support search with multible languages @@ -71,7 +58,10 @@ def get_search_query_from_webapp(preferences, form): if not VALID_LANGUAGE_CODE.match(query_lang): raise SearxParameterException('language', query_lang) - # get safesearch + return query_lang + + +def parse_safesearch(form, preferences): if 'safesearch' in form: query_safesearch = form.get('safesearch') # first check safesearch @@ -85,30 +75,54 @@ def get_search_query_from_webapp(preferences, form): if query_safesearch < 0 or query_safesearch > 2: raise SearxParameterException('safesearch', query_safesearch) - # get time_range - query_time_range = form.get('time_range') + return query_safesearch + +def parse_time_range(form): + query_time_range = form.get('time_range') # check time_range query_time_range = None if query_time_range in ('', 'None') else query_time_range if query_time_range not in (None, 'day', 'week', 'month', 'year'): raise SearxParameterException('time_range', query_time_range) + return query_time_range - # query_engines - query_engineref_list = raw_text_query.enginerefs - # timeout_limit +def parse_timeout(raw_text_query, form): query_timeout = raw_text_query.timeout_limit if query_timeout is None and 'timeout_limit' in form: raw_time_limit = form.get('timeout_limit') if raw_time_limit in ['None', '']: - raw_time_limit = None + return None else: try: - query_timeout = float(raw_time_limit) + return float(raw_time_limit) except ValueError: raise SearxParameterException('timeout_limit', raw_time_limit) + +def get_search_query_from_webapp(preferences, form): + # no text for the query ? + if not form.get('q'): + raise SearxParameterException('q', '') + + # set blocked engines + disabled_engines = preferences.engines.get_disabled() + + # parse query, if tags are set, which change + # the serch engine or search-language + raw_text_query = RawTextQuery(form['q'], disabled_engines) + + # set query + query = raw_text_query.getQuery() + query_pageno = parse_pageno(form) + query_lang = parse_lang(raw_text_query, form, preferences) + query_safesearch = parse_safesearch(form, preferences) + query_time_range = parse_time_range(form) + query_timeout = parse_timeout(raw_text_query, form) + external_bang = raw_text_query.external_bang + # query_categories + query_engineref_list = raw_text_query.enginerefs query_categories = [] # if engines are calculated from query, -- cgit v1.2.3 From 678699beaf97184a9059b0c6514b1a51c7e6dfd0 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 22 Sep 2020 18:03:42 +0200 Subject: [mod] searx/webadapter.py: add get_selected_categories share common code with get_search_query_from_webapp Update searx/webapp.py to use get_selected_categories Close #2142 --- searx/webadapter.py | 162 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 94 insertions(+), 68 deletions(-) (limited to 'searx/webadapter.py') diff --git a/searx/webadapter.py b/searx/webadapter.py index eed320a22..982517eaa 100644 --- a/searx/webadapter.py +++ b/searx/webadapter.py @@ -100,6 +100,93 @@ def parse_timeout(raw_text_query, form): raise SearxParameterException('timeout_limit', raw_time_limit) +def parse_specific(raw_text_query: RawTextQuery): + query_engineref_list = raw_text_query.enginerefs + additional_categories = set() + for engineref in raw_text_query.enginerefs: + if engineref.from_bang: + additional_categories.add('none') + else: + additional_categories.add(engineref.category) + query_categories = list(additional_categories) + return query_engineref_list, query_categories + + +def parse_category_form(query_categories, name, value): + if name == 'categories': + query_categories.extend(categ for categ in map(str.strip, value.split(',')) if categ in categories) + elif name.startswith('category_'): + category = name[9:] + + # if category is not found in list, skip + if category not in categories: + return + + if value != 'off': + # add category to list + query_categories.append(category) + elif category in query_categories: + # remove category from list if property is set to 'off' + query_categories.remove(category) + + +def get_selected_categories(form, preferences): + selected_categories = [] + + if form is not None: + for name, value in form.items(): + parse_category_form(selected_categories, name, value) + + # if no category is specified for this search, + # using user-defined default-configuration which + # (is stored in cookie) + if not selected_categories: + cookie_categories = preferences.get_value('categories') + for ccateg in cookie_categories: + selected_categories.append(ccateg) + + # if still no category is specified, using general + # as default-category + if not selected_categories: + selected_categories = ['general'] + + return selected_categories + + +def parse_generic(form, preferences, disabled_engines): + query_engineref_list = [] + query_categories = [] + + # set categories/engines + load_default_categories = True + for pd_name, pd in form.items(): + if pd_name == 'engines': + pd_engines = [EngineRef(engine_name, engines[engine_name].categories[0]) + for engine_name in map(str.strip, pd.split(',')) if engine_name in engines] + if pd_engines: + query_engineref_list.extend(pd_engines) + load_default_categories = False + else: + parse_category_form(query_categories, pd_name, pd) + + if not load_default_categories: + if not query_categories: + query_categories = list(set(engine['category'] + for engine in query_engineref_list)) + else: + if not query_categories: + query_categories = get_selected_categories(None, preferences) + + # using all engines for that search, which are + # declared under the specific categories + for categ in query_categories: + query_engineref_list.extend(EngineRef(engine.name, categ) + for engine in categories[categ] + if (engine.name, categ) not in disabled_engines) + + return query_engineref_list, query_categories + + def get_search_query_from_webapp(preferences, form): # no text for the query ? if not form.get('q'): @@ -121,79 +208,18 @@ def get_search_query_from_webapp(preferences, form): query_timeout = parse_timeout(raw_text_query, form) external_bang = raw_text_query.external_bang - # query_categories - query_engineref_list = raw_text_query.enginerefs - query_categories = [] - - # if engines are calculated from query, - # set categories by using that informations - if query_engineref_list and raw_text_query.specific: - additional_categories = set() - for engineref in query_engineref_list: - if engineref.from_bang: - additional_categories.add('none') - else: - additional_categories.add(engineref.category) - query_categories = list(additional_categories) - - # otherwise, using defined categories to - # calculate which engines should be used + if raw_text_query.enginerefs and raw_text_query.specific: + # if engines are calculated from query, + # set categories by using that informations + query_engineref_list, query_categories = parse_specific(raw_text_query) else: - # set categories/engines - load_default_categories = True - for pd_name, pd in form.items(): - if pd_name == 'categories': - query_categories.extend(categ for categ in map(str.strip, pd.split(',')) if categ in categories) - elif pd_name == 'engines': - pd_engines = [EngineRef(engineref, engines[engineref].categories[0]) - for engine in map(str.strip, pd.split(',')) if engine in engines] - if pd_engines: - query_engineref_list.extend(pd_engines) - load_default_categories = False - elif pd_name.startswith('category_'): - category = pd_name[9:] - - # if category is not found in list, skip - if category not in categories: - continue - - if pd != 'off': - # add category to list - query_categories.append(category) - elif category in query_categories: - # remove category from list if property is set to 'off' - query_categories.remove(category) - - if not load_default_categories: - if not query_categories: - query_categories = list(set(engine['category'] - for engine in query_engineref_list)) - else: - # if no category is specified for this search, - # using user-defined default-configuration which - # (is stored in cookie) - if not query_categories: - cookie_categories = preferences.get_value('categories') - for ccateg in cookie_categories: - if ccateg in categories: - query_categories.append(ccateg) - - # if still no category is specified, using general - # as default-category - if not query_categories: - query_categories = ['general'] - - # using all engines for that search, which are - # declared under the specific categories - for categ in query_categories: - query_engineref_list.extend(EngineRef(engine.name, categ) - for engine in categories[categ] - if (engine.name, categ) not in disabled_engines) + # otherwise, using defined categories to + # calculate which engines should be used + query_engineref_list, query_categories = parse_generic(form, preferences, disabled_engines) query_engineref_list = deduplicate_engineref_list(query_engineref_list) query_engineref_list, query_engineref_list_unknown, query_engineref_list_notoken =\ validate_engineref_list(query_engineref_list, preferences) - external_bang = raw_text_query.external_bang return (SearchQuery(query, query_engineref_list, query_categories, query_lang, query_safesearch, query_pageno, -- cgit v1.2.3 From 485a502b886bc7fb8a806ffc576b0eec99990a5c Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Thu, 10 Sep 2020 18:08:14 +0200 Subject: [mod] add typing and __slots__ --- searx/webadapter.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'searx/webadapter.py') diff --git a/searx/webadapter.py b/searx/webadapter.py index 982517eaa..1ec84bbdc 100644 --- a/searx/webadapter.py +++ b/searx/webadapter.py @@ -1,19 +1,22 @@ +import typing from searx.exceptions import SearxParameterException from searx.query import RawTextQuery, VALID_LANGUAGE_CODE from searx.engines import categories, engines from searx.search import SearchQuery, EngineRef +from searx.preferences import Preferences # remove duplicate queries. # FIXME: does not fix "!music !soundcloud", because the categories are 'none' and 'music' -def deduplicate_engineref_list(engineref_list): +def deduplicate_engineref_list(engineref_list: typing.List[EngineRef]) -> typing.List[EngineRef]: engineref_dict = {q.category + '|' + q.name: q for q in engineref_list} return engineref_dict.values() -def validate_engineref_list(engineref_list, preferences): +def validate_engineref_list(engineref_list: typing.List[EngineRef], preferences: Preferences): """ Validate query_engines according to the preferences + Returns: list of existing engines with a validated token list of unknown engine @@ -36,14 +39,14 @@ def validate_engineref_list(engineref_list, preferences): return valid, unknown, no_token -def parse_pageno(form): +def parse_pageno(form: typing.Dict[str, str]) -> int: pageno_param = form.get('pageno', '1') if not pageno_param.isdigit() or int(pageno_param) < 1: raise SearxParameterException('pageno', pageno_param) return int(pageno_param) -def parse_lang(raw_text_query, form, preferences): +def parse_lang(raw_text_query: RawTextQuery, form: typing.Dict[str, str], preferences: Preferences) -> str: # get language # set specific language if set on request, query or preferences # TODO support search with multible languages @@ -61,7 +64,7 @@ def parse_lang(raw_text_query, form, preferences): return query_lang -def parse_safesearch(form, preferences): +def parse_safesearch(form: typing.Dict[str, str], preferences: Preferences) -> int: if 'safesearch' in form: query_safesearch = form.get('safesearch') # first check safesearch @@ -78,7 +81,7 @@ def parse_safesearch(form, preferences): return query_safesearch -def parse_time_range(form): +def parse_time_range(form: typing.Dict[str, str]) -> str: query_time_range = form.get('time_range') # check time_range query_time_range = None if query_time_range in ('', 'None') else query_time_range @@ -87,7 +90,7 @@ def parse_time_range(form): return query_time_range -def parse_timeout(raw_text_query, form): +def parse_timeout(raw_text_query: RawTextQuery, form: typing.Dict[str, str]) -> typing.Optional[float]: query_timeout = raw_text_query.timeout_limit if query_timeout is None and 'timeout_limit' in form: raw_time_limit = form.get('timeout_limit') @@ -187,7 +190,8 @@ def parse_generic(form, preferences, disabled_engines): return query_engineref_list, query_categories -def get_search_query_from_webapp(preferences, form): +def get_search_query_from_webapp(preferences: Preferences, form: typing.Dict[str, str])\ + -> typing.Tuple[SearchQuery, RawTextQuery, typing.List[EngineRef], typing.List[EngineRef]]: # no text for the query ? if not form.get('q'): raise SearxParameterException('q', '') -- cgit v1.2.3 From f2f3300bde0cc304f80809ff766fc557b5486098 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Thu, 24 Sep 2020 16:26:00 +0200 Subject: [mod] more typing --- searx/webadapter.py | 51 ++++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 25 deletions(-) (limited to 'searx/webadapter.py') diff --git a/searx/webadapter.py b/searx/webadapter.py index 1ec84bbdc..835cf2276 100644 --- a/searx/webadapter.py +++ b/searx/webadapter.py @@ -1,4 +1,4 @@ -import typing +from typing import Dict, List, Optional, Tuple from searx.exceptions import SearxParameterException from searx.query import RawTextQuery, VALID_LANGUAGE_CODE from searx.engines import categories, engines @@ -8,19 +8,19 @@ from searx.preferences import Preferences # remove duplicate queries. # FIXME: does not fix "!music !soundcloud", because the categories are 'none' and 'music' -def deduplicate_engineref_list(engineref_list: typing.List[EngineRef]) -> typing.List[EngineRef]: +def deduplicate_engineref_list(engineref_list: List[EngineRef]) -> List[EngineRef]: engineref_dict = {q.category + '|' + q.name: q for q in engineref_list} return engineref_dict.values() -def validate_engineref_list(engineref_list: typing.List[EngineRef], preferences: Preferences): - """ - Validate query_engines according to the preferences +def validate_engineref_list(engineref_list: List[EngineRef], preferences: Preferences)\ + -> Tuple[List[EngineRef], List[EngineRef], List[EngineRef]]: + """Validate query_engines according to the preferences - Returns: - list of existing engines with a validated token - list of unknown engine - list of engine with invalid token according to the preferences + Returns: + List[EngineRef]: list of existing engines with a validated token + List[EngineRef]: list of unknown engine + List[EngineRef]: list of engine with invalid token according to the preferences """ valid = [] unknown = [] @@ -39,14 +39,14 @@ def validate_engineref_list(engineref_list: typing.List[EngineRef], preferences: return valid, unknown, no_token -def parse_pageno(form: typing.Dict[str, str]) -> int: +def parse_pageno(form: Dict[str, str]) -> int: pageno_param = form.get('pageno', '1') if not pageno_param.isdigit() or int(pageno_param) < 1: raise SearxParameterException('pageno', pageno_param) return int(pageno_param) -def parse_lang(raw_text_query: RawTextQuery, form: typing.Dict[str, str], preferences: Preferences) -> str: +def parse_lang(preferences: Preferences, form: Dict[str, str], raw_text_query: RawTextQuery) -> str: # get language # set specific language if set on request, query or preferences # TODO support search with multible languages @@ -64,7 +64,7 @@ def parse_lang(raw_text_query: RawTextQuery, form: typing.Dict[str, str], prefer return query_lang -def parse_safesearch(form: typing.Dict[str, str], preferences: Preferences) -> int: +def parse_safesearch(preferences: Preferences, form: Dict[str, str]) -> int: if 'safesearch' in form: query_safesearch = form.get('safesearch') # first check safesearch @@ -81,7 +81,7 @@ def parse_safesearch(form: typing.Dict[str, str], preferences: Preferences) -> i return query_safesearch -def parse_time_range(form: typing.Dict[str, str]) -> str: +def parse_time_range(form: Dict[str, str]) -> str: query_time_range = form.get('time_range') # check time_range query_time_range = None if query_time_range in ('', 'None') else query_time_range @@ -90,7 +90,7 @@ def parse_time_range(form: typing.Dict[str, str]) -> str: return query_time_range -def parse_timeout(raw_text_query: RawTextQuery, form: typing.Dict[str, str]) -> typing.Optional[float]: +def parse_timeout(form: Dict[str, str], raw_text_query: RawTextQuery) -> Optional[float]: query_timeout = raw_text_query.timeout_limit if query_timeout is None and 'timeout_limit' in form: raw_time_limit = form.get('timeout_limit') @@ -103,7 +103,7 @@ def parse_timeout(raw_text_query: RawTextQuery, form: typing.Dict[str, str]) -> raise SearxParameterException('timeout_limit', raw_time_limit) -def parse_specific(raw_text_query: RawTextQuery): +def parse_specific(raw_text_query: RawTextQuery) -> Tuple[List[EngineRef], List[str]]: query_engineref_list = raw_text_query.enginerefs additional_categories = set() for engineref in raw_text_query.enginerefs: @@ -115,7 +115,7 @@ def parse_specific(raw_text_query: RawTextQuery): return query_engineref_list, query_categories -def parse_category_form(query_categories, name, value): +def parse_category_form(query_categories: List[str], name: str, value: str) -> None: if name == 'categories': query_categories.extend(categ for categ in map(str.strip, value.split(',')) if categ in categories) elif name.startswith('category_'): @@ -133,7 +133,7 @@ def parse_category_form(query_categories, name, value): query_categories.remove(category) -def get_selected_categories(form, preferences): +def get_selected_categories(preferences: Preferences, form: Dict[str, str]) -> List[str]: selected_categories = [] if form is not None: @@ -156,7 +156,8 @@ def get_selected_categories(form, preferences): return selected_categories -def parse_generic(form, preferences, disabled_engines): +def parse_generic(preferences: Preferences, form: Dict[str, str], disabled_engines: List[str])\ + -> Tuple[List[EngineRef], List[str]]: query_engineref_list = [] query_categories = [] @@ -178,7 +179,7 @@ def parse_generic(form, preferences, disabled_engines): for engine in query_engineref_list)) else: if not query_categories: - query_categories = get_selected_categories(None, preferences) + query_categories = get_selected_categories(preferences, None) # using all engines for that search, which are # declared under the specific categories @@ -190,8 +191,8 @@ def parse_generic(form, preferences, disabled_engines): return query_engineref_list, query_categories -def get_search_query_from_webapp(preferences: Preferences, form: typing.Dict[str, str])\ - -> typing.Tuple[SearchQuery, RawTextQuery, typing.List[EngineRef], typing.List[EngineRef]]: +def get_search_query_from_webapp(preferences: Preferences, form: Dict[str, str])\ + -> Tuple[SearchQuery, RawTextQuery, List[EngineRef], List[EngineRef]]: # no text for the query ? if not form.get('q'): raise SearxParameterException('q', '') @@ -206,10 +207,10 @@ def get_search_query_from_webapp(preferences: Preferences, form: typing.Dict[str # set query query = raw_text_query.getQuery() query_pageno = parse_pageno(form) - query_lang = parse_lang(raw_text_query, form, preferences) - query_safesearch = parse_safesearch(form, preferences) + query_lang = parse_lang(preferences, form, raw_text_query) + query_safesearch = parse_safesearch(preferences, form) query_time_range = parse_time_range(form) - query_timeout = parse_timeout(raw_text_query, form) + query_timeout = parse_timeout(form, raw_text_query) external_bang = raw_text_query.external_bang if raw_text_query.enginerefs and raw_text_query.specific: @@ -219,7 +220,7 @@ def get_search_query_from_webapp(preferences: Preferences, form: typing.Dict[str else: # otherwise, using defined categories to # calculate which engines should be used - query_engineref_list, query_categories = parse_generic(form, preferences, disabled_engines) + query_engineref_list, query_categories = parse_generic(preferences, form, disabled_engines) query_engineref_list = deduplicate_engineref_list(query_engineref_list) query_engineref_list, query_engineref_list_unknown, query_engineref_list_notoken =\ -- cgit v1.2.3