diff options
Diffstat (limited to 'searx/engines')
27 files changed, 64 insertions, 171 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index a3dd7a95a..b762c0dd9 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -13,6 +13,7 @@ usage:: import sys import copy +from typing import Dict, List, Optional from os.path import realpath, dirname from babel.localedata import locale_identifiers @@ -43,11 +44,34 @@ ENGINE_DEFAULT_ARGS = { "enable_http": False, "display_error_messages": True, "tokens": [], + "about": {}, } -"""Defaults for the namespace of an engine module, see :py:func:`load_engine`""" +# set automatically when an engine does not have any tab category +OTHER_CATEGORY = 'other' + + +class Engine: # pylint: disable=too-few-public-methods + """This class is currently never initialized and only used for type hinting.""" + + name: str + engine: str + shortcut: str + categories: List[str] + supported_languages: List[str] + about: dict + inactive: bool + disabled: bool + language_support: bool + paging: bool + safesearch: bool + time_range_support: bool + timeout: float + + +# Defaults for the namespace of an engine module, see :py:func:`load_engine` categories = {'general': []} -engines = {} +engines: Dict[str, Engine] = {} engine_shortcuts = {} """Simple map of registered *shortcuts* to name of the engine (or ``None``). @@ -58,7 +82,7 @@ engine_shortcuts = {} """ -def load_engine(engine_data): +def load_engine(engine_data: dict) -> Optional[Engine]: """Load engine from ``engine_data``. :param dict engine_data: Attributes from YAML ``settings:engines/<engine>`` @@ -113,6 +137,9 @@ def load_engine(engine_data): set_loggers(engine, engine_name) + if not any(cat in settings['categories_as_tabs'] for cat in engine.categories): + engine.categories.append(OTHER_CATEGORY) + return engine @@ -131,13 +158,15 @@ def set_loggers(engine, engine_name): module.logger = logger.getChild(module_engine_name) -def update_engine_attributes(engine, engine_data): +def update_engine_attributes(engine: Engine, engine_data): # set engine attributes from engine_data for param_name, param_value in engine_data.items(): if param_name == 'categories': if isinstance(param_value, str): param_value = list(map(str.strip, param_value.split(','))) engine.categories = param_value + elif hasattr(engine, 'about') and param_name == 'about': + engine.about = {**engine.about, **engine_data['about']} else: setattr(engine, param_name, param_value) @@ -147,7 +176,7 @@ def update_engine_attributes(engine, engine_data): setattr(engine, arg_name, copy.deepcopy(arg_value)) -def set_language_attributes(engine): +def set_language_attributes(engine: Engine): # assign supported languages from json file if engine.name in ENGINES_LANGUAGES: engine.supported_languages = ENGINES_LANGUAGES[engine.name] @@ -220,7 +249,7 @@ def is_missing_required_attributes(engine): return missing -def is_engine_active(engine): +def is_engine_active(engine: Engine): # check if engine is inactive if engine.inactive is True: return False @@ -232,7 +261,7 @@ def is_engine_active(engine): return True -def register_engine(engine): +def register_engine(engine: Engine): if engine.name in engines: logger.error('Engine config error: ambigious name: {0}'.format(engine.name)) sys.exit(1) diff --git a/searx/engines/apkmirror.py b/searx/engines/apkmirror.py index da84bc79e..ac7cd7431 100644 --- a/searx/engines/apkmirror.py +++ b/searx/engines/apkmirror.py @@ -24,7 +24,7 @@ about = { } # engine dependent config -categories = ['files'] +categories = ['files', 'apps'] paging = True time_range_support = False diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index 1cfb3983f..b5e426107 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -20,7 +20,7 @@ about = { } # engine dependent config -categories = ['it'] +categories = ['it', 'software wikis'] paging = True base_url = 'https://wiki.archlinux.org' diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 1170227ad..9744b1800 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -20,7 +20,7 @@ about = { } # engine dependent config -categories = ['general'] +categories = ['general', 'web'] paging = True time_range_support = False safesearch = False diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index f07d07144..cb69dc172 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -27,7 +27,7 @@ about = { } # engine dependent config -categories = ['images'] +categories = ['images', 'web'] paging = True safesearch = True time_range_support = True diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py index 184f564df..ae8e8d49a 100644 --- a/searx/engines/bing_videos.py +++ b/searx/engines/bing_videos.py @@ -26,7 +26,7 @@ about = { "results": 'HTML', } -categories = ['videos'] +categories = ['videos', 'web'] paging = True safesearch = True time_range_support = True diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 0d2a524df..71da72677 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -27,7 +27,7 @@ about = { } # engine dependent config -categories = ['general'] +categories = ['general', 'web'] paging = True supported_languages_url = 'https://duckduckgo.com/util/u588.js' time_range_support = True diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py index 2f75e16f1..7d844b543 100644 --- a/searx/engines/duckduckgo_images.py +++ b/searx/engines/duckduckgo_images.py @@ -27,7 +27,7 @@ about = { } # engine dependent config -categories = ['images'] +categories = ['images', 'web'] paging = True safesearch = True diff --git a/searx/engines/duden.py b/searx/engines/duden.py index 600b61f3c..da4c4f7da 100644 --- a/searx/engines/duden.py +++ b/searx/engines/duden.py @@ -19,7 +19,7 @@ about = { "language": 'de', } -categories = ['general'] +categories = ['dictionaries'] paging = True # search-url diff --git a/searx/engines/etools.py b/searx/engines/etools.py deleted file mode 100644 index 347463291..000000000 --- a/searx/engines/etools.py +++ /dev/null @@ -1,58 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -""" - eTools (Web) -""" - -from lxml import html -from urllib.parse import quote -from searx.utils import extract_text, eval_xpath - -# about -about = { - "website": 'https://www.etools.ch', - "wikidata_id": None, - "official_api_documentation": None, - "use_official_api": False, - "require_api_key": False, - "results": 'HTML', -} - -categories = ['general'] -paging = False -safesearch = True - -base_url = 'https://www.etools.ch' -search_path = ( - # fmt: off - '/searchAdvancedSubmit.do' - '?query={search_term}' - '&pageResults=20' - '&safeSearch={safesearch}' - # fmt: on -) - - -def request(query, params): - if params['safesearch']: - safesearch = 'true' - else: - safesearch = 'false' - - params['url'] = base_url + search_path.format(search_term=quote(query), safesearch=safesearch) - - return params - - -def response(resp): - results = [] - - dom = html.fromstring(resp.text) - - for result in eval_xpath(dom, '//table[@class="result"]//td[@class="record"]'): - url = eval_xpath(result, './a/@href')[0] - title = extract_text(eval_xpath(result, './a//text()')) - content = extract_text(eval_xpath(result, './/div[@class="text"]//text()')) - - results.append({'url': url, 'title': title, 'content': content}) - - return results diff --git a/searx/engines/fdroid.py b/searx/engines/fdroid.py index c381b25d4..b5f004e7b 100644 --- a/searx/engines/fdroid.py +++ b/searx/engines/fdroid.py @@ -18,7 +18,7 @@ about = { } # engine dependent config -categories = ['files'] +categories = ['files', 'apps'] paging = True # search-url diff --git a/searx/engines/genius.py b/searx/engines/genius.py index b0fcb09a8..1f4b4b03e 100644 --- a/searx/engines/genius.py +++ b/searx/engines/genius.py @@ -20,7 +20,7 @@ about = { } # engine dependent config -categories = ['music'] +categories = ['music', 'lyrics'] paging = True page_size = 5 diff --git a/searx/engines/gentoo.py b/searx/engines/gentoo.py index 5b9edafe0..856c93710 100644 --- a/searx/engines/gentoo.py +++ b/searx/engines/gentoo.py @@ -18,7 +18,7 @@ about = { } # engine dependent config -categories = ['it'] +categories = ['it', 'software wikis'] paging = True base_url = 'https://wiki.gentoo.org' diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index c657dca30..1c40ff331 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -22,7 +22,7 @@ about = { } # engine dependent config -categories = ['general'] +categories = ['general', 'web'] # gigablast's pagination is totally damaged, don't use it paging = False safesearch = True diff --git a/searx/engines/github.py b/searx/engines/github.py index 1d12d296a..343f3793d 100644 --- a/searx/engines/github.py +++ b/searx/engines/github.py @@ -17,7 +17,7 @@ about = { } # engine dependent config -categories = ['it'] +categories = ['it', 'repos'] # search-url search_url = 'https://api.github.com/search/repositories?sort=stars&order=desc&{query}' # noqa diff --git a/searx/engines/google.py b/searx/engines/google.py index 685697d29..ed4381f47 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -41,7 +41,7 @@ about = { } # engine dependent config -categories = ['general'] +categories = ['general', 'web'] paging = True time_range_support = True safesearch = True diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 203df404a..2855860d8 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -45,7 +45,7 @@ about = { } # engine dependent config -categories = ['images'] +categories = ['images', 'web'] paging = False use_locale_domain = True time_range_support = True diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index 049f9138c..06aac8ae1 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -54,7 +54,7 @@ about = { # engine dependent config -categories = ['videos'] +categories = ['videos', 'web'] paging = False language_support = True use_locale_domain = True diff --git a/searx/engines/imdb.py b/searx/engines/imdb.py index bb6258cf4..0897b8dca 100644 --- a/searx/engines/imdb.py +++ b/searx/engines/imdb.py @@ -27,9 +27,7 @@ about = { "results": 'HTML', } -categories = [ - 'general', -] +categories = [] paging = False # suggestion_url = "https://sg.media-imdb.com/suggestion/{letter}/{query}.json" diff --git a/searx/engines/microsoft_academic.py b/searx/engines/microsoft_academic.py deleted file mode 100644 index a869daf2f..000000000 --- a/searx/engines/microsoft_academic.py +++ /dev/null @@ -1,77 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -""" - Microsoft Academic (Science) -""" - -from json import dumps, loads -from searx.utils import html_to_text - -# about -about = { - "website": 'https://academic.microsoft.com', - "wikidata_id": 'Q28136779', - "official_api_documentation": 'http://ma-graph.org/', - "use_official_api": False, - "require_api_key": False, - "results": 'JSON', -} - -categories = ['images'] -paging = True -search_url = 'https://academic.microsoft.com/api/search' -_paper_url = 'https://academic.microsoft.com/paper/{id}/reference' - - -def request(query, params): - params['url'] = search_url - params['method'] = 'POST' - params['headers']['content-type'] = 'application/json; charset=utf-8' - params['data'] = dumps( - { - 'query': query, - 'queryExpression': '', - 'filters': [], - 'orderBy': 0, - 'skip': (params['pageno'] - 1) * 10, - 'sortAscending': True, - 'take': 10, - 'includeCitationContexts': False, - 'profileId': '', - } - ) - - return params - - -def response(resp): - results = [] - response_data = loads(resp.text) - if not response_data: - return results - - for result in response_data.get('pr', {}): - if 'dn' not in result['paper']: - continue - - title = result['paper']['dn'] - content = _get_content(result['paper']) - url = _paper_url.format(id=result['paper']['id']) - results.append( - { - 'url': url, - 'title': html_to_text(title), - 'content': html_to_text(content), - } - ) - - return results - - -def _get_content(result): - if 'd' in result: - content = result['d'] - if len(content) > 300: - return content[:300] + '...' - return content - - return '' diff --git a/searx/engines/ccengine.py b/searx/engines/openverse.py index 93ac30c86..9f4636e41 100644 --- a/searx/engines/ccengine.py +++ b/searx/engines/openverse.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """ - Creative Commons search engine (Images) + Openverse (formerly known as: Creative Commons search engine) [Images] """ @@ -10,9 +10,9 @@ from urllib.parse import urlencode about = { - "website": 'https://search.creativecommons.org/', + "website": 'https://wordpress.org/openverse/', "wikidata_id": None, - "official_api_documentation": 'https://api.creativecommons.engineering/v1/', + "official_api_documentation": 'https://api.openverse.engineering/v1/', "use_official_api": True, "require_api_key": False, "results": 'JSON', @@ -23,8 +23,8 @@ categories = ['images'] paging = True nb_per_page = 20 -base_url = 'https://api.creativecommons.engineering/v1/images?' -search_string = '&page={page}&page_size={nb_per_page}&format=json&{query}' +base_url = 'https://api.openverse.engineering/v1/images/' +search_string = '?page={page}&page_size={nb_per_page}&format=json&{query}' def request(query, params): diff --git a/searx/engines/searx_engine.py b/searx/engines/searx_engine.py index 3e9035d6f..84a8e6449 100644 --- a/searx/engines/searx_engine.py +++ b/searx/engines/searx_engine.py @@ -10,7 +10,7 @@ from searx.engines import categories as searx_categories about = { "website": 'https://github.com/searxng/searxng', "wikidata_id": 'Q17639196', - "official_api_documentation": 'https://searxng.github.io/searxng/dev/search_api.html', + "official_api_documentation": 'https://docs.searxng.org/dev/search_api.html', "use_official_api": True, "require_api_key": False, "results": 'JSON', diff --git a/searx/engines/seznam.py b/searx/engines/seznam.py index 2e95b4769..48a167ce0 100644 --- a/searx/engines/seznam.py +++ b/searx/engines/seznam.py @@ -25,6 +25,7 @@ about = { "language": "cz", } +categories = ['general', 'web'] base_url = 'https://search.seznam.cz/' diff --git a/searx/engines/sjp.py b/searx/engines/sjp.py index ad498b847..8342a2819 100644 --- a/searx/engines/sjp.py +++ b/searx/engines/sjp.py @@ -21,7 +21,7 @@ about = { "language": 'pl', } -categories = ['general'] +categories = ['dictionaries'] paging = False URL = 'https://sjp.pwn.pl' diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 65d90debe..97891921c 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -23,7 +23,7 @@ about = { } # engine dependent config -categories = ['general'] +categories = ['general', 'web'] # there is a mechanism to block "bot" search # (probably the parameter qid), require # storing of qid's between mulitble search-calls diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 62ade49e2..9900c017b 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -14,7 +14,7 @@ about = { } engine_type = 'online_dictionary' -categories = ['general'] +categories = ['dictionaries'] url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' weight = 100 diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index 08bde6665..6bf1932e4 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -31,7 +31,7 @@ about = { } # engine dependent config -categories = ['general'] +categories = ['general', 'web'] paging = True time_range_support = True supported_languages_url = 'https://search.yahoo.com/preferences/languages' |