diff options
| author | Markus Heiser <markus.heiser@darmarit.de> | 2025-03-20 07:47:38 +0100 |
|---|---|---|
| committer | Markus Heiser <markus.heiser@darmarIT.de> | 2025-03-29 10:16:43 +0100 |
| commit | 50f92779bd9f2762d92a24a63851f4069f88297d (patch) | |
| tree | 54e9a3d7b2839bf3feb30339c3e3bd63196fd1da /searx/plugins/_core.py | |
| parent | d36da0a6c34761258c43424a7d948f0c554ef5c9 (diff) | |
[refactor] migrate plugins from "module" to class SXNGPlugin
This patch brings two major changes:
- ``Result.filter_urls(..)`` to pass a filter function for URL fields
- The ``enabled_plugins:`` section in SearXNG's settings do no longer exists.
To understand plugin development compile documentation:
$ make docs.clean docs.live
and read http://0.0.0.0:8000/dev/plugins/development.html
There is no longer a distinction between built-in and external plugin, all
plugins are registered via the settings in the ``plugins:`` section.
In SearXNG, plugins can be registered via a fully qualified class name. A
configuration (`PluginCfg`) can be transferred to the plugin, e.g. to activate
it by default / *opt-in* or *opt-out* from user's point of view.
built-in plugins
================
The built-in plugins are all located in the namespace `searx.plugins`.
.. code:: yaml
plugins:
searx.plugins.calculator.SXNGPlugin:
active: true
searx.plugins.hash_plugin.SXNGPlugin:
active: true
searx.plugins.self_info.SXNGPlugin:
active: true
searx.plugins.tracker_url_remover.SXNGPlugin:
active: true
searx.plugins.unit_converter.SXNGPlugin:
active: true
searx.plugins.ahmia_filter.SXNGPlugin:
active: true
searx.plugins.hostnames.SXNGPlugin:
active: true
searx.plugins.oa_doi_rewrite.SXNGPlugin:
active: false
searx.plugins.tor_check.SXNGPlugin:
active: false
external plugins
================
SearXNG supports *external plugins* / there is no need to install one, SearXNG
runs out of the box.
- Only show green hosted results: https://github.com/return42/tgwf-searx-plugins/
To get a developer installation in a SearXNG developer environment:
.. code:: sh
$ git clone git@github.com:return42/tgwf-searx-plugins.git
$ ./manage pyenv.cmd python -m \
pip install -e tgwf-searx-plugins
To register the plugin in SearXNG add ``only_show_green_results.SXNGPlugin`` to
the ``plugins:``:
.. code:: yaml
plugins:
# ...
only_show_green_results.SXNGPlugin:
active: false
Result.filter_urls(..)
======================
The ``Result.filter_urls(..)`` can be used to filter and/or modify URL fields.
In the following example, the filter function ``my_url_filter``:
.. code:: python
def my_url_filter(result, field_name, url_src) -> bool | str:
if "google" in url_src:
return False # remove URL field from result
if "facebook" in url_src:
new_url = url_src.replace("facebook", "fb-dummy")
return new_url # return modified URL
return True # leave URL in field unchanged
is applied to all URL fields in the :py:obj:`Plugin.on_result` hook:
.. code:: python
class MyUrlFilter(Plugin):
...
def on_result(self, request, search, result) -> bool:
result.filter_urls(my_url_filter)
return True
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/plugins/_core.py')
| -rw-r--r-- | searx/plugins/_core.py | 237 |
1 files changed, 57 insertions, 180 deletions
diff --git a/searx/plugins/_core.py b/searx/plugins/_core.py index 7df9772e9..f5ae56e15 100644 --- a/searx/plugins/_core.py +++ b/searx/plugins/_core.py @@ -3,31 +3,24 @@ from __future__ import annotations -__all__ = ["PluginInfo", "Plugin", "PluginStorage"] +__all__ = ["PluginInfo", "Plugin", "PluginCfg", "PluginStorage"] import abc import importlib +import inspect import logging -import pathlib -import types +import re import typing -import warnings from dataclasses import dataclass, field -import flask - -import searx -from searx.utils import load_module from searx.extended_types import SXNG_Request from searx.result_types import Result - if typing.TYPE_CHECKING: from searx.search import SearchWithPlugins + import flask - -_default = pathlib.Path(__file__).parent log: logging.Logger = logging.getLogger("searx.plugins") @@ -69,14 +62,17 @@ class PluginInfo: """See :py:obj:`Plugin.keywords`""" +ID_REGXP = re.compile("[a-z][a-z0-9].*") + + class Plugin(abc.ABC): """Abstract base class of all Plugins.""" id: str = "" """The ID (suffix) in the HTML form.""" - default_on: bool = False - """Plugin is enabled/disabled by default.""" + active: typing.ClassVar[bool] + """Plugin is enabled/disabled by default (:py:obj:`PluginCfg.active`).""" keywords: list[str] = [] """Keywords in the search query that activate the plugin. The *keyword* is @@ -93,19 +89,28 @@ class Plugin(abc.ABC): fqn: str = "" - def __init__(self) -> None: + def __init__(self, plg_cfg: PluginCfg) -> None: super().__init__() if not self.fqn: self.fqn = self.__class__.__mro__[0].__module__ - for attr in ["id", "default_on"]: + # names from the configuration + for n, v in plg_cfg.__dict__.items(): + setattr(self, n, v) + + # names that must be set by the plugin implementation + for attr in [ + "id", + ]: if getattr(self, attr, None) is None: raise NotImplementedError(f"plugin {self} is missing attribute {attr}") - if not self.id: - self.id = f"{self.__class__.__module__}.{self.__class__.__name__}" + if not ID_REGXP.match(self.id): + raise ValueError(f"plugin ID {self.id} contains invalid character (use lowercase ASCII)") + if not getattr(self, "log", None): - self.log = log.getChild(self.id) + pkg_name = inspect.getmodule(self.__class__).__package__ # type: ignore + self.log = logging.getLogger(f"{pkg_name}.{self.id}") def __hash__(self) -> int: """The hash value is used in :py:obj:`set`, for example, when an object @@ -121,7 +126,7 @@ class Plugin(abc.ABC): return hash(self) == hash(other) - def init(self, app: flask.Flask) -> bool: # pylint: disable=unused-argument + def init(self, app: "flask.Flask") -> bool: # pylint: disable=unused-argument """Initialization of the plugin, the return value decides whether this plugin is active or not. Initialization only takes place once, at the time the WEB application is set up. The base methode always returns @@ -151,7 +156,8 @@ class Plugin(abc.ABC): .. hint:: - If :py:obj:`Result.url` is modified, :py:obj:`Result.parsed_url` must + If :py:obj:`Result.url <searx.result_types._base.Result.url>` is modified, + :py:obj:`Result.parsed_url <searx.result_types._base.Result.parsed_url>` must be changed accordingly: .. code:: python @@ -161,81 +167,24 @@ class Plugin(abc.ABC): return True def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None | typing.Sequence[Result]: - """Runs AFTER the search request. Can return a list of :py:obj:`Result` - objects to be added to the final result list.""" + """Runs AFTER the search request. Can return a list of + :py:obj:`Result <searx.result_types._base.Result>` objects to be added to the + final result list.""" return -class ModulePlugin(Plugin): - """A wrapper class for legacy *plugins*. - - .. note:: - - For internal use only! +@dataclass +class PluginCfg: + """Settings of a plugin. - In a module plugin, the follwing names are mapped: + .. code:: yaml - - `module.query_keywords` --> :py:obj:`Plugin.keywords` - - `module.plugin_id` --> :py:obj:`Plugin.id` - - `module.logger` --> :py:obj:`Plugin.log` + mypackage.mymodule.MyPlugin: + active: true """ - _required_attrs = (("name", str), ("description", str), ("default_on", bool)) - - def __init__(self, mod: types.ModuleType, fqn: str): - """In case of missing attributes in the module or wrong types are given, - a :py:obj:`TypeError` exception is raised.""" - - self.fqn = fqn - self.module = mod - self.id = getattr(self.module, "plugin_id", self.module.__name__) - self.log = logging.getLogger(self.module.__name__) - self.keywords = getattr(self.module, "query_keywords", []) - - for attr, attr_type in self._required_attrs: - if not hasattr(self.module, attr): - msg = f"missing attribute {attr}, cannot load plugin" - self.log.critical(msg) - raise TypeError(msg) - if not isinstance(getattr(self.module, attr), attr_type): - msg = f"attribute {attr} is not of type {attr_type}" - self.log.critical(msg) - raise TypeError(msg) - - self.default_on = mod.default_on - self.info = PluginInfo( - id=self.id, - name=self.module.name, - description=self.module.description, - preference_section=getattr(self.module, "preference_section", None), - examples=getattr(self.module, "query_examples", []), - keywords=self.keywords, - ) - - # monkeypatch module - self.module.logger = self.log # type: ignore - - super().__init__() - - def init(self, app: flask.Flask) -> bool: - if not hasattr(self.module, "init"): - return True - return self.module.init(app) - - def pre_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> bool: - if not hasattr(self.module, "pre_search"): - return True - return self.module.pre_search(request, search) - - def on_result(self, request: SXNG_Request, search: "SearchWithPlugins", result: Result) -> bool: - if not hasattr(self.module, "on_result"): - return True - return self.module.on_result(request, search, result) - - def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None | list[Result]: - if not hasattr(self.module, "post_search"): - return None - return self.module.post_search(request, search) + active: bool = False + """Plugin is active by default and the user can *opt-out* in the preferences.""" class PluginStorage: @@ -244,22 +193,10 @@ class PluginStorage: plugin_list: set[Plugin] """The list of :py:obj:`Plugins` in this storage.""" - legacy_plugins = [ - "ahmia_filter", - "calculator", - "hostnames", - "oa_doi_rewrite", - "tor_check", - "tracker_url_remover", - "unit_converter", - ] - """Internal plugins implemented in the legacy style (as module / deprecated!).""" - def __init__(self): self.plugin_list = set() def __iter__(self): - yield from self.plugin_list def __len__(self): @@ -267,102 +204,42 @@ class PluginStorage: @property def info(self) -> list[PluginInfo]: - return [p.info for p in self.plugin_list] - def load_builtins(self): - """Load plugin modules from: - - - the python packages in :origin:`searx/plugins` and - - the external plugins from :ref:`settings plugins`. - """ - - for f in _default.iterdir(): - - if f.name.startswith("_"): - continue + return [p.info for p in self.plugin_list] - if f.stem not in self.legacy_plugins: - self.register_by_fqn(f"searx.plugins.{f.stem}.SXNGPlugin") - continue + def load_settings(self, cfg: dict[str, dict]): + """Load plugins configured in SearXNG's settings :ref:`settings + plugins`.""" - # for backward compatibility - mod = load_module(f.name, str(f.parent)) - self.register(ModulePlugin(mod, f"searx.plugins.{f.stem}")) + for fqn, plg_settings in cfg.items(): + cls = None + mod_name, cls_name = fqn.rsplit('.', 1) + try: + mod = importlib.import_module(mod_name) + cls = getattr(mod, cls_name, None) + except Exception as exc: # pylint: disable=broad-exception-caught + log.exception(exc) - for fqn in searx.get_setting("plugins"): # type: ignore - self.register_by_fqn(fqn) + if cls is None: + msg = f"plugin {fqn} is not implemented" + raise ValueError(msg) + plg = cls(PluginCfg(**plg_settings)) + self.register(plg) def register(self, plugin: Plugin): """Register a :py:obj:`Plugin`. In case of name collision (if two plugins have same ID) a :py:obj:`KeyError` exception is raised. """ - if plugin in self.plugin_list: + if plugin in [p.id for p in self.plugin_list]: msg = f"name collision '{plugin.id}'" plugin.log.critical(msg) raise KeyError(msg) - if not plugin.fqn.startswith("searx.plugins."): - self.plugin_list.add(plugin) - plugin.log.debug("plugin has been registered") - return - - # backward compatibility for the enabled_plugins setting - # https://docs.searxng.org/admin/settings/settings_plugins.html#enabled-plugins-internal - en_plgs: list[str] | None = searx.get_setting("enabled_plugins") # type:ignore - - if en_plgs is None: - # enabled_plugins not listed in the /etc/searxng/settings.yml: - # check default_on before register .. - if plugin.default_on: - self.plugin_list.add(plugin) - plugin.log.debug("builtin plugin has been registered by SearXNG's defaults") - return - plugin.log.debug("builtin plugin is not registered by SearXNG's defaults") - return - - if plugin.info.name not in en_plgs: - # enabled_plugins listed in the /etc/searxng/settings.yml, - # but this plugin is not listed in: - plugin.log.debug("builtin plugin is not registered by maintainer's settings") - return - - # if the plugin is in enabled_plugins, then it is on by default. - plugin.default_on = True self.plugin_list.add(plugin) - plugin.log.debug("builtin plugin is registered by maintainer's settings") - - def register_by_fqn(self, fqn: str): - """Register a :py:obj:`Plugin` via its fully qualified class name (FQN). - The FQNs of external plugins could be read from a configuration, for - example, and registered using this method - """ - - mod_name, _, obj_name = fqn.rpartition('.') - if not mod_name: - # for backward compatibility - code_obj = importlib.import_module(fqn) - else: - mod = importlib.import_module(mod_name) - code_obj = getattr(mod, obj_name, None) - - if code_obj is None: - msg = f"plugin {fqn} is not implemented" - log.critical(msg) - raise ValueError(msg) - - if isinstance(code_obj, types.ModuleType): - # for backward compatibility - warnings.warn( - f"plugin {fqn} is implemented in a legacy module / migrate to searx.plugins.Plugin", DeprecationWarning - ) - - self.register(ModulePlugin(code_obj, fqn)) - return - - self.register(code_obj()) + plugin.log.debug("plugin has been loaded") - def init(self, app: flask.Flask) -> None: + def init(self, app: "flask.Flask") -> None: """Calls the method :py:obj:`Plugin.init` of each plugin in this storage. Depending on its return value, the plugin is removed from *this* storage or not.""" |