summaryrefslogtreecommitdiff
path: root/searx/plugins/oa_doi_rewrite.py
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2025-03-20 07:47:38 +0100
committerMarkus Heiser <markus.heiser@darmarIT.de>2025-03-29 10:16:43 +0100
commit50f92779bd9f2762d92a24a63851f4069f88297d (patch)
tree54e9a3d7b2839bf3feb30339c3e3bd63196fd1da /searx/plugins/oa_doi_rewrite.py
parentd36da0a6c34761258c43424a7d948f0c554ef5c9 (diff)
[refactor] migrate plugins from "module" to class SXNGPlugin
This patch brings two major changes: - ``Result.filter_urls(..)`` to pass a filter function for URL fields - The ``enabled_plugins:`` section in SearXNG's settings do no longer exists. To understand plugin development compile documentation: $ make docs.clean docs.live and read http://0.0.0.0:8000/dev/plugins/development.html There is no longer a distinction between built-in and external plugin, all plugins are registered via the settings in the ``plugins:`` section. In SearXNG, plugins can be registered via a fully qualified class name. A configuration (`PluginCfg`) can be transferred to the plugin, e.g. to activate it by default / *opt-in* or *opt-out* from user's point of view. built-in plugins ================ The built-in plugins are all located in the namespace `searx.plugins`. .. code:: yaml plugins: searx.plugins.calculator.SXNGPlugin: active: true searx.plugins.hash_plugin.SXNGPlugin: active: true searx.plugins.self_info.SXNGPlugin: active: true searx.plugins.tracker_url_remover.SXNGPlugin: active: true searx.plugins.unit_converter.SXNGPlugin: active: true searx.plugins.ahmia_filter.SXNGPlugin: active: true searx.plugins.hostnames.SXNGPlugin: active: true searx.plugins.oa_doi_rewrite.SXNGPlugin: active: false searx.plugins.tor_check.SXNGPlugin: active: false external plugins ================ SearXNG supports *external plugins* / there is no need to install one, SearXNG runs out of the box. - Only show green hosted results: https://github.com/return42/tgwf-searx-plugins/ To get a developer installation in a SearXNG developer environment: .. code:: sh $ git clone git@github.com:return42/tgwf-searx-plugins.git $ ./manage pyenv.cmd python -m \ pip install -e tgwf-searx-plugins To register the plugin in SearXNG add ``only_show_green_results.SXNGPlugin`` to the ``plugins:``: .. code:: yaml plugins: # ... only_show_green_results.SXNGPlugin: active: false Result.filter_urls(..) ====================== The ``Result.filter_urls(..)`` can be used to filter and/or modify URL fields. In the following example, the filter function ``my_url_filter``: .. code:: python def my_url_filter(result, field_name, url_src) -> bool | str: if "google" in url_src: return False # remove URL field from result if "facebook" in url_src: new_url = url_src.replace("facebook", "fb-dummy") return new_url # return modified URL return True # leave URL in field unchanged is applied to all URL fields in the :py:obj:`Plugin.on_result` hook: .. code:: python class MyUrlFilter(Plugin): ... def on_result(self, request, search, result) -> bool: result.filter_urls(my_url_filter) return True Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/plugins/oa_doi_rewrite.py')
-rw-r--r--searx/plugins/oa_doi_rewrite.py106
1 files changed, 71 insertions, 35 deletions
diff --git a/searx/plugins/oa_doi_rewrite.py b/searx/plugins/oa_doi_rewrite.py
index be5a8d4a4..dac60d298 100644
--- a/searx/plugins/oa_doi_rewrite.py
+++ b/searx/plugins/oa_doi_rewrite.py
@@ -1,54 +1,90 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring
-
from __future__ import annotations
+import typing
+
import re
-from urllib.parse import urlparse, parse_qsl
+from urllib.parse import parse_qsl
from flask_babel import gettext
+from searx import get_setting
+from searx.plugins import Plugin, PluginInfo
+from searx.extended_types import sxng_request
-from searx import settings
+from ._core import log
+if typing.TYPE_CHECKING:
+ from searx.search import SearchWithPlugins
+ from searx.extended_types import SXNG_Request
+ from searx.result_types import Result, LegacyResult
+ from searx.plugins import PluginCfg
-regex = re.compile(r'10\.\d{4,9}/[^\s]+')
-name = gettext('Open Access DOI rewrite')
-description = gettext('Avoid paywalls by redirecting to open-access versions of publications when available')
-default_on = False
-preference_section = 'general/doi_resolver'
+ahmia_blacklist: list = []
-def extract_doi(url):
- match = regex.search(url.path)
- if match:
- return match.group(0)
- for _, v in parse_qsl(url.query):
- match = regex.search(v)
- if match:
- return match.group(0)
- return None
+def filter_url_field(result: "Result|LegacyResult", field_name: str, url_src: str) -> bool | str:
+ """Returns bool ``True`` to use URL unchanged (``False`` to ignore URL).
+ If URL should be modified, the returned string is the new URL to use."""
+ if field_name != "url":
+ return True # use it unchanged
-def get_doi_resolver(preferences):
- doi_resolvers = settings['doi_resolvers']
- selected_resolver = preferences.get_value('doi_resolver')[0]
- if selected_resolver not in doi_resolvers:
- selected_resolver = settings['default_doi_resolver']
- return doi_resolvers[selected_resolver]
+ doi = extract_doi(result.parsed_url)
+ if doi and len(doi) < 50:
+ for suffix in ("/", ".pdf", ".xml", "/full", "/meta", "/abstract"):
+ doi = doi.removesuffix(suffix)
+ new_url = get_doi_resolver() + doi
+ if "doi" not in result:
+ result["doi"] = doi
+ log.debug("oa_doi_rewrite: [URL field: %s] %s -> %s", field_name, url_src, new_url)
+ return new_url # use new url
+
+ return True # use it unchanged
+
+
+class SXNGPlugin(Plugin):
+ """Avoid paywalls by redirecting to open-access."""
+ id = "oa_doi_rewrite"
-def on_result(request, _search, result) -> bool:
+ def __init__(self, plg_cfg: "PluginCfg") -> None:
+ super().__init__(plg_cfg)
+ self.info = PluginInfo(
+ id=self.id,
+ name=gettext("Open Access DOI rewrite"),
+ description=gettext("Avoid paywalls by redirecting to open-access versions of publications when available"),
+ preference_section="general",
+ )
- if not result.parsed_url:
+ def on_result(
+ self,
+ request: "SXNG_Request",
+ search: "SearchWithPlugins",
+ result: "Result",
+ ) -> bool: # pylint: disable=unused-argument
+ if result.parsed_url:
+ result.filter_urls(filter_url_field)
return True
- doi = extract_doi(result['parsed_url'])
- if doi and len(doi) < 50:
- for suffix in ('/', '.pdf', '.xml', '/full', '/meta', '/abstract'):
- if doi.endswith(suffix):
- doi = doi[: -len(suffix)]
- result['url'] = get_doi_resolver(request.preferences) + doi
- result['parsed_url'] = urlparse(result['url'])
- if 'doi' not in result:
- result['doi'] = doi
- return True
+
+regex = re.compile(r'10\.\d{4,9}/[^\s]+')
+
+
+def extract_doi(url):
+ m = regex.search(url.path)
+ if m:
+ return m.group(0)
+ for _, v in parse_qsl(url.query):
+ m = regex.search(v)
+ if m:
+ return m.group(0)
+ return None
+
+
+def get_doi_resolver() -> str:
+ doi_resolvers = get_setting("doi_resolvers")
+ selected_resolver = sxng_request.preferences.get_value('doi_resolver')[0]
+ if selected_resolver not in doi_resolvers:
+ selected_resolver = get_setting("default_doi_resolver")
+ return doi_resolvers[selected_resolver]