summaryrefslogtreecommitdiff
path: root/searx/result_types
diff options
context:
space:
mode:
Diffstat (limited to 'searx/result_types')
-rw-r--r--searx/result_types/__init__.py18
-rw-r--r--searx/result_types/_base.py223
-rw-r--r--searx/result_types/answer.py141
3 files changed, 382 insertions, 0 deletions
diff --git a/searx/result_types/__init__.py b/searx/result_types/__init__.py
new file mode 100644
index 000000000..d7b33001b
--- /dev/null
+++ b/searx/result_types/__init__.py
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Typification of the result items generated by the *engines*, *answerers* and
+*plugins*.
+
+.. note::
+
+ We are at the beginning of typing the results. Further typing will follow,
+ but this is a very large task that we will only be able to implement
+ gradually. For more, please read :ref:`result types`.
+
+"""
+
+from __future__ import annotations
+
+__all__ = ["Result", "AnswerSet", "Answer", "Translations"]
+
+from ._base import Result, LegacyResult
+from .answer import AnswerSet, Answer, Translations
diff --git a/searx/result_types/_base.py b/searx/result_types/_base.py
new file mode 100644
index 000000000..2e98530fc
--- /dev/null
+++ b/searx/result_types/_base.py
@@ -0,0 +1,223 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# pylint: disable=too-few-public-methods, missing-module-docstring
+"""Basic types for the typification of results.
+
+- :py:obj:`Result` base class
+- :py:obj:`LegacyResult` for internal use only
+
+----
+
+.. autoclass:: Result
+ :members:
+
+.. autoclass:: LegacyResult
+ :members:
+"""
+
+
+from __future__ import annotations
+
+__all__ = ["Result"]
+
+import re
+import urllib.parse
+import warnings
+
+import msgspec
+
+
+class Result(msgspec.Struct, kw_only=True):
+ """Base class of all result types :ref:`result types`."""
+
+ url: str | None = None
+ """A link related to this *result*"""
+
+ template: str = "default.html"
+ """Name of the template used to render the result.
+
+ By default :origin:`result_templates/default.html
+ <searx/templates/simple/result_templates/default.html>` is used.
+ """
+
+ engine: str | None = ""
+ """Name of the engine *this* result comes from. In case of *plugins* a
+ prefix ``plugin:`` is set, in case of *answerer* prefix ``answerer:`` is
+ set.
+
+ The field is optional and is initialized from the context if necessary.
+ """
+
+ parsed_url: urllib.parse.ParseResult | None = None
+ """:py:obj:`urllib.parse.ParseResult` of :py:obj:`Result.url`.
+
+ The field is optional and is initialized from the context if necessary.
+ """
+
+ results: list = [] # https://jcristharif.com/msgspec/structs.html#default-values
+ """Result list of an :origin:`engine <searx/engines>` response or a
+ :origin:`answerer <searx/answerers>` to which the answer should be added.
+
+ This field is only present for the sake of simplicity. Typically, the
+ response function of an engine has a result list that is returned at the
+ end. By specifying the result list in the constructor of the result, this
+ result is then immediately added to the list (this parameter does not have
+ another function).
+
+ .. code:: python
+
+ def response(resp):
+ results = []
+ ...
+ Answer(results=results, answer=answer, url=url)
+ ...
+ return results
+
+ """
+
+ def normalize_result_fields(self):
+ """Normalize a result ..
+
+ - if field ``url`` is set and field ``parse_url`` is unset, init
+ ``parse_url`` from field ``url``. This method can be extended in the
+ inheritance.
+
+ """
+
+ if not self.parsed_url and self.url:
+ self.parsed_url = urllib.parse.urlparse(self.url)
+
+ # if the result has no scheme, use http as default
+ if not self.parsed_url.scheme:
+ self.parsed_url = self.parsed_url._replace(scheme="http")
+ self.url = self.parsed_url.geturl()
+
+ def __post_init__(self):
+ """Add *this* result to the result list."""
+
+ self.results.append(self)
+
+ def __hash__(self) -> int:
+ """Generates a hash value that uniquely identifies the content of *this*
+ result. The method can be adapted in the inheritance to compare results
+ from different sources.
+
+ If two result objects are not identical but have the same content, their
+ hash values should also be identical.
+
+ The hash value is used in contexts, e.g. when checking for equality to
+ identify identical results from different sources (engines).
+ """
+
+ return id(self)
+
+ def __eq__(self, other):
+ """py:obj:`Result` objects are equal if the hash values of the two
+ objects are equal. If needed, its recommended to overwrite
+ "py:obj:`Result.__hash__`."""
+
+ return hash(self) == hash(other)
+
+ # for legacy code where a result is treated as a Python dict
+
+ def __setitem__(self, field_name, value):
+
+ return setattr(self, field_name, value)
+
+ def __getitem__(self, field_name):
+
+ if field_name not in self.__struct_fields__:
+ raise KeyError(f"{field_name}")
+ return getattr(self, field_name)
+
+ def __iter__(self):
+
+ return iter(self.__struct_fields__)
+
+
+class LegacyResult(dict):
+ """A wrapper around a legacy result item. The SearXNG core uses this class
+ for untyped dictionaries / to be downward compatible.
+
+ This class is needed until we have implemented an :py:obj:`Result` class for
+ each result type and the old usages in the codebase have been fully
+ migrated.
+
+ There is only one place where this class is used, in the
+ :py:obj:`searx.results.ResultContainer`.
+
+ .. attention::
+
+ Do not use this class in your own implementations!
+ """
+
+ UNSET = object()
+ WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
+
+ def __init__(self, *args, **kwargs):
+
+ super().__init__(*args, **kwargs)
+ self.__dict__ = self
+
+ # Init fields with defaults / compare with defaults of the fields in class Result
+ self.engine = self.get("engine", "")
+ self.template = self.get("template", "default.html")
+ self.url = self.get("url", None)
+ self.parsed_url = self.get("parsed_url", None)
+
+ self.content = self.get("content", "")
+ self.title = self.get("title", "")
+
+ # Legacy types that have already been ported to a type ..
+
+ if "answer" in self:
+ warnings.warn(
+ f"engine {self.engine} is using deprecated `dict` for answers"
+ f" / use a class from searx.result_types.answer",
+ DeprecationWarning,
+ )
+ self.template = "answer/legacy.html"
+
+ def __hash__(self) -> int: # type: ignore
+
+ if "answer" in self:
+ return hash(self["answer"])
+ if not any(cls in self for cls in ["suggestion", "correction", "infobox", "number_of_results", "engine_data"]):
+ # it is a commun url-result ..
+ return hash(self.url)
+ return id(self)
+
+ def __eq__(self, other):
+
+ return hash(self) == hash(other)
+
+ def __repr__(self) -> str:
+
+ return f"LegacyResult: {super().__repr__()}"
+
+ def __getattr__(self, name: str, default=UNSET):
+
+ if default == self.UNSET and name not in self:
+ raise AttributeError(f"LegacyResult object has no field named: {name}")
+ return self[name]
+
+ def __setattr__(self, name: str, val):
+
+ self[name] = val
+
+ def normalize_result_fields(self):
+
+ self.title = self.WHITESPACE_REGEX.sub(" ", self.title)
+
+ if not self.parsed_url and self.url:
+ self.parsed_url = urllib.parse.urlparse(self.url)
+
+ # if the result has no scheme, use http as default
+ if not self.parsed_url.scheme:
+ self.parsed_url = self.parsed_url._replace(scheme="http")
+ self.url = self.parsed_url.geturl()
+
+ if self.content:
+ self.content = self.WHITESPACE_REGEX.sub(" ", self.content)
+ if self.content == self.title:
+ # avoid duplicate content between the content and title fields
+ self.content = ""
diff --git a/searx/result_types/answer.py b/searx/result_types/answer.py
new file mode 100644
index 000000000..1042fe00e
--- /dev/null
+++ b/searx/result_types/answer.py
@@ -0,0 +1,141 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Typification of the *answer* results. Results of this type are rendered in
+the :origin:`answers.html <searx/templates/simple/elements/answers.html>`
+template.
+
+----
+
+.. autoclass:: BaseAnswer
+ :members:
+ :show-inheritance:
+
+.. autoclass:: Answer
+ :members:
+ :show-inheritance:
+
+.. autoclass:: Translations
+ :members:
+ :show-inheritance:
+
+.. autoclass:: AnswerSet
+ :members:
+ :show-inheritance:
+"""
+# pylint: disable=too-few-public-methods
+
+from __future__ import annotations
+
+__all__ = ["AnswerSet", "Answer", "Translations"]
+
+import msgspec
+
+from ._base import Result
+
+
+class BaseAnswer(Result, kw_only=True):
+ """Base class of all answer types. It is not intended to build instances of
+ this class (aka *abstract*)."""
+
+
+class AnswerSet:
+ """Aggregator for :py:obj:`BaseAnswer` items in a result container."""
+
+ def __init__(self):
+ self._answerlist = []
+
+ def __len__(self):
+ return len(self._answerlist)
+
+ def __bool__(self):
+ return bool(self._answerlist)
+
+ def add(self, answer: BaseAnswer) -> None:
+ a_hash = hash(answer)
+ for i in self._answerlist:
+ if hash(i) == a_hash:
+ return
+ self._answerlist.append(answer)
+
+ def __iter__(self):
+ """Sort items in this set and iterate over the items."""
+ self._answerlist.sort(key=lambda answer: answer.template)
+ yield from self._answerlist
+
+ def __contains__(self, answer: BaseAnswer) -> bool:
+ a_hash = hash(answer)
+ for i in self._answerlist:
+ if hash(i) == a_hash:
+ return True
+ return False
+
+
+class Answer(BaseAnswer, kw_only=True):
+ """Simple answer type where the *answer* is a simple string with an optional
+ :py:obj:`url field <Result.url>` field to link a resource (article, map, ..)
+ related to the answer."""
+
+ template: str = "answer/legacy.html"
+
+ answer: str
+ """Text of the answer."""
+
+ def __hash__(self):
+ """The hash value of field *answer* is the hash value of the
+ :py:obj:`Answer` object. :py:obj:`Answer <Result.__eq__>` objects are
+ equal, when the hash values of both objects are equal."""
+ return hash(self.answer)
+
+
+class Translations(BaseAnswer, kw_only=True):
+ """Answer type with a list of translations.
+
+ The items in the list of :py:obj:`Translations.translations` are of type
+ :py:obj:`Translations.Item`:
+
+ .. code:: python
+
+ def response(resp):
+ results = []
+ ...
+ foo_1 = Translations.Item(
+ text="foobar",
+ synonyms=["bar", "foo"],
+ examples=["foo and bar are placeholders"],
+ )
+ foo_url="https://www.deepl.com/de/translator#en/de/foo"
+ ...
+ Translations(results=results, translations=[foo], url=foo_url)
+
+ """
+
+ template: str = "answer/translations.html"
+ """The template in :origin:`answer/translations.html
+ <searx/templates/simple/answer/translations.html>`"""
+
+ translations: list[Translations.Item]
+ """List of translations."""
+
+ class Item(msgspec.Struct, kw_only=True):
+ """A single element of the translations / a translation. A translation
+ consists of at least a mandatory ``text`` property (the translation) ,
+ optional properties such as *definitions*, *synonyms* and *examples* are
+ possible."""
+
+ text: str
+ """Translated text."""
+
+ transliteration: str = ""
+ """Transliteration_ of the requested translation.
+
+ .. _Transliteration: https://en.wikipedia.org/wiki/Transliteration
+ """
+
+ examples: list[str] = []
+ """List of examples for the requested translation."""
+
+ definitions: list[str] = []
+ """List of definitions for the requested translation."""
+
+ synonyms: list[str] = []
+ """List of synonyms for the requested translation."""