diff options
Diffstat (limited to 'searx/result_types')
| -rw-r--r-- | searx/result_types/__init__.py | 18 | ||||
| -rw-r--r-- | searx/result_types/_base.py | 223 | ||||
| -rw-r--r-- | searx/result_types/answer.py | 141 |
3 files changed, 382 insertions, 0 deletions
diff --git a/searx/result_types/__init__.py b/searx/result_types/__init__.py new file mode 100644 index 000000000..d7b33001b --- /dev/null +++ b/searx/result_types/__init__.py @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Typification of the result items generated by the *engines*, *answerers* and +*plugins*. + +.. note:: + + We are at the beginning of typing the results. Further typing will follow, + but this is a very large task that we will only be able to implement + gradually. For more, please read :ref:`result types`. + +""" + +from __future__ import annotations + +__all__ = ["Result", "AnswerSet", "Answer", "Translations"] + +from ._base import Result, LegacyResult +from .answer import AnswerSet, Answer, Translations diff --git a/searx/result_types/_base.py b/searx/result_types/_base.py new file mode 100644 index 000000000..2e98530fc --- /dev/null +++ b/searx/result_types/_base.py @@ -0,0 +1,223 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# pylint: disable=too-few-public-methods, missing-module-docstring +"""Basic types for the typification of results. + +- :py:obj:`Result` base class +- :py:obj:`LegacyResult` for internal use only + +---- + +.. autoclass:: Result + :members: + +.. autoclass:: LegacyResult + :members: +""" + + +from __future__ import annotations + +__all__ = ["Result"] + +import re +import urllib.parse +import warnings + +import msgspec + + +class Result(msgspec.Struct, kw_only=True): + """Base class of all result types :ref:`result types`.""" + + url: str | None = None + """A link related to this *result*""" + + template: str = "default.html" + """Name of the template used to render the result. + + By default :origin:`result_templates/default.html + <searx/templates/simple/result_templates/default.html>` is used. + """ + + engine: str | None = "" + """Name of the engine *this* result comes from. In case of *plugins* a + prefix ``plugin:`` is set, in case of *answerer* prefix ``answerer:`` is + set. + + The field is optional and is initialized from the context if necessary. + """ + + parsed_url: urllib.parse.ParseResult | None = None + """:py:obj:`urllib.parse.ParseResult` of :py:obj:`Result.url`. + + The field is optional and is initialized from the context if necessary. + """ + + results: list = [] # https://jcristharif.com/msgspec/structs.html#default-values + """Result list of an :origin:`engine <searx/engines>` response or a + :origin:`answerer <searx/answerers>` to which the answer should be added. + + This field is only present for the sake of simplicity. Typically, the + response function of an engine has a result list that is returned at the + end. By specifying the result list in the constructor of the result, this + result is then immediately added to the list (this parameter does not have + another function). + + .. code:: python + + def response(resp): + results = [] + ... + Answer(results=results, answer=answer, url=url) + ... + return results + + """ + + def normalize_result_fields(self): + """Normalize a result .. + + - if field ``url`` is set and field ``parse_url`` is unset, init + ``parse_url`` from field ``url``. This method can be extended in the + inheritance. + + """ + + if not self.parsed_url and self.url: + self.parsed_url = urllib.parse.urlparse(self.url) + + # if the result has no scheme, use http as default + if not self.parsed_url.scheme: + self.parsed_url = self.parsed_url._replace(scheme="http") + self.url = self.parsed_url.geturl() + + def __post_init__(self): + """Add *this* result to the result list.""" + + self.results.append(self) + + def __hash__(self) -> int: + """Generates a hash value that uniquely identifies the content of *this* + result. The method can be adapted in the inheritance to compare results + from different sources. + + If two result objects are not identical but have the same content, their + hash values should also be identical. + + The hash value is used in contexts, e.g. when checking for equality to + identify identical results from different sources (engines). + """ + + return id(self) + + def __eq__(self, other): + """py:obj:`Result` objects are equal if the hash values of the two + objects are equal. If needed, its recommended to overwrite + "py:obj:`Result.__hash__`.""" + + return hash(self) == hash(other) + + # for legacy code where a result is treated as a Python dict + + def __setitem__(self, field_name, value): + + return setattr(self, field_name, value) + + def __getitem__(self, field_name): + + if field_name not in self.__struct_fields__: + raise KeyError(f"{field_name}") + return getattr(self, field_name) + + def __iter__(self): + + return iter(self.__struct_fields__) + + +class LegacyResult(dict): + """A wrapper around a legacy result item. The SearXNG core uses this class + for untyped dictionaries / to be downward compatible. + + This class is needed until we have implemented an :py:obj:`Result` class for + each result type and the old usages in the codebase have been fully + migrated. + + There is only one place where this class is used, in the + :py:obj:`searx.results.ResultContainer`. + + .. attention:: + + Do not use this class in your own implementations! + """ + + UNSET = object() + WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) + + def __init__(self, *args, **kwargs): + + super().__init__(*args, **kwargs) + self.__dict__ = self + + # Init fields with defaults / compare with defaults of the fields in class Result + self.engine = self.get("engine", "") + self.template = self.get("template", "default.html") + self.url = self.get("url", None) + self.parsed_url = self.get("parsed_url", None) + + self.content = self.get("content", "") + self.title = self.get("title", "") + + # Legacy types that have already been ported to a type .. + + if "answer" in self: + warnings.warn( + f"engine {self.engine} is using deprecated `dict` for answers" + f" / use a class from searx.result_types.answer", + DeprecationWarning, + ) + self.template = "answer/legacy.html" + + def __hash__(self) -> int: # type: ignore + + if "answer" in self: + return hash(self["answer"]) + if not any(cls in self for cls in ["suggestion", "correction", "infobox", "number_of_results", "engine_data"]): + # it is a commun url-result .. + return hash(self.url) + return id(self) + + def __eq__(self, other): + + return hash(self) == hash(other) + + def __repr__(self) -> str: + + return f"LegacyResult: {super().__repr__()}" + + def __getattr__(self, name: str, default=UNSET): + + if default == self.UNSET and name not in self: + raise AttributeError(f"LegacyResult object has no field named: {name}") + return self[name] + + def __setattr__(self, name: str, val): + + self[name] = val + + def normalize_result_fields(self): + + self.title = self.WHITESPACE_REGEX.sub(" ", self.title) + + if not self.parsed_url and self.url: + self.parsed_url = urllib.parse.urlparse(self.url) + + # if the result has no scheme, use http as default + if not self.parsed_url.scheme: + self.parsed_url = self.parsed_url._replace(scheme="http") + self.url = self.parsed_url.geturl() + + if self.content: + self.content = self.WHITESPACE_REGEX.sub(" ", self.content) + if self.content == self.title: + # avoid duplicate content between the content and title fields + self.content = "" diff --git a/searx/result_types/answer.py b/searx/result_types/answer.py new file mode 100644 index 000000000..1042fe00e --- /dev/null +++ b/searx/result_types/answer.py @@ -0,0 +1,141 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +""" +Typification of the *answer* results. Results of this type are rendered in +the :origin:`answers.html <searx/templates/simple/elements/answers.html>` +template. + +---- + +.. autoclass:: BaseAnswer + :members: + :show-inheritance: + +.. autoclass:: Answer + :members: + :show-inheritance: + +.. autoclass:: Translations + :members: + :show-inheritance: + +.. autoclass:: AnswerSet + :members: + :show-inheritance: +""" +# pylint: disable=too-few-public-methods + +from __future__ import annotations + +__all__ = ["AnswerSet", "Answer", "Translations"] + +import msgspec + +from ._base import Result + + +class BaseAnswer(Result, kw_only=True): + """Base class of all answer types. It is not intended to build instances of + this class (aka *abstract*).""" + + +class AnswerSet: + """Aggregator for :py:obj:`BaseAnswer` items in a result container.""" + + def __init__(self): + self._answerlist = [] + + def __len__(self): + return len(self._answerlist) + + def __bool__(self): + return bool(self._answerlist) + + def add(self, answer: BaseAnswer) -> None: + a_hash = hash(answer) + for i in self._answerlist: + if hash(i) == a_hash: + return + self._answerlist.append(answer) + + def __iter__(self): + """Sort items in this set and iterate over the items.""" + self._answerlist.sort(key=lambda answer: answer.template) + yield from self._answerlist + + def __contains__(self, answer: BaseAnswer) -> bool: + a_hash = hash(answer) + for i in self._answerlist: + if hash(i) == a_hash: + return True + return False + + +class Answer(BaseAnswer, kw_only=True): + """Simple answer type where the *answer* is a simple string with an optional + :py:obj:`url field <Result.url>` field to link a resource (article, map, ..) + related to the answer.""" + + template: str = "answer/legacy.html" + + answer: str + """Text of the answer.""" + + def __hash__(self): + """The hash value of field *answer* is the hash value of the + :py:obj:`Answer` object. :py:obj:`Answer <Result.__eq__>` objects are + equal, when the hash values of both objects are equal.""" + return hash(self.answer) + + +class Translations(BaseAnswer, kw_only=True): + """Answer type with a list of translations. + + The items in the list of :py:obj:`Translations.translations` are of type + :py:obj:`Translations.Item`: + + .. code:: python + + def response(resp): + results = [] + ... + foo_1 = Translations.Item( + text="foobar", + synonyms=["bar", "foo"], + examples=["foo and bar are placeholders"], + ) + foo_url="https://www.deepl.com/de/translator#en/de/foo" + ... + Translations(results=results, translations=[foo], url=foo_url) + + """ + + template: str = "answer/translations.html" + """The template in :origin:`answer/translations.html + <searx/templates/simple/answer/translations.html>`""" + + translations: list[Translations.Item] + """List of translations.""" + + class Item(msgspec.Struct, kw_only=True): + """A single element of the translations / a translation. A translation + consists of at least a mandatory ``text`` property (the translation) , + optional properties such as *definitions*, *synonyms* and *examples* are + possible.""" + + text: str + """Translated text.""" + + transliteration: str = "" + """Transliteration_ of the requested translation. + + .. _Transliteration: https://en.wikipedia.org/wiki/Transliteration + """ + + examples: list[str] = [] + """List of examples for the requested translation.""" + + definitions: list[str] = [] + """List of definitions for the requested translation.""" + + synonyms: list[str] = [] + """List of synonyms for the requested translation.""" |