From 7eedd44f5f9965cf2fbff14d276f96944b5c6a98 Mon Sep 17 00:00:00 2001
From: Markus Heiser <markus.heiser@darmarit.de>
Date: Wed, 10 Sep 2025 16:10:42 +0200
Subject: [mod] typification of SearXNG: add new result type Paper

This patch adds a new result type: Paper

- Python class:   searx/result_types/paper.py
- Jinja template: searx/templates/simple/result_templates/paper.html
- CSS (less)      client/simple/src/less/result_types/paper.less

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
---
 searx/utils.py | 36 ++++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

(limited to 'searx/utils.py')

diff --git a/searx/utils.py b/searx/utils.py
index a65474c9b..079a99ae2 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -21,7 +21,8 @@ from datetime import timedelta
 from markdown_it import MarkdownIt
 
 from lxml import html
-from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError
+from lxml.etree import XPath, XPathError, XPathSyntaxError
+from lxml.etree import ElementBase, _Element  # pyright: ignore[reportPrivateUsage]
 
 from searx import settings
 from searx.data import USER_AGENTS, data_dir
@@ -40,6 +41,9 @@ XPathSpecType: t.TypeAlias = str | XPath
 """Type alias used by :py:obj:`searx.utils.get_xpath`,
 :py:obj:`searx.utils.eval_xpath` and other XPath selectors."""
 
+ElementType: t.TypeAlias = ElementBase | _Element
+
+
 _BLOCKED_TAGS = ('script', 'style')
 
 _ECMA_UNESCAPE4_RE = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE)
@@ -204,15 +208,23 @@ def markdown_to_text(markdown_str: str) -> str:
 
 
 def extract_text(
-    xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None,
+    xpath_results: list[ElementType] | ElementType | str | Number | bool | None,
     allow_none: bool = False,
 ) -> str | None:
     """Extract text from a lxml result
 
-    * if xpath_results is list, extract the text from each result and concat the list
-    * if xpath_results is a xml element, extract all the text node from it
-      ( text_content() method from lxml )
-    * if xpath_results is a string element, then it's already done
+    - If ``xpath_results`` is a list of :py:obj:`ElementType` objects, extract
+      the text from each result and concatenate the list in a string.
+
+    - If ``xpath_results`` is a :py:obj:`ElementType` object, extract all the
+      text node from it ( :py:obj:`lxml.html.tostring`, ``method="text"`` )
+
+    - If ``xpath_results`` is of type :py:obj:`str` or :py:obj:`Number`,
+      :py:obj:`bool` the string value is returned.
+
+    - If ``xpath_results`` is of type ``None`` a :py:obj:`ValueError` is raised,
+      except ``allow_none`` is ``True`` where ``None`` is returned.
+
     """
     if isinstance(xpath_results, list):
         # it's list of result : concat everything using recursive call
@@ -220,7 +232,7 @@ def extract_text(
         for e in xpath_results:
             result = result + (extract_text(e) or '')
         return result.strip()
-    if isinstance(xpath_results, ElementBase):
+    if isinstance(xpath_results, ElementType):
         # it's a element
         text: str = html.tostring(  # type: ignore
             xpath_results,  # pyright: ignore[reportArgumentType]
@@ -289,7 +301,7 @@ def normalize_url(url: str, base_url: str) -> str:
     return url
 
 
-def extract_url(xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None, base_url: str) -> str:
+def extract_url(xpath_results: list[ElementType] | ElementType | str | Number | bool | None, base_url: str) -> str:
     """Extract and normalize URL from lxml Element
 
     Example:
@@ -520,7 +532,7 @@ def get_xpath(xpath_spec: XPathSpecType) -> XPath:
     raise TypeError('xpath_spec must be either a str or a lxml.etree.XPath')  # pyright: ignore[reportUnreachable]
 
 
-def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType) -> t.Any:
+def eval_xpath(element: ElementType, xpath_spec: XPathSpecType) -> t.Any:
     """Equivalent of ``element.xpath(xpath_str)`` but compile ``xpath_str`` into
     a :py:obj:`lxml.etree.XPath` object once for all.  The return value of
     ``xpath(..)`` is complex, read `XPath return values`_ for more details.
@@ -548,12 +560,12 @@ def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType) -> t.Any:
         raise SearxEngineXPathException(xpath_spec, arg) from e
 
 
-def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: int | None = None) -> list[t.Any]:
+def eval_xpath_list(element: ElementType, xpath_spec: XPathSpecType, min_len: int | None = None) -> list[t.Any]:
     """Same as :py:obj:`searx.utils.eval_xpath`, but additionally ensures the
     return value is a :py:obj:`list`.  The minimum length of the list is also
     checked (if ``min_len`` is set)."""
 
-    result = eval_xpath(element, xpath_spec)
+    result: list[t.Any] = eval_xpath(element, xpath_spec)
     if not isinstance(result, list):
         raise SearxEngineXPathException(xpath_spec, 'the result is not a list')
     if min_len is not None and min_len > len(result):
@@ -562,7 +574,7 @@ def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: in
 
 
 def eval_xpath_getindex(
-    element: ElementBase,
+    element: ElementType,
     xpath_spec: XPathSpecType,
     index: int,
     default: t.Any = _NOTSET,
-- 
cgit v1.2.3