summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--client/simple/src/less/result_types/paper.less72
-rw-r--r--client/simple/src/less/style.less10
-rw-r--r--docs/dev/result_types/main/paper.rst7
-rw-r--r--docs/dev/result_types/main_result.rst2
-rw-r--r--searx/result_types/__init__.py3
-rw-r--r--searx/result_types/_base.py6
-rw-r--r--searx/result_types/paper.py96
-rw-r--r--searx/templates/simple/result_templates/paper.html102
-rw-r--r--searx/utils.py36
-rw-r--r--searx/weather.py30
10 files changed, 318 insertions, 46 deletions
diff --git a/client/simple/src/less/result_types/paper.less b/client/simple/src/less/result_types/paper.less
new file mode 100644
index 000000000..0a83ef224
--- /dev/null
+++ b/client/simple/src/less/result_types/paper.less
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+/*
+ Layout of the Paper result class
+*/
+
+.result-paper {
+ .attributes {
+ display: table;
+ border-spacing: 0.125rem;
+
+ div {
+ display: table-row;
+
+ span {
+ font-size: 0.9rem;
+ margin-top: 0.25rem;
+ display: table-cell;
+
+ time {
+ font-size: 0.9rem;
+ }
+ }
+
+ span:first-child {
+ color: var(--color-base-font);
+ min-width: 10rem;
+ }
+
+ span:nth-child(2) {
+ color: var(--color-result-publishdate-font);
+ }
+ }
+ }
+
+ .content {
+ margin-top: 0.25rem;
+ }
+
+ .comments {
+ font-size: 0.9rem;
+ margin: 0.25rem 0 0 0;
+ padding: 0;
+ word-wrap: break-word;
+ line-height: 1.24;
+ font-style: italic;
+ }
+}
+
+@media screen and (max-width: @phone) {
+ .result-paper {
+ .attributes {
+ display: block;
+
+ div {
+ display: block;
+
+ span {
+ display: inline;
+ }
+
+ span:first-child {
+ font-weight: bold;
+ }
+
+ span:nth-child(2) {
+ .ltr-margin-left(0.5rem);
+ }
+ }
+ }
+ }
+}
diff --git a/client/simple/src/less/style.less b/client/simple/src/less/style.less
index 3cac7a265..258c45a0e 100644
--- a/client/simple/src/less/style.less
+++ b/client/simple/src/less/style.less
@@ -309,11 +309,11 @@ article[data-vim-selected].category-social {
}
}
-.result-paper,
.result-packages {
.attributes {
display: table;
border-spacing: 0.125rem;
+ margin-top: 0.3rem;
div {
display: table-row;
@@ -353,12 +353,6 @@ article[data-vim-selected].category-social {
}
}
-.result-packages {
- .attributes {
- margin-top: 0.3rem;
- }
-}
-
.template_group_images {
display: flex;
flex-wrap: wrap;
@@ -1118,7 +1112,6 @@ summary.title {
display: none;
}
- .result-paper,
.result-packages {
.attributes {
display: block;
@@ -1164,3 +1157,4 @@ pre code {
// import layouts of the Result types
@import "result_types/keyvalue.less";
+@import "result_types/paper.less";
diff --git a/docs/dev/result_types/main/paper.rst b/docs/dev/result_types/main/paper.rst
new file mode 100644
index 000000000..94d8a81a3
--- /dev/null
+++ b/docs/dev/result_types/main/paper.rst
@@ -0,0 +1,7 @@
+.. _result_types.paper:
+
+=============
+Paper Results
+=============
+
+.. automodule:: searx.result_types.paper
diff --git a/docs/dev/result_types/main_result.rst b/docs/dev/result_types/main_result.rst
index f072ea757..f3d09c011 100644
--- a/docs/dev/result_types/main_result.rst
+++ b/docs/dev/result_types/main_result.rst
@@ -16,6 +16,7 @@ following types have been implemented so far ..
main/mainresult
main/keyvalue
main/code
+ main/paper
The :ref:`LegacyResult <LegacyResult>` is used internally for the results that
have not yet been typed. The templates can be used as orientation until the
@@ -26,7 +27,6 @@ final typing is complete.
- :ref:`template videos`
- :ref:`template torrent`
- :ref:`template map`
-- :ref:`template paper`
- :ref:`template packages`
- :ref:`template files`
- :ref:`template products`
diff --git a/searx/result_types/__init__.py b/searx/result_types/__init__.py
index c8b8eb4f4..a1976c10f 100644
--- a/searx/result_types/__init__.py
+++ b/searx/result_types/__init__.py
@@ -22,6 +22,7 @@ __all__ = [
"Translations",
"WeatherAnswer",
"Code",
+ "Paper",
]
import typing as t
@@ -31,6 +32,7 @@ from ._base import Result, MainResult, LegacyResult
from .answer import AnswerSet, Answer, Translations, WeatherAnswer
from .keyvalue import KeyValue
from .code import Code
+from .paper import Paper
class ResultList(list[Result | LegacyResult], abc.ABC):
@@ -44,6 +46,7 @@ class ResultList(list[Result | LegacyResult], abc.ABC):
Answer = Answer
KeyValue = KeyValue
Code = Code
+ Paper = Paper
MainResult = MainResult
Result = Result
Translations = Translations
diff --git a/searx/result_types/_base.py b/searx/result_types/_base.py
index 1c614651b..b3f2afdeb 100644
--- a/searx/result_types/_base.py
+++ b/searx/result_types/_base.py
@@ -362,7 +362,11 @@ class MainResult(Result): # pylint: disable=missing-class-docstring
"""The date on which the object was published."""
pubdate: str = ""
- """String representation of :py:obj:`MainResult.publishedDate`"""
+ """String representation of :py:obj:`MainResult.publishedDate`
+
+ Deprecated: it is still partially used in the templates, but will one day be
+ completely eliminated.
+ """
length: time.struct_time | None = None
"""Playing duration in seconds."""
diff --git a/searx/result_types/paper.py b/searx/result_types/paper.py
new file mode 100644
index 000000000..33bb5f99a
--- /dev/null
+++ b/searx/result_types/paper.py
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Typification of the *paper* results.
+
+.. _BibTeX field types: https://en.wikipedia.org/wiki/BibTeX#Field_types
+.. _BibTeX format: https://www.bibtex.com/g/bibtex-format/
+
+Results of this type are rendered in the :origin:`paper.html
+<searx/templates/simple/result_templates/paper.html>` template.
+
+Related topics:
+
+- `BibTeX field types`_
+- `BibTeX format`_
+
+----
+
+.. autoclass:: Paper
+ :members:
+ :show-inheritance:
+
+"""
+# pylint: disable=too-few-public-methods, disable=invalid-name
+
+from __future__ import annotations
+
+__all__ = ["Paper"]
+
+import typing as t
+
+from searx.weather import DateTime
+from ._base import MainResult
+
+
+@t.final
+class Paper(MainResult, kw_only=True):
+ """Result type suitable for displaying scientific papers and other
+ documents."""
+
+ template: str = "paper.html"
+
+ date_of_publication: DateTime | None = None
+ """Date the document was published."""
+
+ content: str = ""
+ """An abstract or excerpt from the document."""
+
+ comments: str = ""
+ """Free text display in italic below the content."""
+
+ tags: list[str] = []
+ """Free tag list."""
+
+ type: str = ""
+ """Short description of medium type, e.g. *book*, *pdf* or *html* ..."""
+
+ authors: list[str] | set[str] = []
+ """List of authors of the work (authors with a "s" suffix, the "author" is
+ in the :py:obj:`MainResult.author`)."""
+
+ editor: str = ""
+ """Editor of the book/paper."""
+
+ publisher: str = ""
+ """Name of the publisher."""
+
+ journal: str = ""
+ """Name of the journal or magazine the article was published in."""
+
+ volume: str | int = ""
+ """Volume number."""
+
+ pages: str = ""
+ """Page range where the article is."""
+
+ number: str = ""
+ """Number of the report or the issue number for a journal article."""
+
+ doi: str = ""
+ """DOI number (like ``10.1038/d41586-018-07848-2``)."""
+
+ issn: list[str] = []
+ """List of ISSN numbers like ``1476-4687``"""
+
+ isbn: list[str] = []
+ """List of ISBN numbers like ``9780201896831``"""
+
+ pdf_url: str = ""
+ """URL to the full article, the PDF version"""
+
+ html_url: str = ""
+ """URL to full article, HTML version"""
+
+ def __post_init__(self):
+ super().__post_init__()
+ if self.date_of_publication is None and self.publishedDate is not None:
+ self.date_of_publication = DateTime(self.publishedDate)
diff --git a/searx/templates/simple/result_templates/paper.html b/searx/templates/simple/result_templates/paper.html
index 7e94cf174..074ad9081 100644
--- a/searx/templates/simple/result_templates/paper.html
+++ b/searx/templates/simple/result_templates/paper.html
@@ -1,34 +1,92 @@
{% from 'simple/macros.html' import result_header, result_sub_header, result_sub_footer, result_footer, result_link with context %}
-{{ result_header(result, favicons, image_proxify) -}}
+{{ result_header(result, favicons, image_proxify) }}
+
<div class="attributes">
- {%- if result.publishedDate %}<div class="result_publishedDate"><span>{{ _("Published date") }}:</span><span><time class="published_date" datetime="{{ result.pubdate }}" >{{ result.publishedDate }}</time></span></div>{% endif -%}
- {%- if result.authors %}<div class="result_authors"><span>{{ _("Author") }}:</span><span>{{ result.authors | join(", ") }}</span></div>{% endif -%}
+ {%- if result.date_of_publication %}
+ <div>
+ <span>{{ _("Published date") }}:</span>
+ <span>{{ result.date_of_publication.l10n_date("long", "UI") }}</span>
+ </div>
+ {% endif -%}
+ {%- if result.authors %}
+ <div>
+ <span>{{ _("Author") }}:</span>
+ <span>{{ result.authors | join(", ") }}</span>
+ </div>
+ {% endif -%}
{%- if result.journal -%}
- <div class="result_journal">
- <span>{{- _("Journal") }}:</span><span>{{ result.journal -}}
- {%- if result.volume -%}
- &nbsp;{{- result.volume -}}
- {%- if result.number -%}
- .{{- result.number -}}
+ <div>
+ <span>{{- _("Journal") }}:</span>
+ <span>{{ result.journal -}}
+ {%- if result.volume -%}
+ &nbsp;{{- result.volume -}}
+ {%- if result.number -%}.{{- result.number -}}{%- endif -%}
{%- endif -%}
- {%- endif -%}
- {%- if result.pages -%}
- &nbsp;{{- result.pages -}}
- {%- endif -%}
+ {%- if result.pages -%}&nbsp;{{- result.pages -}}{%- endif -%}
</span>
</div>
{%- endif %}
- {%- if result.editor %}<div class="result_editor"><span>{{ _("Editor") }}:</span><span>{{ result.editor }}</span></div>{% endif -%}
- {%- if result.publisher %}<div class="result_publisher"><span>{{ _("Publisher") }}:</span><span>{{ result.publisher }}</span></div>{% endif -%}
- {%- if result.type %}<div class="result_type"><span>{{ _("Type") }}:</span><span>{{ result.type }}</span></div>{% endif -%}
- {%- if result.tags %}<div class="result_tags"><span>{{ _("Tags") }}:</span><span>{{ result.tags | join(", ")}}</span></div>{%- endif -%}
- {%- if result.doi %}<div class="result_doi"><span>{{ _("DOI") }}:</span><span>{{ result_link(doi_resolver + result.doi, result.doi) }}</span></div>{% endif -%}
- {%- if result.issn %}<div class="result_issn"><span>{{ _("ISSN") }}:</span><span>{{ result.issn | join(", ") }}</span></div>{% endif -%}
- {%- if result.isbn %}<div class="result_isbn"><span>{{ _("ISBN") }}:</span><span>{{ result.isbn | join(", ") }}</span></div>{% endif -%}
+ {%- if result.editor %}
+ <div>
+ <span>{{ _("Editor") }}:</span>
+ <span>{{ result.editor }}</span>
+ </div>
+ {% endif -%}
+ {%- if result.publisher %}
+ <div>
+ <span>{{ _("Publisher") }}:</span>
+ <span>{{ result.publisher }}</span>
+ </div>
+ {% endif -%}
+ {%- if result.type %}
+ <div>
+ <span>{{ _("Type") }}:</span>
+ <span>{{ result.type }}</span>
+ </div>
+ {% endif -%}
+ {%- if result.tags %}
+ <div>
+ <span>{{ _("Tags") }}:</span>
+ <span>{{ result.tags | join(", ")}}</span>
+ </div>
+ {%- endif -%}
+ {%- if result.doi %}
+ <div>
+ <span>{{ _("DOI") }}:</span>
+ <span>{{ result_link(doi_resolver + result.doi, result.doi) }}</span>
+ </div>
+ {% endif -%}
+ {%- if result.issn %}
+ <div>
+ <span>{{ _("ISSN") }}:</span>
+ <span>{{ result.issn | join(", ") }}</span>
+ </div>
+ {% endif -%}
+ {%- if result.isbn %}
+ <div class="result_isbn">
+ <span>{{ _("ISBN") }}:</span>
+ <span>{{ result.isbn | join(", ") }}</span>
+ </div>
+ {% endif -%}
+ {%- if result.views %}
+ <div>
+ <span>{{ _('Views') }}:</span>
+ <span>{{ result.views }}</span>
+ </div>
+ {% endif -%}
</div>
-{%- if result.content -%}<p class="content">{{- result.content | safe -}}</p>{%- endif -%}
-{%- if result.comments -%}<p class="comments">{{- result.comments -}}</p>{%- endif -%}
+{%- if result.content -%}
+ <p class="content">{{- result.content | safe -}}</p>
+{%- endif -%}
+{%- if result.comments -%}
+ <p class="comments">{{- result.comments -}}</p>
+{%- endif -%}
+
+{%- if result.metadata %}
+ <div class="highlight">{{ result.metadata|safe }}</div>
+{% endif -%}
+
<p class="altlink">
{%- if result.pdf_url -%}{{ result_link(result.pdf_url, _('PDF')) }}{%- endif -%}
{%- if result.html_url -%}{{ result_link(result.html_url, _('HTML')) }}{%- endif -%}
diff --git a/searx/utils.py b/searx/utils.py
index a65474c9b..079a99ae2 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -21,7 +21,8 @@ from datetime import timedelta
from markdown_it import MarkdownIt
from lxml import html
-from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError
+from lxml.etree import XPath, XPathError, XPathSyntaxError
+from lxml.etree import ElementBase, _Element # pyright: ignore[reportPrivateUsage]
from searx import settings
from searx.data import USER_AGENTS, data_dir
@@ -40,6 +41,9 @@ XPathSpecType: t.TypeAlias = str | XPath
"""Type alias used by :py:obj:`searx.utils.get_xpath`,
:py:obj:`searx.utils.eval_xpath` and other XPath selectors."""
+ElementType: t.TypeAlias = ElementBase | _Element
+
+
_BLOCKED_TAGS = ('script', 'style')
_ECMA_UNESCAPE4_RE = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE)
@@ -204,15 +208,23 @@ def markdown_to_text(markdown_str: str) -> str:
def extract_text(
- xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None,
+ xpath_results: list[ElementType] | ElementType | str | Number | bool | None,
allow_none: bool = False,
) -> str | None:
"""Extract text from a lxml result
- * if xpath_results is list, extract the text from each result and concat the list
- * if xpath_results is a xml element, extract all the text node from it
- ( text_content() method from lxml )
- * if xpath_results is a string element, then it's already done
+ - If ``xpath_results`` is a list of :py:obj:`ElementType` objects, extract
+ the text from each result and concatenate the list in a string.
+
+ - If ``xpath_results`` is a :py:obj:`ElementType` object, extract all the
+ text node from it ( :py:obj:`lxml.html.tostring`, ``method="text"`` )
+
+ - If ``xpath_results`` is of type :py:obj:`str` or :py:obj:`Number`,
+ :py:obj:`bool` the string value is returned.
+
+ - If ``xpath_results`` is of type ``None`` a :py:obj:`ValueError` is raised,
+ except ``allow_none`` is ``True`` where ``None`` is returned.
+
"""
if isinstance(xpath_results, list):
# it's list of result : concat everything using recursive call
@@ -220,7 +232,7 @@ def extract_text(
for e in xpath_results:
result = result + (extract_text(e) or '')
return result.strip()
- if isinstance(xpath_results, ElementBase):
+ if isinstance(xpath_results, ElementType):
# it's a element
text: str = html.tostring( # type: ignore
xpath_results, # pyright: ignore[reportArgumentType]
@@ -289,7 +301,7 @@ def normalize_url(url: str, base_url: str) -> str:
return url
-def extract_url(xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None, base_url: str) -> str:
+def extract_url(xpath_results: list[ElementType] | ElementType | str | Number | bool | None, base_url: str) -> str:
"""Extract and normalize URL from lxml Element
Example:
@@ -520,7 +532,7 @@ def get_xpath(xpath_spec: XPathSpecType) -> XPath:
raise TypeError('xpath_spec must be either a str or a lxml.etree.XPath') # pyright: ignore[reportUnreachable]
-def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType) -> t.Any:
+def eval_xpath(element: ElementType, xpath_spec: XPathSpecType) -> t.Any:
"""Equivalent of ``element.xpath(xpath_str)`` but compile ``xpath_str`` into
a :py:obj:`lxml.etree.XPath` object once for all. The return value of
``xpath(..)`` is complex, read `XPath return values`_ for more details.
@@ -548,12 +560,12 @@ def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType) -> t.Any:
raise SearxEngineXPathException(xpath_spec, arg) from e
-def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: int | None = None) -> list[t.Any]:
+def eval_xpath_list(element: ElementType, xpath_spec: XPathSpecType, min_len: int | None = None) -> list[t.Any]:
"""Same as :py:obj:`searx.utils.eval_xpath`, but additionally ensures the
return value is a :py:obj:`list`. The minimum length of the list is also
checked (if ``min_len`` is set)."""
- result = eval_xpath(element, xpath_spec)
+ result: list[t.Any] = eval_xpath(element, xpath_spec)
if not isinstance(result, list):
raise SearxEngineXPathException(xpath_spec, 'the result is not a list')
if min_len is not None and min_len > len(result):
@@ -562,7 +574,7 @@ def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: in
def eval_xpath_getindex(
- element: ElementBase,
+ element: ElementType,
xpath_spec: XPathSpecType,
index: int,
default: t.Any = _NOTSET,
diff --git a/searx/weather.py b/searx/weather.py
index e5cb92bed..c8f3cf973 100644
--- a/searx/weather.py
+++ b/searx/weather.py
@@ -27,6 +27,7 @@ import babel
import babel.numbers
import babel.dates
import babel.languages
+import flask_babel
from searx import network
from searx.cache import ExpireCache, ExpireCacheCfg
@@ -197,6 +198,7 @@ class GeoLocation:
DateTimeFormats = typing.Literal["full", "long", "medium", "short"]
+DateTimeLocaleTypes = typing.Literal["UI"]
@typing.final
@@ -205,6 +207,13 @@ class DateTime:
conveniently combines :py:obj:`datetime.datetime` and
:py:obj:`babel.dates.format_datetime`. A conversion of time zones is not
provided (in the current version).
+
+ The localized string representation can be obtained via the
+ :py:obj:`DateTime.l10n` and :py:obj:`DateTime.l10n_date` methods, where the
+ ``locale`` parameter defaults to the search language. Alternatively, a
+ :py:obj:`GeoLocation` or a :py:obj:`babel.Locale` instance can be passed
+ directly. If the UI language is to be used, the string ``UI`` can be passed
+ as the value for the ``locale``.
"""
def __init__(self, time: datetime.datetime):
@@ -216,15 +225,32 @@ class DateTime:
def l10n(
self,
fmt: DateTimeFormats | str = "medium",
- locale: babel.Locale | GeoLocation | None = None,
+ locale: DateTimeLocaleTypes | babel.Locale | GeoLocation | None = None,
) -> str:
"""Localized representation of date & time."""
- if isinstance(locale, GeoLocation):
+ if isinstance(locale, str) and locale == "UI":
+ locale = flask_babel.get_locale()
+ elif isinstance(locale, GeoLocation):
locale = locale.locale()
elif locale is None:
locale = babel.Locale.parse(_get_sxng_locale_tag(), sep='-')
return babel.dates.format_datetime(self.datetime, format=fmt, locale=locale)
+ def l10n_date(
+ self,
+ fmt: DateTimeFormats | str = "medium",
+ locale: DateTimeLocaleTypes | babel.Locale | GeoLocation | None = None,
+ ) -> str:
+ """Localized representation of date."""
+
+ if isinstance(locale, str) and locale == "UI":
+ locale = flask_babel.get_locale()
+ elif isinstance(locale, GeoLocation):
+ locale = locale.locale()
+ elif locale is None:
+ locale = babel.Locale.parse(_get_sxng_locale_tag(), sep='-')
+ return babel.dates.format_date(self.datetime, format=fmt, locale=locale)
+
@typing.final
class Temperature: