diff options
| -rw-r--r-- | client/simple/src/less/result_types/paper.less | 72 | ||||
| -rw-r--r-- | client/simple/src/less/style.less | 10 | ||||
| -rw-r--r-- | docs/dev/result_types/main/paper.rst | 7 | ||||
| -rw-r--r-- | docs/dev/result_types/main_result.rst | 2 | ||||
| -rw-r--r-- | searx/result_types/__init__.py | 3 | ||||
| -rw-r--r-- | searx/result_types/_base.py | 6 | ||||
| -rw-r--r-- | searx/result_types/paper.py | 96 | ||||
| -rw-r--r-- | searx/templates/simple/result_templates/paper.html | 102 | ||||
| -rw-r--r-- | searx/utils.py | 36 | ||||
| -rw-r--r-- | searx/weather.py | 30 |
10 files changed, 318 insertions, 46 deletions
diff --git a/client/simple/src/less/result_types/paper.less b/client/simple/src/less/result_types/paper.less new file mode 100644 index 000000000..0a83ef224 --- /dev/null +++ b/client/simple/src/less/result_types/paper.less @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +/* + Layout of the Paper result class +*/ + +.result-paper { + .attributes { + display: table; + border-spacing: 0.125rem; + + div { + display: table-row; + + span { + font-size: 0.9rem; + margin-top: 0.25rem; + display: table-cell; + + time { + font-size: 0.9rem; + } + } + + span:first-child { + color: var(--color-base-font); + min-width: 10rem; + } + + span:nth-child(2) { + color: var(--color-result-publishdate-font); + } + } + } + + .content { + margin-top: 0.25rem; + } + + .comments { + font-size: 0.9rem; + margin: 0.25rem 0 0 0; + padding: 0; + word-wrap: break-word; + line-height: 1.24; + font-style: italic; + } +} + +@media screen and (max-width: @phone) { + .result-paper { + .attributes { + display: block; + + div { + display: block; + + span { + display: inline; + } + + span:first-child { + font-weight: bold; + } + + span:nth-child(2) { + .ltr-margin-left(0.5rem); + } + } + } + } +} diff --git a/client/simple/src/less/style.less b/client/simple/src/less/style.less index 3cac7a265..258c45a0e 100644 --- a/client/simple/src/less/style.less +++ b/client/simple/src/less/style.less @@ -309,11 +309,11 @@ article[data-vim-selected].category-social { } } -.result-paper, .result-packages { .attributes { display: table; border-spacing: 0.125rem; + margin-top: 0.3rem; div { display: table-row; @@ -353,12 +353,6 @@ article[data-vim-selected].category-social { } } -.result-packages { - .attributes { - margin-top: 0.3rem; - } -} - .template_group_images { display: flex; flex-wrap: wrap; @@ -1118,7 +1112,6 @@ summary.title { display: none; } - .result-paper, .result-packages { .attributes { display: block; @@ -1164,3 +1157,4 @@ pre code { // import layouts of the Result types @import "result_types/keyvalue.less"; +@import "result_types/paper.less"; diff --git a/docs/dev/result_types/main/paper.rst b/docs/dev/result_types/main/paper.rst new file mode 100644 index 000000000..94d8a81a3 --- /dev/null +++ b/docs/dev/result_types/main/paper.rst @@ -0,0 +1,7 @@ +.. _result_types.paper: + +============= +Paper Results +============= + +.. automodule:: searx.result_types.paper diff --git a/docs/dev/result_types/main_result.rst b/docs/dev/result_types/main_result.rst index f072ea757..f3d09c011 100644 --- a/docs/dev/result_types/main_result.rst +++ b/docs/dev/result_types/main_result.rst @@ -16,6 +16,7 @@ following types have been implemented so far .. main/mainresult main/keyvalue main/code + main/paper The :ref:`LegacyResult <LegacyResult>` is used internally for the results that have not yet been typed. The templates can be used as orientation until the @@ -26,7 +27,6 @@ final typing is complete. - :ref:`template videos` - :ref:`template torrent` - :ref:`template map` -- :ref:`template paper` - :ref:`template packages` - :ref:`template files` - :ref:`template products` diff --git a/searx/result_types/__init__.py b/searx/result_types/__init__.py index c8b8eb4f4..a1976c10f 100644 --- a/searx/result_types/__init__.py +++ b/searx/result_types/__init__.py @@ -22,6 +22,7 @@ __all__ = [ "Translations", "WeatherAnswer", "Code", + "Paper", ] import typing as t @@ -31,6 +32,7 @@ from ._base import Result, MainResult, LegacyResult from .answer import AnswerSet, Answer, Translations, WeatherAnswer from .keyvalue import KeyValue from .code import Code +from .paper import Paper class ResultList(list[Result | LegacyResult], abc.ABC): @@ -44,6 +46,7 @@ class ResultList(list[Result | LegacyResult], abc.ABC): Answer = Answer KeyValue = KeyValue Code = Code + Paper = Paper MainResult = MainResult Result = Result Translations = Translations diff --git a/searx/result_types/_base.py b/searx/result_types/_base.py index 1c614651b..b3f2afdeb 100644 --- a/searx/result_types/_base.py +++ b/searx/result_types/_base.py @@ -362,7 +362,11 @@ class MainResult(Result): # pylint: disable=missing-class-docstring """The date on which the object was published.""" pubdate: str = "" - """String representation of :py:obj:`MainResult.publishedDate`""" + """String representation of :py:obj:`MainResult.publishedDate` + + Deprecated: it is still partially used in the templates, but will one day be + completely eliminated. + """ length: time.struct_time | None = None """Playing duration in seconds.""" diff --git a/searx/result_types/paper.py b/searx/result_types/paper.py new file mode 100644 index 000000000..33bb5f99a --- /dev/null +++ b/searx/result_types/paper.py @@ -0,0 +1,96 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Typification of the *paper* results. + +.. _BibTeX field types: https://en.wikipedia.org/wiki/BibTeX#Field_types +.. _BibTeX format: https://www.bibtex.com/g/bibtex-format/ + +Results of this type are rendered in the :origin:`paper.html +<searx/templates/simple/result_templates/paper.html>` template. + +Related topics: + +- `BibTeX field types`_ +- `BibTeX format`_ + +---- + +.. autoclass:: Paper + :members: + :show-inheritance: + +""" +# pylint: disable=too-few-public-methods, disable=invalid-name + +from __future__ import annotations + +__all__ = ["Paper"] + +import typing as t + +from searx.weather import DateTime +from ._base import MainResult + + +@t.final +class Paper(MainResult, kw_only=True): + """Result type suitable for displaying scientific papers and other + documents.""" + + template: str = "paper.html" + + date_of_publication: DateTime | None = None + """Date the document was published.""" + + content: str = "" + """An abstract or excerpt from the document.""" + + comments: str = "" + """Free text display in italic below the content.""" + + tags: list[str] = [] + """Free tag list.""" + + type: str = "" + """Short description of medium type, e.g. *book*, *pdf* or *html* ...""" + + authors: list[str] | set[str] = [] + """List of authors of the work (authors with a "s" suffix, the "author" is + in the :py:obj:`MainResult.author`).""" + + editor: str = "" + """Editor of the book/paper.""" + + publisher: str = "" + """Name of the publisher.""" + + journal: str = "" + """Name of the journal or magazine the article was published in.""" + + volume: str | int = "" + """Volume number.""" + + pages: str = "" + """Page range where the article is.""" + + number: str = "" + """Number of the report or the issue number for a journal article.""" + + doi: str = "" + """DOI number (like ``10.1038/d41586-018-07848-2``).""" + + issn: list[str] = [] + """List of ISSN numbers like ``1476-4687``""" + + isbn: list[str] = [] + """List of ISBN numbers like ``9780201896831``""" + + pdf_url: str = "" + """URL to the full article, the PDF version""" + + html_url: str = "" + """URL to full article, HTML version""" + + def __post_init__(self): + super().__post_init__() + if self.date_of_publication is None and self.publishedDate is not None: + self.date_of_publication = DateTime(self.publishedDate) diff --git a/searx/templates/simple/result_templates/paper.html b/searx/templates/simple/result_templates/paper.html index 7e94cf174..074ad9081 100644 --- a/searx/templates/simple/result_templates/paper.html +++ b/searx/templates/simple/result_templates/paper.html @@ -1,34 +1,92 @@ {% from 'simple/macros.html' import result_header, result_sub_header, result_sub_footer, result_footer, result_link with context %} -{{ result_header(result, favicons, image_proxify) -}} +{{ result_header(result, favicons, image_proxify) }} + <div class="attributes"> - {%- if result.publishedDate %}<div class="result_publishedDate"><span>{{ _("Published date") }}:</span><span><time class="published_date" datetime="{{ result.pubdate }}" >{{ result.publishedDate }}</time></span></div>{% endif -%} - {%- if result.authors %}<div class="result_authors"><span>{{ _("Author") }}:</span><span>{{ result.authors | join(", ") }}</span></div>{% endif -%} + {%- if result.date_of_publication %} + <div> + <span>{{ _("Published date") }}:</span> + <span>{{ result.date_of_publication.l10n_date("long", "UI") }}</span> + </div> + {% endif -%} + {%- if result.authors %} + <div> + <span>{{ _("Author") }}:</span> + <span>{{ result.authors | join(", ") }}</span> + </div> + {% endif -%} {%- if result.journal -%} - <div class="result_journal"> - <span>{{- _("Journal") }}:</span><span>{{ result.journal -}} - {%- if result.volume -%} - {{- result.volume -}} - {%- if result.number -%} - .{{- result.number -}} + <div> + <span>{{- _("Journal") }}:</span> + <span>{{ result.journal -}} + {%- if result.volume -%} + {{- result.volume -}} + {%- if result.number -%}.{{- result.number -}}{%- endif -%} {%- endif -%} - {%- endif -%} - {%- if result.pages -%} - {{- result.pages -}} - {%- endif -%} + {%- if result.pages -%} {{- result.pages -}}{%- endif -%} </span> </div> {%- endif %} - {%- if result.editor %}<div class="result_editor"><span>{{ _("Editor") }}:</span><span>{{ result.editor }}</span></div>{% endif -%} - {%- if result.publisher %}<div class="result_publisher"><span>{{ _("Publisher") }}:</span><span>{{ result.publisher }}</span></div>{% endif -%} - {%- if result.type %}<div class="result_type"><span>{{ _("Type") }}:</span><span>{{ result.type }}</span></div>{% endif -%} - {%- if result.tags %}<div class="result_tags"><span>{{ _("Tags") }}:</span><span>{{ result.tags | join(", ")}}</span></div>{%- endif -%} - {%- if result.doi %}<div class="result_doi"><span>{{ _("DOI") }}:</span><span>{{ result_link(doi_resolver + result.doi, result.doi) }}</span></div>{% endif -%} - {%- if result.issn %}<div class="result_issn"><span>{{ _("ISSN") }}:</span><span>{{ result.issn | join(", ") }}</span></div>{% endif -%} - {%- if result.isbn %}<div class="result_isbn"><span>{{ _("ISBN") }}:</span><span>{{ result.isbn | join(", ") }}</span></div>{% endif -%} + {%- if result.editor %} + <div> + <span>{{ _("Editor") }}:</span> + <span>{{ result.editor }}</span> + </div> + {% endif -%} + {%- if result.publisher %} + <div> + <span>{{ _("Publisher") }}:</span> + <span>{{ result.publisher }}</span> + </div> + {% endif -%} + {%- if result.type %} + <div> + <span>{{ _("Type") }}:</span> + <span>{{ result.type }}</span> + </div> + {% endif -%} + {%- if result.tags %} + <div> + <span>{{ _("Tags") }}:</span> + <span>{{ result.tags | join(", ")}}</span> + </div> + {%- endif -%} + {%- if result.doi %} + <div> + <span>{{ _("DOI") }}:</span> + <span>{{ result_link(doi_resolver + result.doi, result.doi) }}</span> + </div> + {% endif -%} + {%- if result.issn %} + <div> + <span>{{ _("ISSN") }}:</span> + <span>{{ result.issn | join(", ") }}</span> + </div> + {% endif -%} + {%- if result.isbn %} + <div class="result_isbn"> + <span>{{ _("ISBN") }}:</span> + <span>{{ result.isbn | join(", ") }}</span> + </div> + {% endif -%} + {%- if result.views %} + <div> + <span>{{ _('Views') }}:</span> + <span>{{ result.views }}</span> + </div> + {% endif -%} </div> -{%- if result.content -%}<p class="content">{{- result.content | safe -}}</p>{%- endif -%} -{%- if result.comments -%}<p class="comments">{{- result.comments -}}</p>{%- endif -%} +{%- if result.content -%} + <p class="content">{{- result.content | safe -}}</p> +{%- endif -%} +{%- if result.comments -%} + <p class="comments">{{- result.comments -}}</p> +{%- endif -%} + +{%- if result.metadata %} + <div class="highlight">{{ result.metadata|safe }}</div> +{% endif -%} + <p class="altlink"> {%- if result.pdf_url -%}{{ result_link(result.pdf_url, _('PDF')) }}{%- endif -%} {%- if result.html_url -%}{{ result_link(result.html_url, _('HTML')) }}{%- endif -%} diff --git a/searx/utils.py b/searx/utils.py index a65474c9b..079a99ae2 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -21,7 +21,8 @@ from datetime import timedelta from markdown_it import MarkdownIt from lxml import html -from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError +from lxml.etree import XPath, XPathError, XPathSyntaxError +from lxml.etree import ElementBase, _Element # pyright: ignore[reportPrivateUsage] from searx import settings from searx.data import USER_AGENTS, data_dir @@ -40,6 +41,9 @@ XPathSpecType: t.TypeAlias = str | XPath """Type alias used by :py:obj:`searx.utils.get_xpath`, :py:obj:`searx.utils.eval_xpath` and other XPath selectors.""" +ElementType: t.TypeAlias = ElementBase | _Element + + _BLOCKED_TAGS = ('script', 'style') _ECMA_UNESCAPE4_RE = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE) @@ -204,15 +208,23 @@ def markdown_to_text(markdown_str: str) -> str: def extract_text( - xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None, + xpath_results: list[ElementType] | ElementType | str | Number | bool | None, allow_none: bool = False, ) -> str | None: """Extract text from a lxml result - * if xpath_results is list, extract the text from each result and concat the list - * if xpath_results is a xml element, extract all the text node from it - ( text_content() method from lxml ) - * if xpath_results is a string element, then it's already done + - If ``xpath_results`` is a list of :py:obj:`ElementType` objects, extract + the text from each result and concatenate the list in a string. + + - If ``xpath_results`` is a :py:obj:`ElementType` object, extract all the + text node from it ( :py:obj:`lxml.html.tostring`, ``method="text"`` ) + + - If ``xpath_results`` is of type :py:obj:`str` or :py:obj:`Number`, + :py:obj:`bool` the string value is returned. + + - If ``xpath_results`` is of type ``None`` a :py:obj:`ValueError` is raised, + except ``allow_none`` is ``True`` where ``None`` is returned. + """ if isinstance(xpath_results, list): # it's list of result : concat everything using recursive call @@ -220,7 +232,7 @@ def extract_text( for e in xpath_results: result = result + (extract_text(e) or '') return result.strip() - if isinstance(xpath_results, ElementBase): + if isinstance(xpath_results, ElementType): # it's a element text: str = html.tostring( # type: ignore xpath_results, # pyright: ignore[reportArgumentType] @@ -289,7 +301,7 @@ def normalize_url(url: str, base_url: str) -> str: return url -def extract_url(xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None, base_url: str) -> str: +def extract_url(xpath_results: list[ElementType] | ElementType | str | Number | bool | None, base_url: str) -> str: """Extract and normalize URL from lxml Element Example: @@ -520,7 +532,7 @@ def get_xpath(xpath_spec: XPathSpecType) -> XPath: raise TypeError('xpath_spec must be either a str or a lxml.etree.XPath') # pyright: ignore[reportUnreachable] -def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType) -> t.Any: +def eval_xpath(element: ElementType, xpath_spec: XPathSpecType) -> t.Any: """Equivalent of ``element.xpath(xpath_str)`` but compile ``xpath_str`` into a :py:obj:`lxml.etree.XPath` object once for all. The return value of ``xpath(..)`` is complex, read `XPath return values`_ for more details. @@ -548,12 +560,12 @@ def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType) -> t.Any: raise SearxEngineXPathException(xpath_spec, arg) from e -def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: int | None = None) -> list[t.Any]: +def eval_xpath_list(element: ElementType, xpath_spec: XPathSpecType, min_len: int | None = None) -> list[t.Any]: """Same as :py:obj:`searx.utils.eval_xpath`, but additionally ensures the return value is a :py:obj:`list`. The minimum length of the list is also checked (if ``min_len`` is set).""" - result = eval_xpath(element, xpath_spec) + result: list[t.Any] = eval_xpath(element, xpath_spec) if not isinstance(result, list): raise SearxEngineXPathException(xpath_spec, 'the result is not a list') if min_len is not None and min_len > len(result): @@ -562,7 +574,7 @@ def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: in def eval_xpath_getindex( - element: ElementBase, + element: ElementType, xpath_spec: XPathSpecType, index: int, default: t.Any = _NOTSET, diff --git a/searx/weather.py b/searx/weather.py index e5cb92bed..c8f3cf973 100644 --- a/searx/weather.py +++ b/searx/weather.py @@ -27,6 +27,7 @@ import babel import babel.numbers import babel.dates import babel.languages +import flask_babel from searx import network from searx.cache import ExpireCache, ExpireCacheCfg @@ -197,6 +198,7 @@ class GeoLocation: DateTimeFormats = typing.Literal["full", "long", "medium", "short"] +DateTimeLocaleTypes = typing.Literal["UI"] @typing.final @@ -205,6 +207,13 @@ class DateTime: conveniently combines :py:obj:`datetime.datetime` and :py:obj:`babel.dates.format_datetime`. A conversion of time zones is not provided (in the current version). + + The localized string representation can be obtained via the + :py:obj:`DateTime.l10n` and :py:obj:`DateTime.l10n_date` methods, where the + ``locale`` parameter defaults to the search language. Alternatively, a + :py:obj:`GeoLocation` or a :py:obj:`babel.Locale` instance can be passed + directly. If the UI language is to be used, the string ``UI`` can be passed + as the value for the ``locale``. """ def __init__(self, time: datetime.datetime): @@ -216,15 +225,32 @@ class DateTime: def l10n( self, fmt: DateTimeFormats | str = "medium", - locale: babel.Locale | GeoLocation | None = None, + locale: DateTimeLocaleTypes | babel.Locale | GeoLocation | None = None, ) -> str: """Localized representation of date & time.""" - if isinstance(locale, GeoLocation): + if isinstance(locale, str) and locale == "UI": + locale = flask_babel.get_locale() + elif isinstance(locale, GeoLocation): locale = locale.locale() elif locale is None: locale = babel.Locale.parse(_get_sxng_locale_tag(), sep='-') return babel.dates.format_datetime(self.datetime, format=fmt, locale=locale) + def l10n_date( + self, + fmt: DateTimeFormats | str = "medium", + locale: DateTimeLocaleTypes | babel.Locale | GeoLocation | None = None, + ) -> str: + """Localized representation of date.""" + + if isinstance(locale, str) and locale == "UI": + locale = flask_babel.get_locale() + elif isinstance(locale, GeoLocation): + locale = locale.locale() + elif locale is None: + locale = babel.Locale.parse(_get_sxng_locale_tag(), sep='-') + return babel.dates.format_date(self.datetime, format=fmt, locale=locale) + @typing.final class Temperature: |