summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/dev/engines/online/reuters.rst8
-rw-r--r--searx/engines/reuters.py90
-rw-r--r--searx/result_types/_base.py34
-rw-r--r--searx/settings.yml6
-rwxr-xr-xsearx/webapp.py8
5 files changed, 137 insertions, 9 deletions
diff --git a/docs/dev/engines/online/reuters.rst b/docs/dev/engines/online/reuters.rst
new file mode 100644
index 000000000..e0f685d17
--- /dev/null
+++ b/docs/dev/engines/online/reuters.rst
@@ -0,0 +1,8 @@
+.. _reuters engine:
+
+=======
+Reuters
+=======
+
+.. automodule:: searx.engines.reuters
+ :members:
diff --git a/searx/engines/reuters.py b/searx/engines/reuters.py
new file mode 100644
index 000000000..113124c48
--- /dev/null
+++ b/searx/engines/reuters.py
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Reuters_ (news) is an international news agency.
+
+.. _Reuters: https://www.reuters.com
+
+Configuration
+=============
+
+The engine has the following additional settings:
+
+- :py:obj:`sort_order`
+
+.. code:: yaml
+
+ - name: reuters
+ engine: reuters
+ shortcut: reu
+ sort_order: "relevance"
+
+
+Implementations
+===============
+
+"""
+
+from json import dumps
+from urllib.parse import quote_plus
+from datetime import datetime, timedelta
+
+from searx.result_types import EngineResults
+
+about = {
+ "website": "https://www.reuters.com",
+ "wikidata_id": "Q130879",
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": "JSON",
+}
+
+categories = ["news"]
+time_range_support = True
+paging = True
+
+base_url = "https://www.reuters.com"
+
+results_per_page = 20
+sort_order = "relevance"
+"""Sort order, one of ``relevance``, ``display_date:desc`` or ``display_data:asc``."""
+
+time_range_duration_map = {
+ "day": 1,
+ "week": 7,
+ "month": 30,
+ "year": 365,
+}
+
+
+def request(query, params):
+ args = {
+ "keyword": query,
+ "offset": (params["pageno"] - 1) * results_per_page,
+ "orderby": sort_order,
+ "size": results_per_page,
+ "website": "reuters",
+ }
+ if params["time_range"]:
+ time_diff_days = time_range_duration_map[params["time_range"]]
+ start_date = datetime.now() - timedelta(days=time_diff_days)
+ args["start_date"] = start_date.isoformat()
+
+ params["url"] = f"{base_url}/pf/api/v3/content/fetch/articles-by-search-v2?query={quote_plus(dumps(args))}"
+ return params
+
+
+def response(resp) -> EngineResults:
+ res = EngineResults()
+
+ for result in resp.json().get("result", {}).get("articles", []):
+ res.add(
+ res.types.MainResult(
+ url=base_url + result["canonical_url"],
+ title=result["web"],
+ content=result["description"],
+ thumbnail=result.get("thumbnail", {}).get("url", ""),
+ metadata=result.get("kicker", {}).get("name"),
+ publishedDate=datetime.strptime(result["display_time"], "%Y-%m-%dT%H:%M:%SZ"),
+ )
+ )
+ return res
diff --git a/searx/result_types/_base.py b/searx/result_types/_base.py
index c4c0b18b2..caf7e2a4f 100644
--- a/searx/result_types/_base.py
+++ b/searx/result_types/_base.py
@@ -25,6 +25,8 @@ import re
import urllib.parse
import warnings
import typing
+import time
+import datetime
from collections.abc import Callable
@@ -212,6 +214,15 @@ def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result |
result.normalize_result_fields()
+def _normalize_date_fields(result: MainResult | LegacyResult):
+
+ if result.publishedDate: # do not try to get a date from an empty string or a None type
+ try: # test if publishedDate >= 1900 (datetime module bug)
+ result.pubdate = result.publishedDate.strftime('%Y-%m-%d %H:%M:%S%z')
+ except ValueError:
+ result.publishedDate = None
+
+
class Result(msgspec.Struct, kw_only=True):
"""Base class of all result types :ref:`result types`."""
@@ -347,6 +358,24 @@ class MainResult(Result): # pylint: disable=missing-class-docstring
thumbnail: str = ""
"""URL of a thumbnail that is displayed in the result item."""
+ publishedDate: datetime.datetime | None = None
+ """The date on which the object was published."""
+
+ pubdate: str = ""
+ """String representation of :py:obj:`MainResult.publishedDate`"""
+
+ length: time.struct_time | None = None
+ """Playing duration in seconds."""
+
+ views: str = ""
+ """View count in humanized number format."""
+
+ author: str = ""
+ """Author of the title."""
+
+ metadata: str = ""
+ """Miscellaneous metadata."""
+
priority: typing.Literal["", "high", "low"] = ""
"""The priority can be set via :ref:`hostnames plugin`, for example."""
@@ -379,8 +408,8 @@ class MainResult(Result): # pylint: disable=missing-class-docstring
def normalize_result_fields(self):
super().normalize_result_fields()
-
_normalize_text_fields(self)
+ _normalize_date_fields(self)
if self.engine:
self.engines.add(self.engine)
@@ -419,6 +448,8 @@ class LegacyResult(dict):
positions: list[int]
score: float
category: str
+ publishedDate: datetime.datetime | None = None
+ pubdate: str = ""
# infobox result
urls: list[dict[str, str]]
@@ -514,6 +545,7 @@ class LegacyResult(dict):
return f"LegacyResult: {super().__repr__()}"
def normalize_result_fields(self):
+ _normalize_date_fields(self)
_normalize_url_fields(self)
_normalize_text_fields(self)
if self.engine:
diff --git a/searx/settings.yml b/searx/settings.yml
index a08e2a625..d8d5f4170 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -1741,6 +1741,12 @@ engines:
page_size: 25
disabled: true
+ - name: reuters
+ engine: reuters
+ shortcut: reu
+ # https://docs.searxng.org/dev/engines/online/reuters.html
+ # sort_order = "relevance"
+
- name: right dao
engine: xpath
paging: true
diff --git a/searx/webapp.py b/searx/webapp.py
index 8d9fd9393..b721c7132 100755
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -694,14 +694,6 @@ def search():
if 'title' in result and result['title']:
result['title'] = highlight_content(escape(result['title'] or ''), search_query.query)
- if getattr(result, 'publishedDate', None): # do not try to get a date from an empty string or a None type
- try: # test if publishedDate >= 1900 (datetime module bug)
- result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
- except ValueError:
- result['publishedDate'] = None
- else:
- result['publishedDate'] = webutils.searxng_l10n_timespan(result['publishedDate'])
-
# set result['open_group'] = True when the template changes from the previous result
# set result['close_group'] = True when the template changes on the next result
if current_template != result.template: