1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
|
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Reuters_ (news) is an international news agency.
.. _Reuters: https://www.reuters.com
Configuration
=============
The engine has the following additional settings:
- :py:obj:`sort_order`
.. code:: yaml
- name: reuters
engine: reuters
shortcut: reu
sort_order: "relevance"
Implementations
===============
"""
from json import dumps
from urllib.parse import quote_plus
from datetime import datetime, timedelta
from searx.result_types import EngineResults
about = {
"website": "https://www.reuters.com",
"wikidata_id": "Q130879",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
categories = ["news"]
time_range_support = True
paging = True
base_url = "https://www.reuters.com"
results_per_page = 20
sort_order = "relevance"
"""Sort order, one of ``relevance``, ``display_date:desc`` or ``display_data:asc``."""
time_range_duration_map = {
"day": 1,
"week": 7,
"month": 30,
"year": 365,
}
def request(query, params):
args = {
"keyword": query,
"offset": (params["pageno"] - 1) * results_per_page,
"orderby": sort_order,
"size": results_per_page,
"website": "reuters",
}
if params["time_range"]:
time_diff_days = time_range_duration_map[params["time_range"]]
start_date = datetime.now() - timedelta(days=time_diff_days)
args["start_date"] = start_date.isoformat()
params["url"] = f"{base_url}/pf/api/v3/content/fetch/articles-by-search-v2?query={quote_plus(dumps(args))}"
return params
def response(resp) -> EngineResults:
res = EngineResults()
for result in resp.json().get("result", {}).get("articles", []):
res.add(
res.types.MainResult(
url=base_url + result["canonical_url"],
title=result["web"],
content=result["description"],
thumbnail=result.get("thumbnail", {}).get("url", ""),
metadata=result.get("kicker", {}).get("name"),
publishedDate=datetime.fromisoformat(result["display_time"]),
)
)
return res
|