1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Reuters_ (news) is an international news agency.
.. _Reuters: https://www.reuters.com
Configuration
=============
The engine has the following additional settings:
- :py:obj:`sort_order`
.. code:: yaml
- name: reuters
engine: reuters
shortcut: reu
sort_order: "relevance"
Implementations
===============
"""
from json import dumps
from urllib.parse import quote_plus
from datetime import datetime, timedelta
from dateutil import parser
from searx.result_types import EngineResults
about = {
"website": "https://www.reuters.com",
"wikidata_id": "Q130879",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
categories = ["news"]
time_range_support = True
paging = True
base_url = "https://www.reuters.com"
results_per_page = 20
sort_order = "relevance"
"""Sort order, one of ``relevance``, ``display_date:desc`` or ``display_data:asc``."""
time_range_duration_map = {
"day": 1,
"week": 7,
"month": 30,
"year": 365,
}
def request(query, params):
args = {
"keyword": query,
"offset": (params["pageno"] - 1) * results_per_page,
"orderby": sort_order,
"size": results_per_page,
"website": "reuters",
}
if params["time_range"]:
time_diff_days = time_range_duration_map[params["time_range"]]
start_date = datetime.now() - timedelta(days=time_diff_days)
args["start_date"] = start_date.isoformat()
params["url"] = f"{base_url}/pf/api/v3/content/fetch/articles-by-search-v2?query={quote_plus(dumps(args))}"
return params
def response(resp) -> EngineResults:
res = EngineResults()
resp_json = resp.json()
if not resp_json.get("result"):
return res
for result in resp_json["result"].get("articles", []):
res.add(
res.types.MainResult(
url=base_url + result["canonical_url"],
title=result["web"],
content=result["description"],
thumbnail=resize_url(result.get("thumbnail", {}), height=80),
metadata=result.get("kicker", {}).get("name"),
publishedDate=parser.isoparse(result["display_time"]),
)
)
return res
def resize_url(thumbnail: dict[str, str], width: int = 0, height: int = 0) -> str:
"""Generates a URL for Reuter's thumbnail with the dimensions *width* and
*height*. If no URL can be generated from the *thumbnail data*, an empty
string will be returned.
width: default is *unset* (``0``)
Image width in pixels (negative values are ignored). If only width is
specified, the height matches the original aspect ratio.
height: default is *unset* (``0``)
Image height in pixels (negative values are ignored). If only height is
specified, the width matches the original aspect ratio.
The file size of a full-size image is usually several MB; when reduced to a
height of, for example, 80 points, only a few KB remain!
Fields of the *thumbnail data* (``result.articles.[<int>].thumbnail``):
thumbnail.url:
Is a full-size image (>MB).
thumbnail.width & .height:
Dimensions of the full-size image.
thumbnail.resizer_url:
Reuters has a *resizer* `REST-API for the images`_, this is the URL of the
service. This URL includes the ``&auth`` argument, other arguments are
``&width=<int>`` and ``&height=<int>``.
.. _REST-API for the images:
https://dev.arcxp.com/photo-center/image-resizer/resizer-v2-how-to-transform-images/#query-parameters
"""
url = thumbnail.get("resizer_url")
if not url:
return ""
if int(width) > 0:
url += f"&width={int(width)}"
if int(height) > 0:
url += f"&height={int(height)}"
return url
|