searx/engines/reuters.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137

# SPDX-License-Identifier: AGPL-3.0-or-later
"""Reuters_ (news) is an international news agency.

.. _Reuters: https://www.reuters.com

Configuration
=============

The engine has the following additional settings:

- :py:obj:`sort_order`

.. code:: yaml

   - name: reuters
     engine: reuters
     shortcut: reu
     sort_order: "relevance"

Implementations
===============

"""

from json import dumps
from urllib.parse import quote_plus
from datetime import datetime, timedelta
from dateutil import parser

from searx.result_types import EngineResults

about = {
    "website": "https://www.reuters.com",
    "wikidata_id": "Q130879",
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": "JSON",
}

categories = ["news"]
time_range_support = True
paging = True

base_url = "https://www.reuters.com"

results_per_page = 20
sort_order = "relevance"
"""Sort order, one of ``relevance``, ``display_date:desc`` or ``display_data:asc``."""

time_range_duration_map = {
    "day": 1,
    "week": 7,
    "month": 30,
    "year": 365,
}


def request(query, params):
    args = {
        "keyword": query,
        "offset": (params["pageno"] - 1) * results_per_page,
        "orderby": sort_order,
        "size": results_per_page,
        "website": "reuters",
    }
    if params["time_range"]:
        time_diff_days = time_range_duration_map[params["time_range"]]
        start_date = datetime.now() - timedelta(days=time_diff_days)
        args["start_date"] = start_date.isoformat()

    params["url"] = f"{base_url}/pf/api/v3/content/fetch/articles-by-search-v2?query={quote_plus(dumps(args))}"
    return params


def response(resp) -> EngineResults:
    res = EngineResults()

    resp_json = resp.json()
    if not resp_json.get("result"):
        return res

    for result in resp_json["result"].get("articles", []):
        res.add(
            res.types.MainResult(
                url=base_url + result["canonical_url"],
                title=result["web"],
                content=result["description"],
                thumbnail=resize_url(result.get("thumbnail", {}), height=80),
                metadata=result.get("kicker", {}).get("name"),
                publishedDate=parser.isoparse(result["display_time"]),
            )
        )
    return res


def resize_url(thumbnail: dict[str, str], width: int = 0, height: int = 0) -> str:
    """Generates a URL for Reuter's thumbnail with the dimensions *width* and
    *height*.  If no URL can be generated from the *thumbnail data*, an empty
    string will be returned.

    width: default is *unset* (``0``)
      Image width in pixels (negative values are ignored). If only width is
      specified, the height matches the original aspect ratio.

    height: default is *unset* (``0``)
      Image height in pixels (negative values are ignored). If only height is
      specified, the width matches the original aspect ratio.

    The file size of a full-size image is usually several MB; when reduced to a
    height of, for example, 80 points, only a few KB remain!

    Fields of the *thumbnail data* (``result.articles.[<int>].thumbnail``):

    thumbnail.url:
      Is a full-size image (>MB).

    thumbnail.width & .height:
      Dimensions of the full-size image.

    thumbnail.resizer_url:
      Reuters has a *resizer* `REST-API for the images`_, this is the URL of the
      service. This URL includes the ``&auth`` argument, other arguments are
      ``&width=<int>`` and ``&height=<int>``.

    .. _REST-API for the images:
        https://dev.arcxp.com/photo-center/image-resizer/resizer-v2-how-to-transform-images/#query-parameters
    """

    url = thumbnail.get("resizer_url")
    if not url:
        return ""
    if int(width) > 0:
        url += f"&width={int(width)}"
    if int(height) > 0:
        url += f"&height={int(height)}"
    return url