summaryrefslogtreecommitdiff
path: root/searx/engines/astrophysics_data_system.py
blob: 59efa226ada44d2bddb68659aa83f66327a5fe6b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# SPDX-License-Identifier: AGPL-3.0-or-later
"""The Astrophysics Data System (ADS_) is a digital library portal for
researchers in astronomy and physics, operated by the Smithsonian Astrophysical
Observatory (SAO) under a NASA grant.  The ADS_ is a solr instance, but not with
the standard API paths.

.. note::

   The ADS_ engine requires an :py:obj:`API key <api_key>`.

This engine uses the `search/query`_ API endpoint.  Since the user's search term
is passed through, the `search syntax`_ of ADS can be used (at least to some
extent).

.. _ADS: https://ui.adsabs.harvard.edu
.. _search/query: https://ui.adsabs.harvard.edu/help/api/api-docs.html#get-/search/query
.. _search syntax: https://ui.adsabs.harvard.edu/help/search/search-syntax


Configuration
=============

The engine has the following additional settings:

- :py:obj:`api_key`
- :py:obj:`ads_sort`

.. code:: yaml

  - name: astrophysics data system
    api_key: "..."
    inactive: false


Implementations
===============
"""

import typing as t

from datetime import datetime
from urllib.parse import urlencode

from searx.utils import html_to_text
from searx.exceptions import SearxEngineAPIException
from searx.result_types import EngineResults

if t.TYPE_CHECKING:
    from searx.extended_types import SXNG_Response
    from searx.search.processors import OnlineParams

about = {
    "website": "https://ui.adsabs.harvard.edu/",
    "wikidata_id": "Q752099",
    "official_api_documentation": "https://ui.adsabs.harvard.edu/help/api/api-docs.html",
    "use_official_api": True,
    "require_api_key": True,
    "results": "JSON",
}

categories = ["science", "scientific publications"]
paging = True
base_url = "https://api.adsabs.harvard.edu/v1/search/query"

api_key = "unset"
"""Get an API token as described in https://ui.adsabs.harvard.edu/help/api"""

ads_field_list = [
    "abstract",
    "author",
    "bibcode",
    "comment",
    "date",
    "doi",
    "isbn",
    "issn",
    "keyword",
    "page",
    "page_count",
    "page_range",
    "pub",
    "pubdate",
    "pubnote",
    "read_count",
    "title",
    "volume",
    "year",
]
"""Set of fields to return in the response from ADS."""

ads_rows = 10
"""How many records to return for the ADS request."""

ads_sort = "read_count desc"
"""The format is 'field' + 'direction' where direction is one of 'asc' or 'desc'
and field is any of the valid indexes."""


def setup(engine_settings: dict[str, t.Any]) -> bool:
    """Initialization of the ADS_ engine, checks whether the :py:obj:`api_key`
    is set, otherwise the engine is inactive.
    """
    key: str = engine_settings.get("api_key", "")
    if key and key not in ("unset", "unknown", "..."):
        return True
    logger.error("Astrophysics Data System (ADS) API key is not set or invalid.")
    return False


def request(query: str, params: "OnlineParams") -> None:

    args: dict[str, str | int] = {
        "q": query,
        "fl": ",".join(ads_field_list),
        "rows": ads_rows,
        "start": ads_rows * (params["pageno"] - 1),
    }
    if ads_sort:
        args["sort"] = ads_sort

    params["headers"]["Authorization"] = f"Bearer {api_key}"
    params["url"] = f"{base_url}?{urlencode(args)}"


def response(resp: "SXNG_Response") -> EngineResults:

    res = EngineResults()
    json_data: dict[str, dict[str, t.Any]] = resp.json()

    if "error" in json_data:
        raise SearxEngineAPIException(json_data["error"]["msg"])

    def _str(k: str) -> str:
        return str(doc.get(k, ""))

    def _list(k: str) -> list[str]:
        return doc.get(k, [])

    for doc in json_data["response"]["docs"]:
        authors: list[str] = doc["author"]
        if len(authors) > 15:
            # There are articles with hundreds of authors
            authors = authors[:15] + ["et al."]

        paper = res.types.Paper(
            url=f"https://ui.adsabs.harvard.edu/abs/{doc.get('bibcode')}/",
            title=html_to_text(_list("title")[0]),
            authors=authors,
            content=html_to_text(_str("abstract")),
            doi=_list("doi")[0],
            issn=_list("issn"),
            isbn=_list("isbn"),
            tags=_list("keyword"),
            pages=",".join(_list("page")),
            publisher=_str("pub") + " " + _str("year"),
            publishedDate=datetime.fromisoformat(_str("date")),
            volume=_str("volume"),
            views=_str("read_count"),
            comments=" / ".join(_list("pubnote")),
        )
        res.add(paper)

    return res