summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2025-09-15 09:02:54 +0200
committerMarkus Heiser <markus.heiser@darmarIT.de>2025-09-20 10:56:46 +0200
commit1520a8d545e436e43c0ea0aff916cc4cc9ba5494 (patch)
treeae907e160a011f9364c8cf1361fd5736b770205d /searx/engines
parentf8f7adce6b7fcf5cb03a6d6bfe78205c3549d1ea (diff)
[mod] ADS engine: revision of the engine (Paper result)
Revision of the Astrophysics Data System (ADS) engine / use of the result type Paper as well as other typifications. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/astrophysics_data_system.py214
1 files changed, 142 insertions, 72 deletions
diff --git a/searx/engines/astrophysics_data_system.py b/searx/engines/astrophysics_data_system.py
index a1d942b50..59efa226a 100644
--- a/searx/engines/astrophysics_data_system.py
+++ b/searx/engines/astrophysics_data_system.py
@@ -1,93 +1,163 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
-""".. sidebar:: info
+"""The Astrophysics Data System (ADS_) is a digital library portal for
+researchers in astronomy and physics, operated by the Smithsonian Astrophysical
+Observatory (SAO) under a NASA grant. The ADS_ is a solr instance, but not with
+the standard API paths.
-The Astrophysics Data System (ADS) is a digital library portal for researchers in astronomy and physics,
-operated by the Smithsonian Astrophysical Observatory (SAO) under a NASA grant.
-The engine is adapted from the solr engine.
+.. note::
-"""
-
-# pylint: disable=global-statement
+ The ADS_ engine requires an :py:obj:`API key <api_key>`.
-from datetime import datetime
-from json import loads
-from urllib.parse import urlencode
-from searx.exceptions import SearxEngineAPIException
+This engine uses the `search/query`_ API endpoint. Since the user's search term
+is passed through, the `search syntax`_ of ADS can be used (at least to some
+extent).
-about = {
- "website": 'https://ui.adsabs.harvard.edu/',
- "wikidata_id": 'Q752099',
- "official_api_documentation": 'https://ui.adsabs.harvard.edu/help/api/api-docs.html',
- "use_official_api": True,
- "require_api_key": True,
- "results": 'JSON',
-}
-
-base_url = 'https://api.adsabs.harvard.edu/v1/search'
-result_base_url = 'https://ui.adsabs.harvard.edu/abs/'
-rows = 10
-sort = '' # sorting: asc or desc
-field_list = ['bibcode', 'author', 'title', 'abstract', 'doi', 'date'] # list of field names to display on the UI
-default_fields = '' # default field to query
-query_fields = '' # query fields
-paging = True
-api_key = 'unset'
+.. _ADS: https://ui.adsabs.harvard.edu
+.. _search/query: https://ui.adsabs.harvard.edu/help/api/api-docs.html#get-/search/query
+.. _search syntax: https://ui.adsabs.harvard.edu/help/search/search-syntax
-def init(_):
- if api_key == 'unset':
- raise SearxEngineAPIException('missing ADS API key')
+Configuration
+=============
+The engine has the following additional settings:
-def request(query, params):
- query_params = {'q': query, 'rows': rows}
- if field_list:
- query_params['fl'] = ','.join(field_list)
- if query_fields:
- query_params['qf'] = ','.join(query_fields)
- if default_fields:
- query_params['df'] = default_fields
- if sort:
- query_params['sort'] = sort
+- :py:obj:`api_key`
+- :py:obj:`ads_sort`
- query_params['start'] = rows * (params['pageno'] - 1)
+.. code:: yaml
- params['headers']['Authorization'] = f'Bearer {api_key}'
- params['url'] = f"{base_url}/query?{urlencode(query_params)}"
+ - name: astrophysics data system
+ api_key: "..."
+ inactive: false
- return params
+Implementations
+===============
+"""
-def response(resp):
- try:
- resp_json = loads(resp.text)
- except Exception as e:
- raise SearxEngineAPIException("failed to parse response") from e
+import typing as t
- if 'error' in resp_json:
- raise SearxEngineAPIException(resp_json['error']['msg'])
+from datetime import datetime
+from urllib.parse import urlencode
- resp_json = resp_json["response"]
- result_len = resp_json["numFound"]
- results = []
+from searx.utils import html_to_text
+from searx.exceptions import SearxEngineAPIException
+from searx.result_types import EngineResults
- for res in resp_json["docs"]:
- author = res.get("author")
+if t.TYPE_CHECKING:
+ from searx.extended_types import SXNG_Response
+ from searx.search.processors import OnlineParams
- if author:
- author = author[0] + ' et al.'
+about = {
+ "website": "https://ui.adsabs.harvard.edu/",
+ "wikidata_id": "Q752099",
+ "official_api_documentation": "https://ui.adsabs.harvard.edu/help/api/api-docs.html",
+ "use_official_api": True,
+ "require_api_key": True,
+ "results": "JSON",
+}
- results.append(
- {
- 'url': result_base_url + res.get("bibcode") + "/",
- 'title': res.get("title")[0],
- 'author': author,
- 'content': res.get("abstract"),
- 'doi': res.get("doi"),
- 'publishedDate': datetime.fromisoformat(res.get("date")),
- }
+categories = ["science", "scientific publications"]
+paging = True
+base_url = "https://api.adsabs.harvard.edu/v1/search/query"
+
+api_key = "unset"
+"""Get an API token as described in https://ui.adsabs.harvard.edu/help/api"""
+
+ads_field_list = [
+ "abstract",
+ "author",
+ "bibcode",
+ "comment",
+ "date",
+ "doi",
+ "isbn",
+ "issn",
+ "keyword",
+ "page",
+ "page_count",
+ "page_range",
+ "pub",
+ "pubdate",
+ "pubnote",
+ "read_count",
+ "title",
+ "volume",
+ "year",
+]
+"""Set of fields to return in the response from ADS."""
+
+ads_rows = 10
+"""How many records to return for the ADS request."""
+
+ads_sort = "read_count desc"
+"""The format is 'field' + 'direction' where direction is one of 'asc' or 'desc'
+and field is any of the valid indexes."""
+
+
+def setup(engine_settings: dict[str, t.Any]) -> bool:
+ """Initialization of the ADS_ engine, checks whether the :py:obj:`api_key`
+ is set, otherwise the engine is inactive.
+ """
+ key: str = engine_settings.get("api_key", "")
+ if key and key not in ("unset", "unknown", "..."):
+ return True
+ logger.error("Astrophysics Data System (ADS) API key is not set or invalid.")
+ return False
+
+
+def request(query: str, params: "OnlineParams") -> None:
+
+ args: dict[str, str | int] = {
+ "q": query,
+ "fl": ",".join(ads_field_list),
+ "rows": ads_rows,
+ "start": ads_rows * (params["pageno"] - 1),
+ }
+ if ads_sort:
+ args["sort"] = ads_sort
+
+ params["headers"]["Authorization"] = f"Bearer {api_key}"
+ params["url"] = f"{base_url}?{urlencode(args)}"
+
+
+def response(resp: "SXNG_Response") -> EngineResults:
+
+ res = EngineResults()
+ json_data: dict[str, dict[str, t.Any]] = resp.json()
+
+ if "error" in json_data:
+ raise SearxEngineAPIException(json_data["error"]["msg"])
+
+ def _str(k: str) -> str:
+ return str(doc.get(k, ""))
+
+ def _list(k: str) -> list[str]:
+ return doc.get(k, [])
+
+ for doc in json_data["response"]["docs"]:
+ authors: list[str] = doc["author"]
+ if len(authors) > 15:
+ # There are articles with hundreds of authors
+ authors = authors[:15] + ["et al."]
+
+ paper = res.types.Paper(
+ url=f"https://ui.adsabs.harvard.edu/abs/{doc.get('bibcode')}/",
+ title=html_to_text(_list("title")[0]),
+ authors=authors,
+ content=html_to_text(_str("abstract")),
+ doi=_list("doi")[0],
+ issn=_list("issn"),
+ isbn=_list("isbn"),
+ tags=_list("keyword"),
+ pages=",".join(_list("page")),
+ publisher=_str("pub") + " " + _str("year"),
+ publishedDate=datetime.fromisoformat(_str("date")),
+ volume=_str("volume"),
+ views=_str("read_count"),
+ comments=" / ".join(_list("pubnote")),
)
+ res.add(paper)
- results.append({'number_of_results': result_len})
-
- return results
+ return res