summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2025-09-10 16:47:31 +0200
committerMarkus Heiser <markus.heiser@darmarIT.de>2025-09-20 10:56:46 +0200
commit4c42704c803a8c7962ea779a47b39308dbec3eb8 (patch)
tree36b55cff35027174f6c289f1224d7e1852c8c84f /searx
parent4b4bf0ecafdfdfa5f9d2dd532dcc42496f34a864 (diff)
[mod] Springer Nature engine: revision of the engine (Paper result)
Revision of the engine / use of the result type Paper as well as other typifications. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/springer.py214
-rw-r--r--searx/settings.yml14
2 files changed, 166 insertions, 62 deletions
diff --git a/searx/engines/springer.py b/searx/engines/springer.py
index ba59fa926..098a438d7 100644
--- a/searx/engines/springer.py
+++ b/searx/engines/springer.py
@@ -1,71 +1,175 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
-"""Springer Nature (science)
+"""`Springer Nature`_ is a global publisher dedicated to providing service to
+research community with official Springer-API_ (API-Playground_).
+
+.. note::
+
+ The Springer engine requires an API key, which can be obtained via the
+ `Springer subscription`_.
+
+Since the search term is passed 1:1 to the API, SearXNG users can use the
+`Supported Query Parameters`_.
+
+- ``!springer (doi:10.1007/s10948-025-07019-1 OR doi:10.1007/s10948-025-07035-1)``
+- ``!springer keyword:ybco``
+
+However, please note that the available options depend on the subscription type.
+
+For example, the ``year:`` filter requires a *Premium Plan* subscription.
+
+- ``!springer keyword:ybco year:2024``
+
+The engine uses the REST Meta-API_ `v2` endpoint, but there is also a `Python
+API Wrapper`_.
+
+.. _Python API Wrapper: https://pypi.org/project/springernature-api-client/
+.. _Springer Nature: https://www.springernature.com/
+.. _Springer subscription: https://dev.springernature.com/subscription/
+.. _Springer-API: https://dev.springernature.com/docs/introduction/
+.. _API-Playground: https://dev.springernature.com/docs/live-documentation/
+.. _Meta-API: https://dev.springernature.com/docs/api-endpoints/meta-api/
+.. _Supported Query Parameters: https://dev.springernature.com/docs/supported-query-params/
+
+
+Configuration
+=============
+
+The engine has the following additional settings:
+
+- :py:obj:`api_key`
+
+.. code:: yaml
+
+ - name: springer nature
+ api_key: "..."
+ inactive: false
+
+
+Implementations
+===============
"""
+import typing as t
+
from datetime import datetime
-from json import loads
from urllib.parse import urlencode
-from searx.exceptions import SearxEngineAPIException
+from searx.network import raise_for_httperror
+from searx.result_types import EngineResults
+
+if t.TYPE_CHECKING:
+ from searx.extended_types import SXNG_Response
+ from searx.search.processors import OnlineParams
about = {
- "website": 'https://www.springernature.com/',
- "wikidata_id": 'Q21096327',
- "official_api_documentation": 'https://dev.springernature.com/',
+ "website": "https://www.springernature.com/",
+ "wikidata_id": "Q21096327",
+ "official_api_documentation": "https://dev.springernature.com/docs/live-documentation/",
"use_official_api": True,
"require_api_key": True,
- "results": 'JSON',
+ "results": "JSON",
}
-categories = ['science', 'scientific publications']
+categories = ["science", "scientific publications"]
+
paging = True
nb_per_page = 10
-api_key = 'unset'
-
-base_url = 'https://api.springernature.com/metadata/json?'
-
-
-def request(query, params):
- if api_key == 'unset':
- raise SearxEngineAPIException('missing Springer-Nature API key')
- args = urlencode({'q': query, 's': nb_per_page * (params['pageno'] - 1), 'p': nb_per_page, 'api_key': api_key})
- params['url'] = base_url + args
- logger.debug("query_url --> %s", params['url'])
- return params
-
-
-def response(resp):
- results = []
- json_data = loads(resp.text)
-
- for record in json_data['records']:
- published = datetime.strptime(record['publicationDate'], '%Y-%m-%d')
- authors = [" ".join(author['creator'].split(', ')[::-1]) for author in record['creators']]
- tags = record.get('genre')
- if isinstance(tags, str):
- tags = [tags]
- results.append(
- {
- 'template': 'paper.html',
- 'url': record['url'][0]['value'].replace('http://', 'https://', 1),
- 'title': record['title'],
- 'content': record['abstract'],
- 'comments': record['publicationName'],
- 'tags': tags,
- 'publishedDate': published,
- 'type': record.get('contentType'),
- 'authors': authors,
- # 'editor': '',
- 'publisher': record.get('publisher'),
- 'journal': record.get('publicationName'),
- 'volume': record.get('volume') or None,
- 'pages': '-'.join([x for x in [record.get('startingPage'), record.get('endingPage')] if x]),
- 'number': record.get('number') or None,
- 'doi': record.get('doi'),
- 'issn': [x for x in [record.get('issn')] if x],
- 'isbn': [x for x in [record.get('isbn')] if x],
- # 'pdf_url' : ''
- }
+"""Number of results to return in the request, see `Pagination and Limits`_ for
+more details.
+
+.. _Pagination and Limits:
+ https://dev.springernature.com/docs/advanced-querying/pagination-limits/
+"""
+
+api_key = ""
+"""Key used for the Meta-API_. Get your API key from: `Springer subscription`_"""
+
+base_url = "https://api.springernature.com/meta/v2/json"
+"""An enhanced endpoint with additional metadata fields and optimized queries
+for more efficient and comprehensive retrieval (Meta-API_ `v2`).
+"""
+
+
+def setup(engine_settings: dict[str, t.Any]) -> bool:
+ """Initialization of the Springer engine, checks whether the
+ :py:obj:`api_key` is set, otherwise the engine is inactive.
+ """
+ key: str = engine_settings.get("api_key", "")
+ try:
+ # Springer's API key is a hex value
+ int(key, 16)
+ return True
+ except ValueError:
+ logger.error("Springer's API key is not set or invalid.")
+ return False
+
+
+def request(query: str, params: "OnlineParams") -> None:
+ args = {
+ "api_key": api_key,
+ "q": query,
+ "s": nb_per_page * (params["pageno"] - 1),
+ "p": nb_per_page,
+ }
+ params["url"] = f"{base_url}?{urlencode(args)}"
+ # For example, the ``year:`` filter requires a *Premium Plan* subscription.
+ params["raise_for_httperror"] = False
+
+
+def response(resp: "SXNG_Response") -> EngineResults:
+
+ res = EngineResults()
+ json_data = resp.json()
+
+ if (
+ resp.status_code == 403
+ and json_data["status"].lower() == "fail"
+ and "premium feature" in json_data["message"].lower()
+ ):
+ return res
+ raise_for_httperror(resp)
+
+ def field(k: str) -> str:
+ return str(record.get(k, ""))
+
+ for record in json_data["records"]:
+ published = datetime.strptime(record["publicationDate"], "%Y-%m-%d")
+ authors: list[str] = [" ".join(author["creator"].split(", ")[::-1]) for author in record["creators"]]
+
+ pdf_url = ""
+ html_url = ""
+ url_list: list[dict[str, str]] = record["url"]
+
+ for item in url_list:
+ if item["platform"] != "web":
+ continue
+ val = item["value"].replace("http://", "https://", 1)
+ if item["format"] == "html":
+ html_url = val
+ elif item["format"] == "pdf":
+ pdf_url = val
+
+ paper = res.types.Paper(
+ url=html_url,
+ # html_url=html_url,
+ pdf_url=pdf_url,
+ title=field("title"),
+ content=field("abstract"),
+ comments=field("publicationName"),
+ tags=record.get("keyword", []),
+ publishedDate=published,
+ type=field("contentType"),
+ authors=authors,
+ publisher=field("publisher"),
+ journal=field("publicationName"),
+ volume=field("volume"),
+ pages="-".join([x for x in [field("startingPage"), field("endingPage")] if x]),
+ number=field("number"),
+ doi=field("doi"),
+ issn=[x for x in [field("issn")] if x],
+ isbn=[x for x in [field("isbn")] if x],
)
- return results
+ res.add(paper)
+
+ return res
diff --git a/searx/settings.yml b/searx/settings.yml
index d72e84ff7..200f96de9 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -1984,13 +1984,13 @@ engines:
# query_fields: '' # query fields
# enable_http: true
- # - name: springer nature
- # engine: springer
- # # get your API key from: https://dev.springernature.com/signup
- # # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601"
- # api_key: 'unset'
- # shortcut: springer
- # timeout: 15.0
+ - name: springer nature
+ engine: springer
+ shortcut: springer
+ timeout: 5
+ # read https://docs.searxng.org/dev/engines/online/springer.html
+ api_key: ""
+ inactive: true
- name: startpage
engine: startpage