summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--searx/engines/core.py148
-rw-r--r--searx/settings.yml12
2 files changed, 85 insertions, 75 deletions
diff --git a/searx/engines/core.py b/searx/engines/core.py
index 489b6252b..0da931792 100644
--- a/searx/engines/core.py
+++ b/searx/engines/core.py
@@ -5,6 +5,10 @@ research from repositories and journals.
.. _CORE: https://core.ac.uk/about
+.. note::
+
+ The CORE engine requires an :py:obj:`API key <api_key>`.
+
.. _core engine config:
Configuration
@@ -17,135 +21,141 @@ The engine has the following additional settings:
.. code:: yaml
- name: core.ac.uk
- engine: core
- categories: science
- shortcut: cor
api_key: "..."
- timeout: 5
+ inactive: false
Implementations
===============
"""
-# pylint: disable=too-many-branches
+
+import typing as t
from datetime import datetime
from urllib.parse import urlencode
-from searx.exceptions import SearxEngineAPIException
+from searx.result_types import EngineResults
+
+if t.TYPE_CHECKING:
+ from searx.extended_types import SXNG_Response
+ from searx.search.processors import OnlineParams
+
about = {
- "website": 'https://core.ac.uk',
- "wikidata_id": 'Q22661180',
- "official_api_documentation": 'https://api.core.ac.uk/docs/v3',
+ "website": "https://core.ac.uk",
+ "wikidata_id": "Q22661180",
+ "official_api_documentation": "https://api.core.ac.uk/docs/v3",
"use_official_api": True,
"require_api_key": True,
- "results": 'JSON',
+ "results": "JSON",
}
-api_key = 'unset'
+api_key = ""
"""For an API key register at https://core.ac.uk/services/api and insert
the API key in the engine :ref:`core engine config`."""
-categories = ['science', 'scientific publications']
+categories = ["science", "scientific publications"]
paging = True
nb_per_page = 10
-base_url = 'https://api.core.ac.uk/v3/search/works/'
+base_url = "https://api.core.ac.uk/v3/search/works/"
-def request(query, params):
- if api_key == 'unset':
- raise SearxEngineAPIException('missing CORE API key')
+def setup(engine_settings: dict[str, t.Any]) -> bool:
+ """Initialization of the CORE_ engine, checks whether the :py:obj:`api_key`
+ is set, otherwise the engine is inactive.
+ """
+
+ key: str = engine_settings.get("api_key", "")
+ if key and key not in ("unset", "unknown", "..."):
+ return True
+ logger.error("CORE's API key is not set or invalid.")
+ return False
+
+
+def request(query: str, params: "OnlineParams") -> None:
# API v3 uses different parameters
search_params = {
- 'q': query,
- 'offset': (params['pageno'] - 1) * nb_per_page,
- 'limit': nb_per_page,
- 'sort': 'relevance',
+ "q": query,
+ "offset": (params["pageno"] - 1) * nb_per_page,
+ "limit": nb_per_page,
+ "sort": "relevance",
}
- params['url'] = base_url + '?' + urlencode(search_params)
- params['headers'] = {'Authorization': f'Bearer {api_key}'}
-
- return params
+ params["url"] = base_url + "?" + urlencode(search_params)
+ params["headers"] = {"Authorization": f"Bearer {api_key}"}
-def response(resp):
- results = []
+def response(resp: "SXNG_Response") -> EngineResults:
+ # pylint: disable=too-many-branches
+ res = EngineResults()
json_data = resp.json()
- for result in json_data.get('results', []):
+ for result in json_data.get("results", []):
# Get title
- if not result.get('title'):
+ if not result.get("title"):
continue
# Get URL - try different options
- url = None
+ url: str | None = None
# Try DOI first
- doi = result.get('doi')
+ doi: str = result.get("doi")
if doi:
- url = f'https://doi.org/{doi}'
+ url = f"https://doi.org/{doi}"
- if url is None and result.get('doi'):
+ if url is None and result.get("doi"):
# use the DOI reference
- url = 'https://doi.org/' + str(result['doi'])
- elif result.get('id'):
- url = 'https://core.ac.uk/works/' + str(result['id'])
- elif result.get('downloadUrl'):
- url = result['downloadUrl']
- elif result.get('sourceFulltextUrls'):
- url = result['sourceFulltextUrls']
+ url = "https://doi.org/" + str(result["doi"])
+ elif result.get("id"):
+ url = "https://core.ac.uk/works/" + str(result["id"])
+ elif result.get("downloadUrl"):
+ url = result["downloadUrl"]
+ elif result.get("sourceFulltextUrls"):
+ url = result["sourceFulltextUrls"]
else:
continue
# Published date
published_date = None
- raw_date = result.get('publishedDate') or result.get('depositedDate')
+ raw_date = result.get("publishedDate") or result.get("depositedDate")
if raw_date:
try:
- published_date = datetime.fromisoformat(result['publishedDate'].replace('Z', '+00:00'))
+ published_date = datetime.fromisoformat(result["publishedDate"].replace("Z", "+00:00"))
except (ValueError, AttributeError):
pass
# Handle journals
journals = []
- if result.get('journals'):
- journals = [j.get('title') for j in result['journals'] if j.get('title')]
+ if result.get("journals"):
+ journals = [j.get("title") for j in result["journals"] if j.get("title")]
# Handle publisher
- publisher = result.get('publisher', '').strip("'")
- if publisher:
- publisher = publisher.strip("'")
+ publisher = result.get("publisher", "").strip("'")
# Handle authors
- authors = set()
- for i in result.get('authors', []):
- name = i.get("name")
+ authors: set[str] = set()
+ for i in result.get("authors", []):
+ name: str | None = i.get("name")
if name:
authors.add(name)
- results.append(
- {
- 'template': 'paper.html',
- 'title': result.get('title'),
- 'url': url,
- 'content': result.get('fullText', '') or '',
- # 'comments': '',
- 'tags': result.get('fieldOfStudy', []),
- 'publishedDate': published_date,
- 'type': result.get('documentType', '') or '',
- 'authors': authors,
- 'editor': ', '.join(result.get('contributors', [])),
- 'publisher': publisher,
- 'journal': ', '.join(journals),
- 'doi': result.get('doi'),
- # 'issn' : ''
- # 'isbn' : ''
- 'pdf_url': result.get('downloadUrl', {}) or result.get("sourceFulltextUrls", {}),
- }
+ res.add(
+ res.types.Paper(
+ title=result.get("title"),
+ url=url,
+ content=result.get("fullText", "") or "",
+ tags=result.get("fieldOfStudy", []),
+ publishedDate=published_date,
+ type=result.get("documentType", "") or "",
+ authors=authors,
+ editor=", ".join(result.get("contributors", [])),
+ publisher=publisher,
+ journal=", ".join(journals),
+ doi=result.get("doi"),
+ pdf_url=result.get("downloadUrl", {}) or result.get("sourceFulltextUrls", {}),
+ )
)
- return results
+ return res
diff --git a/searx/settings.yml b/searx/settings.yml
index 7d4d98c95..e34f501d2 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -659,12 +659,12 @@ engines:
timeout: 30
disabled: true
- # - name: core.ac.uk
- # engine: core
- # categories: science
- # shortcut: cor
- # # get your API key from: https://core.ac.uk/api-keys/register/
- # api_key: 'unset'
+ - name: core.ac.uk
+ engine: core
+ shortcut: cor
+ # read https://docs.searxng.org/dev/engines/online/core.html
+ api_key: ""
+ inactive: true
- name: cppreference
engine: cppreference