From e36f85b8365e5d6a9263dd78242a10a305a9000c Mon Sep 17 00:00:00 2001 From: Alexandre FLAMENT Date: Fri, 26 Aug 2022 16:10:12 +0000 Subject: Science category: update the engines * use the paper.html template * fetch more data from the engines * add crossref.py --- searx/engines/crossref.py | 59 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 searx/engines/crossref.py (limited to 'searx/engines/crossref.py') diff --git a/searx/engines/crossref.py b/searx/engines/crossref.py new file mode 100644 index 000000000..d61318146 --- /dev/null +++ b/searx/engines/crossref.py @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Semantic Scholar (Science) +""" + +from urllib.parse import urlencode +from searx.utils import html_to_text + +about = { + "website": 'https://www.crossref.org/', + "wikidata_id": 'Q5188229', + "official_api_documentation": 'https://github.com/CrossRef/rest-api-doc', + "use_official_api": False, + "require_api_key": False, + "results": 'JSON', +} + +categories = ['science', 'scientific publications'] +paging = True +search_url = 'https://api.crossref.org/works' + + +def request(query, params): + params['url'] = search_url + '?' + urlencode(dict(query=query, offset=20 * (params['pageno'] - 1))) + return params + + +def response(resp): + res = resp.json() + results = [] + for record in res['message']['items']: + record_type = record['type'] + if record_type == 'book-chapter': + title = record['container-title'][0] + if record['title'][0].lower().strip() != title.lower().strip(): + title = title + ' (' + record['title'][0] + ')' + journal = None + else: + title = record['title'][0] + journal = record.get('container-title', [None])[0] + url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL'] + authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])] + isbn = record.get('isbn') or [i['value'] for i in record.get('isbn-type', [])] + results.append( + { + 'template': 'paper.html', + 'url': url, + 'title': title, + 'journal': journal, + 'volume': record.get('volume'), + 'type': record['type'], + 'content': html_to_text(record.get('abstract', '')), + 'publisher': record.get('publisher'), + 'authors': authors, + 'doi': record['DOI'], + 'isbn': isbn, + } + ) + return results -- cgit v1.2.3 From d6446be38f3f858c09887a89c8fc490a3c300b95 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Fri, 23 Sep 2022 19:58:14 +0200 Subject: [mod] science category: various update of about PR 1705 --- searx/engines/crossref.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'searx/engines/crossref.py') diff --git a/searx/engines/crossref.py b/searx/engines/crossref.py index d61318146..fbe2f0c2a 100644 --- a/searx/engines/crossref.py +++ b/searx/engines/crossref.py @@ -33,10 +33,10 @@ def response(resp): if record_type == 'book-chapter': title = record['container-title'][0] if record['title'][0].lower().strip() != title.lower().strip(): - title = title + ' (' + record['title'][0] + ')' + title = html_to_text(title) + ' (' + html_to_text(record['title'][0]) + ')' journal = None else: - title = record['title'][0] + title = html_to_text(record['title'][0]) journal = record.get('container-title', [None])[0] url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL'] authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])] -- cgit v1.2.3