From e36f85b8365e5d6a9263dd78242a10a305a9000c Mon Sep 17 00:00:00 2001
From: Alexandre FLAMENT <alexandre.flament@hesge.ch>
Date: Fri, 26 Aug 2022 16:10:12 +0000
Subject: Science category: update the engines

* use the paper.html template
* fetch more data from the engines
* add crossref.py
---
 searx/engines/crossref.py | 59 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 searx/engines/crossref.py

(limited to 'searx/engines/crossref.py')

diff --git a/searx/engines/crossref.py b/searx/engines/crossref.py
new file mode 100644
index 000000000..d61318146
--- /dev/null
+++ b/searx/engines/crossref.py
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Semantic Scholar (Science)
+"""
+
+from urllib.parse import urlencode
+from searx.utils import html_to_text
+
+about = {
+    "website": 'https://www.crossref.org/',
+    "wikidata_id": 'Q5188229',
+    "official_api_documentation": 'https://github.com/CrossRef/rest-api-doc',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
+categories = ['science', 'scientific publications']
+paging = True
+search_url = 'https://api.crossref.org/works'
+
+
+def request(query, params):
+    params['url'] = search_url + '?' + urlencode(dict(query=query, offset=20 * (params['pageno'] - 1)))
+    return params
+
+
+def response(resp):
+    res = resp.json()
+    results = []
+    for record in res['message']['items']:
+        record_type = record['type']
+        if record_type == 'book-chapter':
+            title = record['container-title'][0]
+            if record['title'][0].lower().strip() != title.lower().strip():
+                title = title + ' (' + record['title'][0] + ')'
+            journal = None
+        else:
+            title = record['title'][0]
+            journal = record.get('container-title', [None])[0]
+        url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL']
+        authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])]
+        isbn = record.get('isbn') or [i['value'] for i in record.get('isbn-type', [])]
+        results.append(
+            {
+                'template': 'paper.html',
+                'url': url,
+                'title': title,
+                'journal': journal,
+                'volume': record.get('volume'),
+                'type': record['type'],
+                'content': html_to_text(record.get('abstract', '')),
+                'publisher': record.get('publisher'),
+                'authors': authors,
+                'doi': record['DOI'],
+                'isbn': isbn,
+            }
+        )
+    return results
-- 
cgit v1.2.3


From d6446be38f3f858c09887a89c8fc490a3c300b95 Mon Sep 17 00:00:00 2001
From: Alexandre Flament <alex@al-f.net>
Date: Fri, 23 Sep 2022 19:58:14 +0200
Subject: [mod] science category: various update of about PR 1705

---
 searx/engines/crossref.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'searx/engines/crossref.py')

diff --git a/searx/engines/crossref.py b/searx/engines/crossref.py
index d61318146..fbe2f0c2a 100644
--- a/searx/engines/crossref.py
+++ b/searx/engines/crossref.py
@@ -33,10 +33,10 @@ def response(resp):
         if record_type == 'book-chapter':
             title = record['container-title'][0]
             if record['title'][0].lower().strip() != title.lower().strip():
-                title = title + ' (' + record['title'][0] + ')'
+                title = html_to_text(title) + ' (' + html_to_text(record['title'][0]) + ')'
             journal = None
         else:
-            title = record['title'][0]
+            title = html_to_text(record['title'][0])
             journal = record.get('container-title', [None])[0]
         url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL']
         authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])]
-- 
cgit v1.2.3