summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/arxiv.py141
-rw-r--r--searx/settings.yml1
2 files changed, 80 insertions, 62 deletions
diff --git a/searx/engines/arxiv.py b/searx/engines/arxiv.py
index 39fcb1a34..c6fbb71a7 100644
--- a/searx/engines/arxiv.py
+++ b/searx/engines/arxiv.py
@@ -1,110 +1,129 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
-"""ArXiV (Scientific preprints)
+"""arXiv is a free distribution service and an open-access archive for nearly
+2.4 million scholarly articles in the fields of physics, mathematics, computer
+science, quantitative biology, quantitative finance, statistics, electrical
+engineering and systems science, and economics.
+The engine uses the `arXiv API`_.
+
+.. _arXiv API: https://info.arxiv.org/help/api/user-manual.html
"""
+import typing as t
+
from datetime import datetime
+from urllib.parse import urlencode
from lxml import etree
from lxml.etree import XPath
from searx.utils import eval_xpath, eval_xpath_list, eval_xpath_getindex
+from searx.result_types import EngineResults
+
+if t.TYPE_CHECKING:
+ from searx.extended_types import SXNG_Response
+ from searx.search.processors import OnlineParams
-# about
about = {
- "website": 'https://arxiv.org',
- "wikidata_id": 'Q118398',
- "official_api_documentation": 'https://arxiv.org/help/api',
+ "website": "https://arxiv.org",
+ "wikidata_id": "Q118398",
+ "official_api_documentation": "https://info.arxiv.org/help/api/user-manual.html",
"use_official_api": True,
"require_api_key": False,
- "results": 'XML-RSS',
+ "results": "XML-RSS",
}
-categories = ['science', 'scientific publications']
+categories = ["science", "scientific publications"]
paging = True
+arxiv_max_results = 10
+arxiv_search_prefix = "all"
+"""Search fields, for more details see, `Details of Query Construction`_.
-base_url = (
- 'https://export.arxiv.org/api/query?search_query=all:' + '{query}&start={offset}&max_results={number_of_results}'
-)
+.. _Details of Query Construction:
+ https://info.arxiv.org/help/api/user-manual.html#51-details-of-query-construction
+"""
-# engine dependent config
-number_of_results = 10
+base_url = "https://export.arxiv.org/api/query"
+"""`arXiv API`_ URL, for more details see Query-Interface_
+
+.. _Query-Interface: https://info.arxiv.org/help/api/user-manual.html#_query_interface
+"""
-# xpaths
arxiv_namespaces = {
"atom": "http://www.w3.org/2005/Atom",
"arxiv": "http://arxiv.org/schemas/atom",
}
-xpath_entry = XPath('//atom:entry', namespaces=arxiv_namespaces)
-xpath_title = XPath('.//atom:title', namespaces=arxiv_namespaces)
-xpath_id = XPath('.//atom:id', namespaces=arxiv_namespaces)
-xpath_summary = XPath('.//atom:summary', namespaces=arxiv_namespaces)
-xpath_author_name = XPath('.//atom:author/atom:name', namespaces=arxiv_namespaces)
-xpath_doi = XPath('.//arxiv:doi', namespaces=arxiv_namespaces)
-xpath_pdf = XPath('.//atom:link[@title="pdf"]', namespaces=arxiv_namespaces)
-xpath_published = XPath('.//atom:published', namespaces=arxiv_namespaces)
-xpath_journal = XPath('.//arxiv:journal_ref', namespaces=arxiv_namespaces)
-xpath_category = XPath('.//atom:category/@term', namespaces=arxiv_namespaces)
-xpath_comment = XPath('./arxiv:comment', namespaces=arxiv_namespaces)
+xpath_entry = XPath("//atom:entry", namespaces=arxiv_namespaces)
+xpath_title = XPath(".//atom:title", namespaces=arxiv_namespaces)
+xpath_id = XPath(".//atom:id", namespaces=arxiv_namespaces)
+xpath_summary = XPath(".//atom:summary", namespaces=arxiv_namespaces)
+xpath_author_name = XPath(".//atom:author/atom:name", namespaces=arxiv_namespaces)
+xpath_doi = XPath(".//arxiv:doi", namespaces=arxiv_namespaces)
+xpath_pdf = XPath(".//atom:link[@title='pdf']", namespaces=arxiv_namespaces)
+xpath_published = XPath(".//atom:published", namespaces=arxiv_namespaces)
+xpath_journal = XPath(".//arxiv:journal_ref", namespaces=arxiv_namespaces)
+xpath_category = XPath(".//atom:category/@term", namespaces=arxiv_namespaces)
+xpath_comment = XPath("./arxiv:comment", namespaces=arxiv_namespaces)
-def request(query, params):
- # basic search
- offset = (params['pageno'] - 1) * number_of_results
+def request(query: str, params: "OnlineParams") -> None:
- string_args = {'query': query, 'offset': offset, 'number_of_results': number_of_results}
+ args = {
+ "search_query": f"{arxiv_search_prefix}:{query}",
+ "start": (params["pageno"] - 1) * arxiv_max_results,
+ "max_results": arxiv_max_results,
+ }
+ params["url"] = f"{base_url}?{urlencode(args)}"
- params['url'] = base_url.format(**string_args)
- return params
+def response(resp: "SXNG_Response") -> EngineResults:
+ res = EngineResults()
-def response(resp):
- results = []
dom = etree.fromstring(resp.content)
for entry in eval_xpath_list(dom, xpath_entry):
- title = eval_xpath_getindex(entry, xpath_title, 0).text
- url = eval_xpath_getindex(entry, xpath_id, 0).text
- abstract = eval_xpath_getindex(entry, xpath_summary, 0).text
+ title: str = eval_xpath_getindex(entry, xpath_title, 0).text
+
+ url: str = eval_xpath_getindex(entry, xpath_id, 0).text
+ abstract: str = eval_xpath_getindex(entry, xpath_summary, 0).text
- authors = [author.text for author in eval_xpath_list(entry, xpath_author_name)]
+ authors: list[str] = [author.text for author in eval_xpath_list(entry, xpath_author_name)]
# doi
doi_element = eval_xpath_getindex(entry, xpath_doi, 0, default=None)
- doi = None if doi_element is None else doi_element.text
+ doi: str = "" if doi_element is None else doi_element.text
# pdf
pdf_element = eval_xpath_getindex(entry, xpath_pdf, 0, default=None)
- pdf_url = None if pdf_element is None else pdf_element.attrib.get('href')
+ pdf_url: str = "" if pdf_element is None else pdf_element.attrib.get("href")
# journal
journal_element = eval_xpath_getindex(entry, xpath_journal, 0, default=None)
- journal = None if journal_element is None else journal_element.text
+ journal: str = "" if journal_element is None else journal_element.text
# tags
tag_elements = eval_xpath(entry, xpath_category)
- tags = [str(tag) for tag in tag_elements]
+ tags: list[str] = [str(tag) for tag in tag_elements]
# comments
comments_elements = eval_xpath_getindex(entry, xpath_comment, 0, default=None)
- comments = None if comments_elements is None else comments_elements.text
-
- publishedDate = datetime.strptime(eval_xpath_getindex(entry, xpath_published, 0).text, '%Y-%m-%dT%H:%M:%SZ')
-
- res_dict = {
- 'template': 'paper.html',
- 'url': url,
- 'title': title,
- 'publishedDate': publishedDate,
- 'content': abstract,
- 'doi': doi,
- 'authors': authors,
- 'journal': journal,
- 'tags': tags,
- 'comments': comments,
- 'pdf_url': pdf_url,
- }
-
- results.append(res_dict)
-
- return results
+ comments: str = "" if comments_elements is None else comments_elements.text
+
+ publishedDate = datetime.strptime(eval_xpath_getindex(entry, xpath_published, 0).text, "%Y-%m-%dT%H:%M:%SZ")
+
+ res.add(
+ res.types.Paper(
+ url=url,
+ title=title,
+ publishedDate=publishedDate,
+ content=abstract,
+ doi=doi,
+ authors=authors,
+ journal=journal,
+ tags=tags,
+ comments=comments,
+ pdf_url=pdf_url,
+ )
+ )
+
+ return res
diff --git a/searx/settings.yml b/searx/settings.yml
index c3dee3173..3b77bfe09 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -490,7 +490,6 @@ engines:
- name: arxiv
engine: arxiv
shortcut: arx
- timeout: 4.0
- name: ask
engine: ask