diff options
| author | Paolo Basso <12545838+paolobasso99@users.noreply.github.com> | 2023-06-25 17:12:17 +0200 |
|---|---|---|
| committer | Markus Heiser <markus.heiser@darmarit.de> | 2023-06-29 09:32:57 +0200 |
| commit | e5637fe7b98d7fb06cbbe0e0f24deb12a33187ba (patch) | |
| tree | 1967810e2b4391e5dfe9b50010938d1a8172fbba /searx | |
| parent | fd26f37073ae17cd85549ab461e3ca7eb7a44d4d (diff) | |
[feat] engine: implementation of Anna's Archive
Anna's Archive [1] is a free non-profit online shadow library metasearch engine
providing access to a variety of book resources (also via IPFS), created by a
team of anonymous archivists [2].
[1] https://annas-archive.org/
[2] https://annas-software.org/AnnaArchivist/annas-archive
Diffstat (limited to 'searx')
| -rw-r--r-- | searx/engines/annas-archive.py | 63 | ||||
| -rw-r--r-- | searx/settings.yml | 7 |
2 files changed, 70 insertions, 0 deletions
diff --git a/searx/engines/annas-archive.py b/searx/engines/annas-archive.py new file mode 100644 index 000000000..56d1ca77a --- /dev/null +++ b/searx/engines/annas-archive.py @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Anna's Archive + +""" +from typing import List, Dict, Any, Optional +from urllib.parse import quote +from lxml import html + +from searx.utils import extract_text, eval_xpath + +# about +about: Dict[str, Any] = { + "website": "https://annas-archive.org/", + "wikidata_id": "Q115288326", + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": "HTML", +} + +# engine dependent config +categories: List[str] = ["files"] +paging: bool = False + +# search-url +base_url: str = "https://annas-archive.org" + +# xpath queries +xpath_results: str = '//main//a[starts-with(@href,"/md5")]' +xpath_url: str = ".//@href" +xpath_title: str = ".//h3/text()[1]" +xpath_authors: str = './/div[contains(@class, "italic")]' +xpath_publisher: str = './/div[contains(@class, "text-sm")]' +xpath_file_info: str = './/div[contains(@class, "text-xs")]' + + +def request(query, params: Dict[str, Any]) -> Dict[str, Any]: + search_url: str = base_url + "/search?q={search_query}" + params["url"] = search_url.format(search_query=quote(query)) + return params + + +def response(resp) -> List[Dict[str, Optional[str]]]: + results: List[Dict[str, Optional[str]]] = [] + dom = html.fromstring(resp.text) + + for item in dom.xpath(xpath_results): + result: Dict[str, Optional[str]] = {} + + result["url"] = base_url + item.xpath(xpath_url)[0] + + result["title"] = extract_text(eval_xpath(item, xpath_title)) + + result["content"] = "{publisher}. {authors}. {file_info}".format( + authors=extract_text(eval_xpath(item, xpath_authors)), + publisher=extract_text(eval_xpath(item, xpath_publisher)), + file_info=extract_text(eval_xpath(item, xpath_file_info)), + ) + + results.append(result) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index b6bb0a0e3..561ec41a9 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -297,6 +297,13 @@ engines: shortcut: 9g disabled: true + - name: anna's archive + engine: annas-archive + paging: False + categories: files + disabled: true + shortcut: aa + - name: apk mirror engine: apkmirror timeout: 4.0 |