summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/zlibrary.py78
-rw-r--r--searx/settings.yml3
2 files changed, 44 insertions, 37 deletions
diff --git a/searx/engines/zlibrary.py b/searx/engines/zlibrary.py
index a1729861f..233f776f3 100644
--- a/searx/engines/zlibrary.py
+++ b/searx/engines/zlibrary.py
@@ -39,15 +39,16 @@ from urllib.parse import quote
from lxml import html
from flask_babel import gettext # pyright: ignore[reportUnknownVariableType]
-from searx.utils import extract_text, eval_xpath, eval_xpath_list
+from searx.utils import extract_text, eval_xpath, eval_xpath_list, ElementType
from searx.enginelib.traits import EngineTraits
from searx.data import ENGINE_TRAITS
from searx.exceptions import SearxException
+from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
+ from searx.search.processors import OnlineParams
-# about
about: dict[str, t.Any] = {
"website": "https://zlibrary-global.se",
"wikidata_id": "Q104863992",
@@ -57,7 +58,7 @@ about: dict[str, t.Any] = {
"results": "HTML",
}
-categories: list[str] = ["files"]
+categories: list[str] = ["files", "books"]
paging: bool = True
base_url: str = "https://zlibrary-global.se"
@@ -74,8 +75,12 @@ zlib_ext: str = ""
``PDF`` and ``EPUB``.
"""
+i18n_language = gettext("Language")
+i18n_book_rating = gettext("Book rating")
+i18n_file_quality = gettext("File quality")
+
-def init(engine_settings: dict[str, t.Any] | None = None) -> None: # pylint: disable=unused-argument
+def setup(engine_settings: dict[str, t.Any]) -> bool: # pylint: disable=unused-argument
"""Check of engine's settings."""
traits: EngineTraits = EngineTraits(**ENGINE_TRAITS["z-library"])
@@ -85,10 +90,11 @@ def init(engine_settings: dict[str, t.Any] | None = None) -> None: # pylint: di
raise ValueError(f"invalid setting year_from: {zlib_year_from}")
if zlib_year_to and zlib_year_to not in traits.custom["year_to"]:
raise ValueError(f"invalid setting year_to: {zlib_year_to}")
+ return True
-def request(query: str, params: dict[str, t.Any]) -> dict[str, t.Any]:
- lang: str = traits.get_language(params["language"], traits.all_locale) # type: ignore
+def request(query: str, params: "OnlineParams") -> None:
+ lang: str | None = traits.get_language(params["searxng_locale"], traits.all_locale)
search_url: str = (
base_url
+ "/s/{search_query}/?page={pageno}"
@@ -106,41 +112,35 @@ def request(query: str, params: dict[str, t.Any]) -> dict[str, t.Any]:
zlib_ext=zlib_ext,
)
params["verify"] = False
- return params
-
-
-def domain_is_seized(dom):
- return bool(dom.xpath('//title') and "seized" in dom.xpath('//title')[0].text.lower())
-def response(resp: "SXNG_Response") -> list[dict[str, t.Any]]:
- results: list[dict[str, t.Any]] = []
+def response(resp: "SXNG_Response") -> EngineResults:
+ res = EngineResults()
dom = html.fromstring(resp.text)
if domain_is_seized(dom):
raise SearxException(f"zlibrary domain is seized: {base_url}")
for item in dom.xpath('//div[@id="searchResultBox"]//div[contains(@class, "resItemBox")]'):
- results.append(_parse_result(item))
+ kwargs = _parse_result(item)
+ res.add(res.types.Paper(**kwargs))
- return results
+ return res
-def _text(item, selector: str) -> str | None:
- return extract_text(eval_xpath(item, selector))
+def domain_is_seized(dom: ElementType):
+ return bool(dom.xpath('//title') and "seized" in dom.xpath('//title')[0].text.lower())
-i18n_language = gettext("Language")
-i18n_book_rating = gettext("Book rating")
-i18n_file_quality = gettext("File quality")
+def _text(item: ElementType, selector: str) -> str | None:
+ return extract_text(eval_xpath(item, selector))
-def _parse_result(item) -> dict[str, t.Any]:
+def _parse_result(item: ElementType) -> dict[str, t.Any]:
author_elements = eval_xpath_list(item, './/div[@class="authors"]//a[@itemprop="author"]')
result = {
- "template": "paper.html",
"url": base_url + item.xpath('(.//a[starts-with(@href, "/book/")])[1]/@href')[0],
"title": _text(item, './/*[@itemprop="name"]'),
"authors": [extract_text(author) for author in author_elements],
@@ -148,15 +148,15 @@ def _parse_result(item) -> dict[str, t.Any]:
"type": _text(item, './/div[contains(@class, "property__file")]//div[contains(@class, "property_value")]'),
}
- thumbnail: str = _text(item, './/img[contains(@class, "cover")]/@data-src')
- if not thumbnail.startswith('/'):
+ thumbnail = _text(item, './/img[contains(@class, "cover")]/@data-src')
+ if thumbnail and not thumbnail.startswith('/'):
result["thumbnail"] = thumbnail
year = _text(item, './/div[contains(@class, "property_year")]//div[contains(@class, "property_value")]')
if year:
result["publishedDate"] = datetime.strptime(year, '%Y')
- content = []
+ content: list[str] = []
language = _text(item, './/div[contains(@class, "property_language")]//div[contains(@class, "property_value")]')
if language:
content.append(f"{i18n_language}: {language.capitalize()}")
@@ -173,9 +173,10 @@ def _parse_result(item) -> dict[str, t.Any]:
def fetch_traits(engine_traits: EngineTraits) -> None:
"""Fetch languages and other search arguments from zlibrary's search form."""
- # pylint: disable=import-outside-toplevel, too-many-branches
+ # pylint: disable=import-outside-toplevel, too-many-branches, too-many-statements
import babel
+ import babel.core
import httpx
from searx.network import get # see https://github.com/searxng/searxng/issues/762
@@ -197,7 +198,7 @@ def fetch_traits(engine_traits: EngineTraits) -> None:
if not resp.ok:
raise RuntimeError("Response from zlibrary's search page is not OK.")
- dom = html.fromstring(resp.text) # type: ignore
+ dom = html.fromstring(resp.text)
if domain_is_seized(dom):
print(f"ERROR: zlibrary domain is seized: {base_url}")
@@ -206,25 +207,30 @@ def fetch_traits(engine_traits: EngineTraits) -> None:
engine_traits.all_locale = ""
engine_traits.custom["ext"] = []
- engine_traits.custom["year_from"] = []
- engine_traits.custom["year_to"] = []
+ l: list[str]
+ # years_from
+ l = []
for year in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_yearFrom']/option"):
- engine_traits.custom["year_from"].append(year.get("value"))
+ l.append(year.get("value") or "")
+ engine_traits.custom["year_from"] = l
+ # years_to
+ l = []
for year in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_yearTo']/option"):
- engine_traits.custom["year_to"].append(year.get("value"))
+ l.append(year.get("value") or "")
+ engine_traits.custom["year_to"] = l
+ # ext (file extensions)
+ l = []
for ext in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_extensions']/option"):
- value: str | None = ext.get("value")
- if value is None:
- value = ""
- engine_traits.custom["ext"].append(value)
+ l.append(ext.get("value") or "")
+ engine_traits.custom["ext"] = l
# Handle languages
# Z-library uses English names for languages, so we need to map them to their respective locales
language_name_locale_map: dict[str, babel.Locale] = {}
- for locale in babel.core.localedata.locale_identifiers(): # type: ignore
+ for locale in babel.core.localedata.locale_identifiers():
# Create a Locale object for the current locale
loc = babel.Locale.parse(locale)
if loc.english_name is None:
diff --git a/searx/settings.yml b/searx/settings.yml
index 200f96de9..217658088 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -1254,9 +1254,10 @@ engines:
- name: z-library
engine: zlibrary
shortcut: zlib
- categories: files
timeout: 7.0
disabled: true
+ # https://github.com/searxng/searxng/issues/3610
+ inactive: true
- name: library of congress
engine: loc