diff options
| author | Zhijie He <hezhijie0327@hotmail.com> | 2025-03-29 12:06:41 +0800 |
|---|---|---|
| committer | Markus Heiser <markus.heiser@darmarIT.de> | 2025-04-17 16:43:32 +0200 |
| commit | f94802f2d2268e5a458bef5985e0f4f53ab9882a (patch) | |
| tree | 0a9a2e7f10c928320869ef804fc8ea6a488324df /searx/engines/huggingface.py | |
| parent | d1c584b9619cdc11d6d22ecc8cde5f7dc85054e9 (diff) | |
[feat] engines: add Hugging Face engine
Diffstat (limited to 'searx/engines/huggingface.py')
| -rw-r--r-- | searx/engines/huggingface.py | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/searx/engines/huggingface.py b/searx/engines/huggingface.py new file mode 100644 index 000000000..b49bb3f21 --- /dev/null +++ b/searx/engines/huggingface.py @@ -0,0 +1,116 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""`Hugging Face`_ search engine for SearXNG. + +.. _Hugging Face: https://huggingface.co + +Configuration +============= + +The engine has the following additional settings: + +- :py:obj:`huggingface_endpoint` + +Configurations for endpoints: + +.. code:: yaml + + - name: huggingface + engine: huggingface + shortcut: hf + + - name: huggingface datasets + huggingface_endpoint: datasets + engine: huggingface + shortcut: hfd + + - name: huggingface spaces + huggingface_endpoint: spaces + engine: huggingface + shortcut: hfs + +Implementations +=============== + +""" + +from urllib.parse import urlencode +from datetime import datetime + +from searx.exceptions import SearxEngineAPIException +from searx.utils import html_to_text +from searx.result_types import EngineResults, MainResult + +about = { + "website": "https://huggingface.co/", + "wikidata_id": "Q108943604", + "official_api_documentation": "https://huggingface.co/docs/hub/en/api", + "use_official_api": True, + "require_api_key": False, + "results": "JSON", +} + +categories = ['it', 'repos'] + +base_url = "https://huggingface.co" + +huggingface_endpoint = 'models' +"""Hugging Face supports datasets, models, spaces as search endpoint. + +- ``datasets``: search for datasets +- ``models``: search for models +- ``spaces``: search for spaces +""" + + +def init(_): + if huggingface_endpoint not in ('datasets', 'models', 'spaces'): + raise SearxEngineAPIException(f"Unsupported Hugging Face endpoint: {huggingface_endpoint}") + + +def request(query, params): + query_params = { + "direction": -1, + "search": query, + } + + params["url"] = f"{base_url}/api/{huggingface_endpoint}?{urlencode(query_params)}" + + return params + + +def response(resp) -> EngineResults: + results = EngineResults() + + data = resp.json() + + for entry in data: + if huggingface_endpoint != 'models': + url = f"{base_url}/{huggingface_endpoint}/{entry['id']}" + else: + url = f"{base_url}/{entry['id']}" + + published_date = None + try: + published_date = datetime.strptime(entry["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ") + except (ValueError, TypeError): + pass + + contents = [] + if entry.get("likes"): + contents.append(f"Likes: {entry['likes']}") + if entry.get("downloads"): + contents.append(f"Downloads: {entry['downloads']:,}") + if entry.get("tags"): + contents.append(f"Tags: {', '.join(entry['tags'])}") + if entry.get("description"): + contents.append(f"Description: {entry['description']}") + + item = MainResult( + title=entry["id"], + content=html_to_text(" | ".join(contents)), + url=url, + publishedDate=published_date, + ) + results.add(item) + + return results |