summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhijie He <hezhijie0327@hotmail.com>2025-03-29 12:06:41 +0800
committerMarkus Heiser <markus.heiser@darmarIT.de>2025-04-17 16:43:32 +0200
commitf94802f2d2268e5a458bef5985e0f4f53ab9882a (patch)
tree0a9a2e7f10c928320869ef804fc8ea6a488324df
parentd1c584b9619cdc11d6d22ecc8cde5f7dc85054e9 (diff)
[feat] engines: add Hugging Face engine
-rw-r--r--docs/dev/engines/online/huggingface.rst8
-rw-r--r--searx/engines/huggingface.py116
-rw-r--r--searx/settings.yml16
3 files changed, 140 insertions, 0 deletions
diff --git a/docs/dev/engines/online/huggingface.rst b/docs/dev/engines/online/huggingface.rst
new file mode 100644
index 000000000..06727e719
--- /dev/null
+++ b/docs/dev/engines/online/huggingface.rst
@@ -0,0 +1,8 @@
+.. _huggingface engine:
+
+============
+Hugging Face
+============
+
+.. automodule:: searx.engines.huggingface
+ :members:
diff --git a/searx/engines/huggingface.py b/searx/engines/huggingface.py
new file mode 100644
index 000000000..b49bb3f21
--- /dev/null
+++ b/searx/engines/huggingface.py
@@ -0,0 +1,116 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""`Hugging Face`_ search engine for SearXNG.
+
+.. _Hugging Face: https://huggingface.co
+
+Configuration
+=============
+
+The engine has the following additional settings:
+
+- :py:obj:`huggingface_endpoint`
+
+Configurations for endpoints:
+
+.. code:: yaml
+
+ - name: huggingface
+ engine: huggingface
+ shortcut: hf
+
+ - name: huggingface datasets
+ huggingface_endpoint: datasets
+ engine: huggingface
+ shortcut: hfd
+
+ - name: huggingface spaces
+ huggingface_endpoint: spaces
+ engine: huggingface
+ shortcut: hfs
+
+Implementations
+===============
+
+"""
+
+from urllib.parse import urlencode
+from datetime import datetime
+
+from searx.exceptions import SearxEngineAPIException
+from searx.utils import html_to_text
+from searx.result_types import EngineResults, MainResult
+
+about = {
+ "website": "https://huggingface.co/",
+ "wikidata_id": "Q108943604",
+ "official_api_documentation": "https://huggingface.co/docs/hub/en/api",
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": "JSON",
+}
+
+categories = ['it', 'repos']
+
+base_url = "https://huggingface.co"
+
+huggingface_endpoint = 'models'
+"""Hugging Face supports datasets, models, spaces as search endpoint.
+
+- ``datasets``: search for datasets
+- ``models``: search for models
+- ``spaces``: search for spaces
+"""
+
+
+def init(_):
+ if huggingface_endpoint not in ('datasets', 'models', 'spaces'):
+ raise SearxEngineAPIException(f"Unsupported Hugging Face endpoint: {huggingface_endpoint}")
+
+
+def request(query, params):
+ query_params = {
+ "direction": -1,
+ "search": query,
+ }
+
+ params["url"] = f"{base_url}/api/{huggingface_endpoint}?{urlencode(query_params)}"
+
+ return params
+
+
+def response(resp) -> EngineResults:
+ results = EngineResults()
+
+ data = resp.json()
+
+ for entry in data:
+ if huggingface_endpoint != 'models':
+ url = f"{base_url}/{huggingface_endpoint}/{entry['id']}"
+ else:
+ url = f"{base_url}/{entry['id']}"
+
+ published_date = None
+ try:
+ published_date = datetime.strptime(entry["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ except (ValueError, TypeError):
+ pass
+
+ contents = []
+ if entry.get("likes"):
+ contents.append(f"Likes: {entry['likes']}")
+ if entry.get("downloads"):
+ contents.append(f"Downloads: {entry['downloads']:,}")
+ if entry.get("tags"):
+ contents.append(f"Tags: {', '.join(entry['tags'])}")
+ if entry.get("description"):
+ contents.append(f"Description: {entry['description']}")
+
+ item = MainResult(
+ title=entry["id"],
+ content=html_to_text(" | ".join(contents)),
+ url=url,
+ publishedDate=published_date,
+ )
+ results.add(item)
+
+ return results
diff --git a/searx/settings.yml b/searx/settings.yml
index 5a87da3cd..c9435e4de 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -1134,6 +1134,22 @@ engines:
- name: il post
engine: il_post
shortcut: pst
+
+ - name: huggingface
+ engine: huggingface
+ shortcut: hf
+ disabled: true
+
+ - name: huggingface datasets
+ huggingface_endpoint: datasets
+ engine: huggingface
+ shortcut: hfd
+ disabled: true
+
+ - name: huggingface spaces
+ huggingface_endpoint: spaces
+ engine: huggingface
+ shortcut: hfs
disabled: true
- name: imdb