summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--searx/engines/acfun.py108
-rw-r--r--searx/settings.yml5
2 files changed, 113 insertions, 0 deletions
diff --git a/searx/engines/acfun.py b/searx/engines/acfun.py
new file mode 100644
index 000000000..ab30c13f2
--- /dev/null
+++ b/searx/engines/acfun.py
@@ -0,0 +1,108 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Acfun search engine for searxng"""
+
+from urllib.parse import urlencode
+import re
+import json
+from datetime import datetime, timedelta
+from lxml import html
+
+from searx.utils import extract_text
+
+# Metadata
+about = {
+ "website": "https://www.acfun.cn/",
+ "wikidata_id": "Q3077675",
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": "HTML",
+}
+
+# Engine Configuration
+categories = ["videos"]
+paging = True
+
+# Base URL
+base_url = "https://www.acfun.cn"
+
+
+def request(query, params):
+ query_params = {"keyword": query, "pCursor": params["pageno"]}
+ params["url"] = f"{base_url}/search?{urlencode(query_params)}"
+ return params
+
+
+def response(resp):
+ results = []
+
+ matches = re.findall(r'bigPipe\.onPageletArrive\((\{.*?\})\);', resp.text, re.DOTALL)
+ if not matches:
+ return results
+
+ for match in matches:
+ try:
+ json_data = json.loads(match)
+ raw_html = json_data.get("html", "")
+ if not raw_html:
+ continue
+
+ tree = html.fromstring(raw_html)
+
+ video_blocks = tree.xpath('//div[contains(@class, "search-video")]')
+ if not video_blocks:
+ continue
+
+ for video_block in video_blocks:
+ video_info = extract_video_data(video_block)
+ if video_info and video_info["title"] and video_info["url"]:
+ results.append(video_info)
+
+ except json.JSONDecodeError:
+ continue
+
+ return results
+
+
+def extract_video_data(video_block):
+ try:
+ data_exposure_log = video_block.get('data-exposure-log')
+ video_data = json.loads(data_exposure_log)
+
+ content_id = video_data.get("content_id", "")
+ title = video_data.get("title", "")
+
+ url = f"{base_url}/v/ac{content_id}"
+ iframe_src = f"{base_url}/player/ac{content_id}"
+
+ create_time = extract_text(video_block.xpath('.//span[contains(@class, "info__create-time")]'))
+ video_cover = extract_text(video_block.xpath('.//div[contains(@class, "video__cover")]/a/img/@src')[0])
+ video_duration = extract_text(video_block.xpath('.//span[contains(@class, "video__duration")]'))
+ video_intro = extract_text(video_block.xpath('.//div[contains(@class, "video__main__intro")]'))
+
+ published_date = None
+ if create_time:
+ try:
+ published_date = datetime.strptime(create_time.strip(), "%Y-%m-%d")
+ except (ValueError, TypeError):
+ pass
+
+ length = None
+ if video_duration:
+ try:
+ timediff = datetime.strptime(video_duration.strip(), "%M:%S")
+ length = timedelta(minutes=timediff.minute, seconds=timediff.second)
+ except (ValueError, TypeError):
+ pass
+
+ return {
+ "title": title,
+ "url": url,
+ "content": video_intro,
+ "thumbnail": video_cover,
+ "length": length,
+ "publishedDate": published_date,
+ "iframe_src": iframe_src,
+ }
+
+ except (json.JSONDecodeError, AttributeError, TypeError, ValueError):
+ return None
diff --git a/searx/settings.yml b/searx/settings.yml
index 9662d2d09..c0b740e99 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -352,6 +352,11 @@ engines:
shortcut: 9g
disabled: true
+ - name: acfun
+ engine: acfun
+ shortcut: acf
+ disabled: true
+
- name: adobe stock
engine: adobe_stock
shortcut: asi