summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authorZhijie He <hezhijie0327@hotmail.com>2025-03-15 18:44:46 +0800
committerMarkus Heiser <markus.heiser@darmarIT.de>2025-03-15 17:14:54 +0100
commit38caa4954035c74bb690b551bfbee3148ba96f29 (patch)
treefbc081c3f7f940e933fe0a24d41a377ff6b0f10d /searx/engines
parent4ce7f1accc322d6936b2cc332e842f283fa2bf8e (diff)
[fix] fix invalid escape error in Baidu Images & default config typo
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/baidu.py37
1 files changed, 24 insertions, 13 deletions
diff --git a/searx/engines/baidu.py b/searx/engines/baidu.py
index b4aba587d..9ac28532a 100644
--- a/searx/engines/baidu.py
+++ b/searx/engines/baidu.py
@@ -11,6 +11,7 @@ from urllib.parse import urlencode
from datetime import datetime
import time
import json
+import re
from searx.exceptions import SearxEngineAPIException
from searx.utils import html_to_text
@@ -92,11 +93,12 @@ def request(query, params):
def response(resp):
- try:
- data = json.loads(resp.text, strict=False)
- except Exception as e:
- raise SearxEngineAPIException(f"Invalid response: {e}") from e
+ text = resp.text
+ if baidu_category == 'images':
+ # baidu's JSON encoder wrongly quotes / and ' characters by \\ and \'
+ text = text.replace(r"\/", "/").replace(r"\'", "'")
+ data = json.loads(text, strict=False)
parsers = {'general': parse_general, 'images': parse_images, 'it': parse_it}
return parsers[baidu_category](data)
@@ -133,19 +135,28 @@ def parse_images(data):
results = []
if "data" in data:
for item in data["data"]:
+ if not item:
+ # the last item in the JSON list is empty, the JSON string ends with "}, {}]"
+ continue
replace_url = item.get("replaceUrl", [{}])[0]
- from_url = replace_url.get("FromURL", "").replace("\\/", "/")
- img_src = replace_url.get("ObjURL", "").replace("\\/", "/")
-
+ width = item.get("width")
+ height = item.get("height")
+ img_date = item.get("bdImgnewsDate")
+ publishedDate = None
+ if img_date:
+ publishedDate = datetime.strptime(img_date, "%Y-%m-%d %H:%M")
results.append(
{
"template": "images.html",
- "url": from_url,
- "thumbnail_src": item.get("thumbURL", ""),
- "img_src": img_src,
- "content": html_to_text(item.get("fromPageTitleEnc", "")),
- "title": html_to_text(item.get("fromPageTitle", "")),
- "source": item.get("fromURLHost", ""),
+ "url": replace_url.get("FromURL"),
+ "thumbnail_src": item.get("thumbURL"),
+ "img_src": replace_url.get("ObjURL"),
+ "title": html_to_text(item.get("fromPageTitle")),
+ "source": item.get("fromURLHost"),
+ "resolution": f"{width} x {height}",
+ "img_format": item.get("type"),
+ "filesize": item.get("filesize"),
+ "publishedDate": publishedDate,
}
)
return results