[fix] fix invalid escape error in Baidu Images & default config typo

author: Zhijie He <hezhijie0327@hotmail.com> 2025-03-15 18:44:46 +0800
committer: Markus Heiser <markus.heiser@darmarIT.de> 2025-03-15 17:14:54 +0100
commit: 38caa4954035c74bb690b551bfbee3148ba96f29 (patch)
tree: fbc081c3f7f940e933fe0a24d41a377ff6b0f10d /searx/engines
parent: 4ce7f1accc322d6936b2cc332e842f283fa2bf8e (diff)
1 files changed, 24 insertions, 13 deletions
diff --git a/searx/engines/baidu.py b/searx/engines/baidu.py
index b4aba587d..9ac28532a 100644
--- a/searx/engines/baidu.py
+++ b/searx/engines/baidu.py
@@ -11,6 +11,7 @@ from urllib.parse import urlencode
 from datetime import datetime
 import time
 import json
+import re
 
 from searx.exceptions import SearxEngineAPIException
 from searx.utils import html_to_text
@@ -92,11 +93,12 @@ def request(query, params):
 
 
 def response(resp):
-    try:
-        data = json.loads(resp.text, strict=False)
-    except Exception as e:
-        raise SearxEngineAPIException(f"Invalid response: {e}") from e
 
+    text = resp.text
+    if baidu_category == 'images':
+        # baidu's JSON encoder wrongly quotes / and ' characters by \\ and \'
+        text = text.replace(r"\/", "/").replace(r"\'", "'")
+    data = json.loads(text, strict=False)
     parsers = {'general': parse_general, 'images': parse_images, 'it': parse_it}
 
     return parsers[baidu_category](data)
@@ -133,19 +135,28 @@ def parse_images(data):
     results = []
     if "data" in data:
         for item in data["data"]:
+            if not item:
+                # the last item in the JSON list is empty, the JSON string ends with "}, {}]"
+                continue
             replace_url = item.get("replaceUrl", [{}])[0]
-            from_url = replace_url.get("FromURL", "").replace("\\/", "/")
-            img_src = replace_url.get("ObjURL", "").replace("\\/", "/")
-
+            width = item.get("width")
+            height = item.get("height")
+            img_date = item.get("bdImgnewsDate")
+            publishedDate = None
+            if img_date:
+                publishedDate = datetime.strptime(img_date, "%Y-%m-%d %H:%M")
             results.append(
                 {
                     "template": "images.html",
-                    "url": from_url,
-                    "thumbnail_src": item.get("thumbURL", ""),
-                    "img_src": img_src,
-                    "content": html_to_text(item.get("fromPageTitleEnc", "")),
-                    "title": html_to_text(item.get("fromPageTitle", "")),
-                    "source": item.get("fromURLHost", ""),
+                    "url": replace_url.get("FromURL"),
+                    "thumbnail_src": item.get("thumbURL"),
+                    "img_src": replace_url.get("ObjURL"),
+                    "title": html_to_text(item.get("fromPageTitle")),
+                    "source": item.get("fromURLHost"),
+                    "resolution": f"{width} x {height}",
+                    "img_format": item.get("type"),
+                    "filesize": item.get("filesize"),
+                    "publishedDate": publishedDate,
                 }
             )
     return results
author	Zhijie He <hezhijie0327@hotmail.com>	2025-03-15 18:44:46 +0800
committer	Markus Heiser <markus.heiser@darmarIT.de>	2025-03-15 17:14:54 +0100
commit	38caa4954035c74bb690b551bfbee3148ba96f29 (patch)
tree	fbc081c3f7f940e933fe0a24d41a377ff6b0f10d /searx/engines
parent	4ce7f1accc322d6936b2cc332e842f283fa2bf8e (diff)