summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authorAadniz <8147434+Aadniz@users.noreply.github.com>2025-03-26 19:56:58 +0100
committerMarkus Heiser <markus.heiser@darmarIT.de>2025-03-27 06:11:39 +0100
commit02f5002a5f1c5d05a5876af66870d818eb37286e (patch)
tree83ec4cb1b820ac081bcd92b939b70120ef7b70f3 /searx/engines
parent4dfc47584d7c946b9682dc1e4858fae003b16d1f (diff)
[fix] baidu engine: properly decoding HTML escape codes
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/baidu.py9
1 files changed, 7 insertions, 2 deletions
diff --git a/searx/engines/baidu.py b/searx/engines/baidu.py
index 1c9d86733..29c9c0e4d 100644
--- a/searx/engines/baidu.py
+++ b/searx/engines/baidu.py
@@ -9,6 +9,7 @@
from urllib.parse import urlencode
from datetime import datetime
+from html import unescape
import time
import json
@@ -119,11 +120,15 @@ def parse_general(data):
except (ValueError, TypeError):
published_date = None
+ # title and content sometimes containing characters such as &amp; &#39; &quot; etc...
+ title = unescape(entry["title"])
+ content = unescape(entry.get("abs", ""))
+
results.append(
{
- "title": entry["title"],
+ "title": title,
"url": entry["url"],
- "content": entry.get("abs", ""),
+ "content": content,
"publishedDate": published_date,
}
)