diff options
| author | Aadniz <8147434+Aadniz@users.noreply.github.com> | 2025-03-26 19:56:58 +0100 |
|---|---|---|
| committer | Markus Heiser <markus.heiser@darmarIT.de> | 2025-03-27 06:11:39 +0100 |
| commit | 02f5002a5f1c5d05a5876af66870d818eb37286e (patch) | |
| tree | 83ec4cb1b820ac081bcd92b939b70120ef7b70f3 /searx/engines | |
| parent | 4dfc47584d7c946b9682dc1e4858fae003b16d1f (diff) | |
[fix] baidu engine: properly decoding HTML escape codes
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/baidu.py | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/searx/engines/baidu.py b/searx/engines/baidu.py index 1c9d86733..29c9c0e4d 100644 --- a/searx/engines/baidu.py +++ b/searx/engines/baidu.py @@ -9,6 +9,7 @@ from urllib.parse import urlencode from datetime import datetime +from html import unescape import time import json @@ -119,11 +120,15 @@ def parse_general(data): except (ValueError, TypeError): published_date = None + # title and content sometimes containing characters such as & ' " etc... + title = unescape(entry["title"]) + content = unescape(entry.get("abs", "")) + results.append( { - "title": entry["title"], + "title": title, "url": entry["url"], - "content": entry.get("abs", ""), + "content": content, "publishedDate": published_date, } ) |