summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authorHermógenes Oliveira <OliveiraHermogenes@users.noreply.github.com>2025-11-24 02:54:45 -0300
committerGitHub <noreply@github.com>2025-11-24 06:54:45 +0100
commit5fcee9bc307f6d3592ebcb1db4f4f8834df6f495 (patch)
tree12f246dc228876011b71445ca1c5cf629806926e /searx/engines
parent2f0e52d6ebad4c4f825e88142de2c62660053456 (diff)
[fix] recoll engine: remove HTML markup from result snippets (#5472)
Recoll inserts markup tags in snippets to indicate matching terms in a search query. We remove them so that they don't show to users.
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/recoll.py6
1 files changed, 5 insertions, 1 deletions
diff --git a/searx/engines/recoll.py b/searx/engines/recoll.py
index c9e85344c..d58f60b2c 100644
--- a/searx/engines/recoll.py
+++ b/searx/engines/recoll.py
@@ -41,6 +41,7 @@ from datetime import date, timedelta
from urllib.parse import urlencode
from searx.result_types import EngineResults
+from searx.utils import html_to_text
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
@@ -133,11 +134,14 @@ def response(resp: "SXNG_Response") -> EngineResults:
if mtype in ["image"] and subtype in ["bmp", "gif", "jpeg", "png"]:
thumbnail = url
+ # remove HTML from snippet
+ content = html_to_text(result.get("snippet", ""))
+
res.add(
res.types.File(
title=result.get("label", ""),
url=url,
- content=result.get("snippet", ""),
+ content=content,
size=result.get("size", ""),
filename=result.get("filename", ""),
abstract=result.get("abstract", ""),