diff options
| author | Hermógenes Oliveira <OliveiraHermogenes@users.noreply.github.com> | 2025-11-24 02:54:45 -0300 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-11-24 06:54:45 +0100 |
| commit | 5fcee9bc307f6d3592ebcb1db4f4f8834df6f495 (patch) | |
| tree | 12f246dc228876011b71445ca1c5cf629806926e | |
| parent | 2f0e52d6ebad4c4f825e88142de2c62660053456 (diff) | |
[fix] recoll engine: remove HTML markup from result snippets (#5472)
Recoll inserts markup tags in snippets to indicate matching terms in a
search query. We remove them so that they don't show to users.
| -rw-r--r-- | searx/engines/recoll.py | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/searx/engines/recoll.py b/searx/engines/recoll.py index c9e85344c..d58f60b2c 100644 --- a/searx/engines/recoll.py +++ b/searx/engines/recoll.py @@ -41,6 +41,7 @@ from datetime import date, timedelta from urllib.parse import urlencode from searx.result_types import EngineResults +from searx.utils import html_to_text if t.TYPE_CHECKING: from searx.extended_types import SXNG_Response @@ -133,11 +134,14 @@ def response(resp: "SXNG_Response") -> EngineResults: if mtype in ["image"] and subtype in ["bmp", "gif", "jpeg", "png"]: thumbnail = url + # remove HTML from snippet + content = html_to_text(result.get("snippet", "")) + res.add( res.types.File( title=result.get("label", ""), url=url, - content=result.get("snippet", ""), + content=content, size=result.get("size", ""), filename=result.get("filename", ""), abstract=result.get("abstract", ""), |