From ab8224c9394236d2cbcf6ec7d9bf0d7c602ca6ac Mon Sep 17 00:00:00 2001 From: Bnyro Date: Mon, 1 Dec 2025 15:19:06 +0100 Subject: [fix] brave: content description also contains website URL (#5502) there are other classes like 'site-name-content' we don't want to match, however only using contains(@class, 'content') would e.g. also match `site-name-content` thus, we explicitly also require the spaces as class separator --- searx/engines/brave.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'searx/engines') diff --git a/searx/engines/brave.py b/searx/engines/brave.py index 2c5854705..a19046b77 100644 --- a/searx/engines/brave.py +++ b/searx/engines/brave.py @@ -301,7 +301,10 @@ def _parse_search(resp: SXNG_Response) -> EngineResults: content: str = "" pub_date = None - _content = eval_xpath_getindex(result, ".//div[contains(@class, 'content')]", 0, default="") + # there are other classes like 'site-name-content' we don't want to match, + # however only using contains(@class, 'content') would e.g. also match `site-name-content` + # thus, we explicitly also require the spaces as class separator + _content = eval_xpath_getindex(result, ".//div[contains(concat(' ', @class, ' '), ' content ')]", 0, default="") if len(_content): content = extract_text(_content) # type: ignore _pub_date = extract_text( -- cgit v1.2.3