summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
authorBnyro <bnyro@tutanota.com>2025-12-01 15:19:06 +0100
committerGitHub <noreply@github.com>2025-12-01 15:19:06 +0100
commitab8224c9394236d2cbcf6ec7d9bf0d7c602ca6ac (patch)
tree84245e49b761b87806abeda80068a27265a111e6 /searx
parentc954e71f87d41ef5b70c0d20eb8c1ee5284573e2 (diff)
[fix] brave: content description also contains website URL (#5502)
there are other classes like 'site-name-content' we don't want to match, however only using contains(@class, 'content') would e.g. also match `site-name-content` thus, we explicitly also require the spaces as class separator
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/brave.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/searx/engines/brave.py b/searx/engines/brave.py
index 2c5854705..a19046b77 100644
--- a/searx/engines/brave.py
+++ b/searx/engines/brave.py
@@ -301,7 +301,10 @@ def _parse_search(resp: SXNG_Response) -> EngineResults:
content: str = ""
pub_date = None
- _content = eval_xpath_getindex(result, ".//div[contains(@class, 'content')]", 0, default="")
+ # there are other classes like 'site-name-content' we don't want to match,
+ # however only using contains(@class, 'content') would e.g. also match `site-name-content`
+ # thus, we explicitly also require the spaces as class separator
+ _content = eval_xpath_getindex(result, ".//div[contains(concat(' ', @class, ' '), ' content ')]", 0, default="")
if len(_content):
content = extract_text(_content) # type: ignore
_pub_date = extract_text(