summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2025-01-20 11:45:00 +0100
committerMarkus Heiser <markus.heiser@darmarIT.de>2025-01-20 16:40:36 +0100
commite581921c9229f11a9ab23de2963b020546f2be0d (patch)
treef2ad01d6481618b22f5c9424dd31863760a22a3b
parent073d9549a00362cb6fe0398396e139bc93ed4918 (diff)
[fix] engine brave: remove date from the content string
Related: https://github.com/searxng/searxng/issues/4211#issuecomment-2601941440 Closes: https://github.com/searxng/searxng/issues/4006 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
-rw-r--r--searx/engines/brave.py12
1 files changed, 9 insertions, 3 deletions
diff --git a/searx/engines/brave.py b/searx/engines/brave.py
index 648aee562..db1fc7976 100644
--- a/searx/engines/brave.py
+++ b/searx/engines/brave.py
@@ -291,15 +291,21 @@ def _parse_search(resp):
if url is None or title_tag is None or not urlparse(url).netloc: # partial url likely means it's an ad
continue
- content_tag = eval_xpath_getindex(result, './/div[contains(@class, "snippet-description")]', 0, default='')
+ content: str = extract_text(
+ eval_xpath_getindex(result, './/div[contains(@class, "snippet-description")]', 0, default='')
+ ) # type: ignore
pub_date_raw = eval_xpath(result, 'substring-before(.//div[contains(@class, "snippet-description")], "-")')
+ pub_date = _extract_published_date(pub_date_raw)
+ if pub_date and content.startswith(pub_date_raw):
+ content = content.lstrip(pub_date_raw).strip("- \n\t")
+
thumbnail = eval_xpath_getindex(result, './/img[contains(@class, "thumb")]/@src', 0, default='')
item = {
'url': url,
'title': extract_text(title_tag),
- 'content': extract_text(content_tag),
- 'publishedDate': _extract_published_date(pub_date_raw),
+ 'content': content,
+ 'publishedDate': pub_date,
'thumbnail': thumbnail,
}