summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2025-02-20 09:51:16 +0100
committerMarkus Heiser <markus.heiser@darmarIT.de>2025-02-20 10:08:03 +0100
commitfeb15e3878920ee7bf6e3d726fac0fcd1f89a896 (patch)
treef37327b2d4253d8c5c92551183d74cb1bbbc8ca1
parentc2804c51e2d89766eb14e1776a50e616bfd12cd1 (diff)
[fix] brave.news engine: response is HTML and no longer JSON
The response from brave.com for news is no longer a JSON string. Closes: https://github.com/searxng/searxng/issues/4352 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
-rw-r--r--searx/engines/brave.py35
1 files changed, 24 insertions, 11 deletions
diff --git a/searx/engines/brave.py b/searx/engines/brave.py
index 828f6154e..90cce4045 100644
--- a/searx/engines/brave.py
+++ b/searx/engines/brave.py
@@ -254,14 +254,14 @@ def response(resp) -> EngineResults:
if brave_category in ('search', 'goggles'):
return _parse_search(resp)
+ if brave_category in ('news'):
+ return _parse_news(resp)
+
datastr = extr(resp.text, "const data = ", ";\n").strip()
json_data = js_variable_to_python(datastr)
json_resp = json_data[1]['data']['body']['response']
- if brave_category == 'news':
- return _parse_news(json_resp['news'])
-
if brave_category == 'images':
return _parse_images(json_resp)
if brave_category == 'videos':
@@ -339,18 +339,31 @@ def _parse_search(resp) -> EngineResults:
return result_list
-def _parse_news(json_resp) -> EngineResults:
+def _parse_news(resp) -> EngineResults:
+
result_list = EngineResults()
+ dom = html.fromstring(resp.text)
+
+ for result in eval_xpath_list(dom, '//div[contains(@class, "results")]//div[@data-type="news"]'):
+
+ # import pdb
+ # pdb.set_trace()
+
+ url = eval_xpath_getindex(result, './/a[contains(@class, "result-header")]/@href', 0, default=None)
+ if url is None:
+ continue
+
+ title = extract_text(eval_xpath_list(result, './/span[contains(@class, "snippet-title")]'))
+ content = extract_text(eval_xpath_list(result, './/p[contains(@class, "desc")]'))
+ thumbnail = eval_xpath_getindex(result, './/div[contains(@class, "image-wrapper")]//img/@src', 0, default='')
- for result in json_resp["results"]:
item = {
- 'url': result['url'],
- 'title': result['title'],
- 'content': result['description'],
- 'publishedDate': _extract_published_date(result['age']),
+ "url": url,
+ "title": title,
+ "content": content,
+ "thumbnail": thumbnail,
}
- if result['thumbnail'] is not None:
- item['thumbnail'] = result['thumbnail']['src']
+
result_list.append(item)
return result_list