summaryrefslogtreecommitdiff
path: root/searx/engines/brave.py
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2025-02-20 16:52:35 +0100
committerMarkus Heiser <markus.heiser@darmarIT.de>2025-02-20 17:03:25 +0100
commit4994fbb5af9b855ce326f558ef43b6c9ca2d97f8 (patch)
treef39b49c2e34207b175d3132f922eb8a06c4df5bd /searx/engines/brave.py
parentcaf0dd5372382229f4e3a651440cf281492baf21 (diff)
[fix] engines bing.images & brave.videos - fix parse data string
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines/brave.py')
-rw-r--r--searx/engines/brave.py33
1 files changed, 29 insertions, 4 deletions
diff --git a/searx/engines/brave.py b/searx/engines/brave.py
index 90cce4045..4a9c2828f 100644
--- a/searx/engines/brave.py
+++ b/searx/engines/brave.py
@@ -131,7 +131,6 @@ from lxml import html
from searx import locales
from searx.utils import (
extract_text,
- extr,
eval_xpath,
eval_xpath_list,
eval_xpath_getindex,
@@ -249,6 +248,33 @@ def _extract_published_date(published_date_raw):
return None
+def parse_data_string(resp):
+ # kit.start(app, element, {
+ # node_ids: [0, 19],
+ # data: [{"type":"data","data" .... ["q","goggles_id"],"route":1,"url":1}}]
+ # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ kit_start = resp.text.index("kit.start(app,")
+ start = resp.text[kit_start:].index('data: [{"type":"data"')
+ start = kit_start + start + len('data: ')
+
+ lev = 0
+ end = start
+ inner = False
+ for c in resp.text[start:]:
+ if inner and lev == 0:
+ break
+ end += 1
+ if c == "[":
+ lev += 1
+ inner = True
+ continue
+ if c == "]":
+ lev -= 1
+
+ json_data = js_variable_to_python(resp.text[start:end])
+ return json_data
+
+
def response(resp) -> EngineResults:
if brave_category in ('search', 'goggles'):
@@ -257,9 +283,8 @@ def response(resp) -> EngineResults:
if brave_category in ('news'):
return _parse_news(resp)
- datastr = extr(resp.text, "const data = ", ";\n").strip()
-
- json_data = js_variable_to_python(datastr)
+ json_data = parse_data_string(resp)
+ # json_data is a list and at the second position (0,1) in this list we find the "response" data we need ..
json_resp = json_data[1]['data']['body']['response']
if brave_category == 'images':