diff options
| author | Markus Heiser <markus.heiser@darmarit.de> | 2025-02-20 16:52:35 +0100 |
|---|---|---|
| committer | Markus Heiser <markus.heiser@darmarIT.de> | 2025-02-20 17:03:25 +0100 |
| commit | 4994fbb5af9b855ce326f558ef43b6c9ca2d97f8 (patch) | |
| tree | f39b49c2e34207b175d3132f922eb8a06c4df5bd /searx/engines/brave.py | |
| parent | caf0dd5372382229f4e3a651440cf281492baf21 (diff) | |
[fix] engines bing.images & brave.videos - fix parse data string
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines/brave.py')
| -rw-r--r-- | searx/engines/brave.py | 33 |
1 files changed, 29 insertions, 4 deletions
diff --git a/searx/engines/brave.py b/searx/engines/brave.py index 90cce4045..4a9c2828f 100644 --- a/searx/engines/brave.py +++ b/searx/engines/brave.py @@ -131,7 +131,6 @@ from lxml import html from searx import locales from searx.utils import ( extract_text, - extr, eval_xpath, eval_xpath_list, eval_xpath_getindex, @@ -249,6 +248,33 @@ def _extract_published_date(published_date_raw): return None +def parse_data_string(resp): + # kit.start(app, element, { + # node_ids: [0, 19], + # data: [{"type":"data","data" .... ["q","goggles_id"],"route":1,"url":1}}] + # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + kit_start = resp.text.index("kit.start(app,") + start = resp.text[kit_start:].index('data: [{"type":"data"') + start = kit_start + start + len('data: ') + + lev = 0 + end = start + inner = False + for c in resp.text[start:]: + if inner and lev == 0: + break + end += 1 + if c == "[": + lev += 1 + inner = True + continue + if c == "]": + lev -= 1 + + json_data = js_variable_to_python(resp.text[start:end]) + return json_data + + def response(resp) -> EngineResults: if brave_category in ('search', 'goggles'): @@ -257,9 +283,8 @@ def response(resp) -> EngineResults: if brave_category in ('news'): return _parse_news(resp) - datastr = extr(resp.text, "const data = ", ";\n").strip() - - json_data = js_variable_to_python(datastr) + json_data = parse_data_string(resp) + # json_data is a list and at the second position (0,1) in this list we find the "response" data we need .. json_resp = json_data[1]['data']['body']['response'] if brave_category == 'images': |