summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
author0xhtml <34682885+0xhtml@users.noreply.github.com>2025-03-26 14:44:36 +0100
committerBnyro <bnyro@tutanota.com>2025-06-08 22:09:27 +0200
commit6a2cb4579179bb1e3ecede926013508482286ed4 (patch)
treefcb51b00a6144189b5dac1be4b88bbec623d14af /searx
parent49a1f08a17772cbe8713eb4c5e6d260e01644b12 (diff)
[feat] bing: raise error upon receiving wrong results page
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/bing.py35
1 files changed, 21 insertions, 14 deletions
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index 47e8bb66d..c1f152ea3 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -38,6 +38,7 @@ import babel.languages
from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex
from searx.locales import language_tag, region_tag
from searx.enginelib.traits import EngineTraits
+from searx.exceptions import SearxEngineAPIException
if TYPE_CHECKING:
import logging
@@ -161,27 +162,33 @@ def response(resp):
results.append({'url': url, 'title': title, 'content': content})
# get number_of_results
- try:
+ if results:
result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()'))
if "-" in result_len_container:
-
- # Remove the part "from-to" for paginated request ...
- result_len_container = result_len_container[result_len_container.find("-") * 2 + 2 :]
+ start_str, result_len_container = re.split(r'-\d+', result_len_container)
+ start = int(start_str)
+ else:
+ start = 1
result_len_container = re.sub('[^0-9]', '', result_len_container)
-
if len(result_len_container) > 0:
result_len = int(result_len_container)
- except Exception as e: # pylint: disable=broad-except
- logger.debug('result error :\n%s', e)
-
- if result_len and _page_offset(resp.search_params.get("pageno", 0)) > result_len:
- # Avoid reading more results than available.
- # For example, if there is 100 results from some search and we try to get results from 120 to 130,
- # Bing will send back the results from 0 to 10 and no error.
- # If we compare results count with the first parameter of the request we can avoid this "invalid" results.
- return []
+ expected_start = _page_offset(resp.search_params.get("pageno", 1))
+
+ if expected_start != start:
+ if expected_start > result_len:
+ # Avoid reading more results than available.
+ # For example, if there is 100 results from some search and we try to get results from 120 to 130,
+ # Bing will send back the results from 0 to 10 and no error.
+ # If we compare results count with the first parameter of the request we can avoid this "invalid"
+ # results.
+ return []
+
+ # Sometimes Bing will send back the first result page instead of the requested page as a rate limiting
+ # measure.
+ msg = f"Expected results to start at {expected_start}, but got results starting at {start}"
+ raise SearxEngineAPIException(msg)
results.append({'number_of_results': result_len})
return results