From baec54c49295020bd925e34a705afca29d7458f3 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 22 Jan 2021 18:49:45 +0100 Subject: [fix] revise of the google-news engine This revise is based on the methods developed in the revise of the google engine (see commit 410c2f9). Signed-off-by: Markus Heiser --- searx/engines/google.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'searx/engines/google.py') diff --git a/searx/engines/google.py b/searx/engines/google.py index 4198de640..fe9cd63e0 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -155,6 +155,11 @@ def get_lang_country(params, lang_list, custom_aliases): return language, country, lang_country +def detect_google_sorry(resp): + resp_url = urlparse(resp.url) + if resp_url.netloc == 'sorry.google.com' or resp_url.path.startswith('/sorry'): + raise SearxEngineCaptchaException() + def request(query, params): """Google search request""" @@ -200,16 +205,10 @@ def request(query, params): def response(resp): """Get response from google's search request""" - results = [] - # detect google sorry - resp_url = urlparse(resp.url) - if resp_url.netloc == 'sorry.google.com' or resp_url.path == '/sorry/IndexRedirect': - raise SearxEngineCaptchaException() - - if resp_url.path.startswith('/sorry'): - raise SearxEngineCaptchaException() + detect_google_sorry(resp) + results = [] # which subdomain ? # subdomain = resp.search_params.get('google_subdomain') -- cgit v1.2.3