From baec54c49295020bd925e34a705afca29d7458f3 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 22 Jan 2021 18:49:45 +0100 Subject: [fix] revise of the google-news engine This revise is based on the methods developed in the revise of the google engine (see commit 410c2f9). Signed-off-by: Markus Heiser --- searx/engines/google_images.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'searx/engines/google_images.py') diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 8c2cb9d2a..612682c44 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -12,10 +12,9 @@ Definitions`_. Header set Content-Security-Policy "img-src 'self' data: ;" """ -from urllib.parse import urlencode, urlparse, unquote +from urllib.parse import urlencode, unquote from lxml import html from searx import logger -from searx.exceptions import SearxEngineCaptchaException from searx.utils import extract_text, eval_xpath from searx.engines.google import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import @@ -23,6 +22,7 @@ from searx.engines.google import ( get_lang_country, google_domains, time_range_dict, + detect_google_sorry, ) logger = logger.getChild('google images') @@ -123,13 +123,7 @@ def response(resp): """Get response from google's search request""" results = [] - # detect google sorry - resp_url = urlparse(resp.url) - if resp_url.netloc == 'sorry.google.com' or resp_url.path == '/sorry/IndexRedirect': - raise SearxEngineCaptchaException() - - if resp_url.path.startswith('/sorry'): - raise SearxEngineCaptchaException() + detect_google_sorry(resp) # which subdomain ? # subdomain = resp.search_params.get('google_subdomain') -- cgit v1.2.3