summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2020-12-03 13:23:19 +0100
committerAlexandre Flament <alex@al-f.net>2020-12-03 13:23:19 +0100
commitfa909c7c024d9ec98f6611fde0f99b0e797b1f3b (patch)
tree00793b129b8e3cd80a5feee1224c270687c40cae
parent7905d4148718816de60cafff8a05ce95f97ce6aa (diff)
[mod] stackoverflow & yandex: detect CAPTCHA response
-rw-r--r--searx/engines/stackoverflow.py7
-rw-r--r--searx/engines/yandex.py7
2 files changed, 12 insertions, 2 deletions
diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py
index c6d58de65..f730264e2 100644
--- a/searx/engines/stackoverflow.py
+++ b/searx/engines/stackoverflow.py
@@ -10,9 +10,10 @@
@parse url, title, content
"""
-from urllib.parse import urlencode, urljoin
+from urllib.parse import urlencode, urljoin, urlparse
from lxml import html
from searx.utils import extract_text
+from searx.exceptions import SearxEngineCaptchaException
# engine dependent config
categories = ['it']
@@ -37,6 +38,10 @@ def request(query, params):
# get response from search-request
def response(resp):
+ resp_url = urlparse(resp.url)
+ if resp_url.path.startswith('/nocaptcha'):
+ raise SearxEngineCaptchaException()
+
results = []
dom = html.fromstring(resp.text)
diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py
index ff1ef5a26..b4a6a54cf 100644
--- a/searx/engines/yandex.py
+++ b/searx/engines/yandex.py
@@ -9,9 +9,10 @@
@parse url, title, content
"""
-from urllib.parse import urlencode
+from urllib.parse import urlencode, urlparse
from lxml import html
from searx import logger
+from searx.exceptions import SearxEngineCaptchaException
logger = logger.getChild('yandex engine')
@@ -47,6 +48,10 @@ def request(query, params):
# get response from search-request
def response(resp):
+ resp_url = urlparse(resp.url)
+ if resp_url.path.startswith('/showcaptcha'):
+ raise SearxEngineCaptchaException()
+
dom = html.fromstring(resp.text)
results = []