summaryrefslogtreecommitdiff
path: root/searx/search
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2021-04-12 17:34:21 +0200
committerGitHub <noreply@github.com>2021-04-12 17:34:21 +0200
commit01cefffbf6efa8a027e0e7d720970fffadb6337a (patch)
treea4c37f7b73897c7635ee5fab01c1e8e967e23d8a /searx/search
parent6c0114567e7ba1b3f4a54327eddf658b7474ca58 (diff)
parentd14994dc73ba5c95382812581dac146d9eceaafa (diff)
Merge pull request #1 from metasearch-lab/httpx_networks
Httpx networks
Diffstat (limited to 'searx/search')
-rw-r--r--searx/search/checker/impl.py12
-rw-r--r--searx/search/processors/online.py42
2 files changed, 25 insertions, 29 deletions
diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py
index ad45440ea..e54b3f68d 100644
--- a/searx/search/checker/impl.py
+++ b/searx/search/checker/impl.py
@@ -11,9 +11,9 @@ from urllib.parse import urlparse
import re
from langdetect import detect_langs
from langdetect.lang_detect_exception import LangDetectException
-import requests.exceptions
+import httpx
-from searx import poolrequests, logger
+from searx import network, logger
from searx.results import ResultContainer
from searx.search.models import SearchQuery, EngineRef
from searx.search.processors import EngineProcessor
@@ -75,8 +75,8 @@ def _is_url_image(image_url):
while retry > 0:
a = time()
try:
- poolrequests.set_timeout_for_thread(10.0, time())
- r = poolrequests.get(image_url, timeout=10.0, allow_redirects=True, headers={
+ network.set_timeout_for_thread(10.0, time())
+ r = network.get(image_url, timeout=10.0, allow_redirects=True, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US;q=0.5,en;q=0.3',
@@ -90,10 +90,10 @@ def _is_url_image(image_url):
if r.headers["content-type"].startswith('image/'):
return True
return False
- except requests.exceptions.Timeout:
+ except httpx.TimeoutException:
logger.error('Timeout for %s: %i', image_url, int(time() - a))
retry -= 1
- except requests.exceptions.RequestException:
+ except httpx.HTTPError:
logger.exception('Exception for %s', image_url)
return False
diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py
index 1fc6444ad..66719ea9b 100644
--- a/searx/search/processors/online.py
+++ b/searx/search/processors/online.py
@@ -1,12 +1,12 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
-from urllib.parse import urlparse
from time import time
import threading
+import asyncio
-import requests.exceptions
+import httpx
-import searx.poolrequests as poolrequests
+import searx.network
from searx.engines import settings
from searx import logger
from searx.utils import gen_useragent
@@ -64,10 +64,6 @@ class OnlineProcessor(EngineProcessor):
auth=params['auth']
)
- # setting engine based proxies
- if hasattr(self.engine, 'proxies'):
- request_args['proxies'] = poolrequests.get_proxies(self.engine.proxies)
-
# max_redirects
max_redirects = params.get('max_redirects')
if max_redirects:
@@ -85,9 +81,9 @@ class OnlineProcessor(EngineProcessor):
# specific type of request (GET or POST)
if params['method'] == 'GET':
- req = poolrequests.get
+ req = searx.network.get
else:
- req = poolrequests.post
+ req = searx.network.post
request_args['data'] = params['data']
@@ -99,8 +95,8 @@ class OnlineProcessor(EngineProcessor):
# unexpected redirect : record an error
# but the engine might still return valid results.
status_code = str(response.status_code or '')
- reason = response.reason or ''
- hostname = str(urlparse(response.url or '').netloc)
+ reason = response.reason_phrase or ''
+ hostname = response.url.host
record_error(self.engine_name,
'{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects),
(status_code, reason, hostname))
@@ -128,14 +124,14 @@ class OnlineProcessor(EngineProcessor):
def search(self, query, params, result_container, start_time, timeout_limit):
# set timeout for all HTTP requests
- poolrequests.set_timeout_for_thread(timeout_limit, start_time=start_time)
+ searx.network.set_timeout_for_thread(timeout_limit, start_time=start_time)
# reset the HTTP total time
- poolrequests.reset_time_for_thread()
- # enable HTTP only if explicitly enabled
- poolrequests.set_enable_http_protocol(self.engine.enable_http)
+ searx.network.reset_time_for_thread()
+ # set the network
+ searx.network.set_context_network_name(self.engine_name)
# suppose everything will be alright
- requests_exception = False
+ http_exception = False
suspended_time = None
try:
@@ -149,7 +145,7 @@ class OnlineProcessor(EngineProcessor):
# update engine time when there is no exception
engine_time = time() - start_time
- page_load_time = poolrequests.get_time_for_thread()
+ page_load_time = searx.network.get_time_for_thread()
result_container.add_timing(self.engine_name, engine_time, page_load_time)
with threading.RLock():
self.engine.stats['engine_time'] += engine_time
@@ -162,27 +158,27 @@ class OnlineProcessor(EngineProcessor):
# Timing
engine_time = time() - start_time
- page_load_time = poolrequests.get_time_for_thread()
+ page_load_time = searx.network.get_time_for_thread()
result_container.add_timing(self.engine_name, engine_time, page_load_time)
# Record the errors
with threading.RLock():
self.engine.stats['errors'] += 1
- if (issubclass(e.__class__, requests.exceptions.Timeout)):
+ if (issubclass(e.__class__, (httpx.TimeoutException, asyncio.TimeoutError))):
result_container.add_unresponsive_engine(self.engine_name, 'HTTP timeout')
# requests timeout (connect or read)
logger.error("engine {0} : HTTP requests timeout"
"(search duration : {1} s, timeout: {2} s) : {3}"
.format(self.engine_name, engine_time, timeout_limit, e.__class__.__name__))
- requests_exception = True
- elif (issubclass(e.__class__, requests.exceptions.RequestException)):
+ http_exception = True
+ elif (issubclass(e.__class__, (httpx.HTTPError, httpx.StreamError))):
result_container.add_unresponsive_engine(self.engine_name, 'HTTP error')
# other requests exception
logger.exception("engine {0} : requests exception"
"(search duration : {1} s, timeout: {2} s) : {3}"
.format(self.engine_name, engine_time, timeout_limit, e))
- requests_exception = True
+ http_exception = True
elif (issubclass(e.__class__, SearxEngineCaptchaException)):
result_container.add_unresponsive_engine(self.engine_name, 'CAPTCHA required')
logger.exception('engine {0} : CAPTCHA'.format(self.engine_name))
@@ -206,7 +202,7 @@ class OnlineProcessor(EngineProcessor):
# suspend the engine if there is an HTTP error
# or suspended_time is defined
with threading.RLock():
- if requests_exception or suspended_time:
+ if http_exception or suspended_time:
# update continuous_errors / suspend_end_time
self.engine.continuous_errors += 1
if suspended_time is None: