From eaa694fb7d0e47b943bc6d6edb6cb6a40ab2d85e Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Thu, 18 Mar 2021 19:59:01 +0100 Subject: [enh] replace requests by httpx --- searx/webapp.py | 93 ++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 62 insertions(+), 31 deletions(-) (limited to 'searx/webapp.py') diff --git a/searx/webapp.py b/searx/webapp.py index 072f140ca..1571df8f1 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -26,12 +26,26 @@ if __name__ == '__main__': from os.path import realpath, dirname sys.path.append(realpath(dirname(realpath(__file__)) + '/../')) +# set Unix thread name +try: + import setproctitle +except ImportError: + pass +else: + import threading + old_thread_init = threading.Thread.__init__ + + def new_thread_init(self, *args, **kwargs): + old_thread_init(self, *args, **kwargs) + setproctitle.setthreadtitle(self._name) + threading.Thread.__init__ = new_thread_init + import hashlib import hmac import json import os -import requests +import httpx from searx import logger logger = logger.getChild('webapp') @@ -79,7 +93,7 @@ from searx.plugins import plugins from searx.plugins.oa_doi_rewrite import get_doi_resolver from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES from searx.answerers import answerers -from searx.poolrequests import get_global_proxies +from searx import poolrequests from searx.answerers import ask from searx.metrology.error_recorder import errors_per_engines @@ -890,50 +904,62 @@ def _is_selected_language_supported(engine, preferences): @app.route('/image_proxy', methods=['GET']) def image_proxy(): - url = request.args.get('url').encode() + url = request.args.get('url') if not url: return '', 400 - h = new_hmac(settings['server']['secret_key'], url) + h = new_hmac(settings['server']['secret_key'], url.encode()) if h != request.args.get('h'): return '', 400 - headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) - headers['User-Agent'] = gen_useragent() - - resp = requests.get(url, - stream=True, - timeout=settings['outgoing']['request_timeout'], - headers=headers, - proxies=get_global_proxies()) + maximum_size = 5 * 1024 * 1024 - if resp.status_code == 304: - return '', resp.status_code - - if resp.status_code != 200: - logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code)) - if resp.status_code >= 400: + try: + headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) + headers['User-Agent'] = gen_useragent() + stream = poolrequests.stream( + method='GET', + url=url, + headers=headers, + timeout=settings['outgoing']['request_timeout'], + allow_redirects=True, + max_redirects=20) + + resp = next(stream) + content_length = resp.headers.get('Content-Length') + if content_length and content_length.isdigit() and int(content_length) > maximum_size: + return 'Max size', 400 + + if resp.status_code == 304: return '', resp.status_code - return '', 400 - if not resp.headers.get('content-type', '').startswith('image/'): - logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type'))) - return '', 400 + if resp.status_code != 200: + logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code)) + if resp.status_code >= 400: + return '', resp.status_code + return '', 400 + + if not resp.headers.get('content-type', '').startswith('image/'): + logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type'))) + return '', 400 - img = b'' - chunk_counter = 0 + headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'}) - for chunk in resp.iter_content(1024 * 1024): - chunk_counter += 1 - if chunk_counter > 5: - return '', 502 # Bad gateway - file is too big (>5M) - img += chunk + total_length = 0 - headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'}) + def forward_chunk(): + nonlocal total_length + for chunk in stream: + total_length += len(chunk) + if total_length > maximum_size: + break + yield chunk - return Response(img, mimetype=resp.headers['content-type'], headers=headers) + return Response(forward_chunk(), mimetype=resp.headers['Content-Type'], headers=headers) + except httpx.HTTPError: + return '', 400 @app.route('/stats', methods=['GET']) @@ -1083,6 +1109,11 @@ def config(): }) +@app.route('/config/http') +def config_http(): + return jsonify(poolrequests.debug_asyncclients()) + + @app.errorhandler(404) def page_not_found(e): return render('404.html'), 404 -- cgit v1.2.3 From d14994dc73ba5c95382812581dac146d9eceaafa Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Mon, 5 Apr 2021 10:43:33 +0200 Subject: [httpx] replace searx.poolrequests by searx.network settings.yml: * outgoing.networks: * can contains network definition * propertiers: enable_http, verify, http2, max_connections, max_keepalive_connections, keepalive_expiry, local_addresses, support_ipv4, support_ipv6, proxies, max_redirects, retries * retries: 0 by default, number of times searx retries to send the HTTP request (using different IP & proxy each time) * local_addresses can be "192.168.0.1/24" (it supports IPv6) * support_ipv4 & support_ipv6: both True by default see https://github.com/searx/searx/pull/1034 * each engine can define a "network" section: * either a full network description * either reference an existing network * all HTTP requests of engine use the same HTTP configuration (it was not the case before, see proxy configuration in master) --- searx/webapp.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'searx/webapp.py') diff --git a/searx/webapp.py b/searx/webapp.py index 1571df8f1..8c59b8f3a 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -93,7 +93,7 @@ from searx.plugins import plugins from searx.plugins.oa_doi_rewrite import get_doi_resolver from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES from searx.answerers import answerers -from searx import poolrequests +from searx.network import stream as http_stream from searx.answerers import ask from searx.metrology.error_recorder import errors_per_engines @@ -919,7 +919,7 @@ def image_proxy(): try: headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) headers['User-Agent'] = gen_useragent() - stream = poolrequests.stream( + stream = http_stream( method='GET', url=url, headers=headers, @@ -1109,11 +1109,6 @@ def config(): }) -@app.route('/config/http') -def config_http(): - return jsonify(poolrequests.debug_asyncclients()) - - @app.errorhandler(404) def page_not_found(e): return render('404.html'), 404 -- cgit v1.2.3