diff options
| author | Alexandre Flament <alex@al-f.net> | 2021-04-12 17:34:21 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-04-12 17:34:21 +0200 |
| commit | 01cefffbf6efa8a027e0e7d720970fffadb6337a (patch) | |
| tree | a4c37f7b73897c7635ee5fab01c1e8e967e23d8a /searx/webapp.py | |
| parent | 6c0114567e7ba1b3f4a54327eddf658b7474ca58 (diff) | |
| parent | d14994dc73ba5c95382812581dac146d9eceaafa (diff) | |
Merge pull request #1 from metasearch-lab/httpx_networks
Httpx networks
Diffstat (limited to 'searx/webapp.py')
| -rwxr-xr-x | searx/webapp.py | 88 |
1 files changed, 57 insertions, 31 deletions
diff --git a/searx/webapp.py b/searx/webapp.py index 072f140ca..8c59b8f3a 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -26,12 +26,26 @@ if __name__ == '__main__': from os.path import realpath, dirname sys.path.append(realpath(dirname(realpath(__file__)) + '/../')) +# set Unix thread name +try: + import setproctitle +except ImportError: + pass +else: + import threading + old_thread_init = threading.Thread.__init__ + + def new_thread_init(self, *args, **kwargs): + old_thread_init(self, *args, **kwargs) + setproctitle.setthreadtitle(self._name) + threading.Thread.__init__ = new_thread_init + import hashlib import hmac import json import os -import requests +import httpx from searx import logger logger = logger.getChild('webapp') @@ -79,7 +93,7 @@ from searx.plugins import plugins from searx.plugins.oa_doi_rewrite import get_doi_resolver from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES from searx.answerers import answerers -from searx.poolrequests import get_global_proxies +from searx.network import stream as http_stream from searx.answerers import ask from searx.metrology.error_recorder import errors_per_engines @@ -890,50 +904,62 @@ def _is_selected_language_supported(engine, preferences): @app.route('/image_proxy', methods=['GET']) def image_proxy(): - url = request.args.get('url').encode() + url = request.args.get('url') if not url: return '', 400 - h = new_hmac(settings['server']['secret_key'], url) + h = new_hmac(settings['server']['secret_key'], url.encode()) if h != request.args.get('h'): return '', 400 - headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) - headers['User-Agent'] = gen_useragent() - - resp = requests.get(url, - stream=True, - timeout=settings['outgoing']['request_timeout'], - headers=headers, - proxies=get_global_proxies()) + maximum_size = 5 * 1024 * 1024 - if resp.status_code == 304: - return '', resp.status_code - - if resp.status_code != 200: - logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code)) - if resp.status_code >= 400: + try: + headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) + headers['User-Agent'] = gen_useragent() + stream = http_stream( + method='GET', + url=url, + headers=headers, + timeout=settings['outgoing']['request_timeout'], + allow_redirects=True, + max_redirects=20) + + resp = next(stream) + content_length = resp.headers.get('Content-Length') + if content_length and content_length.isdigit() and int(content_length) > maximum_size: + return 'Max size', 400 + + if resp.status_code == 304: return '', resp.status_code - return '', 400 - if not resp.headers.get('content-type', '').startswith('image/'): - logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type'))) - return '', 400 + if resp.status_code != 200: + logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code)) + if resp.status_code >= 400: + return '', resp.status_code + return '', 400 - img = b'' - chunk_counter = 0 + if not resp.headers.get('content-type', '').startswith('image/'): + logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type'))) + return '', 400 - for chunk in resp.iter_content(1024 * 1024): - chunk_counter += 1 - if chunk_counter > 5: - return '', 502 # Bad gateway - file is too big (>5M) - img += chunk + headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'}) - headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'}) + total_length = 0 - return Response(img, mimetype=resp.headers['content-type'], headers=headers) + def forward_chunk(): + nonlocal total_length + for chunk in stream: + total_length += len(chunk) + if total_length > maximum_size: + break + yield chunk + + return Response(forward_chunk(), mimetype=resp.headers['Content-Type'], headers=headers) + except httpx.HTTPError: + return '', 400 @app.route('/stats', methods=['GET']) |