diff options
Diffstat (limited to 'searx/webapp.py')
| -rwxr-xr-x | searx/webapp.py | 113 |
1 files changed, 74 insertions, 39 deletions
diff --git a/searx/webapp.py b/searx/webapp.py index 8058c6cd0..c3cd38ae8 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -26,12 +26,26 @@ if __name__ == '__main__': from os.path import realpath, dirname sys.path.append(realpath(dirname(realpath(__file__)) + '/../')) +# set Unix thread name +try: + import setproctitle +except ImportError: + pass +else: + import threading + old_thread_init = threading.Thread.__init__ + + def new_thread_init(self, *args, **kwargs): + old_thread_init(self, *args, **kwargs) + setproctitle.setthreadtitle(self._name) + threading.Thread.__init__ = new_thread_init + import hashlib import hmac import json import os -import requests +import httpx from searx import logger logger = logger.getChild('webapp') @@ -79,7 +93,7 @@ from searx.plugins import plugins from searx.plugins.oa_doi_rewrite import get_doi_resolver from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES from searx.answerers import answerers -from searx.poolrequests import get_global_proxies +from searx.network import stream as http_stream from searx.answerers import ask from searx.metrology.error_recorder import errors_per_engines @@ -774,20 +788,26 @@ def autocompleter(): # parse query raw_text_query = RawTextQuery(request.form.get('q', ''), disabled_engines) + sug_prefix = raw_text_query.getQuery() # normal autocompletion results only appear if no inner results returned # and there is a query part - if len(raw_text_query.autocomplete_list) == 0 and len(raw_text_query.getQuery()) > 0: + if len(raw_text_query.autocomplete_list) == 0 and len(sug_prefix) > 0: + # get language from cookie language = request.preferences.get_value('language') if not language or language == 'all': language = 'en' else: language = language.split('-')[0] + # run autocompletion - raw_results = search_autocomplete(request.preferences.get_value('autocomplete'), - raw_text_query.getQuery(), language) + raw_results = search_autocomplete( + request.preferences.get_value('autocomplete'), sug_prefix, language + ) for result in raw_results: + # attention: this loop will change raw_text_query object and this is + # the reason why the sug_prefix was stored before (see above) results.append(raw_text_query.changeQuery(result).getFullQuery()) if len(raw_text_query.autocomplete_list) > 0: @@ -798,13 +818,16 @@ def autocompleter(): for answer in answers: results.append(str(answer['answer'])) - # return autocompleter results if request.headers.get('X-Requested-With') == 'XMLHttpRequest': - return Response(json.dumps(results), - mimetype='application/json') + # the suggestion request comes from the searx search form + suggestions = json.dumps(results) + mimetype = 'application/json' + else: + # the suggestion request comes from browser's URL bar + suggestions = json.dumps([sug_prefix, results]) + mimetype = 'application/x-suggestions+json' - return Response(json.dumps([raw_text_query.query, results]), - mimetype='application/x-suggestions+json') + return Response(suggestions, mimetype=mimetype) @app.route('/preferences', methods=['GET', 'POST']) @@ -890,50 +913,62 @@ def _is_selected_language_supported(engine, preferences): @app.route('/image_proxy', methods=['GET']) def image_proxy(): - url = request.args.get('url').encode() + url = request.args.get('url') if not url: return '', 400 - h = new_hmac(settings['server']['secret_key'], url) + h = new_hmac(settings['server']['secret_key'], url.encode()) if h != request.args.get('h'): return '', 400 - headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) - headers['User-Agent'] = gen_useragent() + maximum_size = 5 * 1024 * 1024 - resp = requests.get(url, - stream=True, - timeout=settings['outgoing']['request_timeout'], - headers=headers, - proxies=get_global_proxies()) - - if resp.status_code == 304: - return '', resp.status_code - - if resp.status_code != 200: - logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code)) - if resp.status_code >= 400: + try: + headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) + headers['User-Agent'] = gen_useragent() + stream = http_stream( + method='GET', + url=url, + headers=headers, + timeout=settings['outgoing']['request_timeout'], + allow_redirects=True, + max_redirects=20) + + resp = next(stream) + content_length = resp.headers.get('Content-Length') + if content_length and content_length.isdigit() and int(content_length) > maximum_size: + return 'Max size', 400 + + if resp.status_code == 304: return '', resp.status_code - return '', 400 - if not resp.headers.get('content-type', '').startswith('image/'): - logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type'))) - return '', 400 + if resp.status_code != 200: + logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code)) + if resp.status_code >= 400: + return '', resp.status_code + return '', 400 + + if not resp.headers.get('content-type', '').startswith('image/'): + logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type'))) + return '', 400 - img = b'' - chunk_counter = 0 + headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'}) - for chunk in resp.iter_content(1024 * 1024): - chunk_counter += 1 - if chunk_counter > 5: - return '', 502 # Bad gateway - file is too big (>5M) - img += chunk + total_length = 0 - headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'}) + def forward_chunk(): + nonlocal total_length + for chunk in stream: + total_length += len(chunk) + if total_length > maximum_size: + break + yield chunk - return Response(img, mimetype=resp.headers['content-type'], headers=headers) + return Response(forward_chunk(), mimetype=resp.headers['Content-Type'], headers=headers) + except httpx.HTTPError: + return '', 400 @app.route('/stats', methods=['GET']) |