summaryrefslogtreecommitdiff
path: root/searx/webapp.py
diff options
context:
space:
mode:
Diffstat (limited to 'searx/webapp.py')
-rwxr-xr-xsearx/webapp.py113
1 files changed, 74 insertions, 39 deletions
diff --git a/searx/webapp.py b/searx/webapp.py
index 8058c6cd0..c3cd38ae8 100755
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -26,12 +26,26 @@ if __name__ == '__main__':
from os.path import realpath, dirname
sys.path.append(realpath(dirname(realpath(__file__)) + '/../'))
+# set Unix thread name
+try:
+ import setproctitle
+except ImportError:
+ pass
+else:
+ import threading
+ old_thread_init = threading.Thread.__init__
+
+ def new_thread_init(self, *args, **kwargs):
+ old_thread_init(self, *args, **kwargs)
+ setproctitle.setthreadtitle(self._name)
+ threading.Thread.__init__ = new_thread_init
+
import hashlib
import hmac
import json
import os
-import requests
+import httpx
from searx import logger
logger = logger.getChild('webapp')
@@ -79,7 +93,7 @@ from searx.plugins import plugins
from searx.plugins.oa_doi_rewrite import get_doi_resolver
from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
from searx.answerers import answerers
-from searx.poolrequests import get_global_proxies
+from searx.network import stream as http_stream
from searx.answerers import ask
from searx.metrology.error_recorder import errors_per_engines
@@ -774,20 +788,26 @@ def autocompleter():
# parse query
raw_text_query = RawTextQuery(request.form.get('q', ''), disabled_engines)
+ sug_prefix = raw_text_query.getQuery()
# normal autocompletion results only appear if no inner results returned
# and there is a query part
- if len(raw_text_query.autocomplete_list) == 0 and len(raw_text_query.getQuery()) > 0:
+ if len(raw_text_query.autocomplete_list) == 0 and len(sug_prefix) > 0:
+
# get language from cookie
language = request.preferences.get_value('language')
if not language or language == 'all':
language = 'en'
else:
language = language.split('-')[0]
+
# run autocompletion
- raw_results = search_autocomplete(request.preferences.get_value('autocomplete'),
- raw_text_query.getQuery(), language)
+ raw_results = search_autocomplete(
+ request.preferences.get_value('autocomplete'), sug_prefix, language
+ )
for result in raw_results:
+ # attention: this loop will change raw_text_query object and this is
+ # the reason why the sug_prefix was stored before (see above)
results.append(raw_text_query.changeQuery(result).getFullQuery())
if len(raw_text_query.autocomplete_list) > 0:
@@ -798,13 +818,16 @@ def autocompleter():
for answer in answers:
results.append(str(answer['answer']))
- # return autocompleter results
if request.headers.get('X-Requested-With') == 'XMLHttpRequest':
- return Response(json.dumps(results),
- mimetype='application/json')
+ # the suggestion request comes from the searx search form
+ suggestions = json.dumps(results)
+ mimetype = 'application/json'
+ else:
+ # the suggestion request comes from browser's URL bar
+ suggestions = json.dumps([sug_prefix, results])
+ mimetype = 'application/x-suggestions+json'
- return Response(json.dumps([raw_text_query.query, results]),
- mimetype='application/x-suggestions+json')
+ return Response(suggestions, mimetype=mimetype)
@app.route('/preferences', methods=['GET', 'POST'])
@@ -890,50 +913,62 @@ def _is_selected_language_supported(engine, preferences):
@app.route('/image_proxy', methods=['GET'])
def image_proxy():
- url = request.args.get('url').encode()
+ url = request.args.get('url')
if not url:
return '', 400
- h = new_hmac(settings['server']['secret_key'], url)
+ h = new_hmac(settings['server']['secret_key'], url.encode())
if h != request.args.get('h'):
return '', 400
- headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'})
- headers['User-Agent'] = gen_useragent()
+ maximum_size = 5 * 1024 * 1024
- resp = requests.get(url,
- stream=True,
- timeout=settings['outgoing']['request_timeout'],
- headers=headers,
- proxies=get_global_proxies())
-
- if resp.status_code == 304:
- return '', resp.status_code
-
- if resp.status_code != 200:
- logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code))
- if resp.status_code >= 400:
+ try:
+ headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'})
+ headers['User-Agent'] = gen_useragent()
+ stream = http_stream(
+ method='GET',
+ url=url,
+ headers=headers,
+ timeout=settings['outgoing']['request_timeout'],
+ allow_redirects=True,
+ max_redirects=20)
+
+ resp = next(stream)
+ content_length = resp.headers.get('Content-Length')
+ if content_length and content_length.isdigit() and int(content_length) > maximum_size:
+ return 'Max size', 400
+
+ if resp.status_code == 304:
return '', resp.status_code
- return '', 400
- if not resp.headers.get('content-type', '').startswith('image/'):
- logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type')))
- return '', 400
+ if resp.status_code != 200:
+ logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code))
+ if resp.status_code >= 400:
+ return '', resp.status_code
+ return '', 400
+
+ if not resp.headers.get('content-type', '').startswith('image/'):
+ logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type')))
+ return '', 400
- img = b''
- chunk_counter = 0
+ headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'})
- for chunk in resp.iter_content(1024 * 1024):
- chunk_counter += 1
- if chunk_counter > 5:
- return '', 502 # Bad gateway - file is too big (>5M)
- img += chunk
+ total_length = 0
- headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'})
+ def forward_chunk():
+ nonlocal total_length
+ for chunk in stream:
+ total_length += len(chunk)
+ if total_length > maximum_size:
+ break
+ yield chunk
- return Response(img, mimetype=resp.headers['content-type'], headers=headers)
+ return Response(forward_chunk(), mimetype=resp.headers['Content-Type'], headers=headers)
+ except httpx.HTTPError:
+ return '', 400
@app.route('/stats', methods=['GET'])