summaryrefslogtreecommitdiff
path: root/searx/webapp.py
diff options
context:
space:
mode:
Diffstat (limited to 'searx/webapp.py')
-rwxr-xr-xsearx/webapp.py145
1 files changed, 91 insertions, 54 deletions
diff --git a/searx/webapp.py b/searx/webapp.py
index cb5183d06..e05488bf5 100755
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -26,12 +26,26 @@ if __name__ == '__main__':
from os.path import realpath, dirname
sys.path.append(realpath(dirname(realpath(__file__)) + '/../'))
+# set Unix thread name
+try:
+ import setproctitle
+except ImportError:
+ pass
+else:
+ import threading
+ old_thread_init = threading.Thread.__init__
+
+ def new_thread_init(self, *args, **kwargs):
+ old_thread_init(self, *args, **kwargs)
+ setproctitle.setthreadtitle(self._name)
+ threading.Thread.__init__ = new_thread_init
+
import hashlib
import hmac
import json
import os
-import requests
+import httpx
from searx import logger
logger = logger.getChild('webapp')
@@ -40,7 +54,7 @@ from datetime import datetime, timedelta
from time import time
from html import escape
from io import StringIO
-from urllib.parse import urlencode, urljoin, urlparse
+from urllib.parse import urlencode, urlparse
from pygments import highlight
from pygments.lexers import get_lexer_by_name
@@ -79,7 +93,7 @@ from searx.plugins import plugins
from searx.plugins.oa_doi_rewrite import get_doi_resolver
from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
from searx.answerers import answerers
-from searx.poolrequests import get_global_proxies
+from searx.network import stream as http_stream
from searx.answerers import ask
from searx.metrology.error_recorder import errors_per_engines
@@ -270,14 +284,7 @@ def extract_domain(url):
def get_base_url():
- if settings['server']['base_url']:
- hostname = settings['server']['base_url']
- else:
- scheme = 'http'
- if request.is_secure:
- scheme = 'https'
- hostname = url_for('index', _external=True, _scheme=scheme)
- return hostname
+ return url_for('index', _external=True)
def get_current_theme_name(override=None):
@@ -310,10 +317,6 @@ def url_for_theme(endpoint, override_theme=None, **values):
if filename_with_theme in static_files:
values['filename'] = filename_with_theme
url = url_for(endpoint, **values)
- if settings['server']['base_url']:
- if url.startswith('/'):
- url = url[1:]
- url = urljoin(settings['server']['base_url'], url)
return url
@@ -650,7 +653,7 @@ def search():
result['pretty_url'] = prettify_url(result['url'])
# TODO, check if timezone is calculated right
- if 'publishedDate' in result:
+ if result.get('publishedDate'): # do not try to get a date from an empty string or a None type
try: # test if publishedDate >= 1900 (datetime module bug)
result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
except ValueError:
@@ -833,7 +836,7 @@ def preferences():
# save preferences
if request.method == 'POST':
- resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index'))))
+ resp = make_response(url_for('index', _external=True))
try:
request.preferences.parse_form(request.form)
except ValidationException:
@@ -910,50 +913,62 @@ def _is_selected_language_supported(engine, preferences):
@app.route('/image_proxy', methods=['GET'])
def image_proxy():
- url = request.args.get('url').encode()
+ url = request.args.get('url')
if not url:
return '', 400
- h = new_hmac(settings['server']['secret_key'], url)
+ h = new_hmac(settings['server']['secret_key'], url.encode())
if h != request.args.get('h'):
return '', 400
- headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'})
- headers['User-Agent'] = gen_useragent()
-
- resp = requests.get(url,
- stream=True,
- timeout=settings['outgoing']['request_timeout'],
- headers=headers,
- proxies=get_global_proxies())
+ maximum_size = 5 * 1024 * 1024
- if resp.status_code == 304:
- return '', resp.status_code
-
- if resp.status_code != 200:
- logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code))
- if resp.status_code >= 400:
+ try:
+ headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'})
+ headers['User-Agent'] = gen_useragent()
+ stream = http_stream(
+ method='GET',
+ url=url,
+ headers=headers,
+ timeout=settings['outgoing']['request_timeout'],
+ allow_redirects=True,
+ max_redirects=20)
+
+ resp = next(stream)
+ content_length = resp.headers.get('Content-Length')
+ if content_length and content_length.isdigit() and int(content_length) > maximum_size:
+ return 'Max size', 400
+
+ if resp.status_code == 304:
return '', resp.status_code
- return '', 400
- if not resp.headers.get('content-type', '').startswith('image/'):
- logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type')))
- return '', 400
+ if resp.status_code != 200:
+ logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code))
+ if resp.status_code >= 400:
+ return '', resp.status_code
+ return '', 400
- img = b''
- chunk_counter = 0
+ if not resp.headers.get('content-type', '').startswith('image/'):
+ logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type')))
+ return '', 400
- for chunk in resp.iter_content(1024 * 1024):
- chunk_counter += 1
- if chunk_counter > 5:
- return '', 502 # Bad gateway - file is too big (>5M)
- img += chunk
+ headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'})
- headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'})
+ total_length = 0
- return Response(img, mimetype=resp.headers['content-type'], headers=headers)
+ def forward_chunk():
+ nonlocal total_length
+ for chunk in stream:
+ total_length += len(chunk)
+ if total_length > maximum_size:
+ break
+ yield chunk
+
+ return Response(forward_chunk(), mimetype=resp.headers['Content-Type'], headers=headers)
+ except httpx.HTTPError:
+ return '', 400
@app.route('/stats', methods=['GET'])
@@ -1022,11 +1037,11 @@ def opensearch():
if request.headers.get('User-Agent', '').lower().find('webkit') >= 0:
method = 'get'
- ret = render('opensearch.xml',
- opensearch_method=method,
- host=get_base_url(),
- urljoin=urljoin,
- override_theme='__common__')
+ ret = render(
+ 'opensearch.xml',
+ opensearch_method=method,
+ override_theme='__common__'
+ )
resp = Response(response=ret,
status=200,
@@ -1047,7 +1062,7 @@ def favicon():
@app.route('/clear_cookies')
def clear_cookies():
- resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index'))))
+ resp = make_response(redirect(url_for('index', _external=True)))
for cookie_name in request.cookies:
resp.delete_cookie(cookie_name)
return resp
@@ -1140,19 +1155,41 @@ class ReverseProxyPathFix:
'''
def __init__(self, app):
+
self.app = app
+ self.script_name = None
+ self.scheme = None
+ self.server = None
+
+ if settings['server']['base_url']:
+
+ # If base_url is specified, then these values from are given
+ # preference over any Flask's generics.
+
+ base_url = urlparse(settings['server']['base_url'])
+ self.script_name = base_url.path
+ if self.script_name.endswith('/'):
+ # remove trailing slash to avoid infinite redirect on the index
+ # see https://github.com/searx/searx/issues/2729
+ self.script_name = self.script_name[:-1]
+ self.scheme = base_url.scheme
+ self.server = base_url.netloc
def __call__(self, environ, start_response):
- script_name = environ.get('HTTP_X_SCRIPT_NAME', '')
+ script_name = self.script_name or environ.get('HTTP_X_SCRIPT_NAME', '')
if script_name:
environ['SCRIPT_NAME'] = script_name
path_info = environ['PATH_INFO']
if path_info.startswith(script_name):
environ['PATH_INFO'] = path_info[len(script_name):]
- scheme = environ.get('HTTP_X_SCHEME', '')
+ scheme = self.scheme or environ.get('HTTP_X_SCHEME', '')
if scheme:
environ['wsgi.url_scheme'] = scheme
+
+ server = self.server or environ.get('HTTP_X_FORWARDED_HOST', '')
+ if server:
+ environ['HTTP_HOST'] = server
return self.app(environ, start_response)