diff options
Diffstat (limited to 'searx/webapp.py')
| -rw-r--r-- | searx/webapp.py | 186 |
1 files changed, 118 insertions, 68 deletions
diff --git a/searx/webapp.py b/searx/webapp.py index 460681b35..096e1f269 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -22,10 +22,11 @@ if __name__ == '__main__': from os.path import realpath, dirname path.append(realpath(dirname(realpath(__file__)) + '/../')) -import json import cStringIO -import os import hashlib +import hmac +import json +import os import requests from searx import logger @@ -39,7 +40,7 @@ except: logger.critical("cannot import dependency: pygments") from sys import exit exit(1) - +from cgi import escape from datetime import datetime, timedelta from urllib import urlencode from urlparse import urlparse, urljoin @@ -50,7 +51,7 @@ from flask import ( ) from flask_babel import Babel, gettext, format_date, format_decimal from flask.json import jsonify -from searx import settings, searx_dir +from searx import settings, searx_dir, searx_debug from searx.engines import ( categories, engines, get_engines_stats, engine_shortcuts ) @@ -61,11 +62,12 @@ from searx.utils import ( ) from searx.version import VERSION_STRING from searx.languages import language_codes -from searx.search import Search -from searx.query import Query +from searx.search import SearchWithPlugins, get_search_query_from_webapp +from searx.query import RawTextQuery from searx.autocomplete import searx_bang, backends as autocomplete_backends from searx.plugins import plugins from searx.preferences import Preferences, ValidationException +from searx.answerers import answerers # check if the pyopenssl, ndg-httpsclient, pyasn1 packages are installed. # They are needed for SSL connection without trouble, see #298 @@ -77,6 +79,9 @@ except ImportError: logger.critical("The pyopenssl, ndg-httpsclient, pyasn1 packages have to be installed.\n" "Some HTTPS connections will fail") +# serve pages with HTTP/1.1 +from werkzeug.serving import WSGIRequestHandler +WSGIRequestHandler.protocol_version = "HTTP/1.1" static_path, templates_path, themes =\ get_themes(settings['ui']['themes_path'] @@ -242,6 +247,24 @@ def url_for_theme(endpoint, override_theme=None, **values): return url_for(endpoint, **values) +def proxify(url): + if url.startswith('//'): + url = 'https:' + url + + if not settings.get('result_proxy'): + return url + + url_params = dict(mortyurl=url.encode('utf-8')) + + if settings['result_proxy'].get('key'): + url_params['mortyhash'] = hmac.new(settings['result_proxy']['key'], + url.encode('utf-8'), + hashlib.sha256).hexdigest() + + return '{0}?{1}'.format(settings['result_proxy']['url'], + urlencode(url_params)) + + def image_proxify(url): if url.startswith('//'): @@ -250,8 +273,10 @@ def image_proxify(url): if not request.preferences.get_value('image_proxy'): return url - hash_string = url + settings['server']['secret_key'] - h = hashlib.sha256(hash_string.encode('utf-8')).hexdigest() + if settings.get('result_proxy'): + return proxify(url) + + h = hmac.new(settings['server']['secret_key'], url.encode('utf-8'), hashlib.sha256).hexdigest() return '{0}?{1}'.format(url_for('image_proxy'), urlencode(dict(url=url.encode('utf-8'), h=h))) @@ -310,6 +335,8 @@ def render(template_name, override_theme=None, **kwargs): kwargs['image_proxify'] = image_proxify + kwargs['proxify'] = proxify if settings.get('result_proxy') else None + kwargs['get_result_template'] = get_result_template kwargs['theme'] = get_current_theme_name(override=override_theme) @@ -318,8 +345,12 @@ def render(template_name, override_theme=None, **kwargs): kwargs['cookies'] = request.cookies + kwargs['errors'] = request.errors + kwargs['instance_name'] = settings['general']['instance_name'] + kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab') + kwargs['scripts'] = set() for plugin in request.user_plugins: for script in plugin.js_dependencies: @@ -336,16 +367,23 @@ def render(template_name, override_theme=None, **kwargs): @app.before_request def pre_request(): - # merge GET, POST vars + request.errors = [] + preferences = Preferences(themes, categories.keys(), engines, plugins) - preferences.parse_cookies(request.cookies) request.preferences = preferences + try: + preferences.parse_cookies(request.cookies) + except: + request.errors.append(gettext('Invalid settings, please edit your preferences')) + # merge GET, POST vars + # request.form request.form = dict(request.form.items()) for k, v in request.args.items(): if k not in request.form: request.form[k] = v + # request.user_plugins request.user_plugins = [] allowed_plugins = preferences.plugins.get_enabled() disabled_plugins = preferences.plugins.get_disabled() @@ -363,37 +401,42 @@ def index(): Supported outputs: html, json, csv, rss. """ - if not request.args and not request.form: + if request.form.get('q') is None: return render( 'index.html', ) + # search + search_query = None + result_container = None try: - search = Search(request) + search_query = get_search_query_from_webapp(request.preferences, request.form) + # search = Search(search_query) # without plugins + search = SearchWithPlugins(search_query, request) + result_container = search.search() except: + request.errors.append(gettext('search error')) + logger.exception('search error') return render( 'index.html', ) - if plugins.call('pre_search', request, locals()): - search.search(request) - - plugins.call('post_search', request, locals()) + results = result_container.get_ordered_results() - results = search.result_container.get_ordered_results() + # UI + advanced_search = request.form.get('advanced_search', None) + output_format = request.form.get('format', 'html') + if output_format not in ['html', 'csv', 'json', 'rss']: + output_format = 'html' + # output for result in results: - - plugins.call('on_result', request, locals()) - if not search.paging and engines[result['engine']].paging: - search.paging = True - - if search.request_data.get('format', 'html') == 'html': - if 'content' in result: - result['content'] = highlight_content(result['content'], - search.query.encode('utf-8')) # noqa - result['title'] = highlight_content(result['title'], - search.query.encode('utf-8')) + if output_format == 'html': + if 'content' in result and result['content']: + result['content'] = highlight_content(escape(result['content'][:1024]), + search_query.query.encode('utf-8')) + result['title'] = highlight_content(escape(result['title'] or u''), + search_query.query.encode('utf-8')) else: if result.get('content'): result['content'] = html_to_text(result['content']).strip() @@ -420,16 +463,19 @@ def index(): else: result['publishedDate'] = format_date(result['publishedDate']) - number_of_results = search.result_container.results_number() - if number_of_results < search.result_container.results_length(): + number_of_results = result_container.results_number() + if number_of_results < result_container.results_length(): number_of_results = 0 - if search.request_data.get('format') == 'json': - return Response(json.dumps({'query': search.query, + if output_format == 'json': + return Response(json.dumps({'query': search_query.query, 'number_of_results': number_of_results, - 'results': results}), + 'results': results, + 'answers': list(result_container.answers), + 'infoboxes': result_container.infoboxes, + 'suggestions': list(result_container.suggestions)}), mimetype='application/json') - elif search.request_data.get('format') == 'csv': + elif output_format == 'csv': csv = UnicodeWriter(cStringIO.StringIO()) keys = ('title', 'url', 'content', 'host', 'engine', 'score') csv.writerow(keys) @@ -438,14 +484,14 @@ def index(): csv.writerow([row.get(key, '') for key in keys]) csv.stream.seek(0) response = Response(csv.stream.read(), mimetype='application/csv') - cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search.query.encode('utf-8')) + cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.encode('utf-8')) response.headers.add('Content-Disposition', cont_disp) return response - elif search.request_data.get('format') == 'rss': + elif output_format == 'rss': response_rss = render( 'opensearch_response_rss.xml', results=results, - q=search.request_data['q'], + q=request.form['q'], number_of_results=number_of_results, base_url=get_base_url() ) @@ -454,17 +500,17 @@ def index(): return render( 'results.html', results=results, - q=search.request_data['q'], - selected_categories=search.categories, - paging=search.paging, + q=request.form['q'], + selected_categories=search_query.categories, + pageno=search_query.pageno, + time_range=search_query.time_range, number_of_results=format_decimal(number_of_results), - pageno=search.pageno, - advanced_search=search.is_advanced, - time_range=search.time_range, + advanced_search=advanced_search, + suggestions=result_container.suggestions, + answers=result_container.answers, + infoboxes=result_container.infoboxes, + paging=result_container.paging, base_url=get_base_url(), - suggestions=search.result_container.suggestions, - answers=search.result_container.answers, - infoboxes=search.result_container.infoboxes, theme=get_current_theme_name(), favicons=global_favicons[themes.index(get_current_theme_name())] ) @@ -481,30 +527,23 @@ def about(): @app.route('/autocompleter', methods=['GET', 'POST']) def autocompleter(): """Return autocompleter results""" - request_data = {} - - # select request method - if request.method == 'POST': - request_data = request.form - else: - request_data = request.args # set blocked engines disabled_engines = request.preferences.engines.get_disabled() # parse query - query = Query(request_data.get('q', '').encode('utf-8'), disabled_engines) - query.parse_query() + raw_text_query = RawTextQuery(request.form.get('q', '').encode('utf-8'), disabled_engines) + raw_text_query.parse_query() # check if search query is set - if not query.getSearchQuery(): + if not raw_text_query.getSearchQuery(): return '', 400 # run autocompleter completer = autocomplete_backends.get(request.preferences.get_value('autocomplete')) # parse searx specific autocompleter results like !bang - raw_results = searx_bang(query) + raw_results = searx_bang(raw_text_query) # normal autocompletion results only appear if max 3 inner results returned if len(raw_results) <= 3 and completer: @@ -515,19 +554,19 @@ def autocompleter(): else: language = language.split('_')[0] # run autocompletion - raw_results.extend(completer(query.getSearchQuery(), language)) + raw_results.extend(completer(raw_text_query.getSearchQuery(), language)) # parse results (write :language and !engine back to result string) results = [] for result in raw_results: - query.changeSearchQuery(result) + raw_text_query.changeSearchQuery(result) # add parsed result - results.append(query.getFullQuery()) + results.append(raw_text_query.getFullQuery()) # return autocompleter results - if request_data.get('format') == 'x-suggestions': - return Response(json.dumps([query.query, results]), + if request.form.get('format') == 'x-suggestions': + return Response(json.dumps([raw_text_query.query, results]), mimetype='application/json') return Response(json.dumps(results), @@ -544,7 +583,7 @@ def preferences(): try: request.preferences.parse_form(request.form) except ValidationException: - # TODO use flash feature of flask + request.errors.append(gettext('Invalid settings, please edit your preferences')) return resp return request.preferences.save(resp) @@ -565,6 +604,8 @@ def preferences(): if e.timeout > settings['outgoing']['request_timeout']: stats[e.name]['warn_timeout'] = True + # get first element [0], the engine time, + # and then the second element [1] : the time (the first one is the label) for engine_stat in get_engines_stats()[0][1]: stats[engine_stat.get('name')]['time'] = round(engine_stat.get('avg'), 3) if engine_stat.get('avg') > settings['outgoing']['request_timeout']: @@ -579,6 +620,7 @@ def preferences(): language_codes=language_codes, engines_by_category=categories, stats=stats, + answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers], disabled_engines=disabled_engines, autocomplete_backends=autocomplete_backends, shortcuts={y: x for x, y in engine_shortcuts.items()}, @@ -595,7 +637,7 @@ def image_proxy(): if not url: return '', 400 - h = hashlib.sha256(url + settings['server']['secret_key'].encode('utf-8')).hexdigest() + h = hmac.new(settings['server']['secret_key'], url, hashlib.sha256).hexdigest() if h != request.args.get('h'): return '', 400 @@ -653,6 +695,7 @@ Allow: / Allow: /about Disallow: /stats Disallow: /preferences +Disallow: /*?*q=* """, mimetype='text/plain') @@ -712,15 +755,22 @@ def config(): 'default_locale': settings['ui']['default_locale'], 'autocomplete': settings['search']['autocomplete'], 'safe_search': settings['search']['safe_search'], - 'default_theme': settings['ui']['default_theme']}) + 'default_theme': settings['ui']['default_theme'], + 'version': VERSION_STRING}) + + +@app.errorhandler(404) +def page_not_found(e): + return render('404.html'), 404 def run(): app.run( - debug=settings['general']['debug'], - use_debugger=settings['general']['debug'], + debug=searx_debug, + use_debugger=searx_debug, port=settings['server']['port'], - host=settings['server']['bind_address'] + host=settings['server']['bind_address'], + threaded=True ) |