diff options
Diffstat (limited to 'searx/webapp.py')
| -rw-r--r-- | searx/webapp.py | 95 |
1 files changed, 84 insertions, 11 deletions
diff --git a/searx/webapp.py b/searx/webapp.py index 42cb42678..830cf440a 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -47,15 +47,19 @@ from searx.utils import ( from searx.https_rewrite import https_rules from searx.languages import language_codes from searx.search import Search +from searx.query import Query from searx.autocomplete import backends as autocomplete_backends +from urlparse import urlparse +import re + static_path, templates_path, themes =\ get_themes(settings['themes_path'] if settings.get('themes_path') else searx_dir) -default_theme = settings['default_theme'] if \ - settings.get('default_theme', None) else 'default' + +default_theme = settings['server'].get('default_theme', 'default') app = Flask( __name__, @@ -198,23 +202,67 @@ def index(): 'index.html', ) - search.results, search.suggestions = search.search(request) + search.results, search.suggestions, search.answers, search.infoboxes = search.search(request) for result in search.results: if not search.paging and engines[result['engine']].paging: search.paging = True + # check if HTTPS rewrite is required if settings['server']['https_rewrite']\ and result['parsed_url'].scheme == 'http': - for http_regex, https_url in https_rules: - if http_regex.match(result['url']): - result['url'] = http_regex.sub(https_url, result['url']) - # TODO result['parsed_url'].scheme + skip_https_rewrite = False + + # check if HTTPS rewrite is possible + for target, rules, exclusions in https_rules: + + # check if target regex match with url + if target.match(result['url']): + # process exclusions + for exclusion in exclusions: + # check if exclusion match with url + if exclusion.match(result['url']): + skip_https_rewrite = True + break + + # skip https rewrite if required + if skip_https_rewrite: + break + + # process rules + for rule in rules: + try: + # TODO, precompile rule + p = re.compile(rule[0]) + + # rewrite url if possible + new_result_url = p.sub(rule[1], result['url']) + except: + break + + # parse new url + new_parsed_url = urlparse(new_result_url) + + # continiue if nothing was rewritten + if result['url'] == new_result_url: + continue + + # get domainname from result + # TODO, does only work correct with TLD's like asdf.com, not for asdf.com.de + # TODO, using publicsuffix instead of this rewrite rule + old_result_domainname = '.'.join(result['parsed_url'].hostname.split('.')[-2:]) + new_result_domainname = '.'.join(new_parsed_url.hostname.split('.')[-2:]) + + # check if rewritten hostname is the same, to protect against wrong or malicious rewrite rules + if old_result_domainname == new_result_domainname: + # set new url + result['url'] = new_result_url + + # target has matched, do not search over the other rules break - # HTTPS rewrite if search.request_data.get('format', 'html') == 'html': if 'content' in result: result['content'] = highlight_content(result['content'], @@ -291,6 +339,8 @@ def index(): pageno=search.pageno, base_url=get_base_url(), suggestions=search.suggestions, + answers=search.answers, + infoboxes=search.infoboxes, theme=get_current_theme_name() ) @@ -308,23 +358,46 @@ def autocompleter(): """Return autocompleter results""" request_data = {} + # select request method if request.method == 'POST': request_data = request.form else: request_data = request.args - query = request_data.get('q', '').encode('utf-8') + # set blocked engines + if request.cookies.get('blocked_engines'): + blocked_engines = request.cookies['blocked_engines'].split(',') # noqa + else: + blocked_engines = [] + + # parse query + query = Query(request_data.get('q', '').encode('utf-8'), blocked_engines) + query.parse_query() - if not query: + # check if search query is set + if not query.getSearchQuery(): return + # run autocompleter completer = autocomplete_backends.get(request.cookies.get('autocomplete')) + # check if valid autocompleter is selected if not completer: return - results = completer(query) + # run autocompletion + raw_results = completer(query.getSearchQuery()) + + # parse results (write :language and !engine back to result string) + results = [] + for result in raw_results: + result_query = query + result_query.changeSearchQuery(result) + + # add parsed result + results.append(result_query.getFullQuery()) + # return autocompleter results if request_data.get('format') == 'x-suggestions': return Response(json.dumps([query, results]), mimetype='application/json') |