From 4da795895d3699e0c185a344806d60b40027cc4f Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Thu, 9 Oct 2014 19:26:02 +0200 Subject: [fix] default template handling --- searx/webapp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'searx/webapp.py') diff --git a/searx/webapp.py b/searx/webapp.py index 74f8fad7e..18dc89a39 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -55,8 +55,8 @@ static_path, templates_path, themes =\ get_themes(settings['themes_path'] if settings.get('themes_path') else searx_dir) -default_theme = settings['default_theme'] if \ - settings.get('default_theme', None) else 'default' + +default_theme = settings['server'].get('default_theme', 'default') app = Flask( __name__, -- cgit v1.2.3 From 9b9f097adbf39d7908931203e9d8966748900bde Mon Sep 17 00:00:00 2001 From: Thomas Pointhuber Date: Sun, 14 Sep 2014 11:09:44 +0200 Subject: Implementing https rewrite support #71 * parsing XML-Files which contain target, exclusions and rules * convert regex if required (is a little hack, probably does not work for all rules) * check if target rule apply for http url, and use the rules to rewrite it * add pice of code, to check if domain name has not changed during rewrite (should be rewritten, using publicsuffix instead of little hack) --- searx/webapp.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 6 deletions(-) (limited to 'searx/webapp.py') diff --git a/searx/webapp.py b/searx/webapp.py index 2bf3afaf4..7952415af 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -49,6 +49,9 @@ from searx.languages import language_codes from searx.search import Search from searx.autocomplete import backends as autocomplete_backends +from urlparse import urlparse +import re + static_path, templates_path, themes =\ get_themes(settings['themes_path'] @@ -197,16 +200,53 @@ def index(): if not search.paging and engines[result['engine']].paging: search.paging = True + # check if HTTPS rewrite is required if settings['server']['https_rewrite']\ and result['parsed_url'].scheme == 'http': - for http_regex, https_url in https_rules: - if http_regex.match(result['url']): - result['url'] = http_regex.sub(https_url, result['url']) - # TODO result['parsed_url'].scheme - break + skip_https_rewrite = False + + # check if HTTPS rewrite is possible + for target, rules, exclusions in https_rules: + + # check if target regex match with url + if target.match(result['url']): + # process exclusions + for exclusion in exclusions: + # check if exclusion match with url + if exclusion.match(result['url']): + skip_https_rewrite = True + break + + # skip https rewrite if required + if skip_https_rewrite: + break + + # process rules + for rule in rules: + # TODO, precompile rule + p = re.compile(rule[0]) + # rewrite url if possible + new_result_url = p.sub(rule[1], result['url']) + + # parse new url + new_parsed_url = urlparse(new_result_url) + + # continiue if nothing was rewritten + if result['url'] == new_result_url: + continue + + # get domainname from result + # TODO, does only work correct with TLD's like asdf.com, not for asdf.com.de + # TODO, using publicsuffix instead of this rewrite rule + old_result_domainname = '.'.join(result['parsed_url'].hostname.split('.')[-2:]) + new_result_domainname = '.'.join(new_parsed_url.hostname.split('.')[-2:]) + + # check if rewritten hostname is the same, to protect against wrong or malicious rewrite rules + if old_result_domainname == new_result_domainname: + # set new url + result['url'] = new_result_url - # HTTPS rewrite if search.request_data.get('format', 'html') == 'html': if 'content' in result: result['content'] = highlight_content(result['content'], -- cgit v1.2.3 From 0616d26feb0f96b3d4fd6b2744ae0288c9fed96b Mon Sep 17 00:00:00 2001 From: Thomas Pointhuber Date: Sun, 14 Sep 2014 14:17:12 +0200 Subject: improve https rewriting --- searx/webapp.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'searx/webapp.py') diff --git a/searx/webapp.py b/searx/webapp.py index 7952415af..d9dc5f710 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -247,6 +247,9 @@ def index(): # set new url result['url'] = new_result_url + # target has matched, do not search over the other rules + break + if search.request_data.get('format', 'html') == 'html': if 'content' in result: result['content'] = highlight_content(result['content'], -- cgit v1.2.3 From bb126da0fb49d1c9640eeb3371d0bbcf005bcd2b Mon Sep 17 00:00:00 2001 From: Thomas Pointhuber Date: Wed, 15 Oct 2014 14:47:03 +0200 Subject: improve https rewrite code --- searx/webapp.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'searx/webapp.py') diff --git a/searx/webapp.py b/searx/webapp.py index d9dc5f710..25c99d94c 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -224,10 +224,14 @@ def index(): # process rules for rule in rules: - # TODO, precompile rule - p = re.compile(rule[0]) - # rewrite url if possible - new_result_url = p.sub(rule[1], result['url']) + try: + # TODO, precompile rule + p = re.compile(rule[0]) + + # rewrite url if possible + new_result_url = p.sub(rule[1], result['url']) + except: + break # parse new url new_parsed_url = urlparse(new_result_url) -- cgit v1.2.3 From 5740cfbf1cb468af74d0e7e1c9358ce702eb4f59 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Sun, 19 Oct 2014 12:41:04 +0200 Subject: [fix] pep8 part II. --- searx/webapp.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'searx/webapp.py') diff --git a/searx/webapp.py b/searx/webapp.py index 830cf440a..0555d6ca9 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -71,7 +71,7 @@ app.secret_key = settings['server']['secret_key'] babel = Babel(app) -#TODO configurable via settings.yml +# TODO configurable via settings.yml favicons = ['wikipedia', 'youtube', 'vimeo', 'soundcloud', 'twitter', 'stackoverflow', 'github'] @@ -146,14 +146,14 @@ def render(template_name, override_theme=None, **kwargs): nonblocked_categories = set(chain.from_iterable(nonblocked_categories)) - if not 'categories' in kwargs: + if 'categories' not in kwargs: kwargs['categories'] = ['general'] kwargs['categories'].extend(x for x in sorted(categories.keys()) if x != 'general' and x in nonblocked_categories) - if not 'selected_categories' in kwargs: + if 'selected_categories' not in kwargs: kwargs['selected_categories'] = [] for arg in request.args: if arg.startswith('category_'): @@ -168,7 +168,7 @@ def render(template_name, override_theme=None, **kwargs): if not kwargs['selected_categories']: kwargs['selected_categories'] = ['general'] - if not 'autocomplete' in kwargs: + if 'autocomplete' not in kwargs: kwargs['autocomplete'] = autocomplete kwargs['method'] = request.cookies.get('method', 'POST') @@ -202,14 +202,15 @@ def index(): 'index.html', ) - search.results, search.suggestions, search.answers, search.infoboxes = search.search(request) + search.results, search.suggestions,\ + search.answers, search.infoboxes = search.search(request) for result in search.results: if not search.paging and engines[result['engine']].paging: search.paging = True - # check if HTTPS rewrite is required + # check if HTTPS rewrite is required if settings['server']['https_rewrite']\ and result['parsed_url'].scheme == 'http': @@ -236,7 +237,7 @@ def index(): try: # TODO, precompile rule p = re.compile(rule[0]) - + # rewrite url if possible new_result_url = p.sub(rule[1], result['url']) except: @@ -250,17 +251,21 @@ def index(): continue # get domainname from result - # TODO, does only work correct with TLD's like asdf.com, not for asdf.com.de + # TODO, does only work correct with TLD's like + # asdf.com, not for asdf.com.de # TODO, using publicsuffix instead of this rewrite rule - old_result_domainname = '.'.join(result['parsed_url'].hostname.split('.')[-2:]) - new_result_domainname = '.'.join(new_parsed_url.hostname.split('.')[-2:]) + old_result_domainname = '.'.join( + result['parsed_url'].hostname.split('.')[-2:]) + new_result_domainname = '.'.join( + new_parsed_url.hostname.split('.')[-2:]) - # check if rewritten hostname is the same, to protect against wrong or malicious rewrite rules + # check if rewritten hostname is the same, + # to protect against wrong or malicious rewrite rules if old_result_domainname == new_result_domainname: # set new url result['url'] = new_result_url - # target has matched, do not search over the other rules + # target has matched, do not search over the other rules break if search.request_data.get('format', 'html') == 'html': @@ -429,7 +434,7 @@ def preferences(): for pd_name, pd in request.form.items(): if pd_name.startswith('category_'): category = pd_name[9:] - if not category in categories: + if category not in categories: continue selected_categories.append(category) elif pd_name == 'locale' and pd in settings['locales']: -- cgit v1.2.3