diff options
| author | Adam Tauber <asciimoo@gmail.com> | 2014-12-19 22:40:37 +0100 |
|---|---|---|
| committer | Adam Tauber <asciimoo@gmail.com> | 2014-12-19 22:40:37 +0100 |
| commit | f14177381433618a5b4f5bcff83e4e1a19487f02 (patch) | |
| tree | f349bb5f1ab15a8d6d3ce33e713e24c24028ade4 /searx/webapp.py | |
| parent | 813247b37ab00a1496468df4cff33199ae04d6b4 (diff) | |
[mod][fix] https rewrite refactor ++ fixes
Diffstat (limited to 'searx/webapp.py')
| -rw-r--r-- | searx/webapp.py | 59 |
1 files changed, 2 insertions, 57 deletions
diff --git a/searx/webapp.py b/searx/webapp.py index a2a135e9a..915fb3564 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -41,15 +41,12 @@ from searx.utils import ( UnicodeWriter, highlight_content, html_to_text, get_themes ) from searx.version import VERSION_STRING -from searx.https_rewrite import https_rules from searx.languages import language_codes +from searx.https_rewrite import https_url_rewrite from searx.search import Search from searx.query import Query from searx.autocomplete import backends as autocomplete_backends -from urlparse import urlparse -import re - static_path, templates_path, themes =\ get_themes(settings['themes_path'] @@ -215,59 +212,7 @@ def index(): if settings['server']['https_rewrite']\ and result['parsed_url'].scheme == 'http': - skip_https_rewrite = False - - # check if HTTPS rewrite is possible - for target, rules, exclusions in https_rules: - - # check if target regex match with url - if target.match(result['url']): - # process exclusions - for exclusion in exclusions: - # check if exclusion match with url - if exclusion.match(result['url']): - skip_https_rewrite = True - break - - # skip https rewrite if required - if skip_https_rewrite: - break - - # process rules - for rule in rules: - try: - # TODO, precompile rule - p = re.compile(rule[0]) - - # rewrite url if possible - new_result_url = p.sub(rule[1], result['url']) - except: - break - - # parse new url - new_parsed_url = urlparse(new_result_url) - - # continiue if nothing was rewritten - if result['url'] == new_result_url: - continue - - # get domainname from result - # TODO, does only work correct with TLD's like - # asdf.com, not for asdf.com.de - # TODO, using publicsuffix instead of this rewrite rule - old_result_domainname = '.'.join( - result['parsed_url'].hostname.split('.')[-2:]) - new_result_domainname = '.'.join( - new_parsed_url.hostname.split('.')[-2:]) - - # check if rewritten hostname is the same, - # to protect against wrong or malicious rewrite rules - if old_result_domainname == new_result_domainname: - # set new url - result['url'] = new_result_url - - # target has matched, do not search over the other rules - break + result = https_url_rewrite(result) if search.request_data.get('format', 'html') == 'html': if 'content' in result: |