From d2a636f75d24953f5094ea97ab54a8a4353a65ff Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Mon, 13 Apr 2015 00:30:12 +0200 Subject: [mod] https rewrite pluginification --- searx/__init__.py | 11 - searx/https_rewrite.py | 209 -- searx/https_rules/00README | 17 - searx/https_rules/Bing.xml | 56 - searx/https_rules/Dailymotion.xml | 69 - searx/https_rules/Deviantart.xml | 53 - searx/https_rules/DuckDuckGo.xml | 38 - searx/https_rules/Flickr.xml | 44 - searx/https_rules/Github-Pages.xml | 11 - searx/https_rules/Github.xml | 94 - searx/https_rules/Google-mismatches.xml | 26 - searx/https_rules/Google.org.xml | 14 - searx/https_rules/GoogleAPIs.xml | 143 -- searx/https_rules/GoogleCanada.xml | 6 - searx/https_rules/GoogleImages.xml | 65 - searx/https_rules/GoogleMainSearch.xml | 78 - searx/https_rules/GoogleMaps.xml | 67 - searx/https_rules/GoogleMelange.xml | 6 - searx/https_rules/GoogleSearch.xml | 135 -- searx/https_rules/GoogleServices.xml | 345 ---- searx/https_rules/GoogleShopping.xml | 28 - searx/https_rules/GoogleSorry.xml | 7 - searx/https_rules/GoogleTranslate.xml | 8 - searx/https_rules/GoogleVideos.xml | 83 - searx/https_rules/GoogleWatchBlog.xml | 17 - searx/https_rules/Google_App_Engine.xml | 21 - searx/https_rules/Googleplex.com.xml | 16 - searx/https_rules/OpenStreetMap.xml | 15 - searx/https_rules/Rawgithub.com.xml | 14 - searx/https_rules/Soundcloud.xml | 101 - searx/https_rules/ThePirateBay.xml | 36 - searx/https_rules/Torproject.xml | 18 - searx/https_rules/Twitter.xml | 169 -- searx/https_rules/Vimeo.xml | 75 - searx/https_rules/WikiLeaks.xml | 13 - searx/https_rules/Wikimedia.xml | 107 - searx/https_rules/Yahoo.xml | 2450 ----------------------- searx/https_rules/YouTube.xml | 46 - searx/plugins/__init__.py | 9 +- searx/plugins/https_rewrite.py | 227 +++ searx/plugins/https_rules/00README | 17 + searx/plugins/https_rules/Bing.xml | 56 + searx/plugins/https_rules/Dailymotion.xml | 69 + searx/plugins/https_rules/Deviantart.xml | 53 + searx/plugins/https_rules/DuckDuckGo.xml | 38 + searx/plugins/https_rules/Flickr.xml | 44 + searx/plugins/https_rules/Github-Pages.xml | 11 + searx/plugins/https_rules/Github.xml | 94 + searx/plugins/https_rules/Google-mismatches.xml | 26 + searx/plugins/https_rules/Google.org.xml | 14 + searx/plugins/https_rules/GoogleAPIs.xml | 143 ++ searx/plugins/https_rules/GoogleCanada.xml | 6 + searx/plugins/https_rules/GoogleImages.xml | 65 + searx/plugins/https_rules/GoogleMainSearch.xml | 78 + searx/plugins/https_rules/GoogleMaps.xml | 67 + searx/plugins/https_rules/GoogleMelange.xml | 6 + searx/plugins/https_rules/GoogleSearch.xml | 135 ++ searx/plugins/https_rules/GoogleServices.xml | 345 ++++ searx/plugins/https_rules/GoogleShopping.xml | 28 + searx/plugins/https_rules/GoogleSorry.xml | 7 + searx/plugins/https_rules/GoogleTranslate.xml | 8 + searx/plugins/https_rules/GoogleVideos.xml | 83 + searx/plugins/https_rules/GoogleWatchBlog.xml | 17 + searx/plugins/https_rules/Google_App_Engine.xml | 21 + searx/plugins/https_rules/Googleplex.com.xml | 16 + searx/plugins/https_rules/OpenStreetMap.xml | 15 + searx/plugins/https_rules/Rawgithub.com.xml | 14 + searx/plugins/https_rules/Soundcloud.xml | 101 + searx/plugins/https_rules/ThePirateBay.xml | 36 + searx/plugins/https_rules/Torproject.xml | 18 + searx/plugins/https_rules/Twitter.xml | 169 ++ searx/plugins/https_rules/Vimeo.xml | 75 + searx/plugins/https_rules/WikiLeaks.xml | 13 + searx/plugins/https_rules/Wikimedia.xml | 107 + searx/plugins/https_rules/Yahoo.xml | 2450 +++++++++++++++++++++++ searx/plugins/https_rules/YouTube.xml | 46 + searx/settings.yml | 1 - searx/webapp.py | 8 +- 78 files changed, 4725 insertions(+), 4722 deletions(-) delete mode 100644 searx/https_rewrite.py delete mode 100644 searx/https_rules/00README delete mode 100644 searx/https_rules/Bing.xml delete mode 100644 searx/https_rules/Dailymotion.xml delete mode 100644 searx/https_rules/Deviantart.xml delete mode 100644 searx/https_rules/DuckDuckGo.xml delete mode 100644 searx/https_rules/Flickr.xml delete mode 100644 searx/https_rules/Github-Pages.xml delete mode 100644 searx/https_rules/Github.xml delete mode 100644 searx/https_rules/Google-mismatches.xml delete mode 100644 searx/https_rules/Google.org.xml delete mode 100644 searx/https_rules/GoogleAPIs.xml delete mode 100644 searx/https_rules/GoogleCanada.xml delete mode 100644 searx/https_rules/GoogleImages.xml delete mode 100644 searx/https_rules/GoogleMainSearch.xml delete mode 100644 searx/https_rules/GoogleMaps.xml delete mode 100644 searx/https_rules/GoogleMelange.xml delete mode 100644 searx/https_rules/GoogleSearch.xml delete mode 100644 searx/https_rules/GoogleServices.xml delete mode 100644 searx/https_rules/GoogleShopping.xml delete mode 100644 searx/https_rules/GoogleSorry.xml delete mode 100644 searx/https_rules/GoogleTranslate.xml delete mode 100644 searx/https_rules/GoogleVideos.xml delete mode 100644 searx/https_rules/GoogleWatchBlog.xml delete mode 100644 searx/https_rules/Google_App_Engine.xml delete mode 100644 searx/https_rules/Googleplex.com.xml delete mode 100644 searx/https_rules/OpenStreetMap.xml delete mode 100644 searx/https_rules/Rawgithub.com.xml delete mode 100644 searx/https_rules/Soundcloud.xml delete mode 100644 searx/https_rules/ThePirateBay.xml delete mode 100644 searx/https_rules/Torproject.xml delete mode 100644 searx/https_rules/Twitter.xml delete mode 100644 searx/https_rules/Vimeo.xml delete mode 100644 searx/https_rules/WikiLeaks.xml delete mode 100644 searx/https_rules/Wikimedia.xml delete mode 100644 searx/https_rules/Yahoo.xml delete mode 100644 searx/https_rules/YouTube.xml create mode 100644 searx/plugins/https_rewrite.py create mode 100644 searx/plugins/https_rules/00README create mode 100644 searx/plugins/https_rules/Bing.xml create mode 100644 searx/plugins/https_rules/Dailymotion.xml create mode 100644 searx/plugins/https_rules/Deviantart.xml create mode 100644 searx/plugins/https_rules/DuckDuckGo.xml create mode 100644 searx/plugins/https_rules/Flickr.xml create mode 100644 searx/plugins/https_rules/Github-Pages.xml create mode 100644 searx/plugins/https_rules/Github.xml create mode 100644 searx/plugins/https_rules/Google-mismatches.xml create mode 100644 searx/plugins/https_rules/Google.org.xml create mode 100644 searx/plugins/https_rules/GoogleAPIs.xml create mode 100644 searx/plugins/https_rules/GoogleCanada.xml create mode 100644 searx/plugins/https_rules/GoogleImages.xml create mode 100644 searx/plugins/https_rules/GoogleMainSearch.xml create mode 100644 searx/plugins/https_rules/GoogleMaps.xml create mode 100644 searx/plugins/https_rules/GoogleMelange.xml create mode 100644 searx/plugins/https_rules/GoogleSearch.xml create mode 100644 searx/plugins/https_rules/GoogleServices.xml create mode 100644 searx/plugins/https_rules/GoogleShopping.xml create mode 100644 searx/plugins/https_rules/GoogleSorry.xml create mode 100644 searx/plugins/https_rules/GoogleTranslate.xml create mode 100644 searx/plugins/https_rules/GoogleVideos.xml create mode 100644 searx/plugins/https_rules/GoogleWatchBlog.xml create mode 100644 searx/plugins/https_rules/Google_App_Engine.xml create mode 100644 searx/plugins/https_rules/Googleplex.com.xml create mode 100644 searx/plugins/https_rules/OpenStreetMap.xml create mode 100644 searx/plugins/https_rules/Rawgithub.com.xml create mode 100644 searx/plugins/https_rules/Soundcloud.xml create mode 100644 searx/plugins/https_rules/ThePirateBay.xml create mode 100644 searx/plugins/https_rules/Torproject.xml create mode 100644 searx/plugins/https_rules/Twitter.xml create mode 100644 searx/plugins/https_rules/Vimeo.xml create mode 100644 searx/plugins/https_rules/WikiLeaks.xml create mode 100644 searx/plugins/https_rules/Wikimedia.xml create mode 100644 searx/plugins/https_rules/Yahoo.xml create mode 100644 searx/plugins/https_rules/YouTube.xml diff --git a/searx/__init__.py b/searx/__init__.py index 110f46af8..2d545a809 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -36,11 +36,6 @@ if 'SEARX_SETTINGS_PATH' in environ: else: settings_path = join(searx_dir, 'settings.yml') -if 'SEARX_HTTPS_REWRITE_PATH' in environ: - https_rewrite_path = environ['SEARX_HTTPS_REWRITE_PATH'] -else: - https_rewrite_path = join(searx_dir, 'https_rules') - # load settings with open(settings_path) as settings_yaml: settings = load(settings_yaml) @@ -52,10 +47,4 @@ else: logger = logging.getLogger('searx') -# load https rules only if https rewrite is enabled -if settings.get('server', {}).get('https_rewrite'): - # loade https rules - from searx.https_rewrite import load_https_rules - load_https_rules(https_rewrite_path) - logger.info('Initialisation done') diff --git a/searx/https_rewrite.py b/searx/https_rewrite.py deleted file mode 100644 index 71aec1c9b..000000000 --- a/searx/https_rewrite.py +++ /dev/null @@ -1,209 +0,0 @@ -''' -searx is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -searx is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with searx. If not, see < http://www.gnu.org/licenses/ >. - -(C) 2013- by Adam Tauber, -''' - -import re -from urlparse import urlparse -from lxml import etree -from os import listdir -from os.path import isfile, isdir, join -from searx import logger - - -logger = logger.getChild("https_rewrite") - -# https://gitweb.torproject.org/\ -# pde/https-everywhere.git/tree/4.0:/src/chrome/content/rules - -# HTTPS rewrite rules -https_rules = [] - - -# load single ruleset from a xml file -def load_single_https_ruleset(filepath): - ruleset = () - - # init parser - parser = etree.XMLParser() - - # load and parse xml-file - try: - tree = etree.parse(filepath, parser) - except: - # TODO, error message - return () - - # get root node - root = tree.getroot() - - # check if root is a node with the name ruleset - # TODO improve parsing - if root.tag != 'ruleset': - return () - - # check if rule is deactivated by default - if root.attrib.get('default_off'): - return () - - # check if rule does only work for specific platforms - if root.attrib.get('platform'): - return () - - hosts = [] - rules = [] - exclusions = [] - - # parse childs from ruleset - for ruleset in root: - # this child define a target - if ruleset.tag == 'target': - # check if required tags available - if not ruleset.attrib.get('host'): - continue - - # convert host-rule to valid regex - host = ruleset.attrib.get('host')\ - .replace('.', '\.').replace('*', '.*') - - # append to host list - hosts.append(host) - - # this child define a rule - elif ruleset.tag == 'rule': - # check if required tags available - if not ruleset.attrib.get('from')\ - or not ruleset.attrib.get('to'): - continue - - # TODO hack, which convert a javascript regex group - # into a valid python regex group - rule_from = ruleset.attrib['from'].replace('$', '\\') - if rule_from.endswith('\\'): - rule_from = rule_from[:-1]+'$' - rule_to = ruleset.attrib['to'].replace('$', '\\') - if rule_to.endswith('\\'): - rule_to = rule_to[:-1]+'$' - - # TODO, not working yet because of the hack above, - # currently doing that in webapp.py - # rule_from_rgx = re.compile(rule_from, re.I) - - # append rule - try: - rules.append((re.compile(rule_from, re.I | re.U), rule_to)) - except: - # TODO log regex error - continue - - # this child define an exclusion - elif ruleset.tag == 'exclusion': - # check if required tags available - if not ruleset.attrib.get('pattern'): - continue - - exclusion_rgx = re.compile(ruleset.attrib.get('pattern')) - - # append exclusion - exclusions.append(exclusion_rgx) - - # convert list of possible hosts to a simple regex - # TODO compress regex to improve performance - try: - target_hosts = re.compile('^(' + '|'.join(hosts) + ')', re.I | re.U) - except: - return () - - # return ruleset - return (target_hosts, rules, exclusions) - - -# load all https rewrite rules -def load_https_rules(rules_path): - # check if directory exists - if not isdir(rules_path): - logger.error("directory not found: '" + rules_path + "'") - return - - # search all xml files which are stored in the https rule directory - xml_files = [join(rules_path, f) - for f in listdir(rules_path) - if isfile(join(rules_path, f)) and f[-4:] == '.xml'] - - # load xml-files - for ruleset_file in xml_files: - # calculate rewrite-rules - ruleset = load_single_https_ruleset(ruleset_file) - - # skip if no ruleset returned - if not ruleset: - continue - - # append ruleset - https_rules.append(ruleset) - - logger.info('{n} rules loaded'.format(n=len(https_rules))) - - -def https_url_rewrite(result): - skip_https_rewrite = False - # check if HTTPS rewrite is possible - for target, rules, exclusions in https_rules: - - # check if target regex match with url - if target.match(result['parsed_url'].netloc): - # process exclusions - for exclusion in exclusions: - # check if exclusion match with url - if exclusion.match(result['url']): - skip_https_rewrite = True - break - - # skip https rewrite if required - if skip_https_rewrite: - break - - # process rules - for rule in rules: - try: - new_result_url = rule[0].sub(rule[1], result['url']) - except: - break - - # parse new url - new_parsed_url = urlparse(new_result_url) - - # continiue if nothing was rewritten - if result['url'] == new_result_url: - continue - - # get domainname from result - # TODO, does only work correct with TLD's like - # asdf.com, not for asdf.com.de - # TODO, using publicsuffix instead of this rewrite rule - old_result_domainname = '.'.join( - result['parsed_url'].hostname.split('.')[-2:]) - new_result_domainname = '.'.join( - new_parsed_url.hostname.split('.')[-2:]) - - # check if rewritten hostname is the same, - # to protect against wrong or malicious rewrite rules - if old_result_domainname == new_result_domainname: - # set new url - result['url'] = new_result_url - - # target has matched, do not search over the other rules - break - return result diff --git a/searx/https_rules/00README b/searx/https_rules/00README deleted file mode 100644 index fcd8a7724..000000000 --- a/searx/https_rules/00README +++ /dev/null @@ -1,17 +0,0 @@ - diff --git a/searx/https_rules/Bing.xml b/searx/https_rules/Bing.xml deleted file mode 100644 index 8b403f108..000000000 --- a/searx/https_rules/Bing.xml +++ /dev/null @@ -1,56 +0,0 @@ - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/Dailymotion.xml b/searx/https_rules/Dailymotion.xml deleted file mode 100644 index 743100cb7..000000000 --- a/searx/https_rules/Dailymotion.xml +++ /dev/null @@ -1,69 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/Deviantart.xml b/searx/https_rules/Deviantart.xml deleted file mode 100644 index 7830fc20f..000000000 --- a/searx/https_rules/Deviantart.xml +++ /dev/null @@ -1,53 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/DuckDuckGo.xml b/searx/https_rules/DuckDuckGo.xml deleted file mode 100644 index 173a9ad9f..000000000 --- a/searx/https_rules/DuckDuckGo.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/Flickr.xml b/searx/https_rules/Flickr.xml deleted file mode 100644 index 85c6e8065..000000000 --- a/searx/https_rules/Flickr.xml +++ /dev/null @@ -1,44 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/Github-Pages.xml b/searx/https_rules/Github-Pages.xml deleted file mode 100644 index d3be58a4c..000000000 --- a/searx/https_rules/Github-Pages.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - diff --git a/searx/https_rules/Github.xml b/searx/https_rules/Github.xml deleted file mode 100644 index a9a3a1e53..000000000 --- a/searx/https_rules/Github.xml +++ /dev/null @@ -1,94 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/Google-mismatches.xml b/searx/https_rules/Google-mismatches.xml deleted file mode 100644 index de9d3eb18..000000000 --- a/searx/https_rules/Google-mismatches.xml +++ /dev/null @@ -1,26 +0,0 @@ - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/Google.org.xml b/searx/https_rules/Google.org.xml deleted file mode 100644 index d6cc47881..000000000 --- a/searx/https_rules/Google.org.xml +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/searx/https_rules/GoogleAPIs.xml b/searx/https_rules/GoogleAPIs.xml deleted file mode 100644 index 85a5a8081..000000000 --- a/searx/https_rules/GoogleAPIs.xml +++ /dev/null @@ -1,143 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/GoogleCanada.xml b/searx/https_rules/GoogleCanada.xml deleted file mode 100644 index d5eefe816..000000000 --- a/searx/https_rules/GoogleCanada.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - diff --git a/searx/https_rules/GoogleImages.xml b/searx/https_rules/GoogleImages.xml deleted file mode 100644 index 0112001e0..000000000 --- a/searx/https_rules/GoogleImages.xml +++ /dev/null @@ -1,65 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/GoogleMainSearch.xml b/searx/https_rules/GoogleMainSearch.xml deleted file mode 100644 index df504d90c..000000000 --- a/searx/https_rules/GoogleMainSearch.xml +++ /dev/null @@ -1,78 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/GoogleMaps.xml b/searx/https_rules/GoogleMaps.xml deleted file mode 100644 index 0f82c5267..000000000 --- a/searx/https_rules/GoogleMaps.xml +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/GoogleMelange.xml b/searx/https_rules/GoogleMelange.xml deleted file mode 100644 index ec23cd45f..000000000 --- a/searx/https_rules/GoogleMelange.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - diff --git a/searx/https_rules/GoogleSearch.xml b/searx/https_rules/GoogleSearch.xml deleted file mode 100644 index 66b7ffdb0..000000000 --- a/searx/https_rules/GoogleSearch.xml +++ /dev/null @@ -1,135 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/GoogleServices.xml b/searx/https_rules/GoogleServices.xml deleted file mode 100644 index 704646b53..000000000 --- a/searx/https_rules/GoogleServices.xml +++ /dev/null @@ -1,345 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/GoogleShopping.xml b/searx/https_rules/GoogleShopping.xml deleted file mode 100644 index 6ba69a91d..000000000 --- a/searx/https_rules/GoogleShopping.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/GoogleSorry.xml b/searx/https_rules/GoogleSorry.xml deleted file mode 100644 index 72a19210d..000000000 --- a/searx/https_rules/GoogleSorry.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - diff --git a/searx/https_rules/GoogleTranslate.xml b/searx/https_rules/GoogleTranslate.xml deleted file mode 100644 index a004025ae..000000000 --- a/searx/https_rules/GoogleTranslate.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - diff --git a/searx/https_rules/GoogleVideos.xml b/searx/https_rules/GoogleVideos.xml deleted file mode 100644 index a5e88fcf0..000000000 --- a/searx/https_rules/GoogleVideos.xml +++ /dev/null @@ -1,83 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/GoogleWatchBlog.xml b/searx/https_rules/GoogleWatchBlog.xml deleted file mode 100644 index afec70c97..000000000 --- a/searx/https_rules/GoogleWatchBlog.xml +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/searx/https_rules/Google_App_Engine.xml b/searx/https_rules/Google_App_Engine.xml deleted file mode 100644 index 851e051d1..000000000 --- a/searx/https_rules/Google_App_Engine.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/searx/https_rules/Googleplex.com.xml b/searx/https_rules/Googleplex.com.xml deleted file mode 100644 index 7ddbb5ba9..000000000 --- a/searx/https_rules/Googleplex.com.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - - diff --git a/searx/https_rules/OpenStreetMap.xml b/searx/https_rules/OpenStreetMap.xml deleted file mode 100644 index 58a661823..000000000 --- a/searx/https_rules/OpenStreetMap.xml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - - - diff --git a/searx/https_rules/Rawgithub.com.xml b/searx/https_rules/Rawgithub.com.xml deleted file mode 100644 index 3868f332a..000000000 --- a/searx/https_rules/Rawgithub.com.xml +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - diff --git a/searx/https_rules/Soundcloud.xml b/searx/https_rules/Soundcloud.xml deleted file mode 100644 index 6958e8cbc..000000000 --- a/searx/https_rules/Soundcloud.xml +++ /dev/null @@ -1,101 +0,0 @@ - - - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/ThePirateBay.xml b/searx/https_rules/ThePirateBay.xml deleted file mode 100644 index 010387b6b..000000000 --- a/searx/https_rules/ThePirateBay.xml +++ /dev/null @@ -1,36 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/Torproject.xml b/searx/https_rules/Torproject.xml deleted file mode 100644 index 69269af7e..000000000 --- a/searx/https_rules/Torproject.xml +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/Twitter.xml b/searx/https_rules/Twitter.xml deleted file mode 100644 index 3285f44e0..000000000 --- a/searx/https_rules/Twitter.xml +++ /dev/null @@ -1,169 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/Vimeo.xml b/searx/https_rules/Vimeo.xml deleted file mode 100644 index f2a3e5764..000000000 --- a/searx/https_rules/Vimeo.xml +++ /dev/null @@ -1,75 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/WikiLeaks.xml b/searx/https_rules/WikiLeaks.xml deleted file mode 100644 index 977709d2d..000000000 --- a/searx/https_rules/WikiLeaks.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/searx/https_rules/Wikimedia.xml b/searx/https_rules/Wikimedia.xml deleted file mode 100644 index 9f25831a2..000000000 --- a/searx/https_rules/Wikimedia.xml +++ /dev/null @@ -1,107 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/Yahoo.xml b/searx/https_rules/Yahoo.xml deleted file mode 100644 index 33548c4ab..000000000 --- a/searx/https_rules/Yahoo.xml +++ /dev/null @@ -1,2450 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/https_rules/YouTube.xml b/searx/https_rules/YouTube.xml deleted file mode 100644 index bddc2a5f3..000000000 --- a/searx/https_rules/YouTube.xml +++ /dev/null @@ -1,46 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py index a8e400c93..5ac3f447c 100644 --- a/searx/plugins/__init__.py +++ b/searx/plugins/__init__.py @@ -14,13 +14,15 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2015 by Adam Tauber, ''' -from searx.plugins import (self_ip, - search_on_category_select) -from searx import logger from sys import exit +from searx import logger logger = logger.getChild('plugins') +from searx.plugins import (https_rewrite, + self_ip, + search_on_category_select) + required_attrs = (('name', str), ('description', str), ('default_on', bool)) @@ -68,5 +70,6 @@ class PluginStore(): plugins = PluginStore() +plugins.register(https_rewrite) plugins.register(self_ip) plugins.register(search_on_category_select) diff --git a/searx/plugins/https_rewrite.py b/searx/plugins/https_rewrite.py new file mode 100644 index 000000000..409b122e6 --- /dev/null +++ b/searx/plugins/https_rewrite.py @@ -0,0 +1,227 @@ +''' +searx is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +searx is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with searx. If not, see < http://www.gnu.org/licenses/ >. + +(C) 2013- by Adam Tauber, +''' + +import re +from urlparse import urlparse +from lxml import etree +from os import listdir, environ +from os.path import isfile, isdir, join +from searx.plugins import logger +from flask.ext.babel import gettext +from searx import searx_dir + + +name = "HTTPS rewrite" +description = gettext('Rewrite HTTP links to HTTPS if possible') +default_on = True + +if 'SEARX_HTTPS_REWRITE_PATH' in environ: + rules_path = environ['SEARX_rules_path'] +else: + rules_path = join(searx_dir, 'plugins/https_rules') + +logger = logger.getChild("https_rewrite") + +# https://gitweb.torproject.org/\ +# pde/https-everywhere.git/tree/4.0:/src/chrome/content/rules + +# HTTPS rewrite rules +https_rules = [] + + +# load single ruleset from a xml file +def load_single_https_ruleset(rules_path): + ruleset = () + + # init parser + parser = etree.XMLParser() + + # load and parse xml-file + try: + tree = etree.parse(rules_path, parser) + except: + # TODO, error message + return () + + # get root node + root = tree.getroot() + + # check if root is a node with the name ruleset + # TODO improve parsing + if root.tag != 'ruleset': + return () + + # check if rule is deactivated by default + if root.attrib.get('default_off'): + return () + + # check if rule does only work for specific platforms + if root.attrib.get('platform'): + return () + + hosts = [] + rules = [] + exclusions = [] + + # parse childs from ruleset + for ruleset in root: + # this child define a target + if ruleset.tag == 'target': + # check if required tags available + if not ruleset.attrib.get('host'): + continue + + # convert host-rule to valid regex + host = ruleset.attrib.get('host')\ + .replace('.', '\.').replace('*', '.*') + + # append to host list + hosts.append(host) + + # this child define a rule + elif ruleset.tag == 'rule': + # check if required tags available + if not ruleset.attrib.get('from')\ + or not ruleset.attrib.get('to'): + continue + + # TODO hack, which convert a javascript regex group + # into a valid python regex group + rule_from = ruleset.attrib['from'].replace('$', '\\') + if rule_from.endswith('\\'): + rule_from = rule_from[:-1]+'$' + rule_to = ruleset.attrib['to'].replace('$', '\\') + if rule_to.endswith('\\'): + rule_to = rule_to[:-1]+'$' + + # TODO, not working yet because of the hack above, + # currently doing that in webapp.py + # rule_from_rgx = re.compile(rule_from, re.I) + + # append rule + try: + rules.append((re.compile(rule_from, re.I | re.U), rule_to)) + except: + # TODO log regex error + continue + + # this child define an exclusion + elif ruleset.tag == 'exclusion': + # check if required tags available + if not ruleset.attrib.get('pattern'): + continue + + exclusion_rgx = re.compile(ruleset.attrib.get('pattern')) + + # append exclusion + exclusions.append(exclusion_rgx) + + # convert list of possible hosts to a simple regex + # TODO compress regex to improve performance + try: + target_hosts = re.compile('^(' + '|'.join(hosts) + ')', re.I | re.U) + except: + return () + + # return ruleset + return (target_hosts, rules, exclusions) + + +# load all https rewrite rules +def load_https_rules(rules_path): + # check if directory exists + if not isdir(rules_path): + logger.error("directory not found: '" + rules_path + "'") + return + + # search all xml files which are stored in the https rule directory + xml_files = [join(rules_path, f) + for f in listdir(rules_path) + if isfile(join(rules_path, f)) and f[-4:] == '.xml'] + + # load xml-files + for ruleset_file in xml_files: + # calculate rewrite-rules + ruleset = load_single_https_ruleset(ruleset_file) + + # skip if no ruleset returned + if not ruleset: + continue + + # append ruleset + https_rules.append(ruleset) + + logger.info('{n} rules loaded'.format(n=len(https_rules))) + + +def https_url_rewrite(result): + skip_https_rewrite = False + # check if HTTPS rewrite is possible + for target, rules, exclusions in https_rules: + + # check if target regex match with url + if target.match(result['parsed_url'].netloc): + # process exclusions + for exclusion in exclusions: + # check if exclusion match with url + if exclusion.match(result['url']): + skip_https_rewrite = True + break + + # skip https rewrite if required + if skip_https_rewrite: + break + + # process rules + for rule in rules: + try: + new_result_url = rule[0].sub(rule[1], result['url']) + except: + break + + # parse new url + new_parsed_url = urlparse(new_result_url) + + # continiue if nothing was rewritten + if result['url'] == new_result_url: + continue + + # get domainname from result + # TODO, does only work correct with TLD's like + # asdf.com, not for asdf.com.de + # TODO, using publicsuffix instead of this rewrite rule + old_result_domainname = '.'.join( + result['parsed_url'].hostname.split('.')[-2:]) + new_result_domainname = '.'.join( + new_parsed_url.hostname.split('.')[-2:]) + + # check if rewritten hostname is the same, + # to protect against wrong or malicious rewrite rules + if old_result_domainname == new_result_domainname: + # set new url + result['url'] = new_result_url + + # target has matched, do not search over the other rules + break + return result + + +def on_result(request, ctx): + result = ctx['result'] + if result['parsed_url'].scheme == 'http': + https_url_rewrite(result) + return True diff --git a/searx/plugins/https_rules/00README b/searx/plugins/https_rules/00README new file mode 100644 index 000000000..fcd8a7724 --- /dev/null +++ b/searx/plugins/https_rules/00README @@ -0,0 +1,17 @@ + diff --git a/searx/plugins/https_rules/Bing.xml b/searx/plugins/https_rules/Bing.xml new file mode 100644 index 000000000..8b403f108 --- /dev/null +++ b/searx/plugins/https_rules/Bing.xml @@ -0,0 +1,56 @@ + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/Dailymotion.xml b/searx/plugins/https_rules/Dailymotion.xml new file mode 100644 index 000000000..743100cb7 --- /dev/null +++ b/searx/plugins/https_rules/Dailymotion.xml @@ -0,0 +1,69 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/Deviantart.xml b/searx/plugins/https_rules/Deviantart.xml new file mode 100644 index 000000000..7830fc20f --- /dev/null +++ b/searx/plugins/https_rules/Deviantart.xml @@ -0,0 +1,53 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/DuckDuckGo.xml b/searx/plugins/https_rules/DuckDuckGo.xml new file mode 100644 index 000000000..173a9ad9f --- /dev/null +++ b/searx/plugins/https_rules/DuckDuckGo.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/Flickr.xml b/searx/plugins/https_rules/Flickr.xml new file mode 100644 index 000000000..85c6e8065 --- /dev/null +++ b/searx/plugins/https_rules/Flickr.xml @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/Github-Pages.xml b/searx/plugins/https_rules/Github-Pages.xml new file mode 100644 index 000000000..d3be58a4c --- /dev/null +++ b/searx/plugins/https_rules/Github-Pages.xml @@ -0,0 +1,11 @@ + + + + + + + + diff --git a/searx/plugins/https_rules/Github.xml b/searx/plugins/https_rules/Github.xml new file mode 100644 index 000000000..a9a3a1e53 --- /dev/null +++ b/searx/plugins/https_rules/Github.xml @@ -0,0 +1,94 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/Google-mismatches.xml b/searx/plugins/https_rules/Google-mismatches.xml new file mode 100644 index 000000000..de9d3eb18 --- /dev/null +++ b/searx/plugins/https_rules/Google-mismatches.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/Google.org.xml b/searx/plugins/https_rules/Google.org.xml new file mode 100644 index 000000000..d6cc47881 --- /dev/null +++ b/searx/plugins/https_rules/Google.org.xml @@ -0,0 +1,14 @@ + + + + + + + + + + \ No newline at end of file diff --git a/searx/plugins/https_rules/GoogleAPIs.xml b/searx/plugins/https_rules/GoogleAPIs.xml new file mode 100644 index 000000000..85a5a8081 --- /dev/null +++ b/searx/plugins/https_rules/GoogleAPIs.xml @@ -0,0 +1,143 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/GoogleCanada.xml b/searx/plugins/https_rules/GoogleCanada.xml new file mode 100644 index 000000000..d5eefe816 --- /dev/null +++ b/searx/plugins/https_rules/GoogleCanada.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/searx/plugins/https_rules/GoogleImages.xml b/searx/plugins/https_rules/GoogleImages.xml new file mode 100644 index 000000000..0112001e0 --- /dev/null +++ b/searx/plugins/https_rules/GoogleImages.xml @@ -0,0 +1,65 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/GoogleMainSearch.xml b/searx/plugins/https_rules/GoogleMainSearch.xml new file mode 100644 index 000000000..df504d90c --- /dev/null +++ b/searx/plugins/https_rules/GoogleMainSearch.xml @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/GoogleMaps.xml b/searx/plugins/https_rules/GoogleMaps.xml new file mode 100644 index 000000000..0f82c5267 --- /dev/null +++ b/searx/plugins/https_rules/GoogleMaps.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/GoogleMelange.xml b/searx/plugins/https_rules/GoogleMelange.xml new file mode 100644 index 000000000..ec23cd45f --- /dev/null +++ b/searx/plugins/https_rules/GoogleMelange.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/searx/plugins/https_rules/GoogleSearch.xml b/searx/plugins/https_rules/GoogleSearch.xml new file mode 100644 index 000000000..66b7ffdb0 --- /dev/null +++ b/searx/plugins/https_rules/GoogleSearch.xml @@ -0,0 +1,135 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/GoogleServices.xml b/searx/plugins/https_rules/GoogleServices.xml new file mode 100644 index 000000000..704646b53 --- /dev/null +++ b/searx/plugins/https_rules/GoogleServices.xml @@ -0,0 +1,345 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/GoogleShopping.xml b/searx/plugins/https_rules/GoogleShopping.xml new file mode 100644 index 000000000..6ba69a91d --- /dev/null +++ b/searx/plugins/https_rules/GoogleShopping.xml @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/GoogleSorry.xml b/searx/plugins/https_rules/GoogleSorry.xml new file mode 100644 index 000000000..72a19210d --- /dev/null +++ b/searx/plugins/https_rules/GoogleSorry.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/searx/plugins/https_rules/GoogleTranslate.xml b/searx/plugins/https_rules/GoogleTranslate.xml new file mode 100644 index 000000000..a004025ae --- /dev/null +++ b/searx/plugins/https_rules/GoogleTranslate.xml @@ -0,0 +1,8 @@ + + + + + + + diff --git a/searx/plugins/https_rules/GoogleVideos.xml b/searx/plugins/https_rules/GoogleVideos.xml new file mode 100644 index 000000000..a5e88fcf0 --- /dev/null +++ b/searx/plugins/https_rules/GoogleVideos.xml @@ -0,0 +1,83 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/GoogleWatchBlog.xml b/searx/plugins/https_rules/GoogleWatchBlog.xml new file mode 100644 index 000000000..afec70c97 --- /dev/null +++ b/searx/plugins/https_rules/GoogleWatchBlog.xml @@ -0,0 +1,17 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/searx/plugins/https_rules/Google_App_Engine.xml b/searx/plugins/https_rules/Google_App_Engine.xml new file mode 100644 index 000000000..851e051d1 --- /dev/null +++ b/searx/plugins/https_rules/Google_App_Engine.xml @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/searx/plugins/https_rules/Googleplex.com.xml b/searx/plugins/https_rules/Googleplex.com.xml new file mode 100644 index 000000000..7ddbb5ba9 --- /dev/null +++ b/searx/plugins/https_rules/Googleplex.com.xml @@ -0,0 +1,16 @@ + + + + + + + + diff --git a/searx/plugins/https_rules/OpenStreetMap.xml b/searx/plugins/https_rules/OpenStreetMap.xml new file mode 100644 index 000000000..58a661823 --- /dev/null +++ b/searx/plugins/https_rules/OpenStreetMap.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/Rawgithub.com.xml b/searx/plugins/https_rules/Rawgithub.com.xml new file mode 100644 index 000000000..3868f332a --- /dev/null +++ b/searx/plugins/https_rules/Rawgithub.com.xml @@ -0,0 +1,14 @@ + + + + + + + + + + diff --git a/searx/plugins/https_rules/Soundcloud.xml b/searx/plugins/https_rules/Soundcloud.xml new file mode 100644 index 000000000..6958e8cbc --- /dev/null +++ b/searx/plugins/https_rules/Soundcloud.xml @@ -0,0 +1,101 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/ThePirateBay.xml b/searx/plugins/https_rules/ThePirateBay.xml new file mode 100644 index 000000000..010387b6b --- /dev/null +++ b/searx/plugins/https_rules/ThePirateBay.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/Torproject.xml b/searx/plugins/https_rules/Torproject.xml new file mode 100644 index 000000000..69269af7e --- /dev/null +++ b/searx/plugins/https_rules/Torproject.xml @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/Twitter.xml b/searx/plugins/https_rules/Twitter.xml new file mode 100644 index 000000000..3285f44e0 --- /dev/null +++ b/searx/plugins/https_rules/Twitter.xml @@ -0,0 +1,169 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/Vimeo.xml b/searx/plugins/https_rules/Vimeo.xml new file mode 100644 index 000000000..f2a3e5764 --- /dev/null +++ b/searx/plugins/https_rules/Vimeo.xml @@ -0,0 +1,75 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/WikiLeaks.xml b/searx/plugins/https_rules/WikiLeaks.xml new file mode 100644 index 000000000..977709d2d --- /dev/null +++ b/searx/plugins/https_rules/WikiLeaks.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/searx/plugins/https_rules/Wikimedia.xml b/searx/plugins/https_rules/Wikimedia.xml new file mode 100644 index 000000000..9f25831a2 --- /dev/null +++ b/searx/plugins/https_rules/Wikimedia.xml @@ -0,0 +1,107 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/Yahoo.xml b/searx/plugins/https_rules/Yahoo.xml new file mode 100644 index 000000000..33548c4ab --- /dev/null +++ b/searx/plugins/https_rules/Yahoo.xml @@ -0,0 +1,2450 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searx/plugins/https_rules/YouTube.xml b/searx/plugins/https_rules/YouTube.xml new file mode 100644 index 000000000..bddc2a5f3 --- /dev/null +++ b/searx/plugins/https_rules/YouTube.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searx/settings.yml b/searx/settings.yml index 5594c54c5..f37c56b26 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -6,7 +6,6 @@ server: base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/" themes_path : "" # Custom ui themes path - leave it blank if you didn't change default_theme : oscar # ui theme - https_rewrite : True # Force rewrite result urls. See searx/https_rewrite.py useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator image_proxy : False # Proxying image results through searx default_locale : "" # Default interface locale - leave blank to detect from browser information or use codes from the 'locales' config section diff --git a/searx/webapp.py b/searx/webapp.py index 89ab9b543..52ced1363 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -59,7 +59,6 @@ from searx.utils import ( ) from searx.version import VERSION_STRING from searx.languages import language_codes -from searx.https_rewrite import https_url_rewrite from searx.search import Search from searx.query import Query from searx.autocomplete import searx_bang, backends as autocomplete_backends @@ -359,15 +358,10 @@ def index(): for result in search.results: + plugins.call('on_result', request, locals()) if not search.paging and engines[result['engine']].paging: search.paging = True - # check if HTTPS rewrite is required - if settings['server']['https_rewrite']\ - and result['parsed_url'].scheme == 'http': - - result = https_url_rewrite(result) - if search.request_data.get('format', 'html') == 'html': if 'content' in result: result['content'] = highlight_content(result['content'], -- cgit v1.2.3