From 28c75c187f8ec9971e09cf002a2ea7028f515473 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Mon, 14 Oct 2019 14:58:20 +0200 Subject: [fix] update query params sanitization - closes #722 --- searx/plugins/tracker_url_remover.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'searx/plugins') diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py index 630c8a638..a1ed0d052 100644 --- a/searx/plugins/tracker_url_remover.py +++ b/searx/plugins/tracker_url_remover.py @@ -17,10 +17,10 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. from flask_babel import gettext import re -from searx.url_utils import urlunparse +from searx.url_utils import urlunparse, parse_qsl, urlencode -regexes = {re.compile(r'utm_[^&]+&?'), - re.compile(r'(wkey|wemail)[^&]+&?'), +regexes = {re.compile(r'utm_[^&]+'), + re.compile(r'(wkey|wemail)[^&]*'), re.compile(r'&$')} name = gettext('Tracker URL remover') @@ -34,12 +34,18 @@ def on_result(request, search, result): if query == "": return True - - for reg in regexes: - query = reg.sub('', query) - - if query != result['parsed_url'].query: - result['parsed_url'] = result['parsed_url']._replace(query=query) - result['url'] = urlunparse(result['parsed_url']) + parsed_query = parse_qsl(query) + + changed = False + for i,(param_name,_) in enumerate(list(parsed_query)): + for reg in regexes: + if reg.match(param_name): + parsed_query.pop(i) + changed = True + break + + if changed: + result['parsed_url'] = result['parsed_url']._replace(query=urlencode(parsed_query)) + result['url'] = urlunparse(result['parsed_url']) return True -- cgit v1.2.3 From ed1c1bdb047f9fe93026739b9dc074393e5c1d46 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Mon, 14 Oct 2019 15:09:39 +0200 Subject: [fix] pep8 --- searx/plugins/tracker_url_remover.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'searx/plugins') diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py index a1ed0d052..8cc063bba 100644 --- a/searx/plugins/tracker_url_remover.py +++ b/searx/plugins/tracker_url_remover.py @@ -37,7 +37,7 @@ def on_result(request, search, result): parsed_query = parse_qsl(query) changed = False - for i,(param_name,_) in enumerate(list(parsed_query)): + for i, (param_name, _) in enumerate(list(parsed_query)): for reg in regexes: if reg.match(param_name): parsed_query.pop(i) -- cgit v1.2.3 From a6f20caf32af463b57a026ee7cb7ed6317db6b8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Mon, 23 Sep 2019 17:14:32 +0200 Subject: add initial support for offline engines && command engine --- searx/plugins/https_rewrite.py | 3 +++ searx/plugins/oa_doi_rewrite.py | 3 +++ searx/plugins/tracker_url_remover.py | 3 +++ 3 files changed, 9 insertions(+) (limited to 'searx/plugins') diff --git a/searx/plugins/https_rewrite.py b/searx/plugins/https_rewrite.py index 3d986770e..82556017e 100644 --- a/searx/plugins/https_rewrite.py +++ b/searx/plugins/https_rewrite.py @@ -225,6 +225,9 @@ def https_url_rewrite(result): def on_result(request, search, result): + if 'parsed_url' not in result: + return True + if result['parsed_url'].scheme == 'http': https_url_rewrite(result) return True diff --git a/searx/plugins/oa_doi_rewrite.py b/searx/plugins/oa_doi_rewrite.py index d4942498f..be80beb26 100644 --- a/searx/plugins/oa_doi_rewrite.py +++ b/searx/plugins/oa_doi_rewrite.py @@ -35,6 +35,9 @@ def get_doi_resolver(args, preference_doi_resolver): def on_result(request, search, result): + if 'parsed_url' not in result: + return True + doi = extract_doi(result['parsed_url']) if doi and len(doi) < 50: for suffix in ('/', '.pdf', '/full', '/meta', '/abstract'): diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py index 8cc063bba..9e18867b9 100644 --- a/searx/plugins/tracker_url_remover.py +++ b/searx/plugins/tracker_url_remover.py @@ -30,6 +30,9 @@ preference_section = 'privacy' def on_result(request, search, result): + if 'parsed_url' not in result: + return True + query = result['parsed_url'].query if query == "": -- cgit v1.2.3 From dede46a2d5ad1504991b05b8edab4d1ffd781f46 Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Tue, 22 Oct 2019 23:17:00 -0700 Subject: fix out of range error in tracker remover plugin --- searx/plugins/tracker_url_remover.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'searx/plugins') diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py index 9e18867b9..33dd621e1 100644 --- a/searx/plugins/tracker_url_remover.py +++ b/searx/plugins/tracker_url_remover.py @@ -39,16 +39,14 @@ def on_result(request, search, result): return True parsed_query = parse_qsl(query) - changed = False + changes = 0 for i, (param_name, _) in enumerate(list(parsed_query)): for reg in regexes: if reg.match(param_name): - parsed_query.pop(i) - changed = True + parsed_query.pop(i - changes) + changes += 1 + result['parsed_url'] = result['parsed_url']._replace(query=urlencode(parsed_query)) + result['url'] = urlunparse(result['parsed_url']) break - if changed: - result['parsed_url'] = result['parsed_url']._replace(query=urlencode(parsed_query)) - result['url'] = urlunparse(result['parsed_url']) - return True -- cgit v1.2.3