diff options
| author | Adam Tauber <asciimoo@gmail.com> | 2019-10-14 14:58:20 +0200 |
|---|---|---|
| committer | Adam Tauber <asciimoo@gmail.com> | 2019-10-14 14:58:20 +0200 |
| commit | 28c75c187f8ec9971e09cf002a2ea7028f515473 (patch) | |
| tree | 90c5e2ec53f1d2800e03d2a2821000aa439b3e10 /searx | |
| parent | 07a0a50e0df8aa1576a9688602e3a30024534d5c (diff) | |
[fix] update query params sanitization - closes #722
Diffstat (limited to 'searx')
| -rw-r--r-- | searx/plugins/tracker_url_remover.py | 26 |
1 files changed, 16 insertions, 10 deletions
diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py index 630c8a638..a1ed0d052 100644 --- a/searx/plugins/tracker_url_remover.py +++ b/searx/plugins/tracker_url_remover.py @@ -17,10 +17,10 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. from flask_babel import gettext import re -from searx.url_utils import urlunparse +from searx.url_utils import urlunparse, parse_qsl, urlencode -regexes = {re.compile(r'utm_[^&]+&?'), - re.compile(r'(wkey|wemail)[^&]+&?'), +regexes = {re.compile(r'utm_[^&]+'), + re.compile(r'(wkey|wemail)[^&]*'), re.compile(r'&$')} name = gettext('Tracker URL remover') @@ -34,12 +34,18 @@ def on_result(request, search, result): if query == "": return True - - for reg in regexes: - query = reg.sub('', query) - - if query != result['parsed_url'].query: - result['parsed_url'] = result['parsed_url']._replace(query=query) - result['url'] = urlunparse(result['parsed_url']) + parsed_query = parse_qsl(query) + + changed = False + for i,(param_name,_) in enumerate(list(parsed_query)): + for reg in regexes: + if reg.match(param_name): + parsed_query.pop(i) + changed = True + break + + if changed: + result['parsed_url'] = result['parsed_url']._replace(query=urlencode(parsed_query)) + result['url'] = urlunparse(result['parsed_url']) return True |