diff options
| author | Markus Heiser <markus.heiser@darmarIT.de> | 2019-12-24 13:33:07 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-12-24 13:33:07 +0100 |
| commit | fb668e2075484084a1f7a9b205ecbe7957ea5e8e (patch) | |
| tree | c6f2e83d9d222d69d79348faac342c07c32dbbf3 /searx/plugins/tracker_url_remover.py | |
| parent | f407dd8ef4e3f6c82bef31f678139d6db2a4d810 (diff) | |
| parent | 6d232e9b695c2553b7594efe00c4f63aa96fc62d (diff) | |
Merge branch 'master' into libgen
Diffstat (limited to 'searx/plugins/tracker_url_remover.py')
| -rw-r--r-- | searx/plugins/tracker_url_remover.py | 27 |
1 files changed, 17 insertions, 10 deletions
diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py index 630c8a638..33dd621e1 100644 --- a/searx/plugins/tracker_url_remover.py +++ b/searx/plugins/tracker_url_remover.py @@ -17,10 +17,10 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. from flask_babel import gettext import re -from searx.url_utils import urlunparse +from searx.url_utils import urlunparse, parse_qsl, urlencode -regexes = {re.compile(r'utm_[^&]+&?'), - re.compile(r'(wkey|wemail)[^&]+&?'), +regexes = {re.compile(r'utm_[^&]+'), + re.compile(r'(wkey|wemail)[^&]*'), re.compile(r'&$')} name = gettext('Tracker URL remover') @@ -30,16 +30,23 @@ preference_section = 'privacy' def on_result(request, search, result): + if 'parsed_url' not in result: + return True + query = result['parsed_url'].query if query == "": return True - - for reg in regexes: - query = reg.sub('', query) - - if query != result['parsed_url'].query: - result['parsed_url'] = result['parsed_url']._replace(query=query) - result['url'] = urlunparse(result['parsed_url']) + parsed_query = parse_qsl(query) + + changes = 0 + for i, (param_name, _) in enumerate(list(parsed_query)): + for reg in regexes: + if reg.match(param_name): + parsed_query.pop(i - changes) + changes += 1 + result['parsed_url'] = result['parsed_url']._replace(query=urlencode(parsed_query)) + result['url'] = urlunparse(result['parsed_url']) + break return True |