From 32957cdf49c306a5f50ca78bb50c0978ffe5c072 Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Sun, 18 Oct 2020 23:55:57 -0700 Subject: add Ahmia filter plugin for onion results --- searx/plugins/__init__.py | 5 +++++ searx/plugins/ahmia_filter.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 searx/plugins/ahmia_filter.py (limited to 'searx/plugins') diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py index b6dc4875b..8221f7c1d 100644 --- a/searx/plugins/__init__.py +++ b/searx/plugins/__init__.py @@ -28,6 +28,7 @@ from searx import logger, settings, static_path logger = logger.getChild('plugins') from searx.plugins import (oa_doi_rewrite, + ahmia_filter, hash_plugin, https_rewrite, infinite_scroll, @@ -181,3 +182,7 @@ if 'enabled_plugins' in settings: plugin.default_on = True else: plugin.default_on = False + +# load tor specific plugins +if settings['outgoing'].get('using_tor_proxy'): + plugins.register(ahmia_filter) diff --git a/searx/plugins/ahmia_filter.py b/searx/plugins/ahmia_filter.py new file mode 100644 index 000000000..8eb7f9413 --- /dev/null +++ b/searx/plugins/ahmia_filter.py @@ -0,0 +1,36 @@ +''' + SPDX-License-Identifier: AGPL-3.0-or-later +''' + +from hashlib import md5 +from os.path import join +from urllib.parse import urlparse +from searx import searx_dir + +name = "Ahmia blacklist" +description = "Filter out onion results that appear in Ahmia's blacklist. (See https://ahmia.fi/blacklist)" +default_on = True +preference_section = 'onions' + +ahmia_blacklist = None + + +def get_ahmia_blacklist(): + global ahmia_blacklist + if not ahmia_blacklist: + with open(join(join(searx_dir, "data"), "ahmia_blacklist.txt"), 'r') as f: + ahmia_blacklist = f.read().split() + return ahmia_blacklist + + +def not_blacklisted(result): + if not result.get('is_onion'): + return True + result_hash = md5(urlparse(result.get('url')).hostname.encode()).hexdigest() + return result_hash not in get_ahmia_blacklist() + + +def post_search(request, search): + filtered_results = list(filter(not_blacklisted, search.result_container._merged_results)) + search.result_container._merged_results = filtered_results + return True -- cgit v1.2.3