From 575159b194440052d7b48aa073d7e03c80799c90 Mon Sep 17 00:00:00 2001 From: jibe-b Date: Fri, 22 Sep 2017 23:43:05 +0200 Subject: [enh] oa_doi_rewrite plugin broadens doai_rewrite --- searx/plugins/__init__.py | 4 ++-- searx/plugins/doai_rewrite.py | 32 ----------------------------- searx/plugins/oa_doi_rewrite.py | 45 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 34 deletions(-) delete mode 100644 searx/plugins/doai_rewrite.py create mode 100644 searx/plugins/oa_doi_rewrite.py (limited to 'searx/plugins') diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py index 46c1f8918..4dbcbbd28 100644 --- a/searx/plugins/__init__.py +++ b/searx/plugins/__init__.py @@ -22,7 +22,7 @@ if version_info[0] == 3: logger = logger.getChild('plugins') -from searx.plugins import (doai_rewrite, +from searx.plugins import (oa_doi_rewrite, https_rewrite, infinite_scroll, open_results_on_new_tab, @@ -78,7 +78,7 @@ class PluginStore(): plugins = PluginStore() -plugins.register(doai_rewrite) +plugins.register(oa_doi_rewrite) plugins.register(https_rewrite) plugins.register(infinite_scroll) plugins.register(open_results_on_new_tab) diff --git a/searx/plugins/doai_rewrite.py b/searx/plugins/doai_rewrite.py deleted file mode 100644 index b7b814fac..000000000 --- a/searx/plugins/doai_rewrite.py +++ /dev/null @@ -1,32 +0,0 @@ -from flask_babel import gettext -import re -from searx.url_utils import urlparse, parse_qsl - -regex = re.compile(r'10\.\d{4,9}/[^\s]+') - -name = gettext('DOAI rewrite') -description = gettext('Avoid paywalls by redirecting to open-access versions of publications when available') -default_on = False -preference_section = 'privacy' - - -def extract_doi(url): - match = regex.search(url.path) - if match: - return match.group(0) - for _, v in parse_qsl(url.query): - match = regex.search(v) - if match: - return match.group(0) - return None - - -def on_result(request, search, result): - doi = extract_doi(result['parsed_url']) - if doi and len(doi) < 50: - for suffix in ('/', '.pdf', '/full', '/meta', '/abstract'): - if doi.endswith(suffix): - doi = doi[:-len(suffix)] - result['url'] = 'http://doai.io/' + doi - result['parsed_url'] = urlparse(result['url']) - return True diff --git a/searx/plugins/oa_doi_rewrite.py b/searx/plugins/oa_doi_rewrite.py new file mode 100644 index 000000000..e952c86f5 --- /dev/null +++ b/searx/plugins/oa_doi_rewrite.py @@ -0,0 +1,45 @@ +from flask_babel import gettext +import re +from searx.url_utils import urlparse, parse_qsl +from flask import request +from searx import settings + + +regex = re.compile(r'10\.\d{4,9}/[^\s]+') + +name = gettext('Open Access DOI rewrite') +description = gettext('Avoid paywalls by redirecting to open-access versions of publications when available') +default_on = False +preference_section = 'privacy' + +doi_resolvers = settings['doi_resolvers'] + + +def extract_doi(url): + match = regex.search(url.path) + if match: + return match.group(0) + for _, v in parse_qsl(url.query): + match = regex.search(v) + if match: + return match.group(0) + return None + + +def get_doi_resolver(): + doi_resolvers = settings['doi_resolvers'] + doi_resolver = request.args.get('doi_resolver', request.preferences.get_value('doi_resolver'))[0] + if doi_resolver not in doi_resolvers: + doi_resolvers = settings['default_doi_resolver'] + return doi_resolvers[doi_resolver] + + +def on_result(request, search, result): + doi = extract_doi(result['parsed_url']) + if doi and len(doi) < 50: + for suffix in ('/', '.pdf', '/full', '/meta', '/abstract'): + if doi.endswith(suffix): + doi = doi[:-len(suffix)] + result['url'] = get_doi_resolver() + doi + result['parsed_url'] = urlparse(result['url']) + return True -- cgit v1.2.3 From 1b10abfc92afed36627c7052b89d2de5e7f537ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Wed, 1 Nov 2017 13:58:48 +0100 Subject: minor fixes of doi resolver Closes #1047 --- searx/plugins/oa_doi_rewrite.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'searx/plugins') diff --git a/searx/plugins/oa_doi_rewrite.py b/searx/plugins/oa_doi_rewrite.py index e952c86f5..b62ef0b1e 100644 --- a/searx/plugins/oa_doi_rewrite.py +++ b/searx/plugins/oa_doi_rewrite.py @@ -1,7 +1,6 @@ from flask_babel import gettext import re from searx.url_utils import urlparse, parse_qsl -from flask import request from searx import settings @@ -26,12 +25,12 @@ def extract_doi(url): return None -def get_doi_resolver(): +def get_doi_resolver(args, preference_doi_resolver): doi_resolvers = settings['doi_resolvers'] - doi_resolver = request.args.get('doi_resolver', request.preferences.get_value('doi_resolver'))[0] + doi_resolver = args.get('doi_resolver', preference_doi_resolver)[0] if doi_resolver not in doi_resolvers: doi_resolvers = settings['default_doi_resolver'] - return doi_resolvers[doi_resolver] + return doi_resolver def on_result(request, search, result): @@ -40,6 +39,6 @@ def on_result(request, search, result): for suffix in ('/', '.pdf', '/full', '/meta', '/abstract'): if doi.endswith(suffix): doi = doi[:-len(suffix)] - result['url'] = get_doi_resolver() + doi + result['url'] = get_doi_resolver(request.args, request.preferences.get_value('doi_resolver')) + doi result['parsed_url'] = urlparse(result['url']) return True -- cgit v1.2.3