diff options
| author | Adam Tauber <asciimoo@gmail.com> | 2017-11-01 21:27:57 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2017-11-01 21:27:57 +0100 |
| commit | 3d50b0288dc2ba42baf550353f3fb5bee6462754 (patch) | |
| tree | 3931e34b8501cd58db58cb651b0193ee46c23eb8 /searx/plugins/oa_doi_rewrite.py | |
| parent | 6d28e9d6945b5510b3d861e20521554435a10f63 (diff) | |
| parent | 5954a8e16a64a369072a7487f62b6396a451ae5f (diff) | |
Merge pull request #1075 from kvch/finish-jibe-b-engines
Finish PRs of @jibe-b: pubmed, oa_doi_rewrite, openaire, arxiv
Diffstat (limited to 'searx/plugins/oa_doi_rewrite.py')
| -rw-r--r-- | searx/plugins/oa_doi_rewrite.py | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/searx/plugins/oa_doi_rewrite.py b/searx/plugins/oa_doi_rewrite.py new file mode 100644 index 000000000..b62ef0b1e --- /dev/null +++ b/searx/plugins/oa_doi_rewrite.py @@ -0,0 +1,44 @@ +from flask_babel import gettext +import re +from searx.url_utils import urlparse, parse_qsl +from searx import settings + + +regex = re.compile(r'10\.\d{4,9}/[^\s]+') + +name = gettext('Open Access DOI rewrite') +description = gettext('Avoid paywalls by redirecting to open-access versions of publications when available') +default_on = False +preference_section = 'privacy' + +doi_resolvers = settings['doi_resolvers'] + + +def extract_doi(url): + match = regex.search(url.path) + if match: + return match.group(0) + for _, v in parse_qsl(url.query): + match = regex.search(v) + if match: + return match.group(0) + return None + + +def get_doi_resolver(args, preference_doi_resolver): + doi_resolvers = settings['doi_resolvers'] + doi_resolver = args.get('doi_resolver', preference_doi_resolver)[0] + if doi_resolver not in doi_resolvers: + doi_resolvers = settings['default_doi_resolver'] + return doi_resolver + + +def on_result(request, search, result): + doi = extract_doi(result['parsed_url']) + if doi and len(doi) < 50: + for suffix in ('/', '.pdf', '/full', '/meta', '/abstract'): + if doi.endswith(suffix): + doi = doi[:-len(suffix)] + result['url'] = get_doi_resolver(request.args, request.preferences.get_value('doi_resolver')) + doi + result['parsed_url'] = urlparse(result['url']) + return True |