diff options
| author | David A Roberts <d@vidr.cc> | 2016-07-16 19:26:29 +1000 |
|---|---|---|
| committer | David A Roberts <d@vidr.cc> | 2016-08-10 11:15:30 +1000 |
| commit | 117d36b66eae1a36d730135f6a659d515f2514f2 (patch) | |
| tree | 7a8fd1965e26799409a9717c37ecf3bd5d02f47f /searx/plugins/doai_rewrite.py | |
| parent | 53c9185cbeb05026909adec164b859e09850c74e (diff) | |
Add Crossref search engine and DOAI rewrite plugin
Diffstat (limited to 'searx/plugins/doai_rewrite.py')
| -rw-r--r-- | searx/plugins/doai_rewrite.py | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/searx/plugins/doai_rewrite.py b/searx/plugins/doai_rewrite.py new file mode 100644 index 000000000..fc5998b14 --- /dev/null +++ b/searx/plugins/doai_rewrite.py @@ -0,0 +1,31 @@ +from flask_babel import gettext +import re +from urlparse import urlparse, parse_qsl + +regex = re.compile(r'10\.\d{4,9}/[^\s]+') + +name = gettext('DOAI rewrite') +description = gettext('Avoid paywalls by redirecting to open-access versions of publications when available') +default_on = False + + +def extract_doi(url): + match = regex.search(url.path) + if match: + return match.group(0) + for _, v in parse_qsl(url.query): + match = regex.search(v) + if match: + return match.group(0) + return None + + +def on_result(request, ctx): + doi = extract_doi(ctx['result']['parsed_url']) + if doi and len(doi) < 50: + for suffix in ('/', '.pdf', '/full', '/meta', '/abstract'): + if doi.endswith(suffix): + doi = doi[:-len(suffix)] + ctx['result']['url'] = 'http://doai.io/' + doi + ctx['result']['parsed_url'] = urlparse(ctx['result']['url']) + return True |