diff options
| author | Noémi Ványi <kvch@users.noreply.github.com> | 2020-11-30 08:35:15 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-11-30 08:35:15 +0100 |
| commit | 4a36a3044d6e39bc60d026d99ed7a010f6505a5f (patch) | |
| tree | 7037ea2a1e86ec7d308a5d231eae3aa0897e0f87 /searx/engines/recoll.py | |
| parent | 93c2603561c039fb43137c251493e77032f91743 (diff) | |
Add recoll engine (#2325)
recoll is a local search engine based on Xapian:
http://www.lesbonscomptes.com/recoll/
By itself recoll does not offer web or API access,
this can be achieved using recoll-webui:
https://framagit.org/medoc92/recollwebui.git
This engine uses a custom 'files' result template
set `base_url` to the location where recoll-webui can be reached
set `dl_prefix` to a location where the file hierarchy as indexed by recoll can be reached
set `search_dir` to the part of the indexed file hierarchy to be searched, use an empty string to search the entire search domain
Diffstat (limited to 'searx/engines/recoll.py')
| -rw-r--r-- | searx/engines/recoll.py | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/searx/engines/recoll.py b/searx/engines/recoll.py new file mode 100644 index 000000000..5a956b8bf --- /dev/null +++ b/searx/engines/recoll.py @@ -0,0 +1,104 @@ +""" + Recoll (local search engine) + + @using-api yes + @results JSON + @stable yes + @parse url, content, size, abstract, author, mtype, subtype, time, \ + filename, label, type, embedded +""" + +from datetime import date, timedelta +from json import loads +from urllib.parse import urlencode, quote + +# engine dependent config +time_range_support = True + +# parameters from settings.yml +base_url = None +search_dir = '' +mount_prefix = None +dl_prefix = None + +# embedded +embedded_url = '<{ttype} controls height="166px" ' +\ + 'src="{url}" type="{mtype}"></{ttype}>' + + +# helper functions +def get_time_range(time_range): + sw = { + 'day': 1, + 'week': 7, + 'month': 30, + 'year': 365 + } + + offset = sw.get(time_range, 0) + if not offset: + return '' + + return (date.today() - timedelta(days=offset)).isoformat() + + +# do search-request +def request(query, params): + search_after = get_time_range(params['time_range']) + search_url = base_url + 'json?{query}&highlight=0' + params['url'] = search_url.format(query=urlencode({ + 'query': query, + 'after': search_after, + 'dir': search_dir})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + response_json = loads(resp.text) + + if not response_json: + return [] + + for result in response_json.get('results', []): + title = result['label'] + url = result['url'].replace('file://' + mount_prefix, dl_prefix) + content = '{}'.format(result['snippet']) + + # append result + item = {'url': url, + 'title': title, + 'content': content, + 'template': 'files.html'} + + if result['size']: + item['size'] = int(result['size']) + + for parameter in ['filename', 'abstract', 'author', 'mtype', 'time']: + if result[parameter]: + item[parameter] = result[parameter] + + # facilitate preview support for known mime types + if 'mtype' in result and '/' in result['mtype']: + (mtype, subtype) = result['mtype'].split('/') + item['mtype'] = mtype + item['subtype'] = subtype + + if mtype in ['audio', 'video']: + item['embedded'] = embedded_url.format( + ttype=mtype, + url=quote(url.encode('utf8'), '/:'), + mtype=result['mtype']) + + if mtype in ['image'] and subtype in ['bmp', 'gif', 'jpeg', 'png']: + item['img_src'] = url + + results.append(item) + + if 'nres' in response_json: + results.append({'number_of_results': response_json['nres']}) + + return results |