From 4a36a3044d6e39bc60d026d99ed7a010f6505a5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Mon, 30 Nov 2020 08:35:15 +0100 Subject: Add recoll engine (#2325) recoll is a local search engine based on Xapian: http://www.lesbonscomptes.com/recoll/ By itself recoll does not offer web or API access, this can be achieved using recoll-webui: https://framagit.org/medoc92/recollwebui.git This engine uses a custom 'files' result template set `base_url` to the location where recoll-webui can be reached set `dl_prefix` to a location where the file hierarchy as indexed by recoll can be reached set `search_dir` to the part of the indexed file hierarchy to be searched, use an empty string to search the entire search domain --- searx/engines/recoll.py | 104 +++++++++++++++++++++ searx/settings.yml | 22 +++++ .../themes/oscar/less/logicodev/results.less | 19 ++++ searx/templates/oscar/macros.html | 26 ++++++ searx/templates/oscar/result_templates/files.html | 55 +++++++++++ 5 files changed, 226 insertions(+) create mode 100644 searx/engines/recoll.py create mode 100644 searx/templates/oscar/result_templates/files.html (limited to 'searx') diff --git a/searx/engines/recoll.py b/searx/engines/recoll.py new file mode 100644 index 000000000..5a956b8bf --- /dev/null +++ b/searx/engines/recoll.py @@ -0,0 +1,104 @@ +""" + Recoll (local search engine) + + @using-api yes + @results JSON + @stable yes + @parse url, content, size, abstract, author, mtype, subtype, time, \ + filename, label, type, embedded +""" + +from datetime import date, timedelta +from json import loads +from urllib.parse import urlencode, quote + +# engine dependent config +time_range_support = True + +# parameters from settings.yml +base_url = None +search_dir = '' +mount_prefix = None +dl_prefix = None + +# embedded +embedded_url = '<{ttype} controls height="166px" ' +\ + 'src="{url}" type="{mtype}">' + + +# helper functions +def get_time_range(time_range): + sw = { + 'day': 1, + 'week': 7, + 'month': 30, + 'year': 365 + } + + offset = sw.get(time_range, 0) + if not offset: + return '' + + return (date.today() - timedelta(days=offset)).isoformat() + + +# do search-request +def request(query, params): + search_after = get_time_range(params['time_range']) + search_url = base_url + 'json?{query}&highlight=0' + params['url'] = search_url.format(query=urlencode({ + 'query': query, + 'after': search_after, + 'dir': search_dir})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + response_json = loads(resp.text) + + if not response_json: + return [] + + for result in response_json.get('results', []): + title = result['label'] + url = result['url'].replace('file://' + mount_prefix, dl_prefix) + content = '{}'.format(result['snippet']) + + # append result + item = {'url': url, + 'title': title, + 'content': content, + 'template': 'files.html'} + + if result['size']: + item['size'] = int(result['size']) + + for parameter in ['filename', 'abstract', 'author', 'mtype', 'time']: + if result[parameter]: + item[parameter] = result[parameter] + + # facilitate preview support for known mime types + if 'mtype' in result and '/' in result['mtype']: + (mtype, subtype) = result['mtype'].split('/') + item['mtype'] = mtype + item['subtype'] = subtype + + if mtype in ['audio', 'video']: + item['embedded'] = embedded_url.format( + ttype=mtype, + url=quote(url.encode('utf8'), '/:'), + mtype=result['mtype']) + + if mtype in ['image'] and subtype in ['bmp', 'gif', 'jpeg', 'png']: + item['img_src'] = url + + results.append(item) + + if 'nres' in response_json: + results.append({'number_of_results': response_json['nres']}) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index 8af1a17f1..33ae234d5 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -650,6 +650,28 @@ engines: shortcut : qws categories : social media +# - name: library +# engine: recoll +# shortcut: lib +# base_url: 'https://recoll.example.org/' +# search_dir: '' +# mount_prefix: /export +# dl_prefix: 'https://download.example.org' +# timeout: 30.0 +# categories: files +# disabled: True + +# - name: recoll library reference +# engine: recoll +# base_url: 'https://recoll.example.org/' +# search_dir: reference +# mount_prefix: /export +# dl_prefix: 'https://download.example.org' +# shortcut: libr +# timeout: 30.0 +# categories: files +# disabled: True + - name : reddit engine : reddit shortcut : re diff --git a/searx/static/themes/oscar/less/logicodev/results.less b/searx/static/themes/oscar/less/logicodev/results.less index 9926d6e53..33965fb33 100644 --- a/searx/static/themes/oscar/less/logicodev/results.less +++ b/searx/static/themes/oscar/less/logicodev/results.less @@ -51,6 +51,11 @@ float: right; } +.result-abstract { + margin-top: 0.5em; + margin-bottom: 0.8em; +} + .external-link { color: @dark-green; font-size: 12px; @@ -124,6 +129,20 @@ } } +.result-metadata { + clear: both; + margin: 1em; + + td { + padding-right: 1em; + color: @gray; + } + + td:first-of-type { + color: @dark-gray; + } +} + // map formating of results .result-map { clear: both; diff --git a/searx/templates/oscar/macros.html b/searx/templates/oscar/macros.html index 57a90aaa2..2bc1e7805 100644 --- a/searx/templates/oscar/macros.html +++ b/searx/templates/oscar/macros.html @@ -47,6 +47,20 @@ {%- endif -%} {%- endmacro %} + +{% macro result_footer_nocache(result) -%} +
+
+ {% for engine in result.engines %} + {{ engine }} + {% endfor %} + {% if proxify %} + {{ result_link(proxify(result.url), icon('sort') + _('proxied'), "text-info") }} + {% endif %} +
+ +{%- endmacro %} + {% macro result_footer_rtl(result, id) -%}
{{- "" -}} @@ -68,6 +82,18 @@ {%- endif %} {%- endmacro %} + +{% macro result_footer_nocache_rtl(result) -%} +
+ {% for engine in result.engines %} + {{ engine }} + {% endfor %} + {% if proxify %} + {{ result_link(proxify(result.url), icon('sort') + _('proxied'), "text-info") }} + {% endif %} + +{%- endmacro %} + {% macro preferences_item_header(info, label, rtl, id) -%} {% if rtl %}
diff --git a/searx/templates/oscar/result_templates/files.html b/searx/templates/oscar/result_templates/files.html new file mode 100644 index 000000000..5e3894e0a --- /dev/null +++ b/searx/templates/oscar/result_templates/files.html @@ -0,0 +1,55 @@ +{% from 'oscar/macros.html' import result_header, result_sub_header, result_footer_nocache, result_footer_nocache_rtl, icon with context %} + +{{ result_header(result, favicons) }} +{{ result_sub_header(result) }} + +{% if result.embedded %} + +{% endif %} + +{% if result.embedded %} +
+ {{ result.embedded|safe }} +
+{% endif %} + +{% if result.abstract %}

{{ result.abstract|safe }}

{% endif %} + +{% if result.img_src %} +
+
+{{ result.title|striptags }} +{% if result.content %}

{{ result.content|safe }}

{% endif %} +
+
+{% else %} +{% if result.content %}

{{ result.content|safe }}

{% endif %} +{% endif %} + + +{% if result.author %}{% endif %} + +{% if result.filename %}{% endif %} + +{% if result.size %} +{% endif %} + +{% if result.time %}{% endif %} + +{% if result.mtype %}{% endif %} + + +{% if rtl %} +{{ result_footer_nocache_rtl(result) }} +{% else %} +{{ result_footer_nocache(result) }} +{% endif %} -- cgit v1.2.3