summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/admin/engines/index.rst1
-rw-r--r--docs/admin/engines/nosql-engines.rst136
-rw-r--r--requirements-dev.txt4
-rw-r--r--searx/engines/peertube.py2
-rw-r--r--searx/engines/qwant.py23
-rw-r--r--searx/engines/seznam.py4
-rw-r--r--searx/settings.yml17
-rw-r--r--searx/settings_defaults.py2
8 files changed, 169 insertions, 20 deletions
diff --git a/docs/admin/engines/index.rst b/docs/admin/engines/index.rst
index 80f4120a5..f488731ea 100644
--- a/docs/admin/engines/index.rst
+++ b/docs/admin/engines/index.rst
@@ -17,6 +17,7 @@ Engines & Settings
private-engines
recoll
sql-engines
+ nosql-engines
search-indexer-engines
command-line-engines
searx.engines.xpath
diff --git a/docs/admin/engines/nosql-engines.rst b/docs/admin/engines/nosql-engines.rst
new file mode 100644
index 000000000..19a723c4e
--- /dev/null
+++ b/docs/admin/engines/nosql-engines.rst
@@ -0,0 +1,136 @@
+===============
+NoSQL databases
+===============
+
+.. sidebar:: further read
+
+ - `NoSQL databases <https://en.wikipedia.org/wiki/NoSQL>`_
+ - `redis.io <https://redis.io/>`_
+ - `MongoDB <https://www.mongodb.com>`_
+
+The following `NoSQL databases`_ are supported:
+
+- :ref:`engine redis_server`
+- :ref:`engine mongodb`
+
+All of the engines above are just commented out in the :origin:`settings.yml
+<searx/settings.yml>`, as you have to set various options and install
+dependencies before using them.
+
+By default, the engines use the ``key-value`` template for displaying results /
+see :origin:`oscar <searx/templates/oscar/result_templates/key-value.html>` &
+:origin:`simple <searx/templates/simple/result_templates/key-value.html>`
+themes. If you are not satisfied with the original result layout, you can use
+your own template, set ``result_template`` attribute to ``{template_name}`` and
+place the templates at::
+
+ searx/templates/{theme_name}/result_templates/{template_name}
+
+Futhermore, if you do not wish to expose these engines on a public instance, you
+can still add them and limit the access by setting ``tokens`` as described in
+section :ref:`private engines`.
+
+
+Configure the engines
+=====================
+
+`NoSQL databases`_ are used for storing arbitrary data without first defining
+their structure.
+
+
+Extra Dependencies
+------------------
+
+For using :ref:`engine redis_server` or :ref:`engine mongodb` you need to
+install additional packages in Python's Virtual Environment of your SearxNG
+instance. To switch into the environment (:ref:`searx-src`) you can use
+:ref:`searx.sh`::
+
+ $ sudo utils/searx.sh shell
+ (searx-pyenv)$ pip install ...
+
+
+.. _engine redis_server:
+
+Redis Server
+------------
+
+.. _redis: https://github.com/andymccurdy/redis-py#installation
+
+.. sidebar:: info
+
+ - ``pip install`` redis_
+ - redis.io_
+ - :origin:`redis_server.py <searx/engines/redis_server.py>`
+
+
+Redis is an open source (BSD licensed), in-memory data structure (key value
+based) store. Before configuring the ``redis_server`` engine, you must install
+the dependency redis_.
+
+Select a database to search in and set its index in the option ``db``. You can
+either look for exact matches or use partial keywords to find what you are
+looking for by configuring ``exact_match_only``. You find an example
+configuration below:
+
+.. code:: yaml
+
+ # Required dependency: redis
+
+ - name: myredis
+ shortcut : rds
+ engine: redis_server
+ exact_match_only: false
+ host: '127.0.0.1'
+ port: 6379
+ enable_http: true
+ password: ''
+ db: 0
+
+.. _engine mongodb:
+
+MongoDB
+-------
+
+.. _pymongo: https://github.com/mongodb/mongo-python-driver#installation
+
+.. sidebar:: info
+
+ - ``pip install`` pymongo_
+ - MongoDB_
+ - :origin:`mongodb.py <searx/engines/mongodb.py>`
+
+MongoDB_ is a document based database program that handles JSON like data.
+Before configuring the ``mongodb`` engine, you must install the dependency
+redis_.
+
+In order to query MongoDB_, you have to select a ``database`` and a
+``collection``. Furthermore, you have to select a ``key`` that is going to be
+searched. MongoDB_ also supports the option ``exact_match_only``, so configure
+it as you wish. Below is an example configuration for using a MongoDB
+collection:
+
+.. code:: yaml
+
+ # MongoDB engine
+ # Required dependency: pymongo
+
+ - name: mymongo
+ engine: mongodb
+ shortcut: md
+ exact_match_only: false
+ host: '127.0.0.1'
+ port: 27017
+ enable_http: true
+ results_per_page: 20
+ database: 'business'
+ collection: 'reviews' # name of the db collection
+ key: 'name' # key in the collection to search for
+
+
+Acknowledgment
+==============
+
+This development was sponsored by `Search and Discovery Fund
+<https://nlnet.nl/discovery>`_ of `NLnet Foundation <https://nlnet.nl/>`_.
+
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 4b3527d0b..08641eef3 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -2,11 +2,11 @@ mock==4.0.3
nose2[coverage_plugin]==0.10.0
cov-core==1.15.0
pycodestyle==2.7.0
-pylint==2.9.3
+pylint==2.9.5
splinter==0.15.0
transifex-client==0.14.3
selenium==3.141.0
-twine==3.4.1
+twine==3.4.2
Pallets-Sphinx-Themes==2.0.1
Sphinx==4.1.1
sphinx-issues==1.2.0
diff --git a/searx/engines/peertube.py b/searx/engines/peertube.py
index 86cd04f2d..058065c03 100644
--- a/searx/engines/peertube.py
+++ b/searx/engines/peertube.py
@@ -97,6 +97,6 @@ def _fetch_supported_languages(resp):
import re
# https://docs.python.org/3/howto/regex.html#greedy-versus-non-greedy
- videolanguages = re.search(r"videoLanguages \(\) \{(.*?)\]", resp.text, re.DOTALL)
+ videolanguages = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL)
peertube_languages = [m.group(1) for m in re.finditer(r"\{ id: '([a-z]+)', label:", videolanguages.group(1))]
return peertube_languages
diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py
index 97e461177..8d03d8324 100644
--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
@@ -84,14 +84,16 @@ def request(query, params):
)
# add language tag
- if params['language'] != 'all':
+ if params['language'] == 'all':
+ params['url'] += '&locale=en_us'
+ else:
language = match_language(
params['language'],
# pylint: disable=undefined-variable
supported_languages,
language_aliases,
)
- params['url'] += '&locale=' + language.replace('-', '_')
+ params['url'] += '&locale=' + language.replace('-', '_').lower()
params['raise_for_httperror'] = False
return params
@@ -144,8 +146,8 @@ def response(resp):
mainline_items = row.get('items', [])
for item in mainline_items:
- title = item['title']
- res_url = item['url']
+ title = item.get('title', None)
+ res_url = item.get('url', None)
if mainline_type == 'web':
content = item['desc']
@@ -156,7 +158,10 @@ def response(resp):
})
elif mainline_type == 'news':
- pub_date = datetime.fromtimestamp(item['date'], None)
+
+ pub_date = item['date']
+ if pub_date is not None:
+ pub_date = datetime.fromtimestamp(pub_date)
news_media = item.get('media', [])
img_src = None
if news_media:
@@ -192,8 +197,12 @@ def response(resp):
if c:
content_parts.append("%s: %s " % (gettext("Channel"), c))
content = ' // '.join(content_parts)
- length = timedelta(seconds=item['duration'])
- pub_date = datetime.fromtimestamp(item['date'])
+ length = item['duration']
+ if length is not None:
+ length = timedelta(milliseconds=length)
+ pub_date = item['date']
+ if pub_date is not None:
+ pub_date = datetime.fromtimestamp(pub_date)
thumbnail = item['thumbnail']
# from some locations (DE and others?) the s2 link do
# response a 'Please wait ..' but does not deliver the thumbnail
diff --git a/searx/engines/seznam.py b/searx/engines/seznam.py
index 042088dbe..9cd50dfc0 100644
--- a/searx/engines/seznam.py
+++ b/searx/engines/seznam.py
@@ -53,14 +53,14 @@ def response(resp):
dom = html.fromstring(resp.content.decode())
for result_element in eval_xpath_list(dom, '//div[@data-dot="results"]/div'):
- result_data = eval_xpath_getindex(result_element, './/div[contains(@class, "Result")]', 0, default=None)
+ result_data = eval_xpath_getindex(result_element, './/div[contains(@class, "bec586")]', 0, default=None)
if result_data is None:
continue
title_element = eval_xpath_getindex(result_element, './/h3/a', 0)
results.append({
'url': title_element.get('href'),
'title': extract_text(title_element),
- 'content': extract_text(eval_xpath(result_data, './/p[@class="Result-description"]')),
+ 'content': extract_text(eval_xpath(result_data, './/div[@class="_3eded7"]')),
})
return results
diff --git a/searx/settings.yml b/searx/settings.yml
index 5307631dd..ac9a14064 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -825,6 +825,7 @@ engines:
# exact_match_only: false
# host: '127.0.0.1'
# port: 27017
+ # enable_http: true
# results_per_page: 20
# database: 'business'
# collection: 'reviews' # name of the db collection
@@ -1053,13 +1054,15 @@ engines:
disabled: true
# Required dependency: redis
- # - name: myredis
- # engine: redis_server
- # exact_match_only: False
- # host: '127.0.0.1'
- # port: 6379
- # password: ''
- # db: 0
+ # - name: myredis
+ # shortcut : rds
+ # engine: redis_server
+ # exact_match_only: false
+ # host: '127.0.0.1'
+ # port: 6379
+ # enable_http: true
+ # password: ''
+ # db: 0
# tmp suspended: bad certificate
# - name: scanr structures
diff --git a/searx/settings_defaults.py b/searx/settings_defaults.py
index ccf4df5cd..72d7dc588 100644
--- a/searx/settings_defaults.py
+++ b/searx/settings_defaults.py
@@ -30,7 +30,7 @@ CATEGORY_ORDER = [
'it',
'science',
'files',
- 'social medias',
+ 'social media',
]
STR_TO_BOOL = {
'0': False,