diff options
| -rw-r--r-- | docs/admin/engines/index.rst | 1 | ||||
| -rw-r--r-- | docs/admin/engines/nosql-engines.rst | 136 | ||||
| -rw-r--r-- | requirements-dev.txt | 4 | ||||
| -rw-r--r-- | searx/engines/peertube.py | 2 | ||||
| -rw-r--r-- | searx/engines/qwant.py | 23 | ||||
| -rw-r--r-- | searx/engines/seznam.py | 4 | ||||
| -rw-r--r-- | searx/settings.yml | 17 | ||||
| -rw-r--r-- | searx/settings_defaults.py | 2 |
8 files changed, 169 insertions, 20 deletions
diff --git a/docs/admin/engines/index.rst b/docs/admin/engines/index.rst index 80f4120a5..f488731ea 100644 --- a/docs/admin/engines/index.rst +++ b/docs/admin/engines/index.rst @@ -17,6 +17,7 @@ Engines & Settings private-engines recoll sql-engines + nosql-engines search-indexer-engines command-line-engines searx.engines.xpath diff --git a/docs/admin/engines/nosql-engines.rst b/docs/admin/engines/nosql-engines.rst new file mode 100644 index 000000000..19a723c4e --- /dev/null +++ b/docs/admin/engines/nosql-engines.rst @@ -0,0 +1,136 @@ +=============== +NoSQL databases +=============== + +.. sidebar:: further read + + - `NoSQL databases <https://en.wikipedia.org/wiki/NoSQL>`_ + - `redis.io <https://redis.io/>`_ + - `MongoDB <https://www.mongodb.com>`_ + +The following `NoSQL databases`_ are supported: + +- :ref:`engine redis_server` +- :ref:`engine mongodb` + +All of the engines above are just commented out in the :origin:`settings.yml +<searx/settings.yml>`, as you have to set various options and install +dependencies before using them. + +By default, the engines use the ``key-value`` template for displaying results / +see :origin:`oscar <searx/templates/oscar/result_templates/key-value.html>` & +:origin:`simple <searx/templates/simple/result_templates/key-value.html>` +themes. If you are not satisfied with the original result layout, you can use +your own template, set ``result_template`` attribute to ``{template_name}`` and +place the templates at:: + + searx/templates/{theme_name}/result_templates/{template_name} + +Futhermore, if you do not wish to expose these engines on a public instance, you +can still add them and limit the access by setting ``tokens`` as described in +section :ref:`private engines`. + + +Configure the engines +===================== + +`NoSQL databases`_ are used for storing arbitrary data without first defining +their structure. + + +Extra Dependencies +------------------ + +For using :ref:`engine redis_server` or :ref:`engine mongodb` you need to +install additional packages in Python's Virtual Environment of your SearxNG +instance. To switch into the environment (:ref:`searx-src`) you can use +:ref:`searx.sh`:: + + $ sudo utils/searx.sh shell + (searx-pyenv)$ pip install ... + + +.. _engine redis_server: + +Redis Server +------------ + +.. _redis: https://github.com/andymccurdy/redis-py#installation + +.. sidebar:: info + + - ``pip install`` redis_ + - redis.io_ + - :origin:`redis_server.py <searx/engines/redis_server.py>` + + +Redis is an open source (BSD licensed), in-memory data structure (key value +based) store. Before configuring the ``redis_server`` engine, you must install +the dependency redis_. + +Select a database to search in and set its index in the option ``db``. You can +either look for exact matches or use partial keywords to find what you are +looking for by configuring ``exact_match_only``. You find an example +configuration below: + +.. code:: yaml + + # Required dependency: redis + + - name: myredis + shortcut : rds + engine: redis_server + exact_match_only: false + host: '127.0.0.1' + port: 6379 + enable_http: true + password: '' + db: 0 + +.. _engine mongodb: + +MongoDB +------- + +.. _pymongo: https://github.com/mongodb/mongo-python-driver#installation + +.. sidebar:: info + + - ``pip install`` pymongo_ + - MongoDB_ + - :origin:`mongodb.py <searx/engines/mongodb.py>` + +MongoDB_ is a document based database program that handles JSON like data. +Before configuring the ``mongodb`` engine, you must install the dependency +redis_. + +In order to query MongoDB_, you have to select a ``database`` and a +``collection``. Furthermore, you have to select a ``key`` that is going to be +searched. MongoDB_ also supports the option ``exact_match_only``, so configure +it as you wish. Below is an example configuration for using a MongoDB +collection: + +.. code:: yaml + + # MongoDB engine + # Required dependency: pymongo + + - name: mymongo + engine: mongodb + shortcut: md + exact_match_only: false + host: '127.0.0.1' + port: 27017 + enable_http: true + results_per_page: 20 + database: 'business' + collection: 'reviews' # name of the db collection + key: 'name' # key in the collection to search for + + +Acknowledgment +============== + +This development was sponsored by `Search and Discovery Fund +<https://nlnet.nl/discovery>`_ of `NLnet Foundation <https://nlnet.nl/>`_. + diff --git a/requirements-dev.txt b/requirements-dev.txt index 4b3527d0b..08641eef3 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,11 +2,11 @@ mock==4.0.3 nose2[coverage_plugin]==0.10.0 cov-core==1.15.0 pycodestyle==2.7.0 -pylint==2.9.3 +pylint==2.9.5 splinter==0.15.0 transifex-client==0.14.3 selenium==3.141.0 -twine==3.4.1 +twine==3.4.2 Pallets-Sphinx-Themes==2.0.1 Sphinx==4.1.1 sphinx-issues==1.2.0 diff --git a/searx/engines/peertube.py b/searx/engines/peertube.py index 86cd04f2d..058065c03 100644 --- a/searx/engines/peertube.py +++ b/searx/engines/peertube.py @@ -97,6 +97,6 @@ def _fetch_supported_languages(resp): import re # https://docs.python.org/3/howto/regex.html#greedy-versus-non-greedy - videolanguages = re.search(r"videoLanguages \(\) \{(.*?)\]", resp.text, re.DOTALL) + videolanguages = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL) peertube_languages = [m.group(1) for m in re.finditer(r"\{ id: '([a-z]+)', label:", videolanguages.group(1))] return peertube_languages diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 97e461177..8d03d8324 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -84,14 +84,16 @@ def request(query, params): ) # add language tag - if params['language'] != 'all': + if params['language'] == 'all': + params['url'] += '&locale=en_us' + else: language = match_language( params['language'], # pylint: disable=undefined-variable supported_languages, language_aliases, ) - params['url'] += '&locale=' + language.replace('-', '_') + params['url'] += '&locale=' + language.replace('-', '_').lower() params['raise_for_httperror'] = False return params @@ -144,8 +146,8 @@ def response(resp): mainline_items = row.get('items', []) for item in mainline_items: - title = item['title'] - res_url = item['url'] + title = item.get('title', None) + res_url = item.get('url', None) if mainline_type == 'web': content = item['desc'] @@ -156,7 +158,10 @@ def response(resp): }) elif mainline_type == 'news': - pub_date = datetime.fromtimestamp(item['date'], None) + + pub_date = item['date'] + if pub_date is not None: + pub_date = datetime.fromtimestamp(pub_date) news_media = item.get('media', []) img_src = None if news_media: @@ -192,8 +197,12 @@ def response(resp): if c: content_parts.append("%s: %s " % (gettext("Channel"), c)) content = ' // '.join(content_parts) - length = timedelta(seconds=item['duration']) - pub_date = datetime.fromtimestamp(item['date']) + length = item['duration'] + if length is not None: + length = timedelta(milliseconds=length) + pub_date = item['date'] + if pub_date is not None: + pub_date = datetime.fromtimestamp(pub_date) thumbnail = item['thumbnail'] # from some locations (DE and others?) the s2 link do # response a 'Please wait ..' but does not deliver the thumbnail diff --git a/searx/engines/seznam.py b/searx/engines/seznam.py index 042088dbe..9cd50dfc0 100644 --- a/searx/engines/seznam.py +++ b/searx/engines/seznam.py @@ -53,14 +53,14 @@ def response(resp): dom = html.fromstring(resp.content.decode()) for result_element in eval_xpath_list(dom, '//div[@data-dot="results"]/div'): - result_data = eval_xpath_getindex(result_element, './/div[contains(@class, "Result")]', 0, default=None) + result_data = eval_xpath_getindex(result_element, './/div[contains(@class, "bec586")]', 0, default=None) if result_data is None: continue title_element = eval_xpath_getindex(result_element, './/h3/a', 0) results.append({ 'url': title_element.get('href'), 'title': extract_text(title_element), - 'content': extract_text(eval_xpath(result_data, './/p[@class="Result-description"]')), + 'content': extract_text(eval_xpath(result_data, './/div[@class="_3eded7"]')), }) return results diff --git a/searx/settings.yml b/searx/settings.yml index 5307631dd..ac9a14064 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -825,6 +825,7 @@ engines: # exact_match_only: false # host: '127.0.0.1' # port: 27017 + # enable_http: true # results_per_page: 20 # database: 'business' # collection: 'reviews' # name of the db collection @@ -1053,13 +1054,15 @@ engines: disabled: true # Required dependency: redis - # - name: myredis - # engine: redis_server - # exact_match_only: False - # host: '127.0.0.1' - # port: 6379 - # password: '' - # db: 0 + # - name: myredis + # shortcut : rds + # engine: redis_server + # exact_match_only: false + # host: '127.0.0.1' + # port: 6379 + # enable_http: true + # password: '' + # db: 0 # tmp suspended: bad certificate # - name: scanr structures diff --git a/searx/settings_defaults.py b/searx/settings_defaults.py index ccf4df5cd..72d7dc588 100644 --- a/searx/settings_defaults.py +++ b/searx/settings_defaults.py @@ -30,7 +30,7 @@ CATEGORY_ORDER = [ 'it', 'science', 'files', - 'social medias', + 'social media', ] STR_TO_BOOL = { '0': False, |