summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--requirements.txt4
-rw-r--r--searx/engines/mongodb.py65
-rw-r--r--searx/engines/qwant.py24
-rw-r--r--searx/settings.yml13
4 files changed, 100 insertions, 6 deletions
diff --git a/requirements.txt b/requirements.txt
index 4d14dc0cc..a8e0156c3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,11 +5,11 @@ flask==1.1.2
jinja2==2.11.3
lxml==4.6.3
pygments==2.9.0
-python-dateutil==2.8.1
+python-dateutil==2.8.2
pyyaml==5.4.1
httpx[http2]==0.17.1
Brotli==1.0.9
-uvloop==0.15.2; python_version >= '3.7'
+uvloop==0.15.3; python_version >= '3.7'
uvloop==0.14.0; python_version < '3.7'
httpx-socks[asyncio]==0.3.1
langdetect==1.0.9
diff --git a/searx/engines/mongodb.py b/searx/engines/mongodb.py
new file mode 100644
index 000000000..1f24c5acf
--- /dev/null
+++ b/searx/engines/mongodb.py
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+# pylint: disable=missing-function-docstring
+"""MongoDB engine (Offline)
+
+"""
+
+import re
+from pymongo import MongoClient # pylint: disable=import-error
+
+engine_type = 'offline'
+
+# mongodb connection variables
+host = '127.0.0.1'
+port = 27017
+username = ''
+password = ''
+database = None
+collection = None
+key = None
+
+# engine specific variables
+paging = True
+results_per_page = 20
+exact_match_only = False
+result_template = 'key-value.html'
+
+_client = None
+
+def init(_):
+ connect()
+
+def connect():
+ global _client # pylint: disable=global-statement
+ kwargs = { 'port': port }
+ if username:
+ kwargs['username'] = username
+ if password:
+ kwargs['password'] = password
+ _client = MongoClient(host, **kwargs)[database][collection]
+
+def search(query, params):
+ results = []
+ if exact_match_only:
+ q = { '$eq': query }
+ else:
+ _re = re.compile('.*{0}.*'.format(re.escape(query)), re.I | re.M )
+ q = { '$regex': _re }
+
+ query = _client.find(
+ {key: q}
+ ).skip(
+ ( params['pageno'] -1 ) * results_per_page
+ ).limit(
+ results_per_page
+ )
+
+ results.append({ 'number_of_results': query.count() })
+ for r in query:
+ del r['_id']
+ r = { str(k):str(v) for k,v in r.items() }
+ r['template'] = result_template
+ results.append(r)
+
+ return results
diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py
index 00ecf7e83..97e461177 100644
--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
@@ -29,13 +29,12 @@ from datetime import (
)
from json import loads
from urllib.parse import urlencode
+from flask_babel import gettext
-# from searx import logger
from searx.utils import match_language
from searx.exceptions import SearxEngineAPIException
from searx.network import raise_for_httperror
-#logger = logger.getChild('qwant')
# about
about = {
@@ -100,6 +99,7 @@ def request(query, params):
def response(resp):
"""Get response from Qwant's search request"""
+ # pylint: disable=too-many-locals, too-many-branches, too-many-statements
keyword = category_to_keyword[categories[0]]
results = []
@@ -180,11 +180,27 @@ def response(resp):
})
elif mainline_type == 'videos':
- content = item['desc']
+ # some videos do not have a description: while qwant-video
+ # returns an empty string, such video from a qwant-web query
+ # miss the 'desc' key.
+ d, s, c = item.get('desc'), item.get('source'), item.get('channel')
+ content_parts = []
+ if d:
+ content_parts.append(d)
+ if s:
+ content_parts.append("%s: %s " % (gettext("Source"), s))
+ if c:
+ content_parts.append("%s: %s " % (gettext("Channel"), c))
+ content = ' // '.join(content_parts)
length = timedelta(seconds=item['duration'])
pub_date = datetime.fromtimestamp(item['date'])
thumbnail = item['thumbnail']
-
+ # from some locations (DE and others?) the s2 link do
+ # response a 'Please wait ..' but does not deliver the thumbnail
+ thumbnail = thumbnail.replace(
+ 'https://s2.qwant.com',
+ 'https://s1.qwant.com', 1
+ )
results.append({
'title': title,
'url': res_url,
diff --git a/searx/settings.yml b/searx/settings.yml
index e12a39c1a..dbfffa438 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -811,6 +811,19 @@ engines:
engine: mixcloud
shortcut: mc
+ # MongoDB engine
+ # Required dependency: pymongo
+ # - name: mymongo
+ # engine: mongodb
+ # shortcut: md
+ # exact_match_only: false
+ # host: '127.0.0.1'
+ # port: 27017
+ # results_per_page: 20
+ # database: 'business'
+ # collection: 'reviews' # name of the db collection
+ # key: 'name' # key in the collection to search for
+
- name: npm
engine: json_engine
paging: true