diff options
| -rw-r--r-- | requirements.txt | 4 | ||||
| -rw-r--r-- | searx/engines/mongodb.py | 65 | ||||
| -rw-r--r-- | searx/engines/qwant.py | 24 | ||||
| -rw-r--r-- | searx/settings.yml | 13 |
4 files changed, 100 insertions, 6 deletions
diff --git a/requirements.txt b/requirements.txt index 4d14dc0cc..a8e0156c3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,11 +5,11 @@ flask==1.1.2 jinja2==2.11.3 lxml==4.6.3 pygments==2.9.0 -python-dateutil==2.8.1 +python-dateutil==2.8.2 pyyaml==5.4.1 httpx[http2]==0.17.1 Brotli==1.0.9 -uvloop==0.15.2; python_version >= '3.7' +uvloop==0.15.3; python_version >= '3.7' uvloop==0.14.0; python_version < '3.7' httpx-socks[asyncio]==0.3.1 langdetect==1.0.9 diff --git a/searx/engines/mongodb.py b/searx/engines/mongodb.py new file mode 100644 index 000000000..1f24c5acf --- /dev/null +++ b/searx/engines/mongodb.py @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +# pylint: disable=missing-function-docstring +"""MongoDB engine (Offline) + +""" + +import re +from pymongo import MongoClient # pylint: disable=import-error + +engine_type = 'offline' + +# mongodb connection variables +host = '127.0.0.1' +port = 27017 +username = '' +password = '' +database = None +collection = None +key = None + +# engine specific variables +paging = True +results_per_page = 20 +exact_match_only = False +result_template = 'key-value.html' + +_client = None + +def init(_): + connect() + +def connect(): + global _client # pylint: disable=global-statement + kwargs = { 'port': port } + if username: + kwargs['username'] = username + if password: + kwargs['password'] = password + _client = MongoClient(host, **kwargs)[database][collection] + +def search(query, params): + results = [] + if exact_match_only: + q = { '$eq': query } + else: + _re = re.compile('.*{0}.*'.format(re.escape(query)), re.I | re.M ) + q = { '$regex': _re } + + query = _client.find( + {key: q} + ).skip( + ( params['pageno'] -1 ) * results_per_page + ).limit( + results_per_page + ) + + results.append({ 'number_of_results': query.count() }) + for r in query: + del r['_id'] + r = { str(k):str(v) for k,v in r.items() } + r['template'] = result_template + results.append(r) + + return results diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 00ecf7e83..97e461177 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -29,13 +29,12 @@ from datetime import ( ) from json import loads from urllib.parse import urlencode +from flask_babel import gettext -# from searx import logger from searx.utils import match_language from searx.exceptions import SearxEngineAPIException from searx.network import raise_for_httperror -#logger = logger.getChild('qwant') # about about = { @@ -100,6 +99,7 @@ def request(query, params): def response(resp): """Get response from Qwant's search request""" + # pylint: disable=too-many-locals, too-many-branches, too-many-statements keyword = category_to_keyword[categories[0]] results = [] @@ -180,11 +180,27 @@ def response(resp): }) elif mainline_type == 'videos': - content = item['desc'] + # some videos do not have a description: while qwant-video + # returns an empty string, such video from a qwant-web query + # miss the 'desc' key. + d, s, c = item.get('desc'), item.get('source'), item.get('channel') + content_parts = [] + if d: + content_parts.append(d) + if s: + content_parts.append("%s: %s " % (gettext("Source"), s)) + if c: + content_parts.append("%s: %s " % (gettext("Channel"), c)) + content = ' // '.join(content_parts) length = timedelta(seconds=item['duration']) pub_date = datetime.fromtimestamp(item['date']) thumbnail = item['thumbnail'] - + # from some locations (DE and others?) the s2 link do + # response a 'Please wait ..' but does not deliver the thumbnail + thumbnail = thumbnail.replace( + 'https://s2.qwant.com', + 'https://s1.qwant.com', 1 + ) results.append({ 'title': title, 'url': res_url, diff --git a/searx/settings.yml b/searx/settings.yml index e12a39c1a..dbfffa438 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -811,6 +811,19 @@ engines: engine: mixcloud shortcut: mc + # MongoDB engine + # Required dependency: pymongo + # - name: mymongo + # engine: mongodb + # shortcut: md + # exact_match_only: false + # host: '127.0.0.1' + # port: 27017 + # results_per_page: 20 + # database: 'business' + # collection: 'reviews' # name of the db collection + # key: 'name' # key in the collection to search for + - name: npm engine: json_engine paging: true |