diff options
| -rw-r--r-- | searx/engines/genius.py | 60 | ||||
| -rw-r--r-- | searx/engines/sqlite.py | 75 | ||||
| -rw-r--r-- | searx/engines/unsplash.py | 50 | ||||
| -rw-r--r-- | searx/settings.yml | 21 |
4 files changed, 162 insertions, 44 deletions
diff --git a/searx/engines/genius.py b/searx/engines/genius.py index 9f6a8cd5f..9d701a8de 100644 --- a/searx/engines/genius.py +++ b/searx/engines/genius.py @@ -1,12 +1,17 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -""" - Genius +# lint: pylint +# pylint: disable=invalid-name, missing-function-docstring +"""Genius + """ from json import loads from urllib.parse import urlencode from datetime import datetime +from searx import logger +logger = logger.getChild('genius engine') + # about about = { "website": 'https://genius.com/', @@ -27,49 +32,54 @@ search_url = url + 'search/{index}?{query}&page={pageno}&per_page={page_size}' def request(query, params): - params['url'] = search_url.format(query=urlencode({'q': query}), - index='multi', - page_size=page_size, - pageno=params['pageno']) + params['url'] = search_url.format( + query=urlencode({'q': query}), + index='multi', + page_size=page_size, + pageno=params['pageno'], + ) return params def parse_lyric(hit): try: content = hit['highlights'][0]['value'] - except: + except Exception as e: # pylint: disable=broad-except + logger.error(e, exc_info=True) content = '' timestamp = hit['result']['lyrics_updated_at'] - result = {'url': hit['result']['url'], - 'title': hit['result']['full_title'], - 'content': content, - 'thumbnail': hit['result']['song_art_image_thumbnail_url'], - 'template': 'videos.html'} + result = { + 'url': hit['result']['url'], + 'title': hit['result']['full_title'], + 'content': content, + 'thumbnail': hit['result']['song_art_image_thumbnail_url'], + } if timestamp: result.update({'publishedDate': datetime.fromtimestamp(timestamp)}) return result def parse_artist(hit): - result = {'url': hit['result']['url'], - 'title': hit['result']['name'], - 'content': '', - 'thumbnail': hit['result']['image_url'], - 'template': 'videos.html'} + result = { + 'url': hit['result']['url'], + 'title': hit['result']['name'], + 'content': '', + 'thumbnail': hit['result']['image_url'], + } return result def parse_album(hit): - result = {'url': hit['result']['url'], - 'title': hit['result']['full_title'], - 'thumbnail': hit['result']['cover_art_url'], - 'content': '', - # 'thumbnail': hit['result']['cover_art_thumbnail_url'], - 'template': 'videos.html'} + result = { + 'url': hit['result']['url'], + 'title': hit['result']['full_title'], + 'thumbnail': hit['result']['cover_art_url'], + 'content': '', + } try: year = hit['result']['release_date_components']['year'] - except: - pass + except Exception as e: # pylint: disable=broad-except + logger.error(e, exc_info=True) else: if year: result.update({'content': 'Released: {}'.format(year)}) diff --git a/searx/engines/sqlite.py b/searx/engines/sqlite.py new file mode 100644 index 000000000..84db74d62 --- /dev/null +++ b/searx/engines/sqlite.py @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +# pylint: disable=missing-function-docstring + +"""SQLite database (Offline) + +""" + +import sqlite3 +import contextlib + +from searx import logger + + +logger = logger.getChild('SQLite engine') + +engine_type = 'offline' +database = "" +query_str = "" +limit = 10 +paging = True +result_template = 'key-value.html' + + +def init(engine_settings): + if 'query_str' not in engine_settings: + raise ValueError('query_str cannot be empty') + + if not engine_settings['query_str'].lower().startswith('select '): + raise ValueError('only SELECT query is supported') + + +@contextlib.contextmanager +def sqlite_cursor(): + """Implements a `Context Manager`_ for a :py:obj:`sqlite3.Cursor`. + + Open database in read only mode: if the database doesn't exist. + The default mode creates an empty file on the file system. + + see: + * https://docs.python.org/3/library/sqlite3.html#sqlite3.connect + * https://www.sqlite.org/uri.html + """ + global database # pylint: disable=global-statement + uri = 'file:' + database + '?mode=ro' + with contextlib.closing(sqlite3.connect(uri, uri=True)) as connect: + connect.row_factory = sqlite3.Row + with contextlib.closing(connect.cursor()) as cursor: + yield cursor + + +def search(query, params): + global query_str, result_template # pylint: disable=global-statement + results = [] + + query_params = { + 'query': query, + 'wildcard': r'%' + query.replace(' ', r'%') + r'%', + 'limit': limit, + 'offset': (params['pageno'] - 1) * limit + } + query_to_run = query_str + ' LIMIT :limit OFFSET :offset' + + with sqlite_cursor() as cur: + + cur.execute(query_to_run, query_params) + col_names = [cn[0] for cn in cur.description] + + for row in cur.fetchall(): + item = dict( zip(col_names, map(str, row)) ) + item['template'] = result_template + logger.debug("append result --> %s", item) + results.append(item) + + return results diff --git a/searx/engines/unsplash.py b/searx/engines/unsplash.py index 3bbdf630d..834bc917c 100644 --- a/searx/engines/unsplash.py +++ b/searx/engines/unsplash.py @@ -1,11 +1,16 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -""" - Unsplash +# lint: pylint +# pylint: disable=missing-function-docstring +"""Unsplash + """ from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl from json import loads +from searx import logger + +logger = logger.getChild('unsplash engine') # about about = { "website": 'https://unsplash.com', @@ -16,8 +21,8 @@ about = { "results": 'JSON', } -url = 'https://unsplash.com/' -search_url = url + 'napi/search/photos?' +base_url = 'https://unsplash.com/' +search_url = base_url + 'napi/search/photos?' categories = ['images'] page_size = 20 paging = True @@ -25,18 +30,24 @@ paging = True def clean_url(url): parsed = urlparse(url) - query = [(k, v) for (k, v) in parse_qsl(parsed.query) if k not in ['ixid', 's']] + query = [(k, v) for (k, v) + in parse_qsl(parsed.query) if k not in ['ixid', 's']] - return urlunparse((parsed.scheme, - parsed.netloc, - parsed.path, - parsed.params, - urlencode(query), - parsed.fragment)) + return urlunparse(( + parsed.scheme, + parsed.netloc, + parsed.path, + parsed.params, + urlencode(query), + parsed.fragment + )) def request(query, params): - params['url'] = search_url + urlencode({'query': query, 'page': params['pageno'], 'per_page': page_size}) + params['url'] = search_url + urlencode({ + 'query': query, 'page': params['pageno'], 'per_page': page_size + }) + logger.debug("query_url --> %s", params['url']) return params @@ -46,10 +57,13 @@ def response(resp): if 'results' in json_data: for result in json_data['results']: - results.append({'template': 'images.html', - 'url': clean_url(result['links']['html']), - 'thumbnail_src': clean_url(result['urls']['thumb']), - 'img_src': clean_url(result['urls']['raw']), - 'title': result['description'], - 'content': ''}) + results.append({ + 'template': 'images.html', + 'url': clean_url(result['links']['html']), + 'thumbnail_src': clean_url(result['urls']['thumb']), + 'img_src': clean_url(result['urls']['raw']), + 'title': result.get('alt_description') or 'unknown', + 'content': result.get('description') or '' + }) + return results diff --git a/searx/settings.yml b/searx/settings.yml index 57ed93b28..94d9e185a 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1004,6 +1004,26 @@ engines: timeout : 3.0 disabled : True + # For this demo of the sqlite engine download: + # https://liste.mediathekview.de/filmliste-v2.db.bz2 + # and unpack into searx/data/filmliste-v2.db + # Query to test: "!demo concert" + # + # - name : demo + # engine : sqlite + # shortcut: demo + # categories: general + # result_template: default.html + # database : searx/data/filmliste-v2.db + # query_str : >- + # SELECT title || ' (' || time(duration, 'unixepoch') || ')' AS title, + # COALESCE( NULLIF(url_video_hd,''), NULLIF(url_video_sd,''), url_video) AS url, + # description AS content + # FROM film + # WHERE title LIKE :wildcard OR description LIKE :wildcard + # ORDER BY duration DESC + # disabled : False + - name : torrentz engine : torrentz shortcut : tor @@ -1041,7 +1061,6 @@ engines: - name : unsplash engine : unsplash - disabled: True shortcut : us - name : yahoo |