From cd7849ffc82dc319e25ed80cae03dc021e5c7263 Mon Sep 17 00:00:00 2001 From: Denis Wernert Date: Tue, 2 Oct 2018 15:08:43 +0200 Subject: Adds the Unsplash image engine --- searx/engines/unsplash.py | 39 +++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 5 +++++ 2 files changed, 44 insertions(+) create mode 100644 searx/engines/unsplash.py (limited to 'searx') diff --git a/searx/engines/unsplash.py b/searx/engines/unsplash.py new file mode 100644 index 000000000..03db97788 --- /dev/null +++ b/searx/engines/unsplash.py @@ -0,0 +1,39 @@ +""" + Unsplash + + @website https://unsplash.com + @provide-api yes (https://unsplash.com/developers) + + @using-api no + @results JSON (using search portal's infiniscroll API) + @stable no (JSON format could change any time) + @parse url, title, img_src, thumbnail_src +""" + +from searx.url_utils import urlencode +from json import loads + +url = 'https://unsplash.com/' +search_url = url + 'napi/search/photos?' +categories = ['images'] +page_size = 20 +paging = True + + +def request(query, params): + params['url'] = search_url + urlencode({'query': query, 'page': params['pageno'], 'per_page': page_size}) + return params + + +def response(resp): + results = [] + json_data = loads(resp.text) + + for result in json_data['results']: + results.append({'template': 'images.html', + 'url': result['links']['html'], + 'thumbnail_src': result['urls']['thumb'], + 'img_src': result['urls']['full'], + 'title': result['description'], + 'content': ''}) + return results diff --git a/searx/settings.yml b/searx/settings.yml index 6e1b5fb74..b48fada8e 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -642,6 +642,11 @@ engines: # content_xpath : //*[@class="meaning"] # shortcut : ud + - name : unsplash + engine : unsplash + disabled: True + shortcut : us + - name : yahoo engine : yahoo shortcut : yh -- cgit v1.2.3 From 72d063d27d9277b12efe2ec16fe1c95ef651c2a2 Mon Sep 17 00:00:00 2001 From: Denis Wernert Date: Mon, 8 Oct 2018 14:01:35 +0200 Subject: Uses the raw url for the image result, rather than the full size result. --- searx/engines/unsplash.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'searx') diff --git a/searx/engines/unsplash.py b/searx/engines/unsplash.py index 03db97788..04a943297 100644 --- a/searx/engines/unsplash.py +++ b/searx/engines/unsplash.py @@ -29,11 +29,12 @@ def response(resp): results = [] json_data = loads(resp.text) - for result in json_data['results']: - results.append({'template': 'images.html', - 'url': result['links']['html'], - 'thumbnail_src': result['urls']['thumb'], - 'img_src': result['urls']['full'], - 'title': result['description'], - 'content': ''}) + if 'results' in json_data: + for result in json_data['results']: + results.append({'template': 'images.html', + 'url': result['links']['html'], + 'thumbnail_src': result['urls']['thumb'], + 'img_src': result['urls']['raw'], + 'title': result['description'], + 'content': ''}) return results -- cgit v1.2.3 From b9ada93b3ade2b4268bdc898e2c67b156b4dba92 Mon Sep 17 00:00:00 2001 From: Denis Wernert Date: Mon, 8 Oct 2018 14:56:20 +0200 Subject: Removes what looks like tracking parameters --- searx/engines/unsplash.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'searx') diff --git a/searx/engines/unsplash.py b/searx/engines/unsplash.py index 04a943297..2e8d6fdfc 100644 --- a/searx/engines/unsplash.py +++ b/searx/engines/unsplash.py @@ -10,7 +10,7 @@ @parse url, title, img_src, thumbnail_src """ -from searx.url_utils import urlencode +from searx.url_utils import urlencode, urlparse, urlunparse, parse_qsl from json import loads url = 'https://unsplash.com/' @@ -20,6 +20,18 @@ page_size = 20 paging = True +def clean_url(url): + parsed = urlparse(url) + query = [(k, v) for (k, v) in parse_qsl(parsed.query) if k not in ['ixid', 's']] + + return urlunparse((parsed.scheme, + parsed.netloc, + parsed.path, + parsed.params, + urlencode(query), + parsed.fragment)) + + def request(query, params): params['url'] = search_url + urlencode({'query': query, 'page': params['pageno'], 'per_page': page_size}) return params @@ -32,9 +44,9 @@ def response(resp): if 'results' in json_data: for result in json_data['results']: results.append({'template': 'images.html', - 'url': result['links']['html'], - 'thumbnail_src': result['urls']['thumb'], - 'img_src': result['urls']['raw'], + 'url': clean_url(result['links']['html']), + 'thumbnail_src': clean_url(result['urls']['thumb']), + 'img_src': clean_url(result['urls']['raw']), 'title': result['description'], 'content': ''}) return results -- cgit v1.2.3