diff options
| author | Bnyro <bnyro@tutanota.com> | 2025-05-31 21:10:23 +0200 |
|---|---|---|
| committer | Bnyro <bnyro@tutanota.com> | 2025-07-05 13:45:19 +0200 |
| commit | 6ff40356352e9f5b9c7f9c5d58ac2fc787455036 (patch) | |
| tree | f26a828798c518b449edc053736dedd402a50b64 /searx/engines/pixabay.py | |
| parent | 5926d737e3c93fcaafd806c013b207a2d1813b0b (diff) | |
[feat] engines: add pixabay for royalty free images/videos
What's changed?
- this PR adds Pixabay, a collection of royalty free images
- additionaly it seems to have some videos, so there's an engine for it too
Author Notes
- when using SearXNG's transport, all our requests will get blocked, probably due to fingerprinting
- we should find an alternative solution because this is just a hacky change to make things work for now, but idk how ...
Diffstat (limited to 'searx/engines/pixabay.py')
| -rw-r--r-- | searx/engines/pixabay.py | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/searx/engines/pixabay.py b/searx/engines/pixabay.py new file mode 100644 index 000000000..3a01d8af0 --- /dev/null +++ b/searx/engines/pixabay.py @@ -0,0 +1,98 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Pixabay provides royalty-free media (images, videos)""" + +from datetime import timedelta +from urllib.parse import quote_plus, urlencode +from dateutil import parser +from searx.utils import gen_useragent + +# about +about = { + "website": 'https://pixabay.com', + "wikidata_id": 'Q1746538', + "official_api_documentation": 'https://pixabay.com/api/docs/', + "use_official_api": False, + "require_api_key": False, + "results": 'JSON', +} + +base_url = 'https://pixabay.com' +categories = ['images'] +pixabay_type = "images" # alternative: 'videos' + +paging = True +safesearch = True +time_range_support = True + +safesearch_map = {0: 'off', 1: '1', 2: '1'} +time_range_map = {'day': '1d', 'week': '1w', 'month': '1m', 'year': '1y'} + +# using http2 returns forbidden errors +enable_http2 = False + + +def request(query, params): + args = { + 'pagi': params['pageno'], + } + if params['time_range']: + args['date'] = time_range_map[params['time_range']] + + params['url'] = f"{base_url}/{pixabay_type}/search/{quote_plus(query)}/?{urlencode(args)}" + params['headers'] = { + 'User-Agent': gen_useragent() + " Pixabay", + 'Accept': 'application/json', + 'x-bootstrap-cache-miss': '1', + 'x-fetch-bootstrap': '1', + } + params['cookies']['g_rated'] = safesearch_map[params['safesearch']] + + # prevent automatic redirects to first page on pagination + params['allow_redirects'] = False + + return params + + +def _image_result(result): + return { + 'template': 'images.html', + 'url': base_url + result["href"], + # images are sorted in ascending quality + 'thumbnail_src': list(result['sources'].values())[0], + 'img_src': list(result['sources'].values())[-1], + 'title': result.get('name'), + 'content': result.get('description', ''), + } + + +def _video_result(result): + return { + 'template': 'videos.html', + 'url': base_url + result["href"], + # images are sorted in ascending quality + 'thumbnail': result['sources'].get('thumbnail'), + 'iframe_src': result['sources'].get('embed'), + 'title': result.get('name'), + 'content': result.get('description', ''), + 'length': timedelta(seconds=result['duration']), + 'publishedDate': parser.parse(result['uploadDate']), + } + + +def response(resp): + results = [] + + # if there are no results on this page, we get a redirect + # to the first page + if resp.status_code == 302: + return results + + json_data = resp.json() + + for result in json_data.get('page', {}).get('results', []): + if result['mediaType'] in ('photo', 'illustration', 'vector'): + results.append(_image_result(result)) + elif result['mediaType'] == 'video': + results.append(_video_result(result)) + + return results |