summaryrefslogtreecommitdiff
path: root/searx/engines/duckduckgo_extra.py
diff options
context:
space:
mode:
authorBnyro <bnyro@tutanota.com>2023-10-07 10:26:04 +0200
committerMarkus Heiser <markus.heiser@darmarIT.de>2023-10-09 06:53:43 +0200
commit48cb58bd2ec4eb9cb4ba416f7ece75c3c6c41e55 (patch)
tree76f9f4e619a3391bc571e9f23ae3f6308c8df54c /searx/engines/duckduckgo_extra.py
parentc3ab49cd903d27905d2da6f70699a55c9a74593e (diff)
[feat] duckduckgo: support for videos and news
Diffstat (limited to 'searx/engines/duckduckgo_extra.py')
-rw-r--r--searx/engines/duckduckgo_extra.py135
1 files changed, 135 insertions, 0 deletions
diff --git a/searx/engines/duckduckgo_extra.py b/searx/engines/duckduckgo_extra.py
new file mode 100644
index 000000000..7e3a3282d
--- /dev/null
+++ b/searx/engines/duckduckgo_extra.py
@@ -0,0 +1,135 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+DuckDuckGo Extra (images, videos, news)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+"""
+
+from datetime import datetime
+from typing import TYPE_CHECKING
+from urllib.parse import urlencode
+
+from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
+from searx.engines.duckduckgo import (
+ get_ddg_lang,
+ get_vqd,
+)
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+ import logging
+
+ logger: logging.Logger
+
+traits: EngineTraits
+
+# about
+about = {
+ "website": 'https://duckduckgo.com/',
+ "wikidata_id": 'Q12805',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'JSON (site requires js to get images)',
+}
+
+# engine dependent config
+categories = ['images', 'web']
+ddg_category = 'images'
+"""The category must be any of ``images``, ``videos`` and ``news``
+"""
+paging = True
+safesearch = True
+send_accept_language_header = True
+
+safesearch_cookies = {0: '-2', 1: None, 2: '1'}
+safesearch_args = {0: '1', 1: None, 2: '1'}
+
+search_path_map = {'images': 'i', 'videos': 'v', 'news': 'news'}
+
+
+def request(query, params):
+
+ eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
+ eng_lang = get_ddg_lang(traits, params['searxng_locale'])
+
+ args = {
+ 'q': query,
+ 'o': 'json',
+ # 'u': 'bing',
+ 'l': eng_region,
+ 'f': ',,,,,',
+ 'vqd': get_vqd(query),
+ }
+
+ if params['pageno'] > 1:
+ args['s'] = (params['pageno'] - 1) * 100
+
+ params['cookies']['ad'] = eng_lang # zh_CN
+ params['cookies']['ah'] = eng_region # "us-en,de-de"
+ params['cookies']['l'] = eng_region # "hk-tzh"
+
+ safe_search = safesearch_cookies.get(params['safesearch'])
+ if safe_search is not None:
+ params['cookies']['p'] = safe_search # "-2", "1"
+ safe_search = safesearch_args.get(params['safesearch'])
+ if safe_search is not None:
+ args['p'] = safe_search # "-1", "1"
+
+ logger.debug("cookies: %s", params['cookies'])
+
+ params['url'] = f'https://duckduckgo.com/{search_path_map[ddg_category]}.js?{urlencode(args)}'
+
+ return params
+
+
+def _image_result(result):
+ return {
+ 'template': 'images.html',
+ 'url': result['url'],
+ 'title': result['title'],
+ 'content': '',
+ 'thumbnail_src': result['thumbnail'],
+ 'img_src': result['image'],
+ 'img_format': '%s x %s' % (result['width'], result['height']),
+ 'source': result['source'],
+ }
+
+
+def _video_result(result):
+ return {
+ 'template': 'videos.html',
+ 'url': result['content'],
+ 'title': result['title'],
+ 'content': result['description'],
+ 'thumbnail': result['images'].get('small') or result['images'].get('medium'),
+ 'iframe_src': result['embed_url'],
+ 'source': result['provider'],
+ 'length': result['duration'],
+ 'metadata': result.get('uploader'),
+ }
+
+
+def _news_result(result):
+ return {
+ 'url': result['url'],
+ 'title': result['title'],
+ 'content': result['excerpt'],
+ 'source': result['source'],
+ 'publishedDate': datetime.utcfromtimestamp(result['date']),
+ }
+
+
+def response(resp):
+ results = []
+ res_json = resp.json()
+
+ for result in res_json['results']:
+ if ddg_category == 'images':
+ results.append(_image_result(result))
+ elif ddg_category == 'videos':
+ results.append(_video_result(result))
+ elif ddg_category == 'news':
+ results.append(_news_result(result))
+ else:
+ raise ValueError(f"Invalid duckduckgo category: {ddg_category}")
+
+ return results