summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2022-04-08 11:17:45 +0200
committerMarkus Heiser <markus.heiser@darmarit.de>2022-04-16 09:27:34 +0200
commit3bb62823ec3af0e67bd2d959bec20c4791ee3bac (patch)
tree7a4d9dc863b76a9d57902c101cd3712b64add7b8 /searx/engines
parent27f8fa6fe066c15dfc60bc2c50533ef87663ce29 (diff)
[fix] dailymotion engine: filter by language & country
- fix the issue of fetching more the 7000 *languages* - improve the request function and filter by language & country - implement time_range_support & safesearch - add more fields to the response from dailymotion (allow_embed, length) - better clean up of HTML tags in the 'content' field. This is more or less a complete rework based on the '/videos' API from [1]. This patch cleans up the language list in SearXNG that has been polluted by the ISO-639-3 2 and 3 letter codes from dailymotion languages which have never been used. [1] https://developers.dailymotion.com/tools/ Closes: https://github.com/searxng/searxng/issues/1065 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/dailymotion.py167
1 files changed, 117 insertions, 50 deletions
diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py
index d71cdc114..5dc0d9dbe 100644
--- a/searx/engines/dailymotion.py
+++ b/searx/engines/dailymotion.py
@@ -1,12 +1,17 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
-"""
- Dailymotion (Videos)
+"""Dailymotion (Videos)
+
"""
-from json import loads
-from datetime import datetime
+from typing import Set
+from datetime import datetime, timedelta
from urllib.parse import urlencode
-from searx.utils import match_language, html_to_text
+import time
+import babel
+
+from searx.exceptions import SearxEngineAPIException
+from searx.network import raise_for_httperror
+from searx.utils import html_to_text
# about
about = {
@@ -21,23 +26,78 @@ about = {
# engine dependent config
categories = ['videos']
paging = True
+number_of_results = 10
+
+time_range_support = True
+time_delta_dict = {
+ "day": timedelta(days=1),
+ "week": timedelta(days=7),
+ "month": timedelta(days=31),
+ "year": timedelta(days=365),
+}
-# search-url
-# see http://www.dailymotion.com/doc/api/obj-video.html
-search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,description,duration,url,thumbnail_360_url,id&sort=relevance&limit=5&page={pageno}&{query}' # noqa
-supported_languages_url = 'https://api.dailymotion.com/languages'
+safesearch = True
+safesearch_params = {2: '&is_created_for_kids=true', 1: '&is_created_for_kids=true', 0: ''}
+# search-url
+# - https://developers.dailymotion.com/tools/
+# - https://www.dailymotion.com/doc/api/obj-video.html
+
+result_fields = [
+ 'allow_embed',
+ 'description',
+ 'title',
+ 'created_time',
+ 'duration',
+ 'url',
+ 'thumbnail_360_url',
+ 'id',
+]
+search_url = (
+ 'https://api.dailymotion.com/videos?'
+ 'fields={fields}&password_protected={password_protected}&private={private}&sort={sort}&limit={limit}'
+).format(
+ fields=','.join(result_fields),
+ password_protected= 'false',
+ private='false',
+ sort='relevance',
+ limit=number_of_results,
+)
+iframe_src = "https://www.dailymotion.com/embed/video/{video_id}"
+
+# The request query filters by 'languages' & 'country', therefore instead of
+# fetching only languages we need to fetch locales.
+supported_languages_url = 'https://api.dailymotion.com/locales'
-# do search-request
def request(query, params):
- if params['language'] == 'all':
- locale = 'en-US'
- else:
- locale = match_language(params['language'], supported_languages)
- params['url'] = search_url.format(
- query=urlencode({'search': query, 'localization': locale}), pageno=params['pageno']
- )
+ if not query:
+ return False
+
+ language = params['language']
+ if language == 'all':
+ language = 'en-US'
+ locale = babel.Locale.parse(language, sep='-')
+
+ query_args = {
+ 'search': query,
+ 'languages': locale.language,
+ 'page': params['pageno'],
+ }
+
+ if locale.territory:
+ localization = locale.language + '_' + locale.territory
+ if localization in supported_languages:
+ query_args['country'] = locale.territory
+
+ time_delta = time_delta_dict.get(params["time_range"])
+ if time_delta:
+ created_after = datetime.now() - time_delta
+ query_args['created_after'] = datetime.timestamp(created_after)
+
+ query_str = urlencode(query_args)
+ params['url'] = search_url + '&' + query_str + safesearch_params.get(params['safesearch'], '')
+ params['raise_for_httperror'] = False
return params
@@ -46,34 +106,51 @@ def request(query, params):
def response(resp):
results = []
- search_res = loads(resp.text)
+ search_res = resp.json()
+
+ # check for an API error
+ if 'error' in search_res:
+ raise SearxEngineAPIException(search_res['error'].get('message'))
- # return empty array if there are no results
- if 'list' not in search_res:
- return []
+ raise_for_httperror(resp)
# parse results
- for res in search_res['list']:
+ for res in search_res.get('list', []):
+
title = res['title']
url = res['url']
+
content = html_to_text(res['description'])
- thumbnail = res['thumbnail_360_url']
+ if len(content) > 300:
+ content = content[:300] + '...'
+
publishedDate = datetime.fromtimestamp(res['created_time'], None)
- # http to https
+ length = time.gmtime(res.get('duration'))
+ if length.tm_hour:
+ length = time.strftime("%H:%M:%S", length)
+ else:
+ length = time.strftime("%M:%S", length)
+
+ thumbnail = res['thumbnail_360_url']
thumbnail = thumbnail.replace("http://", "https://")
- results.append(
- {
- 'template': 'videos.html',
- 'url': url,
- 'title': title,
- 'content': content,
- 'publishedDate': publishedDate,
- 'iframe_src': "https://www.dailymotion.com/embed/video/" + res['id'],
- 'thumbnail': thumbnail,
- }
- )
+ item = {
+ 'template': 'videos.html',
+ 'url': url,
+ 'title': title,
+ 'content': content,
+ 'publishedDate': publishedDate,
+ 'length': length,
+ 'thumbnail': thumbnail,
+ }
+
+ # HINT: no mater what the value is, without API token videos can't shown
+ # embedded
+ if res['allow_embed']:
+ item['iframe_src'] = iframe_src.format(video_id=res['id'])
+
+ results.append(item)
# return results
return results
@@ -81,18 +158,8 @@ def response(resp):
# get supported languages from their site
def _fetch_supported_languages(resp):
- supported_languages = {}
-
- response_json = loads(resp.text)
-
- for language in response_json['list']:
- supported_languages[language['code']] = {}
-
- name = language['native_name']
- if name:
- supported_languages[language['code']]['name'] = name
- english_name = language['name']
- if english_name:
- supported_languages[language['code']]['english_name'] = english_name
-
- return supported_languages
+ response_json = resp.json()
+ return [
+ item['locale']
+ for item in response_json['list']
+ ]