searx/engines/senscritique.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151

# SPDX-License-Identifier: AGPL-3.0-or-later
"""SensCritique (movies)
"""
from __future__ import annotations

from json import dumps, loads
from typing import Any, Optional
from searx.result_types import EngineResults, MainResult

about = {
    "website": 'https://www.senscritique.com/',
    "wikidata_id": 'Q16676060',
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'JSON',
    'language': 'fr',
}

categories = ['movies']
paging = True
page_size = 16
graphql_url = 'https://apollo.senscritique.com/'

graphql_query = """query SearchProductExplorer($query: String, $offset: Int, $limit: Int,
                    $sortBy: SearchProductExplorerSort) {
  searchProductExplorer(
    query: $query
    filters: []
    sortBy: $sortBy
    offset: $offset
    limit: $limit
  ) {
    items {
      category
      dateRelease
      duration
      id
      originalTitle
      rating
      title
      url
      yearOfProduction
      medias {
        picture
      }
      countries {
        name
      }
      genresInfos {
        label
      }
      directors {
        name
      }
      stats {
        ratingCount
      }
    }
  }
}"""


def request(query: str, params: dict[str, Any]) -> dict[str, Any]:
    offset = (params['pageno'] - 1) * page_size

    data = {
        "operationName": "SearchProductExplorer",
        "variables": {"offset": offset, "limit": page_size, "query": query, "sortBy": "RELEVANCE"},
        "query": graphql_query,
    }

    params['url'] = graphql_url
    params['method'] = 'POST'
    params['headers']['Content-Type'] = 'application/json'
    params['data'] = dumps(data)

    return params


def response(resp) -> EngineResults:
    res = EngineResults()
    response_data = loads(resp.text)

    items = response_data.get('data', {}).get('searchProductExplorer', {}).get('items', [])
    if not items:
        return res

    for item in items:
        result = parse_item(item)
        if not result:
            continue
        res.add(result=result)

    return res


def parse_item(item: dict[str, Any]) -> MainResult | None:
    """Parse a single item from the SensCritique API response"""
    title = item.get('title', '')
    if not title:
        return None
    year = item.get('yearOfProduction')
    original_title = item.get('originalTitle')

    thumbnail: str = ""
    if item.get('medias', {}) and item['medias'].get('picture'):
        thumbnail = item['medias']['picture']

    content_parts = build_content_parts(item, title, original_title)
    url = f"https://www.senscritique.com{item['url']}"

    return MainResult(
        url=url,
        title=title + (f' ({year})' if year else ''),
        content=' | '.join(content_parts),
        thumbnail=thumbnail,
    )


def build_content_parts(item: dict[str, Any], title: str, original_title: Optional[str]) -> list[str]:
    """Build the content parts for an item"""
    content_parts = []

    if item.get('category'):
        content_parts.append(item['category'])

    if original_title and original_title != title:
        content_parts.append(f"Original title: {original_title}")

    if item.get('directors'):
        directors = [director['name'] for director in item['directors']]
        content_parts.append(f"Director(s): {', '.join(directors)}")

    if item.get('countries'):
        countries = [country['name'] for country in item['countries']]
        content_parts.append(f"Country: {', '.join(countries)}")

    if item.get('genresInfos'):
        genres = [genre['label'] for genre in item['genresInfos']]
        content_parts.append(f"Genre(s): {', '.join(genres)}")

    if item.get('duration'):
        minutes = item['duration'] // 60
        if minutes > 0:
            content_parts.append(f"Duration: {minutes} min")

    if item.get('rating') and item.get('stats', {}).get('ratingCount'):
        content_parts.append(f"Rating: {item['rating']}/10 ({item['stats']['ratingCount']} votes)")

    return content_parts