summaryrefslogtreecommitdiff
path: root/searx/metrics/error_recorder.py
blob: 1d0d6e7a3da8edc367a7a035717f71a4f8750169 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import typing
import inspect
from json import JSONDecodeError
from urllib.parse import urlparse
from httpx import HTTPError, HTTPStatusError
from searx.exceptions import (
    SearxXPathSyntaxException,
    SearxEngineXPathException,
    SearxEngineAPIException,
    SearxEngineAccessDeniedException,
)
from searx import searx_parent_dir, settings
from searx.engines import engines


errors_per_engines = {}


class ErrorContext:

    __slots__ = (
        'filename',
        'function',
        'line_no',
        'code',
        'exception_classname',
        'log_message',
        'log_parameters',
        'secondary',
    )

    def __init__(self, filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary):
        self.filename = filename
        self.function = function
        self.line_no = line_no
        self.code = code
        self.exception_classname = exception_classname
        self.log_message = log_message
        self.log_parameters = log_parameters
        self.secondary = secondary

    def __eq__(self, o) -> bool:
        if not isinstance(o, ErrorContext):
            return False
        return (
            self.filename == o.filename
            and self.function == o.function
            and self.line_no == o.line_no
            and self.code == o.code
            and self.exception_classname == o.exception_classname
            and self.log_message == o.log_message
            and self.log_parameters == o.log_parameters
            and self.secondary == o.secondary
        )

    def __hash__(self):
        return hash(
            (
                self.filename,
                self.function,
                self.line_no,
                self.code,
                self.exception_classname,
                self.log_message,
                self.log_parameters,
                self.secondary,
            )
        )

    def __repr__(self):
        return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r}) {!r}".format(
            self.filename,
            self.line_no,
            self.code,
            self.exception_classname,
            self.log_message,
            self.log_parameters,
            self.secondary,
        )


def add_error_context(engine_name: str, error_context: ErrorContext) -> None:
    errors_for_engine = errors_per_engines.setdefault(engine_name, {})
    errors_for_engine[error_context] = errors_for_engine.get(error_context, 0) + 1
    engines[engine_name].logger.warning('%s', str(error_context))


def get_trace(traces):
    for trace in reversed(traces):
        split_filename = trace.filename.split('/')
        if '/'.join(split_filename[-3:-1]) == 'searx/engines':
            return trace
        if '/'.join(split_filename[-4:-1]) == 'searx/search/processors':
            return trace
    return traces[-1]


def get_hostname(exc: HTTPError) -> typing.Optional[None]:
    url = exc.request.url
    if url is None and exc.response is not None:
        url = exc.response.url
    return urlparse(url).netloc


def get_request_exception_messages(
    exc: HTTPError,
) -> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]:
    url = None
    status_code = None
    reason = None
    hostname = None
    if hasattr(exc, '_request') and exc._request is not None:
        # exc.request is property that raise an RuntimeException
        # if exc._request is not defined.
        url = exc.request.url
    if url is None and hasattr(exc, 'response') and exc.response is not None:
        url = exc.response.url
    if url is not None:
        hostname = url.host
    if isinstance(exc, HTTPStatusError):
        status_code = str(exc.response.status_code)
        reason = exc.response.reason_phrase
    return (status_code, reason, hostname)


def get_messages(exc, filename) -> typing.Tuple:
    if isinstance(exc, JSONDecodeError):
        return (exc.msg,)
    if isinstance(exc, TypeError):
        return (str(exc),)
    if isinstance(exc, ValueError) and 'lxml' in filename:
        return (str(exc),)
    if isinstance(exc, HTTPError):
        return get_request_exception_messages(exc)
    if isinstance(exc, SearxXPathSyntaxException):
        return (exc.xpath_str, exc.message)
    if isinstance(exc, SearxEngineXPathException):
        return (exc.xpath_str, exc.message)
    if isinstance(exc, SearxEngineAPIException):
        return (str(exc.args[0]),)
    if isinstance(exc, SearxEngineAccessDeniedException):
        return (exc.message,)
    return ()


def get_exception_classname(exc: Exception) -> str:
    exc_class = exc.__class__
    exc_name = exc_class.__qualname__
    exc_module = exc_class.__module__
    if exc_module is None or exc_module == str.__class__.__module__:
        return exc_name
    return exc_module + '.' + exc_name


def get_error_context(framerecords, exception_classname, log_message, log_parameters, secondary) -> ErrorContext:
    searx_frame = get_trace(framerecords)
    filename = searx_frame.filename
    if filename.startswith(searx_parent_dir):
        filename = filename[len(searx_parent_dir) + 1 :]
    function = searx_frame.function
    line_no = searx_frame.lineno
    code = searx_frame.code_context[0].strip()
    del framerecords
    return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary)


def count_exception(engine_name: str, exc: Exception, secondary: bool = False) -> None:
    if not settings['general']['enable_metrics']:
        return
    framerecords = inspect.trace()
    try:
        exception_classname = get_exception_classname(exc)
        log_parameters = get_messages(exc, framerecords[-1][1])
        error_context = get_error_context(framerecords, exception_classname, None, log_parameters, secondary)
        add_error_context(engine_name, error_context)
    finally:
        del framerecords


def count_error(
    engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None, secondary: bool = False
) -> None:
    if not settings['general']['enable_metrics']:
        return
    framerecords = list(reversed(inspect.stack()[1:]))
    try:
        error_context = get_error_context(framerecords, None, log_message, log_parameters or (), secondary)
        add_error_context(engine_name, error_context)
    finally:
        del framerecords