1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
|
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, invalid-name
import typing as t
import inspect
from json import JSONDecodeError
from urllib.parse import urlparse
from httpx import HTTPError, HTTPStatusError
from searx.exceptions import (
SearxXPathSyntaxException,
SearxEngineXPathException,
SearxEngineAPIException,
SearxEngineAccessDeniedException,
)
from searx import searx_parent_dir, settings
from searx.engines import engines
errors_per_engines: dict[str, t.Any] = {}
LogParametersType = tuple[str, ...]
class ErrorContext: # pylint: disable=missing-class-docstring
def __init__( # pylint: disable=too-many-arguments
self,
filename: str,
function: str,
line_no: int,
code: str,
exception_classname: str,
log_message: str,
log_parameters: LogParametersType,
secondary: bool,
):
self.filename: str = filename
self.function: str = function
self.line_no: int = line_no
self.code: str = code
self.exception_classname: str = exception_classname
self.log_message: str = log_message
self.log_parameters: LogParametersType = log_parameters
self.secondary: bool = secondary
def __eq__(self, o) -> bool: # pylint: disable=invalid-name
if not isinstance(o, ErrorContext):
return False
return (
self.filename == o.filename
and self.function == o.function
and self.line_no == o.line_no
and self.code == o.code
and self.exception_classname == o.exception_classname
and self.log_message == o.log_message
and self.log_parameters == o.log_parameters
and self.secondary == o.secondary
)
def __hash__(self):
return hash(
(
self.filename,
self.function,
self.line_no,
self.code,
self.exception_classname,
self.log_message,
self.log_parameters,
self.secondary,
)
)
def __repr__(self):
return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r}) {!r}".format(
self.filename,
self.line_no,
self.code,
self.exception_classname,
self.log_message,
self.log_parameters,
self.secondary,
)
def add_error_context(engine_name: str, error_context: ErrorContext) -> None:
errors_for_engine = errors_per_engines.setdefault(engine_name, {})
errors_for_engine[error_context] = errors_for_engine.get(error_context, 0) + 1
engines[engine_name].logger.warning('%s', str(error_context))
def get_trace(traces):
for trace in reversed(traces):
split_filename: list[str] = trace.filename.split('/')
if '/'.join(split_filename[-3:-1]) == 'searx/engines':
return trace
if '/'.join(split_filename[-4:-1]) == 'searx/search/processors':
return trace
return traces[-1]
def get_hostname(exc: HTTPError) -> str | None:
url = exc.request.url
if url is None and exc.response is not None:
url = exc.response.url
return urlparse(url).netloc
def get_request_exception_messages(
exc: HTTPError,
) -> tuple[str | None, str | None, str | None]:
url = None
status_code = None
reason = None
hostname = None
if hasattr(exc, '_request') and exc._request is not None: # pylint: disable=protected-access
# exc.request is property that raise an RuntimeException
# if exc._request is not defined.
url = exc.request.url
if url is None and hasattr(exc, 'response') and exc.response is not None:
url = exc.response.url
if url is not None:
hostname = url.host
if isinstance(exc, HTTPStatusError):
status_code = str(exc.response.status_code)
reason = exc.response.reason_phrase
return (status_code, reason, hostname)
def get_messages(exc, filename) -> tuple[str, ...]: # pylint: disable=too-many-return-statements
if isinstance(exc, JSONDecodeError):
return (exc.msg,)
if isinstance(exc, TypeError):
return (str(exc),)
if isinstance(exc, ValueError) and 'lxml' in filename:
return (str(exc),)
if isinstance(exc, HTTPError):
return get_request_exception_messages(exc)
if isinstance(exc, SearxXPathSyntaxException):
return (exc.xpath_str, exc.message)
if isinstance(exc, SearxEngineXPathException):
return (exc.xpath_str, exc.message)
if isinstance(exc, SearxEngineAPIException):
return (str(exc.args[0]),)
if isinstance(exc, SearxEngineAccessDeniedException):
return (exc.message,)
return ()
def get_exception_classname(exc: BaseException) -> str:
exc_class = exc.__class__
exc_name = exc_class.__qualname__
exc_module = exc_class.__module__
if exc_module is None or exc_module == str.__class__.__module__:
return exc_name
return exc_module + '.' + exc_name
def get_error_context(
framerecords, exception_classname, log_message, log_parameters: LogParametersType, secondary: bool
) -> ErrorContext:
searx_frame = get_trace(framerecords)
filename = searx_frame.filename
if filename.startswith(searx_parent_dir):
filename = filename[len(searx_parent_dir) + 1 :]
function = searx_frame.function
line_no = searx_frame.lineno
code = searx_frame.code_context[0].strip()
del framerecords
return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary)
def count_exception(engine_name: str, exc: BaseException, secondary: bool = False) -> None:
if not settings['general']['enable_metrics']:
return
framerecords = inspect.trace()
try:
exception_classname = get_exception_classname(exc)
log_parameters = get_messages(exc, framerecords[-1][1])
error_context = get_error_context(framerecords, exception_classname, None, log_parameters, secondary)
add_error_context(engine_name, error_context)
finally:
del framerecords
def count_error(
engine_name: str,
log_message: str,
log_parameters: LogParametersType | None = None,
secondary: bool = False,
) -> None:
if not settings['general']['enable_metrics']:
return
framerecords = list(reversed(inspect.stack()[1:]))
try:
error_context = get_error_context(framerecords, None, log_message, log_parameters or (), secondary)
add_error_context(engine_name, error_context)
finally:
del framerecords
|