From 65c29081cc3debad9b77194cb917f5b04ea4ac72 Mon Sep 17 00:00:00 2001
From: Alexandre Flament <alex@al-f.net>
Date: Thu, 22 Apr 2021 12:14:11 +0200
Subject: [mod] update /stats

---
 searx/metrics/__init__.py | 56 +++++++++++++++--------------------------------
 1 file changed, 18 insertions(+), 38 deletions(-)

(limited to 'searx/metrics/__init__.py')

diff --git a/searx/metrics/__init__.py b/searx/metrics/__init__.py
index bae62c915..063554f1a 100644
--- a/searx/metrics/__init__.py
+++ b/searx/metrics/__init__.py
@@ -135,27 +135,13 @@ def to_percentage(stats, maxvalue):
 
 
 def get_engines_stats(engine_list):
-    global counter_storage, histogram_storage
-
     assert counter_storage is not None
     assert histogram_storage is not None
 
     list_time = []
-    list_time_http = []
-    list_time_total = []
-    list_result_count = []
-    list_error_count = []
-    list_scores = []
-    list_scores_per_result = []
-
-    max_error_count = max_http_time = max_time_total = max_result_count = max_score = None  # noqa
-    for engine_name in engine_list:
-        error_count = counter('engine', engine_name, 'search', 'count', 'error')
-
-        if counter('engine', engine_name, 'search', 'count', 'sent') > 0:
-            list_error_count.append({'avg': error_count, 'name': engine_name})
-            max_error_count = max(error_count, max_error_count or 0)
 
+    max_time_total = max_result_count = None  # noqa
+    for engine_name in engine_list:
         successful_count = counter('engine', engine_name, 'search', 'count', 'successful')
         if successful_count == 0:
             continue
@@ -163,6 +149,10 @@ def get_engines_stats(engine_list):
         result_count_sum = histogram('engine', engine_name, 'result', 'count').sum
         time_total = histogram('engine', engine_name, 'time', 'total').percentage(50)
         time_http = histogram('engine', engine_name, 'time', 'http').percentage(50)
+        time_total_p80 = histogram('engine', engine_name, 'time', 'total').percentage(80)
+        time_http_p80 = histogram('engine', engine_name, 'time', 'http').percentage(80)
+        time_total_p95 = histogram('engine', engine_name, 'time', 'total').percentage(95)
+        time_http_p95 = histogram('engine', engine_name, 'time', 'http').percentage(95)
         result_count = result_count_sum / float(successful_count)
 
         if result_count:
@@ -172,35 +162,25 @@ def get_engines_stats(engine_list):
             score = score_per_result = 0.0
 
         max_time_total = max(time_total, max_time_total or 0)
-        max_http_time = max(time_http, max_http_time or 0)
         max_result_count = max(result_count, max_result_count or 0)
-        max_score = max(score, max_score or 0)
 
         list_time.append({'total': round(time_total, 1),
+                          'total_p80': round(time_total_p80, 1),
+                          'total_p95': round(time_total_p95, 1),
                           'http': round(time_http, 1),
+                          'http_p80': round(time_http_p80, 1),
+                          'http_p95': round(time_http_p95, 1),
                           'name': engine_name,
-                          'processing': round(time_total - time_http, 1)})
-        list_time_total.append({'avg': time_total, 'name': engine_name})
-        list_time_http.append({'avg': time_http, 'name': engine_name})
-        list_result_count.append({'avg': result_count, 'name': engine_name})
-        list_scores.append({'avg': score, 'name': engine_name})
-        list_scores_per_result.append({'avg': score_per_result, 'name': engine_name})
-
-    list_time = sorted(list_time, key=itemgetter('total'))
-    list_time_total = sorted(to_percentage(list_time_total, max_time_total), key=itemgetter('avg'))
-    list_time_http = sorted(to_percentage(list_time_http, max_http_time), key=itemgetter('avg'))
-    list_result_count = sorted(to_percentage(list_result_count, max_result_count), key=itemgetter('avg'), reverse=True)
-    list_scores = sorted(list_scores, key=itemgetter('avg'), reverse=True)
-    list_scores_per_result = sorted(list_scores_per_result, key=itemgetter('avg'), reverse=True)
-    list_error_count = sorted(to_percentage(list_error_count, max_error_count), key=itemgetter('avg'), reverse=True)
+                          'processing': round(time_total - time_http, 1),
+                          'processing_p80': round(time_total_p80 - time_http_p80, 1),
+                          'processing_p95': round(time_total_p95 - time_http_p95, 1),
+                          'score': score,
+                          'score_per_result': score_per_result,
+                          'result_count': result_count,
+                          })
 
     return {
         'time': list_time,
         'max_time': math.ceil(max_time_total or 0),
-        'time_total': list_time_total,
-        'time_http': list_time_http,
-        'result_count': list_result_count,
-        'scores': list_scores,
-        'scores_per_result': list_scores_per_result,
-        'error_count': list_error_count,
+        'max_result_count': math.ceil(max_result_count or 0),
     }
-- 
cgit v1.2.3


From c54bf42cb9699501ccbd54a2df0d7fa73be8dcdb Mon Sep 17 00:00:00 2001
From: Markus Heiser <markus.heiser@darmarit.de>
Date: Thu, 22 Apr 2021 16:05:39 +0200
Subject: [mod] metrics: add to pylint and justify indentations

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
---
 searx/metrics/__init__.py | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

(limited to 'searx/metrics/__init__.py')

diff --git a/searx/metrics/__init__.py b/searx/metrics/__init__.py
index 063554f1a..9648e6215 100644
--- a/searx/metrics/__init__.py
+++ b/searx/metrics/__init__.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
+# pylint: disable=missing-module-docstring, missing-function-docstring
 
 import typing
 import math
@@ -63,7 +64,7 @@ def initialize(engine_names=None):
     """
     Initialize metrics
     """
-    global counter_storage, histogram_storage
+    global counter_storage, histogram_storage # pylint: disable=global-statement
 
     counter_storage = CounterStorage()
     histogram_storage = HistogramStorage()
@@ -164,20 +165,21 @@ def get_engines_stats(engine_list):
         max_time_total = max(time_total, max_time_total or 0)
         max_result_count = max(result_count, max_result_count or 0)
 
-        list_time.append({'total': round(time_total, 1),
-                          'total_p80': round(time_total_p80, 1),
-                          'total_p95': round(time_total_p95, 1),
-                          'http': round(time_http, 1),
-                          'http_p80': round(time_http_p80, 1),
-                          'http_p95': round(time_http_p95, 1),
-                          'name': engine_name,
-                          'processing': round(time_total - time_http, 1),
-                          'processing_p80': round(time_total_p80 - time_http_p80, 1),
-                          'processing_p95': round(time_total_p95 - time_http_p95, 1),
-                          'score': score,
-                          'score_per_result': score_per_result,
-                          'result_count': result_count,
-                          })
+        list_time.append({
+            'total': round(time_total, 1),
+            'total_p80': round(time_total_p80, 1),
+            'total_p95': round(time_total_p95, 1),
+            'http': round(time_http, 1),
+            'http_p80': round(time_http_p80, 1),
+            'http_p95': round(time_http_p95, 1),
+            'name': engine_name,
+            'processing': round(time_total - time_http, 1),
+            'processing_p80': round(time_total_p80 - time_http_p80, 1),
+            'processing_p95': round(time_total_p95 - time_http_p95, 1),
+            'score': score,
+            'score_per_result': score_per_result,
+            'result_count': result_count,
+        })
 
     return {
         'time': list_time,
-- 
cgit v1.2.3


From 09e7ecdce22733be82bb141355c646ddec538714 Mon Sep 17 00:00:00 2001
From: Alexandre Flament <alex@al-f.net>
Date: Thu, 22 Apr 2021 17:47:53 +0200
Subject: [mod] /stats : add reliability column and sort by column links

---
 searx/metrics/__init__.py | 79 +++++++++++++++++++++++++++++++----------------
 1 file changed, 52 insertions(+), 27 deletions(-)

(limited to 'searx/metrics/__init__.py')

diff --git a/searx/metrics/__init__.py b/searx/metrics/__init__.py
index 9648e6215..5665ca63c 100644
--- a/searx/metrics/__init__.py
+++ b/searx/metrics/__init__.py
@@ -97,12 +97,12 @@ def initialize(engine_names=None):
         histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'total')
 
 
-def get_engine_errors(engline_list):
+def get_engine_errors(engline_name_list):
     result = {}
     engine_names = list(errors_per_engines.keys())
     engine_names.sort()
     for engine_name in engine_names:
-        if engine_name not in engline_list:
+        if engine_name not in engline_name_list:
             continue
 
         error_stats = errors_per_engines[engine_name]
@@ -126,61 +126,86 @@ def get_engine_errors(engline_list):
     return result
 
 
-def to_percentage(stats, maxvalue):
-    for engine_stat in stats:
-        if maxvalue:
-            engine_stat['percentage'] = int(engine_stat['avg'] / maxvalue * 100)
+def get_reliabilities(engline_name_list, checker_results):
+    reliabilities = {}
+
+    engine_errors = get_engine_errors(engline_name_list)
+
+    for engine_name in engline_name_list:
+        checker_result = checker_results.get(engine_name, {})
+        checker_success = checker_result.get('success', True)
+        errors = engine_errors.get(engine_name) or []
+        if counter('engine', engine_name, 'search', 'count', 'sent') == 0:
+            # no request
+            reliablity = None
+        elif checker_success and not errors:
+            reliablity = 100
+        elif 'simple' in checker_result.get('errors', {}):
+            # the basic (simple) test doesn't work: the engine is broken accoding to the checker
+            # even if there is no exception
+            reliablity = 0
         else:
-            engine_stat['percentage'] = 0
-    return stats
+            reliablity = 100 - sum([error['percentage'] for error in errors if not error.get('secondary')])
 
+        reliabilities[engine_name] = {
+            'reliablity': reliablity,
+            'errors': errors,
+            'checker': checker_results.get(engine_name, {}).get('errors', {}).keys(),
+        }
+    return reliabilities
 
-def get_engines_stats(engine_list):
+
+def round_or_none(number, digits):
+    return round(number, digits) if number else number
+
+
+def get_engines_stats(engine_name_list):
     assert counter_storage is not None
     assert histogram_storage is not None
 
     list_time = []
 
     max_time_total = max_result_count = None  # noqa
-    for engine_name in engine_list:
-        successful_count = counter('engine', engine_name, 'search', 'count', 'successful')
-        if successful_count == 0:
+    for engine_name in engine_name_list:
+        sent_count = counter('engine', engine_name, 'search', 'count', 'sent')
+        if sent_count == 0:
             continue
 
-        result_count_sum = histogram('engine', engine_name, 'result', 'count').sum
+        successful_count = counter('engine', engine_name, 'search', 'count', 'successful')
+
         time_total = histogram('engine', engine_name, 'time', 'total').percentage(50)
         time_http = histogram('engine', engine_name, 'time', 'http').percentage(50)
         time_total_p80 = histogram('engine', engine_name, 'time', 'total').percentage(80)
         time_http_p80 = histogram('engine', engine_name, 'time', 'http').percentage(80)
         time_total_p95 = histogram('engine', engine_name, 'time', 'total').percentage(95)
         time_http_p95 = histogram('engine', engine_name, 'time', 'http').percentage(95)
-        result_count = result_count_sum / float(successful_count)
 
-        if result_count:
+        result_count = histogram('engine', engine_name, 'result', 'count').percentage(50)
+        result_count_sum = histogram('engine', engine_name, 'result', 'count').sum
+        if successful_count and result_count_sum:
             score = counter('engine', engine_name, 'score')  # noqa
             score_per_result = score / float(result_count_sum)
         else:
             score = score_per_result = 0.0
 
-        max_time_total = max(time_total, max_time_total or 0)
-        max_result_count = max(result_count, max_result_count or 0)
+        max_time_total = max(time_total or 0, max_time_total or 0)
+        max_result_count = max(result_count or 0, max_result_count or 0)
 
         list_time.append({
-            'total': round(time_total, 1),
-            'total_p80': round(time_total_p80, 1),
-            'total_p95': round(time_total_p95, 1),
-            'http': round(time_http, 1),
-            'http_p80': round(time_http_p80, 1),
-            'http_p95': round(time_http_p95, 1),
             'name': engine_name,
-            'processing': round(time_total - time_http, 1),
-            'processing_p80': round(time_total_p80 - time_http_p80, 1),
-            'processing_p95': round(time_total_p95 - time_http_p95, 1),
+            'total': round_or_none(time_total, 1),
+            'total_p80': round_or_none(time_total_p80, 1),
+            'total_p95': round_or_none(time_total_p95, 1),
+            'http': round_or_none(time_http, 1),
+            'http_p80': round_or_none(time_http_p80, 1),
+            'http_p95': round_or_none(time_http_p95, 1),
+            'processing': round(time_total - time_http, 1) if time_total else None,
+            'processing_p80': round(time_total_p80 - time_http_p80, 1) if time_total else None,
+            'processing_p95': round(time_total_p95 - time_http_p95, 1) if time_total else None,
             'score': score,
             'score_per_result': score_per_result,
             'result_count': result_count,
         })
-
     return {
         'time': list_time,
         'max_time': math.ceil(max_time_total or 0),
-- 
cgit v1.2.3