From 3a9f513521d006a7939538cce368d7b799e32c30 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 5 Jan 2021 11:24:39 +0100 Subject: [enh] checker: background check See settings.yml for the options SIGUSR1 signal starts the checker. The result is available at /stats/checker --- searx/search/checker/background.py | 106 +++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 searx/search/checker/background.py (limited to 'searx/search/checker/background.py') diff --git a/searx/search/checker/background.py b/searx/search/checker/background.py new file mode 100644 index 000000000..45188ab38 --- /dev/null +++ b/searx/search/checker/background.py @@ -0,0 +1,106 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +import json +import random +import time +import threading +import os +import signal + +from searx import logger, settings, searx_debug +from searx.exceptions import SearxSettingsException +from searx.search.processors import processors +from searx.search.checker import Checker +from searx.shared import schedule, storage + + +CHECKER_RESULT = 'CHECKER_RESULT' +running = threading.Lock() + + +def _get_interval(every, error_msg): + if isinstance(every, int): + every = (every, every) + if not isinstance(every, (tuple, list))\ + or len(every) != 2\ + or not isinstance(every[0], int)\ + or not isinstance(every[1], int): + raise SearxSettingsException(error_msg, None) + return every + + +def _get_every(): + every = settings.get('checker', {}).get('scheduling', {}).get('every', (300, 1800)) + return _get_interval(every, 'checker.scheduling.every is not a int or list') + + +def get_result(): + serialized_result = storage.get_str('CHECKER_RESULT') + if serialized_result is not None: + return json.loads(serialized_result) + + +def run(): + if not running.acquire(blocking=False): + return + try: + logger.info('Starting checker') + result = {} + for name, processor in processors.items(): + logger.debug('Checking %s engine', name) + checker = Checker(processor) + checker.run() + if checker.test_results.succesfull: + result[name] = {'status': True} + else: + result[name] = {'status': False, 'errors': checker.test_results.errors} + + storage.set_str('CHECKER_RESULT', json.dumps(result)) + logger.info('Check done') + finally: + running.release() + + +def _run_with_delay(): + every = _get_every() + delay = random.randint(0, every[1] - every[0]) + logger.debug('Start checker in %i seconds', delay) + time.sleep(delay) + run() + + +def _start_scheduling(): + every = _get_every() + schedule(every[0], _run_with_delay) + run() + + +def _signal_handler(signum, frame): + t = threading.Thread(target=run) + t.daemon = True + t.start() + + +def initialize(): + logger.info('Send SIGUSR1 signal to pid %i to start the checker', os.getpid()) + signal.signal(signal.SIGUSR1, _signal_handler) + + # special case when debug is activate + if searx_debug and settings.get('checker', {}).get('off_when_debug', True): + logger.info('debug mode: checker is disabled') + return + + # check value of checker.scheduling.every now + scheduling = settings.get('checker', {}).get('scheduling', None) + if scheduling is None or not scheduling: + logger.info('Checker scheduler is disabled') + return + + # + start_after = scheduling.get('start_after', (300, 1800)) + start_after = _get_interval(start_after, 'checker.scheduling.start_after is not a int or list') + delay = random.randint(start_after[0], start_after[1]) + logger.info('Start checker in %i seconds', delay) + t = threading.Timer(delay, _start_scheduling) + t.daemon = True + t.start() -- cgit v1.2.3 From 87bafbc32b34ef7f3033bdea6a4bfa966a6068c1 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Mon, 11 Jan 2021 18:43:12 +0100 Subject: [mod] checker: add status and timestamp to the result for each engine: replace status by success --- searx/search/checker/background.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'searx/search/checker/background.py') diff --git a/searx/search/checker/background.py b/searx/search/checker/background.py index 45188ab38..0fc13ddb6 100644 --- a/searx/search/checker/background.py +++ b/searx/search/checker/background.py @@ -35,28 +35,39 @@ def _get_every(): def get_result(): - serialized_result = storage.get_str('CHECKER_RESULT') + serialized_result = storage.get_str(CHECKER_RESULT) if serialized_result is not None: return json.loads(serialized_result) +def _set_result(result): + result['timestamp'] = int(time.time() / 3600) * 3600 + storage.set_str(CHECKER_RESULT, json.dumps(result)) + + def run(): if not running.acquire(blocking=False): return try: logger.info('Starting checker') - result = {} + result = { + 'status': 'ok', + 'engines': {} + } for name, processor in processors.items(): logger.debug('Checking %s engine', name) checker = Checker(processor) checker.run() if checker.test_results.succesfull: - result[name] = {'status': True} + result['engines'][name] = {'success': True} else: - result[name] = {'status': False, 'errors': checker.test_results.errors} + result['engines'][name] = {'success': False, 'errors': checker.test_results.errors} - storage.set_str('CHECKER_RESULT', json.dumps(result)) + _set_result(result) logger.info('Check done') + except Exception: + _set_result({'status': 'error'}) + logger.exception('Error while running the checker') finally: running.release() @@ -85,6 +96,9 @@ def initialize(): logger.info('Send SIGUSR1 signal to pid %i to start the checker', os.getpid()) signal.signal(signal.SIGUSR1, _signal_handler) + # disabled by default + _set_result({'status': 'disabled'}) + # special case when debug is activate if searx_debug and settings.get('checker', {}).get('off_when_debug', True): logger.info('debug mode: checker is disabled') @@ -97,6 +111,8 @@ def initialize(): return # + _set_result({'status': 'unknow'}) + start_after = scheduling.get('start_after', (300, 1800)) start_after = _get_interval(start_after, 'checker.scheduling.start_after is not a int or list') delay = random.randint(start_after[0], start_after[1]) -- cgit v1.2.3 From 7f0c508598cc2197e53b877dcf4c76e25a097c4f Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 12 Jan 2021 09:33:58 +0100 Subject: [fix] checker: fix typo unknown instead of unknow --- searx/search/checker/background.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'searx/search/checker/background.py') diff --git a/searx/search/checker/background.py b/searx/search/checker/background.py index 0fc13ddb6..be30897bc 100644 --- a/searx/search/checker/background.py +++ b/searx/search/checker/background.py @@ -111,7 +111,7 @@ def initialize(): return # - _set_result({'status': 'unknow'}) + _set_result({'status': 'unknown'}) start_after = scheduling.get('start_after', (300, 1800)) start_after = _get_interval(start_after, 'checker.scheduling.start_after is not a int or list') -- cgit v1.2.3 From 912c7e975c3943db798d748fa48d460467b66d30 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Wed, 13 Jan 2021 14:07:39 +0100 Subject: [fix] checker: don't run the checker when uwsgi is not properly configured Before this commit, even with the scheduler disabled, the checker was running at least once for each uwsgi worker. --- searx/search/checker/background.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'searx/search/checker/background.py') diff --git a/searx/search/checker/background.py b/searx/search/checker/background.py index be30897bc..e41bff5f5 100644 --- a/searx/search/checker/background.py +++ b/searx/search/checker/background.py @@ -40,8 +40,9 @@ def get_result(): return json.loads(serialized_result) -def _set_result(result): - result['timestamp'] = int(time.time() / 3600) * 3600 +def _set_result(result, include_timestamp=True): + if include_timestamp: + result['timestamp'] = int(time.time() / 3600) * 3600 storage.set_str(CHECKER_RESULT, json.dumps(result)) @@ -82,8 +83,8 @@ def _run_with_delay(): def _start_scheduling(): every = _get_every() - schedule(every[0], _run_with_delay) - run() + if schedule(every[0], _run_with_delay): + run() def _signal_handler(signum, frame): @@ -111,7 +112,7 @@ def initialize(): return # - _set_result({'status': 'unknown'}) + _set_result({'status': 'unknown'}, include_timestamp=False) start_after = scheduling.get('start_after', (300, 1800)) start_after = _get_interval(start_after, 'checker.scheduling.start_after is not a int or list') -- cgit v1.2.3