summaryrefslogtreecommitdiff
path: root/searx/search/checker
diff options
context:
space:
mode:
Diffstat (limited to 'searx/search/checker')
-rw-r--r--searx/search/checker/__init__.py3
-rw-r--r--searx/search/checker/__main__.py30
-rw-r--r--searx/search/checker/background.py106
-rw-r--r--searx/search/checker/impl.py12
4 files changed, 143 insertions, 8 deletions
diff --git a/searx/search/checker/__init__.py b/searx/search/checker/__init__.py
index 442d5a09d..85b9178df 100644
--- a/searx/search/checker/__init__.py
+++ b/searx/search/checker/__init__.py
@@ -1 +1,4 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
from .impl import Checker
+from .background import initialize, get_result
diff --git a/searx/search/checker/__main__.py b/searx/search/checker/__main__.py
index 2f808237a..37b7e6cda 100644
--- a/searx/search/checker/__main__.py
+++ b/searx/search/checker/__main__.py
@@ -1,9 +1,13 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
import sys
import os
+import argparse
import searx.search
-import searx.search.processors
import searx.search.checker
+from searx.search import processors
+from searx.engines import engine_shortcuts
if sys.stdout.isatty() and os.environ.get('TERM') not in ['dumb', 'unknown']:
@@ -18,20 +22,24 @@ else:
BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = "", "", "", "", "", "", "", ""
-def iter_processor():
- if len(sys.argv) > 1:
- for name, processor in searx.search.processors.items():
- if name in sys.argv:
+def iter_processor(engine_name_list):
+ if len(engine_name_list) > 0:
+ for name in engine_name_list:
+ name = engine_shortcuts.get(name, name)
+ processor = processors.get(name)
+ if processor is not None:
yield name, processor
+ else:
+ print(BOLD_SEQ, 'Engine ', '%-30s' % name, RESET_SEQ, RED, ' Not found ', RESET_SEQ)
else:
for name, processor in searx.search.processors.items():
yield name, processor
-def main():
+def run(engine_name_list):
searx.search.initialize()
broken_urls = []
- for name, processor in iter_processor():
+ for name, processor in iter_processor(engine_name_list):
if sys.stdout.isatty():
print(BOLD_SEQ, 'Engine ', '%-30s' % name, RESET_SEQ, WHITE, ' Checking', RESET_SEQ)
checker = searx.search.checker.Checker(processor)
@@ -48,5 +56,13 @@ def main():
print('Error fetching', url)
+def main():
+ parser = argparse.ArgumentParser(description='Check searx engines.')
+ parser.add_argument('engine_name_list', metavar='engine name', type=str, nargs='*',
+ help='engines name or shortcut list. Empty for all engines.')
+ args = parser.parse_args()
+ run(args.engine_name_list)
+
+
if __name__ == '__main__':
main()
diff --git a/searx/search/checker/background.py b/searx/search/checker/background.py
new file mode 100644
index 000000000..45188ab38
--- /dev/null
+++ b/searx/search/checker/background.py
@@ -0,0 +1,106 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import json
+import random
+import time
+import threading
+import os
+import signal
+
+from searx import logger, settings, searx_debug
+from searx.exceptions import SearxSettingsException
+from searx.search.processors import processors
+from searx.search.checker import Checker
+from searx.shared import schedule, storage
+
+
+CHECKER_RESULT = 'CHECKER_RESULT'
+running = threading.Lock()
+
+
+def _get_interval(every, error_msg):
+ if isinstance(every, int):
+ every = (every, every)
+ if not isinstance(every, (tuple, list))\
+ or len(every) != 2\
+ or not isinstance(every[0], int)\
+ or not isinstance(every[1], int):
+ raise SearxSettingsException(error_msg, None)
+ return every
+
+
+def _get_every():
+ every = settings.get('checker', {}).get('scheduling', {}).get('every', (300, 1800))
+ return _get_interval(every, 'checker.scheduling.every is not a int or list')
+
+
+def get_result():
+ serialized_result = storage.get_str('CHECKER_RESULT')
+ if serialized_result is not None:
+ return json.loads(serialized_result)
+
+
+def run():
+ if not running.acquire(blocking=False):
+ return
+ try:
+ logger.info('Starting checker')
+ result = {}
+ for name, processor in processors.items():
+ logger.debug('Checking %s engine', name)
+ checker = Checker(processor)
+ checker.run()
+ if checker.test_results.succesfull:
+ result[name] = {'status': True}
+ else:
+ result[name] = {'status': False, 'errors': checker.test_results.errors}
+
+ storage.set_str('CHECKER_RESULT', json.dumps(result))
+ logger.info('Check done')
+ finally:
+ running.release()
+
+
+def _run_with_delay():
+ every = _get_every()
+ delay = random.randint(0, every[1] - every[0])
+ logger.debug('Start checker in %i seconds', delay)
+ time.sleep(delay)
+ run()
+
+
+def _start_scheduling():
+ every = _get_every()
+ schedule(every[0], _run_with_delay)
+ run()
+
+
+def _signal_handler(signum, frame):
+ t = threading.Thread(target=run)
+ t.daemon = True
+ t.start()
+
+
+def initialize():
+ logger.info('Send SIGUSR1 signal to pid %i to start the checker', os.getpid())
+ signal.signal(signal.SIGUSR1, _signal_handler)
+
+ # special case when debug is activate
+ if searx_debug and settings.get('checker', {}).get('off_when_debug', True):
+ logger.info('debug mode: checker is disabled')
+ return
+
+ # check value of checker.scheduling.every now
+ scheduling = settings.get('checker', {}).get('scheduling', None)
+ if scheduling is None or not scheduling:
+ logger.info('Checker scheduler is disabled')
+ return
+
+ #
+ start_after = scheduling.get('start_after', (300, 1800))
+ start_after = _get_interval(start_after, 'checker.scheduling.start_after is not a int or list')
+ delay = random.randint(start_after[0], start_after[1])
+ logger.info('Start checker in %i seconds', delay)
+ t = threading.Timer(delay, _start_scheduling)
+ t.daemon = True
+ t.start()
diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py
index f55b6d0f5..abef5f8e9 100644
--- a/searx/search/checker/impl.py
+++ b/searx/search/checker/impl.py
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
import typing
import types
import functools
@@ -11,7 +13,7 @@ import requests.exceptions
from searx import poolrequests, logger
from searx.results import ResultContainer
-from searx.search import SearchQuery, EngineRef
+from searx.search.models import SearchQuery, EngineRef
from searx.search.processors import EngineProcessor
@@ -240,18 +242,24 @@ class ResultContainerTests:
self._check_infoboxes(self.result_container.infoboxes)
def has_infobox(self):
+ """Check the ResultContainer has at least one infobox"""
if len(self.result_container.infoboxes) == 0:
self._record_error('No infobox')
def has_answer(self):
+ """Check the ResultContainer has at least one answer"""
if len(self.result_container.answers) == 0:
self._record_error('No answer')
def has_language(self, lang):
+ """Check at least one title or content of the results is written in the `lang`.
+
+ Detected using pycld3, may be not accurate"""
if lang not in self.languages:
self._record_error(lang + ' not found')
def not_empty(self):
+ """Check the ResultContainer has at least one answer or infobox or result"""
result_types = set()
results = self.result_container.get_ordered_results()
if len(results) > 0:
@@ -267,6 +275,7 @@ class ResultContainerTests:
self._record_error('No result')
def one_title_contains(self, title: str):
+ """Check one of the title contains `title` (case insensitive comparaison)"""
title = title.lower()
for result in self.result_container.get_ordered_results():
if title in result['title'].lower():
@@ -287,6 +296,7 @@ class CheckerTests:
self.result_container_tests_list = result_container_tests_list
def unique_results(self):
+ """Check the results of each ResultContain is unique"""
urls_list = [rct.result_urls for rct in self.result_container_tests_list]
if len(urls_list[0]) > 0:
# results on the first page