diff options
| author | Adam Tauber <asciimoo@gmail.com> | 2021-01-20 18:48:29 +0000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-01-20 18:48:29 +0000 |
| commit | f310305c54c3cd1d9fc74f09453294edbd2b5486 (patch) | |
| tree | 2f42e532b22e945c97813b8fc1a5fd440eb8569c /searx | |
| parent | 0495e15df4b5e88adef24a9b5c3dbb35e4fac072 (diff) | |
| parent | 73c86f9bf233aa4f265d1c01ea94d01563e299f8 (diff) | |
Merge pull request #2481 from dalf/mod-check
Mod check
Diffstat (limited to 'searx')
| -rw-r--r-- | searx/search/checker/__main__.py | 1 | ||||
| -rw-r--r-- | searx/search/checker/impl.py | 15 | ||||
| -rw-r--r-- | searx/search/processors/online.py | 6 | ||||
| -rw-r--r-- | searx/settings.yml | 12 |
4 files changed, 23 insertions, 11 deletions
diff --git a/searx/search/checker/__main__.py b/searx/search/checker/__main__.py index 75b37e6c5..0d7d1b8ed 100644 --- a/searx/search/checker/__main__.py +++ b/searx/search/checker/__main__.py @@ -74,6 +74,7 @@ def run(engine_name_list, verbose): stdout.write(f' {"found languages":15}: {" ".join(sorted(list(checker.test_results.languages)))}\n') for test_name, logs in checker.test_results.logs.items(): for log in logs: + log = map(lambda l: l if isinstance(l, str) else repr(l), log) stdout.write(f' {test_name:15}: {RED}{" ".join(log)}{RESET_SEQ}\n') diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py index 244536f1b..25887b0f4 100644 --- a/searx/search/checker/impl.py +++ b/searx/search/checker/impl.py @@ -9,7 +9,8 @@ from time import time from urllib.parse import urlparse import re -import cld3 +from langdetect import detect_langs +from langdetect.lang_detect_exception import LangDetectException import requests.exceptions from searx import poolrequests, logger @@ -181,10 +182,14 @@ class ResultContainerTests: self.test_results.add_error(self.test_name, message, *args, '(' + sqstr + ')') def _add_language(self, text: str) -> typing.Optional[str]: - r = cld3.get_language(str(text)) # pylint: disable=E1101 - if r is not None and r.probability >= 0.98 and r.is_reliable: - self.languages.add(r.language) - self.test_results.add_language(r.language) + try: + r = detect_langs(str(text)) # pylint: disable=E1101 + except LangDetectException: + return None + + if len(r) > 0 and r[0].prob > 0.95: + self.languages.add(r[0].lang) + self.test_results.add_language(r[0].lang) return None def _check_result(self, result): diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index 0ceb0adf2..d79edd542 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -239,14 +239,14 @@ class OnlineProcessor(EngineProcessor): 'test': ['unique_results'] } - if getattr(self.engine, 'lang', False): + if getattr(self.engine, 'supported_languages', []): tests['lang_fr'] = { 'matrix': {'query': 'paris', 'lang': 'fr'}, - 'result_container': ['not_empty', ('has_lang', 'fr')], + 'result_container': ['not_empty', ('has_language', 'fr')], } tests['lang_en'] = { 'matrix': {'query': 'paris', 'lang': 'en'}, - 'result_container': ['not_empty', ('has_lang', 'en')], + 'result_container': ['not_empty', ('has_language', 'en')], } if getattr(self.engine, 'safesearch', False): diff --git a/searx/settings.yml b/searx/settings.yml index 767bf6d82..d7149ad7c 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -105,11 +105,17 @@ outgoing: # communication with search engines checker: # disable checker when in debug mode off_when_debug: True + # scheduling: interval or int # use "scheduling: False" to disable scheduling - scheduling: - start_after: [300, 1800] # delay to start the first run of the checker - every: [86400, 90000] # how often the checker runs + # to activate the scheduler: + # * uncomment "scheduling" section + # * add "cache2 = name=searxcache,items=2000,blocks=2000,blocksize=4096,bitmap=1" to your uwsgi.ini + + # scheduling: + # start_after: [300, 1800] # delay to start the first run of the checker + # every: [86400, 90000] # how often the checker runs + # additional tests: only for the YAML anchors (see the engines section) additional_tests: rosebud: &test_rosebud |