summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Tauber <asciimoo@gmail.com>2021-01-20 18:48:29 +0000
committerGitHub <noreply@github.com>2021-01-20 18:48:29 +0000
commitf310305c54c3cd1d9fc74f09453294edbd2b5486 (patch)
tree2f42e532b22e945c97813b8fc1a5fd440eb8569c
parent0495e15df4b5e88adef24a9b5c3dbb35e4fac072 (diff)
parent73c86f9bf233aa4f265d1c01ea94d01563e299f8 (diff)
Merge pull request #2481 from dalf/mod-check
Mod check
-rw-r--r--requirements.txt2
-rw-r--r--searx/search/checker/__main__.py1
-rw-r--r--searx/search/checker/impl.py15
-rw-r--r--searx/search/processors/online.py6
-rw-r--r--searx/settings.yml12
-rwxr-xr-xutils/searx.sh6
6 files changed, 26 insertions, 16 deletions
diff --git a/requirements.txt b/requirements.txt
index 776bbc20b..a8d9b3f20 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,4 +9,4 @@ pygments==2.1.3
python-dateutil==2.8.1
pyyaml==5.3.1
requests[socks]==2.25.1
-pycld3==0.20
+langdetect==1.0.8
diff --git a/searx/search/checker/__main__.py b/searx/search/checker/__main__.py
index 75b37e6c5..0d7d1b8ed 100644
--- a/searx/search/checker/__main__.py
+++ b/searx/search/checker/__main__.py
@@ -74,6 +74,7 @@ def run(engine_name_list, verbose):
stdout.write(f' {"found languages":15}: {" ".join(sorted(list(checker.test_results.languages)))}\n')
for test_name, logs in checker.test_results.logs.items():
for log in logs:
+ log = map(lambda l: l if isinstance(l, str) else repr(l), log)
stdout.write(f' {test_name:15}: {RED}{" ".join(log)}{RESET_SEQ}\n')
diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py
index 244536f1b..25887b0f4 100644
--- a/searx/search/checker/impl.py
+++ b/searx/search/checker/impl.py
@@ -9,7 +9,8 @@ from time import time
from urllib.parse import urlparse
import re
-import cld3
+from langdetect import detect_langs
+from langdetect.lang_detect_exception import LangDetectException
import requests.exceptions
from searx import poolrequests, logger
@@ -181,10 +182,14 @@ class ResultContainerTests:
self.test_results.add_error(self.test_name, message, *args, '(' + sqstr + ')')
def _add_language(self, text: str) -> typing.Optional[str]:
- r = cld3.get_language(str(text)) # pylint: disable=E1101
- if r is not None and r.probability >= 0.98 and r.is_reliable:
- self.languages.add(r.language)
- self.test_results.add_language(r.language)
+ try:
+ r = detect_langs(str(text)) # pylint: disable=E1101
+ except LangDetectException:
+ return None
+
+ if len(r) > 0 and r[0].prob > 0.95:
+ self.languages.add(r[0].lang)
+ self.test_results.add_language(r[0].lang)
return None
def _check_result(self, result):
diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py
index 0ceb0adf2..d79edd542 100644
--- a/searx/search/processors/online.py
+++ b/searx/search/processors/online.py
@@ -239,14 +239,14 @@ class OnlineProcessor(EngineProcessor):
'test': ['unique_results']
}
- if getattr(self.engine, 'lang', False):
+ if getattr(self.engine, 'supported_languages', []):
tests['lang_fr'] = {
'matrix': {'query': 'paris', 'lang': 'fr'},
- 'result_container': ['not_empty', ('has_lang', 'fr')],
+ 'result_container': ['not_empty', ('has_language', 'fr')],
}
tests['lang_en'] = {
'matrix': {'query': 'paris', 'lang': 'en'},
- 'result_container': ['not_empty', ('has_lang', 'en')],
+ 'result_container': ['not_empty', ('has_language', 'en')],
}
if getattr(self.engine, 'safesearch', False):
diff --git a/searx/settings.yml b/searx/settings.yml
index 767bf6d82..d7149ad7c 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -105,11 +105,17 @@ outgoing: # communication with search engines
checker:
# disable checker when in debug mode
off_when_debug: True
+
# scheduling: interval or int
# use "scheduling: False" to disable scheduling
- scheduling:
- start_after: [300, 1800] # delay to start the first run of the checker
- every: [86400, 90000] # how often the checker runs
+ # to activate the scheduler:
+ # * uncomment "scheduling" section
+ # * add "cache2 = name=searxcache,items=2000,blocks=2000,blocksize=4096,bitmap=1" to your uwsgi.ini
+
+ # scheduling:
+ # start_after: [300, 1800] # delay to start the first run of the checker
+ # every: [86400, 90000] # how often the checker runs
+
# additional tests: only for the YAML anchors (see the engines section)
additional_tests:
rosebud: &test_rosebud
diff --git a/utils/searx.sh b/utils/searx.sh
index f85935fa2..a33642ee4 100755
--- a/utils/searx.sh
+++ b/utils/searx.sh
@@ -46,7 +46,6 @@ SEARX_PACKAGES_debian="\
python3-dev python3-babel python3-venv
uwsgi uwsgi-plugin-python3
git build-essential libxslt-dev zlib1g-dev libffi-dev libssl-dev
-libprotobuf-dev protobuf-compiler
shellcheck"
BUILD_PACKAGES_debian="\
@@ -59,7 +58,6 @@ SEARX_PACKAGES_arch="\
python python-pip python-lxml python-babel
uwsgi uwsgi-plugin-python
git base-devel libxml2
-protobuf
shellcheck"
BUILD_PACKAGES_arch="\
@@ -71,7 +69,7 @@ SEARX_PACKAGES_fedora="\
python python-pip python-lxml python-babel
uwsgi uwsgi-plugin-python3
git @development-tools libxml2
-ShellCheck protobuf-compiler protobuf-devel"
+ShellCheck"
BUILD_PACKAGES_fedora="\
firefox graphviz graphviz-gd ImageMagick librsvg2-tools
@@ -84,7 +82,7 @@ SEARX_PACKAGES_centos="\
python36 python36-pip python36-lxml python-babel
uwsgi uwsgi-plugin-python3
git @development-tools libxml2
-ShellCheck protobuf-compiler protobuf-devel"
+ShellCheck"
BUILD_PACKAGES_centos="\
firefox graphviz graphviz-gd ImageMagick librsvg2-tools