summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/integration.yml12
-rw-r--r--Dockerfile3
-rw-r--r--Makefile36
-rw-r--r--dockerfiles/uwsgi.ini3
-rw-r--r--docs/admin/installation-searx.rst13
-rw-r--r--docs/admin/installation.rst2
-rw-r--r--docs/admin/settings.rst168
-rw-r--r--docs/blog/lxcdev-202006.rst4
-rw-r--r--docs/build-templates/searx.rst1
-rw-r--r--docs/conf.py38
-rw-r--r--docs/dev/makefile.rst40
-rw-r--r--docs/dev/search_api.rst2
-rw-r--r--docs/utils/index.rst4
-rwxr-xr-xmanage.sh4
-rw-r--r--requirements-dev.txt17
-rw-r--r--requirements.txt2
-rw-r--r--searx/__init__.py46
-rw-r--r--searx/brand.py7
-rw-r--r--searx/data/__init__.py3
-rw-r--r--searx/engines/1337x.py14
-rw-r--r--searx/engines/acgsou.py18
-rw-r--r--searx/engines/ahmia.py19
-rw-r--r--searx/engines/apkmirror.py17
-rw-r--r--searx/engines/archlinux.py20
-rw-r--r--searx/engines/arxiv.py19
-rwxr-xr-xsearx/engines/base.py21
-rw-r--r--searx/engines/bing.py22
-rw-r--r--searx/engines/bing_images.py21
-rw-r--r--searx/engines/bing_news.py20
-rw-r--r--searx/engines/bing_videos.py19
-rw-r--r--searx/engines/btdigg.py22
-rw-r--r--searx/engines/command.py19
-rw-r--r--searx/engines/currency_convert.py14
-rw-r--r--searx/engines/dailymotion.py21
-rw-r--r--searx/engines/deezer.py19
-rw-r--r--searx/engines/deviantart.py21
-rw-r--r--searx/engines/dictzone.py17
-rw-r--r--searx/engines/digbt.py18
-rw-r--r--searx/engines/digg.py19
-rw-r--r--searx/engines/doku.py24
-rw-r--r--searx/engines/duckduckgo.py22
-rw-r--r--searx/engines/duckduckgo_definitions.py20
-rw-r--r--searx/engines/duckduckgo_images.py25
-rw-r--r--searx/engines/duden.py17
-rw-r--r--searx/engines/dummy-offline.py14
-rw-r--r--searx/engines/dummy.py14
-rw-r--r--searx/engines/ebay.py23
-rw-r--r--searx/engines/elasticsearch.py5
-rw-r--r--searx/engines/etools.py18
-rw-r--r--searx/engines/fdroid.py18
-rw-r--r--searx/engines/flickr.py20
-rw-r--r--searx/engines/flickr_noapi.py23
-rw-r--r--searx/engines/framalibre.py19
-rw-r--r--searx/engines/frinkiac.py23
-rw-r--r--searx/engines/genius.py21
-rw-r--r--searx/engines/gentoo.py20
-rw-r--r--searx/engines/gigablast.py18
-rw-r--r--searx/engines/github.py21
-rw-r--r--searx/engines/google.py26
-rw-r--r--searx/engines/google_images.py22
-rw-r--r--searx/engines/google_news.py19
-rw-r--r--searx/engines/google_videos.py19
-rw-r--r--searx/engines/ina.py25
-rw-r--r--searx/engines/invidious.py42
-rw-r--r--searx/engines/json_engine.py2
-rw-r--r--searx/engines/kickass.py19
-rw-r--r--searx/engines/mediawiki.py23
-rw-r--r--searx/engines/microsoft_academic.py20
-rw-r--r--searx/engines/mixcloud.py19
-rw-r--r--searx/engines/not_evil.py19
-rw-r--r--searx/engines/nyaa.py18
-rw-r--r--searx/engines/opensemantic.py22
-rw-r--r--searx/engines/openstreetmap.py19
-rw-r--r--searx/engines/pdbe.py19
-rw-r--r--searx/engines/peertube.py21
-rw-r--r--searx/engines/photon.py19
-rw-r--r--searx/engines/piratebay.py23
-rw-r--r--searx/engines/pubmed.py22
-rw-r--r--searx/engines/qwant.py18
-rw-r--r--searx/engines/recoll.py17
-rw-r--r--searx/engines/reddit.py19
-rw-r--r--searx/engines/scanr_structures.py19
-rw-r--r--searx/engines/searchcode_code.py20
-rw-r--r--searx/engines/searx_engine.py18
-rw-r--r--searx/engines/sepiasearch.py22
-rw-r--r--searx/engines/soundcloud.py18
-rw-r--r--searx/engines/spotify.py19
-rw-r--r--searx/engines/stackoverflow.py21
-rw-r--r--searx/engines/startpage.py25
-rw-r--r--searx/engines/tokyotoshokan.py19
-rw-r--r--searx/engines/torrentz.py20
-rw-r--r--searx/engines/translated.py18
-rw-r--r--searx/engines/unsplash.py19
-rw-r--r--searx/engines/vimeo.py27
-rw-r--r--searx/engines/wikidata.py20
-rw-r--r--searx/engines/wikipedia.py19
-rw-r--r--searx/engines/wolframalpha_api.py23
-rw-r--r--searx/engines/wolframalpha_noapi.py23
-rw-r--r--searx/engines/www1x.py19
-rw-r--r--searx/engines/xpath.py2
-rw-r--r--searx/engines/yacy.py27
-rw-r--r--searx/engines/yahoo.py20
-rw-r--r--searx/engines/yahoo_news.py24
-rw-r--r--searx/engines/yandex.py18
-rw-r--r--searx/engines/yggtorrent.py23
-rw-r--r--searx/engines/youtube_api.py23
-rw-r--r--searx/engines/youtube_noapi.py23
-rw-r--r--searx/query.py2
-rw-r--r--searx/search/__init__.py65
-rw-r--r--searx/search/checker/__init__.py4
-rw-r--r--searx/search/checker/__main__.py94
-rw-r--r--searx/search/checker/background.py123
-rw-r--r--searx/search/checker/impl.py406
-rw-r--r--searx/search/models.py69
-rw-r--r--searx/search/processors/abstract.py12
-rw-r--r--searx/search/processors/online.py52
-rw-r--r--searx/search/processors/online_currency.py10
-rw-r--r--searx/search/processors/online_dictionary.py18
-rw-r--r--searx/settings.yml267
-rw-r--r--searx/shared/__init__.py31
-rw-r--r--searx/shared/shared_abstract.py21
-rw-r--r--searx/shared/shared_simple.py39
-rw-r--r--searx/shared/shared_uwsgi.py64
-rw-r--r--searx/templates/oscar/results.html2
-rwxr-xr-xsearx/webapp.py13
-rw-r--r--setup.py9
-rw-r--r--tests/unit/test_query.py9
-rw-r--r--utils/brand.env4
-rw-r--r--utils/build_env.py38
-rw-r--r--utils/fetch_engine_descriptions.py206
-rw-r--r--utils/makefile.python4
-rwxr-xr-xutils/searx.sh8
-rw-r--r--utils/templates/etc/uwsgi/apps-archlinux/searx.ini5
-rw-r--r--utils/templates/etc/uwsgi/apps-available/searx.ini3
134 files changed, 2738 insertions, 991 deletions
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index cff8854b9..4f3b8e9c8 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -5,11 +5,11 @@ on: [push, pull_request]
jobs:
python:
name: Python ${{ matrix.python-version }}
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
strategy:
matrix:
- os: [ubuntu-latest]
- python-version: [3.5, 3.6, 3.7, 3.8]
+ os: [ubuntu-20.04]
+ python-version: [3.6, 3.7, 3.8, 3.9]
steps:
- name: Checkout
uses: actions/checkout@v2
@@ -46,7 +46,7 @@ jobs:
themes:
name: Themes
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@v2
@@ -59,7 +59,7 @@ jobs:
documentation:
name: Documentation
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@v2
@@ -92,7 +92,7 @@ jobs:
- documentation
env:
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
- runs-on: ubuntu-latest
+ runs-on: ubuntu-18.04
steps:
- name: Checkout
if: env.DOCKERHUB_USERNAME != null
diff --git a/Dockerfile b/Dockerfile
index 3894aa968..f251d06ea 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -41,6 +41,8 @@ RUN apk upgrade --no-cache \
openssl-dev \
tar \
git \
+ protoc \
+ protobuf-dev \
&& apk add --no-cache \
ca-certificates \
su-exec \
@@ -53,6 +55,7 @@ RUN apk upgrade --no-cache \
uwsgi \
uwsgi-python3 \
brotli \
+ protobuf \
&& pip3 install --upgrade pip \
&& pip3 install --no-cache -r requirements.txt \
&& apk del build-dependencies \
diff --git a/Makefile b/Makefile
index ff3bc2252..9d8e3a199 100644
--- a/Makefile
+++ b/Makefile
@@ -1,20 +1,12 @@
# -*- coding: utf-8; mode: makefile-gmake -*-
.DEFAULT_GOAL=help
-# START Makefile setup
-export GIT_URL=https://github.com/searx/searx
-export GIT_BRANCH=master
-export SEARX_URL=https://searx.me
-export DOCS_URL=https://searx.github.io/searx
-# export CONTACT_URL=mailto:contact@example.com
-# END Makefile setup
-
include utils/makefile.include
PYOBJECTS = searx
DOC = docs
PY_SETUP_EXTRAS ?= \[test\]
-PYLINT_SEARX_DISABLE_OPTION := I,C,R,W0105,W0212,W0511,W0603,W0613,W0621,W0702,W0703,W1401
+PYLINT_SEARX_DISABLE_OPTION := I,C,R,W0105,W0212,W0511,W0603,W0613,W0621,W0702,W0703,W1401,E1136
PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES := supported_languages,language_aliases
include utils/makefile.python
@@ -43,12 +35,6 @@ help-min:
@echo ' docker - build Docker image'
@echo ' node.env - download & install npm dependencies locally'
@echo ''
- @echo 'environment'
- @echo ' SEARX_URL = $(SEARX_URL)'
- @echo ' GIT_URL = $(GIT_URL)'
- @echo ' DOCS_URL = $(DOCS_URL)'
- @echo ' CONTACT_URL = $(CONTACT_URL)'
- @echo ''
@$(MAKE) -e -s make-help
help-all: help-min
@@ -118,24 +104,8 @@ useragents.update: pyenvinstall
$(Q)echo "Update searx/data/useragents.json with the most recent versions of Firefox."
$(Q)$(PY_ENV_ACT); python utils/fetch_firefox_version.py
-buildenv:
- $(Q)echo "build searx/brand.py"
- $(Q)echo "GIT_URL = '$(GIT_URL)'" > searx/brand.py
- $(Q)echo "GIT_BRANCH = '$(GIT_BRANCH)'" >> searx/brand.py
- $(Q)echo "ISSUE_URL = 'https://github.com/searx/searx/issues'" >> searx/brand.py
- $(Q)echo "SEARX_URL = '$(SEARX_URL)'" >> searx/brand.py
- $(Q)echo "DOCS_URL = '$(DOCS_URL)'" >> searx/brand.py
- $(Q)echo "PUBLIC_INSTANCES = 'https://searx.space'" >> searx/brand.py
- $(Q)echo "CONTACT_URL = '$(CONTACT_URL)'" >> searx/brand.py
- $(Q)echo "build utils/brand.env"
- $(Q)echo "export GIT_URL='$(GIT_URL)'" > utils/brand.env
- $(Q)echo "export GIT_BRANCH='$(GIT_BRANCH)'" >> utils/brand.env
- $(Q)echo "export ISSUE_URL='https://github.com/searx/searx/issues'" >> utils/brand.env
- $(Q)echo "export SEARX_URL='$(SEARX_URL)'" >> utils/brand.env
- $(Q)echo "export DOCS_URL='$(DOCS_URL)'" >> utils/brand.env
- $(Q)echo "export PUBLIC_INSTANCES='https://searx.space'" >> utils/brand.env
- $(Q)echo "export CONTACT_URL='$(CONTACT_URL)'" >> utils/brand.env
-
+buildenv: pyenv
+ $(Q)$(PY_ENV_ACT); SEARX_DEBUG=1 python utils/build_env.py
# node / npm
# ----------
diff --git a/dockerfiles/uwsgi.ini b/dockerfiles/uwsgi.ini
index 398a440d9..818a99cc0 100644
--- a/dockerfiles/uwsgi.ini
+++ b/dockerfiles/uwsgi.ini
@@ -42,3 +42,6 @@ static-map = /static=/usr/local/searx/searx/static
static-expires = /* 864000
static-gzip-all = True
offload-threads = %k
+
+# Cache
+cache2 = name=searxcache,items=2000,blocks=2000,blocksize=4096,bitmap=1
diff --git a/docs/admin/installation-searx.rst b/docs/admin/installation-searx.rst
index 3f8904a1d..512a185a7 100644
--- a/docs/admin/installation-searx.rst
+++ b/docs/admin/installation-searx.rst
@@ -70,13 +70,20 @@ from the login (*~/.profile*):
Configuration
=============
+.. sidebar:: ``use_default_settings: True``
+
+ - :ref:`settings global`
+ - :ref:`settings location`
+ - :ref:`settings use_default_settings`
+ - :origin:`/etc/searx/settings.yml <utils/templates/etc/searx/use_default_settings.yml>`
+
To create a initial ``/etc/searx/settings.yml`` you can start with a copy of the
file :origin:`utils/templates/etc/searx/use_default_settings.yml`. This setup
-:option:ref:`use default settings <settings use_default_settings>` from
+:ref:`use default settings <settings use_default_settings>` from
:origin:`searx/settings.yml` and is recommended since :pull:`2291` is merged.
-For minimal Setup, configure like shown below – replace ``searx@\$(uname -n)``
-with a name of your choice, set ``ultrasecretkey`` -- *and/or* edit
+For a *minimal setup*, configure like shown below – replace ``searx@$(uname
+-n)`` with a name of your choice, set ``ultrasecretkey`` -- *and/or* edit
``/etc/searx/settings.yml`` to your needs.
.. kernel-include:: $DOCS_BUILD/includes/searx.rst
diff --git a/docs/admin/installation.rst b/docs/admin/installation.rst
index 4a301ecf8..8a066dec7 100644
--- a/docs/admin/installation.rst
+++ b/docs/admin/installation.rst
@@ -76,6 +76,6 @@ If all services are running fine, you can add it to your HTTP server:
.. tip::
About script's installation options have a look at chapter :ref:`toolboxing
- setup`. How to brand your instance see chapter :ref:`makefile setup`. To
+ setup`. How to brand your instance see chapter :ref:`settings global`. To
*stash* your instance's setup, `git stash`_ your clone's :origin:`Makefile`
and :origin:`.config.sh` file .
diff --git a/docs/admin/settings.rst b/docs/admin/settings.rst
index 985c16f85..7cf055dbf 100644
--- a/docs/admin/settings.rst
+++ b/docs/admin/settings.rst
@@ -27,7 +27,8 @@ First, searx will try to load settings.yml from these locations:
1. the full path specified in the ``SEARX_SETTINGS_PATH`` environment variable.
2. ``/etc/searx/settings.yml``
-If these files don't exist (or are empty or can't be read), searx uses the :origin:`searx/settings.yml` file.
+If these files don't exist (or are empty or can't be read), searx uses the
+:origin:`searx/settings.yml` file.
.. _settings global:
@@ -35,16 +36,46 @@ If these files don't exist (or are empty or can't be read), searx uses the :orig
Global Settings
===============
+``general:``
+------------
+
.. code:: yaml
general:
debug : False # Debug mode, only for development
instance_name : "searx" # displayed name
+ git_url: https://github.com/searx/searx
+ git_branch: master
+ issue_url: https://github.com/searx/searx/issues
+ docs_url: https://searx.github.io/searx
+ public_instances: https://searx.space
+ contact_url: False # mailto:contact@example.com
+ wiki_url: https://github.com/searx/searx/wiki
+ twitter_url: https://twitter.com/Searx_engine
``debug`` :
Allow a more detailed log if you run searx directly. Display *detailed* error
messages in the browser too, so this must be deactivated in production.
+``contact_url``:
+ Contact ``mailto:`` address or WEB form.
+
+``git_url`` and ``git_branch``:
+ Changes this, to point to your searx fork (branch).
+
+``docs_url``
+ If you host your own documentation, change this URL.
+
+``wiki_url``:
+ Link to your wiki (or ``False``)
+
+``twitter_url``:
+ Link to your tweets (or ``False``)
+
+
+``server:``
+-----------
+
.. code:: yaml
server:
@@ -90,6 +121,8 @@ Global Settings
``default_http_headers``:
Set additional HTTP headers, see `#755 <https://github.com/searx/searx/issues/715>`__
+``outgoing:``
+-------------
.. code:: yaml
@@ -139,6 +172,10 @@ Global Settings
If you use multiple network interfaces, define from which IP the requests must
be made. This parameter is ignored when ``proxies`` is set.
+
+``locales:``
+------------
+
.. code:: yaml
locales:
@@ -244,61 +281,76 @@ Engine settings
use_default_settings
====================
-.. note::
-
- If searx is cloned from a git repository, most probably there is no need to have an user settings.
-
-The user defined settings.yml can relied on the default configuration :origin:`searx/settings.yml` using ``use_default_settings: True``.
+.. sidebar:: ``use_default_settings: True``
-In the following example, the actual settings are the default settings defined in :origin:`searx/settings.yml` with the exception of the ``secret_key`` and the ``bind_address``:
-
-.. code-block:: yaml
-
- use_default_settings: True
- server:
- secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA"
- bind_address: "0.0.0.0"
-
-With ``use_default_settings: True``, each settings can be override in a similar way, the ``engines`` section is merged according to the engine ``name``.
-
-In this example, searx will load all the engine and the arch linux wiki engine has a :ref:`token<private engines>`:
-
-.. code-block:: yaml
-
- use_default_settings: True
- server:
- secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA"
- engines:
- - name: arch linux wiki
- tokens: ['$ecretValue']
-
-It is possible to remove some engines from the default settings. The following example is similar to the above one, but searx doesn't load the the google engine:
-
-.. code-block:: yaml
-
- use_default_settings:
- engines:
- remove:
- - google
- server:
- secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA"
- engines:
- - name: arch linux wiki
- tokens: ['$ecretValue']
-
-As an alternative, it is possible to specify the engines to keep. In the following example, searx has only two engines:
-
-.. code-block:: yaml
-
- use_default_settings:
- engines:
- keep_only:
- - google
- - duckduckgo
- server:
- secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA"
- engines:
- - name: google
- tokens: ['$ecretValue']
- - name: duckduckgo
- tokens: ['$ecretValue']
+ - :ref:`settings location`
+ - :ref:`use_default_settings.yml`
+ - :origin:`/etc/searx/settings.yml <utils/templates/etc/searx/use_default_settings.yml>`
+
+The user defined ``settings.yml`` is loaded from the :ref:`settings location`
+and can relied on the default configuration :origin:`searx/settings.yml` using:
+
+ ``use_default_settings: True``
+
+``server:``
+ In the following example, the actual settings are the default settings defined
+ in :origin:`searx/settings.yml` with the exception of the ``secret_key`` and
+ the ``bind_address``:
+
+ .. code-block:: yaml
+
+ use_default_settings: True
+ server:
+ secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA"
+ bind_address: "0.0.0.0"
+
+``engines:``
+ With ``use_default_settings: True``, each settings can be override in a
+ similar way, the ``engines`` section is merged according to the engine
+ ``name``. In this example, searx will load all the engine and the arch linux
+ wiki engine has a :ref:`token<private engines>`:
+
+ .. code-block:: yaml
+
+ use_default_settings: True
+ server:
+ secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA"
+ engines:
+ - name: arch linux wiki
+ tokens: ['$ecretValue']
+
+``engines:`` / ``remove:``
+ It is possible to remove some engines from the default settings. The following
+ example is similar to the above one, but searx doesn't load the the google
+ engine:
+
+ .. code-block:: yaml
+
+ use_default_settings:
+ engines:
+ remove:
+ - google
+ server:
+ secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA"
+ engines:
+ - name: arch linux wiki
+ tokens: ['$ecretValue']
+
+``engines:`` / ``keep_only:``
+ As an alternative, it is possible to specify the engines to keep. In the
+ following example, searx has only two engines:
+
+ .. code-block:: yaml
+
+ use_default_settings:
+ engines:
+ keep_only:
+ - google
+ - duckduckgo
+ server:
+ secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA"
+ engines:
+ - name: google
+ tokens: ['$ecretValue']
+ - name: duckduckgo
+ tokens: ['$ecretValue']
diff --git a/docs/blog/lxcdev-202006.rst b/docs/blog/lxcdev-202006.rst
index b8d470d4e..2bea19807 100644
--- a/docs/blog/lxcdev-202006.rst
+++ b/docs/blog/lxcdev-202006.rst
@@ -259,8 +259,8 @@ suite. For this, we have to keep an eye on the :ref:`installation basic`:
- virtualenv in: ``/usr/local/searx/searx-pyenv``
- searx software in: ``/usr/local/searx/searx-src``
-The searx software is a clone of the ``GIT_URL`` (see :ref:`makefile setup`) and
-the working tree is checked out from the ``GIT_BRANCH``. With the use of the
+The searx software is a clone of the ``git_url`` (see :ref:`settings global`) and
+the working tree is checked out from the ``git_branch``. With the use of the
:ref:`searx.sh` the searx service was installed as :ref:`uWSGI application
<searx uwsgi>`. To maintain this service, we can use ``systemctl`` (compare
:ref:`service architectures on distributions <uwsgi configuration>`).
diff --git a/docs/build-templates/searx.rst b/docs/build-templates/searx.rst
index fe82ec3d0..e06bc2c6a 100644
--- a/docs/build-templates/searx.rst
+++ b/docs/build-templates/searx.rst
@@ -116,6 +116,7 @@ ${fedora_build}
pip install -U pip
pip install -U setuptools
pip install -U wheel
+ pip install -U pyyaml
# jump to searx's working tree and install searx into virtualenv
(${SERVICE_USER})$ cd \"$SEARX_SRC\"
diff --git a/docs/conf.py b/docs/conf.py
index 0c07761a8..e467c6262 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -2,14 +2,10 @@
import sys, os
from sphinx_build_tools import load_sphinx_config
-from searx.version import VERSION_STRING
from pallets_sphinx_themes import ProjectLink
-from searx.brand import GIT_URL
-GIT_BRANCH = os.environ.get("GIT_BRANCH", "master")
-from searx.brand import SEARX_URL
-from searx.brand import DOCS_URL
-
+from searx import brand
+from searx.version import VERSION_STRING
# Project --------------------------------------------------------------
@@ -46,10 +42,10 @@ extlinks['wiki'] = ('https://github.com/searx/searx/wiki/%s', ' ')
extlinks['pull'] = ('https://github.com/searx/searx/pull/%s', 'PR ')
# links to custom brand
-extlinks['origin'] = (GIT_URL + '/blob/' + GIT_BRANCH + '/%s', 'git://')
-extlinks['patch'] = (GIT_URL + '/commit/%s', '#')
-extlinks['search'] = (SEARX_URL + '/%s', '#')
-extlinks['docs'] = (DOCS_URL + '/%s', 'docs: ')
+extlinks['origin'] = (brand.GIT_URL + '/blob/' + brand.GIT_BRANCH + '/%s', 'git://')
+extlinks['patch'] = (brand.GIT_URL + '/commit/%s', '#')
+extlinks['search'] = (brand.SEARX_URL + '/%s', '#')
+extlinks['docs'] = (brand.DOCS_URL + '/%s', 'docs: ')
extlinks['pypi'] = ('https://pypi.org/project/%s', 'PyPi: ')
extlinks['man'] = ('https://manpages.debian.org/jump?q=%s', '')
#extlinks['role'] = (
@@ -104,14 +100,20 @@ imgmath_font_size = 14
# sphinx.ext.imgmath setup END
html_theme_options = {"index_sidebar_logo": True}
-html_context = {
- "project_links": [
- ProjectLink("Source", GIT_URL),
- ProjectLink("Wiki", "https://github.com/searx/searx/wiki"),
- ProjectLink("Public instances", "https://searx.space/"),
- ProjectLink("Twitter", "https://twitter.com/Searx_engine"),
- ]
-}
+html_context = {"project_links": [] }
+if brand.GIT_URL:
+ html_context["project_links"].append(ProjectLink("Source", brand.GIT_URL))
+if brand.WIKI_URL:
+ html_context["project_links"].append(ProjectLink("Wiki", brand.WIKI_URL))
+if brand.PUBLIC_INSTANCES:
+ html_context["project_links"].append(ProjectLink("Public instances", brand.PUBLIC_INSTANCES))
+if brand.TWITTER_URL:
+ html_context["project_links"].append(ProjectLink("Twitter", brand.TWITTER_URL))
+if brand.ISSUE_URL:
+ html_context["project_links"].append(ProjectLink("Issue Tracker", brand.ISSUE_URL))
+if brand.CONTACT_URL:
+ html_context["project_links"].append(ProjectLink("Contact", brand.CONTACT_URL))
+
html_sidebars = {
"**": ["project.html", "relations.html", "searchbox.html"],
}
diff --git a/docs/dev/makefile.rst b/docs/dev/makefile.rst
index c43855617..f93855927 100644
--- a/docs/dev/makefile.rst
+++ b/docs/dev/makefile.rst
@@ -8,8 +8,7 @@ Makefile Targets
.. sidebar:: build environment
- Before looking deeper at the targets, first read about :ref:`makefile setup`
- and :ref:`make pyenv`.
+ Before looking deeper at the targets, first read about :ref:`make pyenv`.
To install system requirements follow :ref:`buildhosts`.
@@ -28,37 +27,6 @@ Calling the ``help`` target gives a first overview (``make help``):
:local:
:backlinks: entry
-
-.. _makefile setup:
-
-Makefile setup
-==============
-
-.. _git stash: https://git-scm.com/docs/git-stash
-
-.. sidebar:: fork & upstream
-
- Commit changes in your (local) branch, fork or whatever, but do not push them
- upstream / `git stash`_ is your friend.
-
-The main setup is done in the :origin:`Makefile`.
-
-.. literalinclude:: ../../Makefile
- :start-after: START Makefile setup
- :end-before: END Makefile setup
-
-:GIT_URL: Changes this, to point to your searx fork.
-:GIT_BRANCH: Changes this, to point to your searx branch.
-:SEARX_URL: Changes this, to point to your searx instance.
-:DOCS_URL: If you host your own (*brand*) documentation, change this URL.
-
-If you change any of this build environment variables, you have to run ``make
-buildenv``::
-
- $ make buildenv
- build searx/brand.py
- build utils/brand.env
-
.. _make pyenv:
Python environment
@@ -148,7 +116,7 @@ clean`` stop all processes using :ref:`make pyenv`.
We describe the usage of the ``doc*`` targets in the :ref:`How to contribute /
Documentation <contrib docs>` section. If you want to edit the documentation
read our :ref:`make docs-live` section. If you are working in your own brand,
-adjust your :ref:`Makefile setup <makefile setup>`.
+adjust your :ref:`settings global`.
.. _make books:
@@ -185,8 +153,8 @@ Use ``make docs-help`` to see which books available:
``make gh-pages``
=================
-To deploy on github.io first adjust your :ref:`Makefile setup <makefile
-setup>`. For any further read :ref:`deploy on github.io`.
+To deploy on github.io first adjust your :ref:`settings global`. For any
+further read :ref:`deploy on github.io`.
.. _make test:
diff --git a/docs/dev/search_api.rst b/docs/dev/search_api.rst
index b63891f53..68fee94bf 100644
--- a/docs/dev/search_api.rst
+++ b/docs/dev/search_api.rst
@@ -6,7 +6,7 @@ Search API
The search supports both ``GET`` and ``POST``.
-Furthermore, two enpoints ``/`` and ``/search`` are available for querying.
+Furthermore, two endpoints ``/`` and ``/search`` are available for querying.
``GET /``
diff --git a/docs/utils/index.rst b/docs/utils/index.rst
index ada78cef3..28515318f 100644
--- a/docs/utils/index.rst
+++ b/docs/utils/index.rst
@@ -47,8 +47,8 @@ Scripts to maintain services often dispose of common commands and environments.
Tooling box setup
=================
-The main setup is done in the :origin:`.config.sh` (read also :ref:`makefile
-setup`).
+The main setup is done in the :origin:`.config.sh` (read also :ref:`settings
+global`).
.. literalinclude:: ../../.config.sh
:language: bash
diff --git a/manage.sh b/manage.sh
index bda74545c..bb86329f9 100755
--- a/manage.sh
+++ b/manage.sh
@@ -123,9 +123,9 @@ docker_build() {
SEARX_GIT_VERSION=$(git describe --match "v[0-9]*\.[0-9]*\.[0-9]*" HEAD 2>/dev/null | awk -F'-' '{OFS="-"; $1=substr($1, 2); if ($3) { $3=substr($3, 2); } print}')
# add the suffix "-dirty" if the repository has uncommited change
- # /!\ HACK for searx/searx: ignore searx/brand.py and utils/brand.env
+ # /!\ HACK for searx/searx: ignore utils/brand.env
git update-index -q --refresh
- if [ ! -z "$(git diff-index --name-only HEAD -- | grep -v 'searx/brand.py' | grep -v 'utils/brand.env')" ]; then
+ if [ ! -z "$(git diff-index --name-only HEAD -- | grep -v 'utils/brand.env')" ]; then
SEARX_GIT_VERSION="${SEARX_GIT_VERSION}-dirty"
fi
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 170f8c1d9..483532723 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,5 +1,4 @@
-mock==4.0.3; python_version >= "3.6"
-mock==2.0.0; python_version < "3.6"
+mock==4.0.3
nose2[coverage_plugin]==0.9.2
cov-core==1.15.0
pycodestyle==2.6.0
@@ -8,16 +7,12 @@ splinter==0.14.0
transifex-client==0.14.2
unittest2==1.1.0
selenium==3.141.0
-twine==3.3.0; python_version >= "3.6"
-twine==1.15.0; python_version < "3.6"
+twine==3.3.0
Pallets-Sphinx-Themes==1.2.3
-Sphinx==3.4.1; python_version >= '3.6'
-Sphinx==3.0.1; python_version < '3.6'
+Sphinx==3.4.1
sphinx-issues==1.2.0
sphinx-jinja==1.1.1
-sphinx-tabs==1.3.0; python_version >= '3.6'
-sphinx-tabs==1.1.13; python_version < '3.6'
+sphinx-tabs==1.3.0
sphinxcontrib-programoutput==0.16
-sphinx-autobuild==2020.9.1; python_version >= '3.6'
-sphinx-autobuild==0.7.1; python_version < '3.6'
-linuxdoc @ git+http://github.com/return42/linuxdoc.git@70673dcf69e705e08d81f53794895dc15c4920b3#egg=linuxdoc
+sphinx-autobuild==2020.9.1
+linuxdoc==20210110
diff --git a/requirements.txt b/requirements.txt
index e0c48ca94..776bbc20b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,7 @@ idna==2.10
jinja2==2.11.2
lxml==4.6.2
pygments==2.1.3
-pyopenssl==20.0.1
python-dateutil==2.8.1
pyyaml==5.3.1
requests[socks]==2.25.1
+pycld3==0.20
diff --git a/searx/__init__.py b/searx/__init__.py
index 08e67f69d..11adbba73 100644
--- a/searx/__init__.py
+++ b/searx/__init__.py
@@ -60,3 +60,49 @@ if 'SEARX_SECRET' in environ:
settings['server']['secret_key'] = environ['SEARX_SECRET']
if 'SEARX_BIND_ADDRESS' in environ:
settings['server']['bind_address'] = environ['SEARX_BIND_ADDRESS']
+
+
+class _brand_namespace:
+
+ @classmethod
+ def get_val(cls, group, name, default=''):
+ return settings.get(group, {}).get(name) or default
+
+ @property
+ def SEARX_URL(self):
+ return self.get_val('server', 'base_url')
+
+ @property
+ def CONTACT_URL(self):
+ return self.get_val('general', 'contact_url')
+
+ @property
+ def GIT_URL(self):
+ return self.get_val('brand', 'git_url')
+
+ @property
+ def GIT_BRANCH(self):
+ return self.get_val('brand', 'git_branch')
+
+ @property
+ def ISSUE_URL(self):
+ return self.get_val('brand', 'issue_url')
+
+ @property
+ def DOCS_URL(self):
+ return self.get_val('brand', 'docs_url')
+
+ @property
+ def PUBLIC_INSTANCES(self):
+ return self.get_val('brand', 'public_instances')
+
+ @property
+ def WIKI_URL(self):
+ return self.get_val('brand', 'wiki_url')
+
+ @property
+ def TWITTER_URL(self):
+ return self.get_val('brand', 'twitter_url')
+
+
+brand = _brand_namespace()
diff --git a/searx/brand.py b/searx/brand.py
deleted file mode 100644
index 7fcab6fad..000000000
--- a/searx/brand.py
+++ /dev/null
@@ -1,7 +0,0 @@
-GIT_URL = 'https://github.com/searx/searx'
-GIT_BRANCH = 'master'
-ISSUE_URL = 'https://github.com/searx/searx/issues'
-SEARX_URL = 'https://searx.me'
-DOCS_URL = 'https://searx.github.io/searx'
-PUBLIC_INSTANCES = 'https://searx.space'
-CONTACT_URL = ''
diff --git a/searx/data/__init__.py b/searx/data/__init__.py
index 55a254b13..29ac5b7a2 100644
--- a/searx/data/__init__.py
+++ b/searx/data/__init__.py
@@ -8,8 +8,7 @@ data_dir = Path(__file__).parent
def load(filename):
- # add str(...) for Python 3.5
- with open(str(data_dir / filename), encoding='utf-8') as fd:
+ with open(data_dir / filename, encoding='utf-8') as fd:
return json.load(fd)
diff --git a/searx/engines/1337x.py b/searx/engines/1337x.py
index 18478876a..9cc7c1b79 100644
--- a/searx/engines/1337x.py
+++ b/searx/engines/1337x.py
@@ -1,7 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ 1337x
+"""
+
from urllib.parse import quote, urljoin
from lxml import html
from searx.utils import extract_text, get_torrent_size, eval_xpath, eval_xpath_list, eval_xpath_getindex
+# about
+about = {
+ "website": 'https://1337x.to/',
+ "wikidata_id": 'Q28134166',
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
url = 'https://1337x.to/'
search_url = url + 'search/{search_term}/{pageno}/'
diff --git a/searx/engines/acgsou.py b/searx/engines/acgsou.py
index 637443edc..ea9793f10 100644
--- a/searx/engines/acgsou.py
+++ b/searx/engines/acgsou.py
@@ -1,18 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Acgsou (Japanese Animation/Music/Comics Bittorrent tracker)
-
- @website https://www.acgsou.com/
- @provide-api no
- @using-api no
- @results HTML
- @stable no (HTML can change)
- @parse url, title, content, seed, leech, torrentfile
"""
from urllib.parse import urlencode
from lxml import html
from searx.utils import extract_text, get_torrent_size, eval_xpath_list, eval_xpath_getindex
+# about
+about = {
+ "website": 'https://www.acgsou.com/',
+ "wikidata_id": None,
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['files', 'images', 'videos', 'music']
paging = True
diff --git a/searx/engines/ahmia.py b/searx/engines/ahmia.py
index 7a2ae0075..6c502bb40 100644
--- a/searx/engines/ahmia.py
+++ b/searx/engines/ahmia.py
@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Ahmia (Onions)
-
- @website http://msydqstlz2kzerdg.onion
- @provides-api no
-
- @using-api no
- @results HTML
- @stable no
- @parse url, title, content
"""
from urllib.parse import urlencode, urlparse, parse_qs
from lxml.html import fromstring
from searx.engines.xpath import extract_url, extract_text, eval_xpath_list, eval_xpath
+# about
+about = {
+ "website": 'http://msydqstlz2kzerdg.onion',
+ "wikidata_id": 'Q18693938',
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine config
categories = ['onions']
paging = True
diff --git a/searx/engines/apkmirror.py b/searx/engines/apkmirror.py
index 3a948dcb4..a4c66e891 100644
--- a/searx/engines/apkmirror.py
+++ b/searx/engines/apkmirror.py
@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
APK Mirror
-
- @website https://www.apkmirror.com
-
- @using-api no
- @results HTML
- @stable no (HTML can change)
- @parse url, title, thumbnail_src
"""
from urllib.parse import urlencode
from lxml import html
from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
+# about
+about = {
+ "website": 'https://www.apkmirror.com',
+ "wikidata_id": None,
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
# engine dependent config
categories = ['it']
diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py
index 04117c07d..d29d65ba3 100644
--- a/searx/engines/archlinux.py
+++ b/searx/engines/archlinux.py
@@ -1,20 +1,24 @@
-# -*- coding: utf-8 -*-
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Arch Linux Wiki
- @website https://wiki.archlinux.org
- @provide-api no (Mediawiki provides API, but Arch Wiki blocks access to it
- @using-api no
- @results HTML
- @stable no (HTML can change)
- @parse url, title
+ API: Mediawiki provides API, but Arch Wiki blocks access to it
"""
from urllib.parse import urlencode, urljoin
from lxml import html
from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
+# about
+about = {
+ "website": 'https://wiki.archlinux.org/',
+ "wikidata_id": 'Q101445877',
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['it']
language_support = True
diff --git a/searx/engines/arxiv.py b/searx/engines/arxiv.py
index 1190de363..09ea07ea5 100644
--- a/searx/engines/arxiv.py
+++ b/searx/engines/arxiv.py
@@ -1,20 +1,21 @@
-#!/usr/bin/env python
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
ArXiV (Scientific preprints)
- @website https://arxiv.org
- @provide-api yes (export.arxiv.org/api/query)
- @using-api yes
- @results XML-RSS
- @stable yes
- @parse url, title, publishedDate, content
- More info on api: https://arxiv.org/help/api/user-manual
"""
from lxml import html
from datetime import datetime
from searx.utils import eval_xpath_list, eval_xpath_getindex
+# about
+about = {
+ "website": 'https://arxiv.org',
+ "wikidata_id": 'Q118398',
+ "official_api_documentation": 'https://arxiv.org/help/api',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'XML-RSS',
+}
categories = ['science']
paging = True
diff --git a/searx/engines/base.py b/searx/engines/base.py
index 3648d7ed0..463274681 100755
--- a/searx/engines/base.py
+++ b/searx/engines/base.py
@@ -1,16 +1,6 @@
-#!/usr/bin/env python
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
BASE (Scholar publications)
-
- @website https://base-search.net
- @provide-api yes with authorization (https://api.base-search.net/)
-
- @using-api yes
- @results XML
- @stable ?
- @parse url, title, publishedDate, content
- More info on api: http://base-search.net/about/download/base_interface.pdf
"""
from urllib.parse import urlencode
@@ -19,6 +9,15 @@ from datetime import datetime
import re
from searx.utils import searx_useragent
+# about
+about = {
+ "website": 'https://base-search.net',
+ "wikidata_id": 'Q448335',
+ "official_api_documentation": 'https://api.base-search.net/',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'XML',
+}
categories = ['science']
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index f0882fcc9..edf6baef9 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -1,16 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Bing (Web)
-
- @website https://www.bing.com
- @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
- max. 5000 query/month
-
- @using-api no (because of query limit)
- @results HTML (using search portal)
- @stable no (HTML can change)
- @parse url, title, content
-
- @todo publishedDate
"""
import re
@@ -21,6 +11,16 @@ from searx.utils import eval_xpath, extract_text, match_language
logger = logger.getChild('bing engine')
+# about
+about = {
+ "website": 'https://www.bing.com',
+ "wikidata_id": 'Q182496',
+ "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-web-search-api',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['general']
paging = True
diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py
index 2bcf82b84..b4ca57f4b 100644
--- a/searx/engines/bing_images.py
+++ b/searx/engines/bing_images.py
@@ -1,15 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Bing (Images)
-
- @website https://www.bing.com/images
- @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
- max. 5000 query/month
-
- @using-api no (because of query limit)
- @results HTML (using search portal)
- @stable no (HTML can change)
- @parse url, title, img_src
-
"""
from urllib.parse import urlencode
@@ -20,6 +11,16 @@ from searx.utils import match_language
from searx.engines.bing import language_aliases
from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
+# about
+about = {
+ "website": 'https://www.bing.com/images',
+ "wikidata_id": 'Q182496',
+ "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-image-search-api',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['images']
paging = True
diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py
index b95def48b..2e4b78278 100644
--- a/searx/engines/bing_news.py
+++ b/searx/engines/bing_news.py
@@ -1,14 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Bing (News)
-
- @website https://www.bing.com/news
- @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
- max. 5000 query/month
-
- @using-api no (because of query limit)
- @results RSS (using search portal)
- @stable yes (except perhaps for the images)
- @parse url, title, content, publishedDate, thumbnail
"""
from datetime import datetime
@@ -20,6 +12,16 @@ from searx.utils import match_language, eval_xpath_getindex
from searx.engines.bing import language_aliases
from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
+# about
+about = {
+ "website": 'https://www.bing.com/news',
+ "wikidata_id": 'Q2878637',
+ "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-news-search-api',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'RSS',
+}
+
# engine dependent config
categories = ['news']
paging = True
diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py
index 143c71a3e..b4584bb37 100644
--- a/searx/engines/bing_videos.py
+++ b/searx/engines/bing_videos.py
@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Bing (Videos)
-
- @website https://www.bing.com/videos
- @provide-api yes (http://datamarket.azure.com/dataset/bing/search)
-
- @using-api no
- @results HTML
- @stable no
- @parse url, title, content, thumbnail
"""
from json import loads
@@ -18,6 +11,16 @@ from searx.utils import match_language
from searx.engines.bing import language_aliases
from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
+# about
+about = {
+ "website": 'https://www.bing.com/videos',
+ "wikidata_id": 'Q4914152',
+ "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-video-search-api',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
categories = ['videos']
paging = True
safesearch = True
diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py
index 72bda8d20..863396f6e 100644
--- a/searx/engines/btdigg.py
+++ b/searx/engines/btdigg.py
@@ -1,19 +1,25 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
BTDigg (Videos, Music, Files)
-
- @website https://btdig.com
- @provide-api yes (on demand)
-
- @using-api no
- @results HTML (using search portal)
- @stable no (HTML can change)
- @parse url, title, content, seed, leech, magnetlink
"""
from lxml import html
from urllib.parse import quote, urljoin
from searx.utils import extract_text, get_torrent_size
+# about
+about = {
+ "website": 'https://btdig.com',
+ "wikidata_id": 'Q4836698',
+ "official_api_documentation": {
+ 'url': 'https://btdig.com/contacts',
+ 'comment': 'on demand'
+ },
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['videos', 'music', 'files']
paging = True
diff --git a/searx/engines/command.py b/searx/engines/command.py
index 6321e0004..33270d245 100644
--- a/searx/engines/command.py
+++ b/searx/engines/command.py
@@ -1,18 +1,7 @@
-'''
-searx is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-searx is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with searx. If not, see < http://www.gnu.org/licenses/ >.
-'''
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Command (offline)
+"""
import re
from os.path import expanduser, isabs, realpath, commonprefix
diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py
index 7098dd3c7..d4c3b5f81 100644
--- a/searx/engines/currency_convert.py
+++ b/searx/engines/currency_convert.py
@@ -1,5 +1,19 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ currency convert (DuckDuckGo)
+"""
+
import json
+# about
+about = {
+ "website": 'https://duckduckgo.com/',
+ "wikidata_id": 'Q12805',
+ "official_api_documentation": 'https://duckduckgo.com/api',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'JSONP',
+}
engine_type = 'online_currency'
categories = []
diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py
index 1e24e41da..874e0f42a 100644
--- a/searx/engines/dailymotion.py
+++ b/searx/engines/dailymotion.py
@@ -1,15 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Dailymotion (Videos)
-
- @website https://www.dailymotion.com
- @provide-api yes (http://www.dailymotion.com/developer)
-
- @using-api yes
- @results JSON
- @stable yes
- @parse url, title, thumbnail, publishedDate, embedded
-
- @todo set content-parameter with correct data
"""
from json import loads
@@ -17,6 +8,16 @@ from datetime import datetime
from urllib.parse import urlencode
from searx.utils import match_language, html_to_text
+# about
+about = {
+ "website": 'https://www.dailymotion.com',
+ "wikidata_id": 'Q769222',
+ "official_api_documentation": 'https://www.dailymotion.com/developer',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# engine dependent config
categories = ['videos']
paging = True
diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py
index 48c0429a7..946bd3ebe 100644
--- a/searx/engines/deezer.py
+++ b/searx/engines/deezer.py
@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Deezer (Music)
-
- @website https://deezer.com
- @provide-api yes (http://developers.deezer.com/api/)
-
- @using-api yes
- @results JSON
- @stable yes
- @parse url, title, content, embedded
"""
from json import loads
from urllib.parse import urlencode
+# about
+about = {
+ "website": 'https://deezer.com',
+ "wikidata_id": 'Q602243',
+ "official_api_documentation": 'https://developers.deezer.com/',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# engine dependent config
categories = ['music']
paging = True
diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py
index 0378929b2..7840495e1 100644
--- a/searx/engines/deviantart.py
+++ b/searx/engines/deviantart.py
@@ -1,21 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Deviantart (Images)
-
- @website https://www.deviantart.com/
- @provide-api yes (https://www.deviantart.com/developers/) (RSS)
-
- @using-api no (TODO, rewrite to api)
- @results HTML
- @stable no (HTML can change)
- @parse url, title, img_src
-
- @todo rewrite to api
"""
# pylint: disable=missing-function-docstring
from urllib.parse import urlencode
from lxml import html
+# about
+about = {
+ "website": 'https://www.deviantart.com/',
+ "wikidata_id": 'Q46523',
+ "official_api_documentation": 'https://www.deviantart.com/developers/',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['images']
paging = True
diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py
index 5e6f688a1..2483c0805 100644
--- a/searx/engines/dictzone.py
+++ b/searx/engines/dictzone.py
@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Dictzone
-
- @website https://dictzone.com/
- @provide-api no
- @using-api no
- @results HTML (using search portal)
- @stable no (HTML can change)
- @parse url, title, content
"""
from urllib.parse import urljoin
from lxml import html
from searx.utils import eval_xpath
+# about
+about = {
+ "website": 'https://dictzone.com/',
+ "wikidata_id": None,
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
engine_type = 'online_dictionnary'
categories = ['general']
diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py
index b1a90fb2f..109662a49 100644
--- a/searx/engines/digbt.py
+++ b/searx/engines/digbt.py
@@ -1,19 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
DigBT (Videos, Music, Files)
-
- @website https://digbt.org
- @provide-api no
-
- @using-api no
- @results HTML (using search portal)
- @stable no (HTML can change)
- @parse url, title, content, magnetlink
"""
from urllib.parse import urljoin
from lxml import html
from searx.utils import extract_text, get_torrent_size
+# about
+about = {
+ "website": 'https://digbt.org',
+ "wikidata_id": None,
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
categories = ['videos', 'music', 'files']
paging = True
diff --git a/searx/engines/digg.py b/searx/engines/digg.py
index 85f727f0d..defcacd20 100644
--- a/searx/engines/digg.py
+++ b/searx/engines/digg.py
@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Digg (News, Social media)
-
- @website https://digg.com
- @provide-api no
-
- @using-api no
- @results HTML (using search portal)
- @stable no (HTML can change)
- @parse url, title, content, publishedDate, thumbnail
"""
# pylint: disable=missing-function-docstring
@@ -17,6 +10,16 @@ from datetime import datetime
from lxml import html
+# about
+about = {
+ "website": 'https://digg.com',
+ "wikidata_id": 'Q270478',
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['news', 'social media']
paging = True
diff --git a/searx/engines/doku.py b/searx/engines/doku.py
index e1b10d664..ed1eab388 100644
--- a/searx/engines/doku.py
+++ b/searx/engines/doku.py
@@ -1,18 +1,22 @@
-# Doku Wiki
-#
-# @website https://www.dokuwiki.org/
-# @provide-api yes
-# (https://www.dokuwiki.org/devel:xmlrpc)
-#
-# @using-api no
-# @results HTML
-# @stable yes
-# @parse (general) url, title, content
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Doku Wiki
+"""
from urllib.parse import urlencode
from lxml.html import fromstring
from searx.utils import extract_text, eval_xpath
+# about
+about = {
+ "website": 'https://www.dokuwiki.org/',
+ "wikidata_id": 'Q851864',
+ "official_api_documentation": 'https://www.dokuwiki.org/devel:xmlrpc',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'
paging = False
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index c1c984623..fc20de239 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -1,22 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
DuckDuckGo (Web)
-
- @website https://duckduckgo.com/
- @provide-api yes (https://duckduckgo.com/api),
- but not all results from search-site
-
- @using-api no
- @results HTML (using search portal)
- @stable no (HTML can change)
- @parse url, title, content
-
- @todo rewrite to api
"""
from lxml.html import fromstring
from json import loads
from searx.utils import extract_text, match_language, eval_xpath
+# about
+about = {
+ "website": 'https://duckduckgo.com/',
+ "wikidata_id": 'Q12805',
+ "official_api_documentation": 'https://duckduckgo.com/api',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['general']
paging = False
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index 1d1c84b4b..0473b0a95 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -1,12 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
-DuckDuckGo (definitions)
-
-- `Instant Answer API`_
-- `DuckDuckGo query`_
-
-.. _Instant Answer API: https://duckduckgo.com/api
-.. _DuckDuckGo query: https://api.duckduckgo.com/?q=DuckDuckGo&format=json&pretty=1
-
+ DuckDuckGo (Instant Answer API)
"""
import json
@@ -22,6 +16,16 @@ from searx.external_urls import get_external_url, get_earth_coordinates_url, are
logger = logger.getChild('duckduckgo_definitions')
+# about
+about = {
+ "website": 'https://duckduckgo.com/',
+ "wikidata_id": 'Q12805',
+ "official_api_documentation": 'https://duckduckgo.com/api',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
URL = 'https://api.duckduckgo.com/'\
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'
diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py
index 009f81cca..b5c2d4506 100644
--- a/searx/engines/duckduckgo_images.py
+++ b/searx/engines/duckduckgo_images.py
@@ -1,16 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
DuckDuckGo (Images)
-
- @website https://duckduckgo.com/
- @provide-api yes (https://duckduckgo.com/api),
- but images are not supported
-
- @using-api no
- @results JSON (site requires js to get images)
- @stable no (JSON can change)
- @parse url, title, img_src
-
- @todo avoid extra request
"""
from json import loads
@@ -20,6 +10,19 @@ from searx.engines.duckduckgo import get_region_code
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
from searx.poolrequests import get
+# about
+about = {
+ "website": 'https://duckduckgo.com/',
+ "wikidata_id": 'Q12805',
+ "official_api_documentation": {
+ 'url': 'https://duckduckgo.com/api',
+ 'comment': 'but images are not supported',
+ },
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'JSON (site requires js to get images)',
+}
+
# engine dependent config
categories = ['images']
paging = True
diff --git a/searx/engines/duden.py b/searx/engines/duden.py
index 1475fb846..f1c9efd3f 100644
--- a/searx/engines/duden.py
+++ b/searx/engines/duden.py
@@ -1,11 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Duden
- @website https://www.duden.de
- @provide-api no
- @using-api no
- @results HTML (using search portal)
- @stable no (HTML can change)
- @parse url, title, content
"""
import re
@@ -13,6 +8,16 @@ from urllib.parse import quote, urljoin
from lxml import html
from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
+# about
+about = {
+ "website": 'https://www.duden.de',
+ "wikidata_id": 'Q73624591',
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
categories = ['general']
paging = True
language_support = False
diff --git a/searx/engines/dummy-offline.py b/searx/engines/dummy-offline.py
index 13a9ecc01..cf2f75312 100644
--- a/searx/engines/dummy-offline.py
+++ b/searx/engines/dummy-offline.py
@@ -1,11 +1,19 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Dummy Offline
-
- @results one result
- @stable yes
"""
+# about
+about = {
+ "wikidata_id": None,
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
+
def search(query, request_params):
return [{
'result': 'this is what you get',
diff --git a/searx/engines/dummy.py b/searx/engines/dummy.py
index 50b56ef78..1a1b57d8c 100644
--- a/searx/engines/dummy.py
+++ b/searx/engines/dummy.py
@@ -1,10 +1,18 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Dummy
-
- @results empty array
- @stable yes
"""
+# about
+about = {
+ "website": None,
+ "wikidata_id": None,
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'empty array',
+}
+
# do search-request
def request(query, params):
diff --git a/searx/engines/ebay.py b/searx/engines/ebay.py
index e2e5ded6a..45c633b42 100644
--- a/searx/engines/ebay.py
+++ b/searx/engines/ebay.py
@@ -1,17 +1,22 @@
-# Ebay (Videos, Music, Files)
-#
-# @website https://www.ebay.com
-# @provide-api no (nothing found)
-#
-# @using-api no
-# @results HTML (using search portal)
-# @stable yes (HTML can change)
-# @parse url, title, content, price, shipping, source
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Ebay (Videos, Music, Files)
+"""
from lxml import html
from searx.engines.xpath import extract_text
from urllib.parse import quote
+# about
+about = {
+ "website": 'https://www.ebay.com',
+ "wikidata_id": 'Q58024',
+ "official_api_documentation": 'https://developer.ebay.com/',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
categories = ['shopping']
paging = True
diff --git a/searx/engines/elasticsearch.py b/searx/engines/elasticsearch.py
index 0e2d35756..da7f98074 100644
--- a/searx/engines/elasticsearch.py
+++ b/searx/engines/elasticsearch.py
@@ -1,3 +1,8 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Elasticsearch
+"""
+
from json import loads, dumps
from requests.auth import HTTPBasicAuth
from searx.exceptions import SearxEngineAPIException
diff --git a/searx/engines/etools.py b/searx/engines/etools.py
index a0762d1c7..77d7e71c6 100644
--- a/searx/engines/etools.py
+++ b/searx/engines/etools.py
@@ -1,18 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
eTools (Web)
-
- @website https://www.etools.ch
- @provide-api no
- @using-api no
- @results HTML
- @stable no (HTML can change)
- @parse url, title, content
"""
from lxml import html
from urllib.parse import quote
from searx.utils import extract_text, eval_xpath
+# about
+about = {
+ "website": 'https://www.etools.ch',
+ "wikidata_id": None,
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
categories = ['general']
paging = False
language_support = False
diff --git a/searx/engines/fdroid.py b/searx/engines/fdroid.py
index 3d37db44e..8fff2e384 100644
--- a/searx/engines/fdroid.py
+++ b/searx/engines/fdroid.py
@@ -1,18 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
F-Droid (a repository of FOSS applications for Android)
-
- @website https://f-droid.org/
- @provide-api no
- @using-api no
- @results HTML
- @stable no (HTML can change)
- @parse url, title, content
"""
from urllib.parse import urlencode
from lxml import html
from searx.utils import extract_text
+# about
+about = {
+ "website": 'https://f-droid.org/',
+ "wikidata_id": 'Q1386210',
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['files']
paging = True
diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py
index b23c447b8..b0ddf6224 100644
--- a/searx/engines/flickr.py
+++ b/searx/engines/flickr.py
@@ -1,21 +1,23 @@
-#!/usr/bin/env python
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Flickr (Images)
- @website https://www.flickr.com
- @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
-
- @using-api yes
- @results JSON
- @stable yes
- @parse url, title, thumbnail, img_src
More info on api-key : https://www.flickr.com/services/apps/create/
"""
from json import loads
from urllib.parse import urlencode
+# about
+about = {
+ "website": 'https://www.flickr.com',
+ "wikidata_id": 'Q103204',
+ "official_api_documentation": 'https://secure.flickr.com/services/api/flickr.photos.search.html',
+ "use_official_api": True,
+ "require_api_key": True,
+ "results": 'JSON',
+}
+
categories = ['images']
nb_per_page = 15
diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py
index 4bcf837cb..a07aad51e 100644
--- a/searx/engines/flickr_noapi.py
+++ b/searx/engines/flickr_noapi.py
@@ -1,15 +1,6 @@
-#!/usr/bin/env python
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
- Flickr (Images)
-
- @website https://www.flickr.com
- @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
-
- @using-api no
- @results HTML
- @stable no
- @parse url, title, thumbnail, img_src
+ Flickr (Images)
"""
from json import loads
@@ -21,6 +12,16 @@ from searx.utils import ecma_unescape, html_to_text
logger = logger.getChild('flickr-noapi')
+# about
+about = {
+ "website": 'https://www.flickr.com',
+ "wikidata_id": 'Q103204',
+ "official_api_documentation": 'https://secure.flickr.com/services/api/flickr.photos.search.html',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
categories = ['images']
url = 'https://www.flickr.com/'
diff --git a/searx/engines/framalibre.py b/searx/engines/framalibre.py
index e3d056425..42c08cf95 100644
--- a/searx/engines/framalibre.py
+++ b/searx/engines/framalibre.py
@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
FramaLibre (It)
-
- @website https://framalibre.org/
- @provide-api no
-
- @using-api no
- @results HTML
- @stable no (HTML can change)
- @parse url, title, content, thumbnail, img_src
"""
from html import escape
@@ -15,6 +8,16 @@ from urllib.parse import urljoin, urlencode
from lxml import html
from searx.utils import extract_text
+# about
+about = {
+ "website": 'https://framalibre.org/',
+ "wikidata_id": 'Q30213882',
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['it']
paging = True
diff --git a/searx/engines/frinkiac.py b/searx/engines/frinkiac.py
index 5b174a687..f43bb6e20 100644
--- a/searx/engines/frinkiac.py
+++ b/searx/engines/frinkiac.py
@@ -1,17 +1,24 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
-Frinkiac (Images)
-
-@website https://www.frinkiac.com
-@provide-api no
-@using-api no
-@results JSON
-@stable no
-@parse url, title, img_src
+ Frinkiac (Images)
"""
from json import loads
from urllib.parse import urlencode
+# about
+about = {
+ "website": 'https://frinkiac.com',
+ "wikidata_id": 'Q24882614',
+ "official_api_documentation": {
+ 'url': None,
+ 'comment': 'see https://github.com/MitchellAW/CompuGlobal'
+ },
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
categories = ['images']
BASE = 'https://frinkiac.com/'
diff --git a/searx/engines/genius.py b/searx/engines/genius.py
index 2bfbfddf5..1667d529d 100644
--- a/searx/engines/genius.py
+++ b/searx/engines/genius.py
@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
-Genius
-
- @website https://www.genius.com/
- @provide-api yes (https://docs.genius.com/)
-
- @using-api yes
- @results JSON
- @stable yes
- @parse url, title, content, thumbnail, publishedDate
+ Genius
"""
from json import loads
from urllib.parse import urlencode
from datetime import datetime
+# about
+about = {
+ "website": 'https://genius.com/',
+ "wikidata_id": 'Q3419343',
+ "official_api_documentation": 'https://docs.genius.com/',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# engine dependent config
categories = ['music']
paging = True
diff --git a/searx/engines/gentoo.py b/searx/engines/gentoo.py
index 16b3e692d..55f15576e 100644
--- a/searx/engines/gentoo.py
+++ b/searx/engines/gentoo.py
@@ -1,20 +1,22 @@
-# -*- coding: utf-8 -*-
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Gentoo Wiki
-
- @website https://wiki.gentoo.org
- @provide-api yes
- @using-api no
- @results HTML
- @stable no (HTML can change)
- @parse url, title
"""
from urllib.parse import urlencode, urljoin
from lxml import html
from searx.utils import extract_text
+# about
+about = {
+ "website": 'https://wiki.gentoo.org/',
+ "wikidata_id": 'Q1050637',
+ "official_api_documentation": 'https://wiki.gentoo.org/api.php',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['it']
language_support = True
diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py
index 1d71b18e9..f5f89a736 100644
--- a/searx/engines/gigablast.py
+++ b/searx/engines/gigablast.py
@@ -1,14 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Gigablast (Web)
-
- @website https://gigablast.com
- @provide-api yes (https://gigablast.com/api.html)
-
- @using-api yes
- @results XML
- @stable yes
- @parse url, title, content
"""
# pylint: disable=missing-function-docstring, invalid-name
@@ -18,6 +10,16 @@ from urllib.parse import urlencode
# from searx import logger
from searx.poolrequests import get
+# about
+about = {
+ "website": 'https://www.gigablast.com',
+ "wikidata_id": 'Q3105449',
+ "official_api_documentation": 'https://gigablast.com/api.html',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# engine dependent config
categories = ['general']
# gigablast's pagination is totally damaged, don't use it
diff --git a/searx/engines/github.py b/searx/engines/github.py
index 80b50ceda..b68caa350 100644
--- a/searx/engines/github.py
+++ b/searx/engines/github.py
@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
- Github (It)
-
- @website https://github.com/
- @provide-api yes (https://developer.github.com/v3/)
-
- @using-api yes
- @results JSON
- @stable yes (using api)
- @parse url, title, content
+ Github (IT)
"""
from json import loads
from urllib.parse import urlencode
+# about
+about = {
+ "website": 'https://github.com/',
+ "wikidata_id": 'Q364',
+ "official_api_documentation": 'https://developer.github.com/v3/',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# engine dependent config
categories = ['it']
diff --git a/searx/engines/google.py b/searx/engines/google.py
index 17ab21f6a..4198de640 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -1,19 +1,11 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Google (Web)
-:website: https://www.google.com
-:provide-api: yes (https://developers.google.com/custom-search/)
-:using-api: not the offical, since it needs registration to another service
-:results: HTML
-:stable: no
-:parse: url, title, content, number_of_results, answer, suggestion, correction
-
-For detailed description of the *REST-full* API see: `Query Parameter
-Definitions`_.
-
-.. _Query Parameter Definitions:
- https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
+ For detailed description of the *REST-full* API see: `Query Parameter
+ Definitions`_.
+ .. _Query Parameter Definitions:
+ https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
"""
# pylint: disable=invalid-name, missing-function-docstring
@@ -27,6 +19,16 @@ from searx.exceptions import SearxEngineCaptchaException
logger = logger.getChild('google engine')
+# about
+about = {
+ "website": 'https://www.google.com',
+ "wikidata_id": 'Q9366',
+ "official_api_documentation": 'https://developers.google.com/custom-search/',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['general']
paging = True
diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py
index 9ef1be753..8c2cb9d2a 100644
--- a/searx/engines/google_images.py
+++ b/searx/engines/google_images.py
@@ -1,14 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Google (Images)
-:website: https://images.google.com (redirected to subdomain www.)
-:provide-api: yes (https://developers.google.com/custom-search/)
-:using-api: not the offical, since it needs registration to another service
-:results: HTML
-:stable: no
-:template: images.html
-:parse: url, title, content, source, thumbnail_src, img_src
-
For detailed description of the *REST-full* API see: `Query Parameter
Definitions`_.
@@ -18,10 +10,6 @@ Definitions`_.
``data:` scheme).::
Header set Content-Security-Policy "img-src 'self' data: ;"
-
-.. _Query Parameter Definitions:
- https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
-
"""
from urllib.parse import urlencode, urlparse, unquote
@@ -39,6 +27,16 @@ from searx.engines.google import (
logger = logger.getChild('google images')
+# about
+about = {
+ "website": 'https://images.google.com/',
+ "wikidata_id": 'Q521550',
+ "official_api_documentation": 'https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions', # NOQA
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['images']
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
index f1b7cfa79..63fef6696 100644
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Google (News)
-
- @website https://news.google.com
- @provide-api no
-
- @using-api no
- @results HTML
- @stable no
- @parse url, title, content, publishedDate
"""
from urllib.parse import urlencode
@@ -15,6 +8,16 @@ from lxml import html
from searx.utils import match_language
from searx.engines.google import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
+# about
+about = {
+ "website": 'https://news.google.com',
+ "wikidata_id": 'Q12020',
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# search-url
categories = ['news']
paging = True
diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py
index eedefbf45..61e01ca7b 100644
--- a/searx/engines/google_videos.py
+++ b/searx/engines/google_videos.py
@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Google (Videos)
-
- @website https://www.google.com
- @provide-api yes (https://developers.google.com/custom-search/)
-
- @using-api no
- @results HTML
- @stable no
- @parse url, title, content, thumbnail
"""
from datetime import date, timedelta
@@ -16,6 +9,16 @@ from lxml import html
from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
import re
+# about
+about = {
+ "website": 'https://www.google.com',
+ "wikidata_id": 'Q219885',
+ "official_api_documentation": 'https://developers.google.com/custom-search/',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['videos']
paging = True
diff --git a/searx/engines/ina.py b/searx/engines/ina.py
index ce241d409..1a47ca51e 100644
--- a/searx/engines/ina.py
+++ b/searx/engines/ina.py
@@ -1,15 +1,7 @@
-# INA (Videos)
-#
-# @website https://www.ina.fr/
-# @provide-api no
-#
-# @using-api no
-# @results HTML (using search portal)
-# @stable no (HTML can change)
-# @parse url, title, content, publishedDate, thumbnail
-#
-# @todo set content-parameter with correct data
-# @todo embedded (needs some md5 from video page)
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ INA (Videos)
+"""
from json import loads
from html import unescape
@@ -18,6 +10,15 @@ from lxml import html
from dateutil import parser
from searx.utils import extract_text
+# about
+about = {
+ "website": 'https://www.ina.fr/',
+ "wikidata_id": 'Q1665109',
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
# engine dependent config
categories = ['videos']
diff --git a/searx/engines/invidious.py b/searx/engines/invidious.py
index 6ea942699..61a6e5a19 100644
--- a/searx/engines/invidious.py
+++ b/searx/engines/invidious.py
@@ -1,16 +1,22 @@
-# Invidious (Videos)
-#
-# @website https://invidio.us/
-# @provide-api yes (https://github.com/omarroth/invidious/wiki/API)
-#
-# @using-api yes
-# @results JSON
-# @stable yes
-# @parse url, title, content, publishedDate, thumbnail, embedded, author, length
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Invidious (Videos)
+"""
from urllib.parse import quote_plus
from dateutil import parser
import time
+import random
+
+# about
+about = {
+ "website": 'https://instances.invidio.us/',
+ "wikidata_id": 'Q79343316',
+ "official_api_documentation": 'https://github.com/omarroth/invidious/wiki/API',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
# engine dependent config
categories = ["videos", "music"]
@@ -18,19 +24,29 @@ paging = True
language_support = True
time_range_support = True
+
# search-url
-base_url = "https://invidio.us/"
+
+base_url = ''
+base_url_rand = ''
# do search-request
def request(query, params):
+ global base_url_rand
time_range_dict = {
"day": "today",
"week": "week",
"month": "month",
"year": "year",
}
- search_url = base_url + "api/v1/search?q={query}"
+
+ if isinstance(base_url, list):
+ base_url_rand = random.choice(base_url)
+ else:
+ base_url_rand = base_url
+
+ search_url = base_url_rand + "api/v1/search?q={query}"
params["url"] = search_url.format(
query=quote_plus(query)
) + "&page={pageno}".format(pageno=params["pageno"])
@@ -56,12 +72,12 @@ def response(resp):
embedded_url = (
'<iframe width="540" height="304" '
+ 'data-src="'
- + base_url
+ + base_url_rand
+ 'embed/{videoid}" '
+ 'frameborder="0" allowfullscreen></iframe>'
)
- base_invidious_url = base_url + "watch?v="
+ base_invidious_url = base_url_rand + "watch?v="
for result in search_results:
rtype = result.get("type", None)
diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py
index e2aa436cc..f4a5ff6d2 100644
--- a/searx/engines/json_engine.py
+++ b/searx/engines/json_engine.py
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
from collections.abc import Iterable
from json import loads
from urllib.parse import urlencode
diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py
index 90bd33063..6a44e2fd7 100644
--- a/searx/engines/kickass.py
+++ b/searx/engines/kickass.py
@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Kickass Torrent (Videos, Music, Files)
-
- @website https://kickass.so
- @provide-api no (nothing found)
-
- @using-api no
- @results HTML (using search portal)
- @stable yes (HTML can change)
- @parse url, title, content, seed, leech, magnetlink
"""
from lxml import html
@@ -15,6 +8,16 @@ from operator import itemgetter
from urllib.parse import quote, urljoin
from searx.utils import extract_text, get_torrent_size, convert_str_to_int
+# about
+about = {
+ "website": 'https://kickass.so',
+ "wikidata_id": 'Q17062285',
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['videos', 'music', 'files']
paging = True
diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py
index 50ba74efc..21abff86e 100644
--- a/searx/engines/mediawiki.py
+++ b/searx/engines/mediawiki.py
@@ -1,21 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
- general mediawiki-engine (Web)
-
- @website websites built on mediawiki (https://www.mediawiki.org)
- @provide-api yes (http://www.mediawiki.org/wiki/API:Search)
-
- @using-api yes
- @results JSON
- @stable yes
- @parse url, title
-
- @todo content
+ General mediawiki-engine (Web)
"""
from json import loads
from string import Formatter
from urllib.parse import urlencode, quote
+# about
+about = {
+ "website": None,
+ "wikidata_id": None,
+ "official_api_documentation": 'http://www.mediawiki.org/wiki/API:Search',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# engine dependent config
categories = ['general']
language_support = True
diff --git a/searx/engines/microsoft_academic.py b/searx/engines/microsoft_academic.py
index 7426eef7e..14de4ac9a 100644
--- a/searx/engines/microsoft_academic.py
+++ b/searx/engines/microsoft_academic.py
@@ -1,12 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
-Microsoft Academic (Science)
-
-@website https://academic.microsoft.com
-@provide-api yes
-@using-api no
-@results JSON
-@stable no
-@parse url, title, content
+ Microsoft Academic (Science)
"""
from datetime import datetime
@@ -15,6 +9,16 @@ from uuid import uuid4
from urllib.parse import urlencode
from searx.utils import html_to_text
+# about
+about = {
+ "website": 'https://academic.microsoft.com',
+ "wikidata_id": 'Q28136779',
+ "official_api_documentation": 'http://ma-graph.org/',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
categories = ['images']
paging = True
result_url = 'https://academic.microsoft.com/api/search/GetEntityResults?{query}'
diff --git a/searx/engines/mixcloud.py b/searx/engines/mixcloud.py
index 0606350a9..a6fd1c0a1 100644
--- a/searx/engines/mixcloud.py
+++ b/searx/engines/mixcloud.py
@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Mixcloud (Music)
-
- @website https://http://www.mixcloud.com/
- @provide-api yes (http://www.mixcloud.com/developers/
-
- @using-api yes
- @results JSON
- @stable yes
- @parse url, title, content, embedded, publishedDate
"""
from json import loads
from dateutil import parser
from urllib.parse import urlencode
+# about
+about = {
+ "website": 'https://www.mixcloud.com/',
+ "wikidata_id": 'Q6883832',
+ "official_api_documentation": 'http://www.mixcloud.com/developers/',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# engine dependent config
categories = ['music']
paging = True
diff --git a/searx/engines/not_evil.py b/searx/engines/not_evil.py
index e84f153bd..df41c0941 100644
--- a/searx/engines/not_evil.py
+++ b/searx/engines/not_evil.py
@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
not Evil (Onions)
-
- @website http://hss3uro2hsxfogfq.onion
- @provide-api yes (http://hss3uro2hsxfogfq.onion/api.htm)
-
- @using-api no
- @results HTML
- @stable no
- @parse url, title, content
"""
from urllib.parse import urlencode
from lxml import html
from searx.engines.xpath import extract_text
+# about
+about = {
+ "website": 'http://hss3uro2hsxfogfq.onion',
+ "wikidata_id": None,
+ "official_api_documentation": 'http://hss3uro2hsxfogfq.onion/api.htm',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['onions']
paging = True
diff --git a/searx/engines/nyaa.py b/searx/engines/nyaa.py
index e0a91494f..f8178d637 100644
--- a/searx/engines/nyaa.py
+++ b/searx/engines/nyaa.py
@@ -1,18 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Nyaa.si (Anime Bittorrent tracker)
-
- @website https://nyaa.si/
- @provide-api no
- @using-api no
- @results HTML
- @stable no (HTML can change)
- @parse url, title, content, seed, leech, torrentfile
"""
from lxml import html
from urllib.parse import urlencode
from searx.utils import extract_text, get_torrent_size, int_or_zero
+# about
+about = {
+ "website": 'https://nyaa.si/',
+ "wikidata_id": None,
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['files', 'images', 'videos', 'music']
paging = True
diff --git a/searx/engines/opensemantic.py b/searx/engines/opensemantic.py
index 9364bab41..64bc321f1 100644
--- a/searx/engines/opensemantic.py
+++ b/searx/engines/opensemantic.py
@@ -1,18 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
-Open Semantic Search
-
- @website https://www.opensemanticsearch.org/
- @provide-api yes (https://www.opensemanticsearch.org/dev)
-
- @using-api yes
- @results JSON
- @stable yes
- @parse url, title, content, publishedDate
+ Open Semantic Search
"""
+
from dateutil import parser
from json import loads
from urllib.parse import quote
+# about
+about = {
+ "website": 'https://www.opensemanticsearch.org/',
+ "wikidata_id": None,
+ "official_api_documentation": 'https://www.opensemanticsearch.org/dev',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
base_url = 'http://localhost:8983/solr/opensemanticsearch/'
search_string = 'query?q={query}'
diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py
index 5475c7a6d..f11aa5f8c 100644
--- a/searx/engines/openstreetmap.py
+++ b/searx/engines/openstreetmap.py
@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
OpenStreetMap (Map)
-
- @website https://openstreetmap.org/
- @provide-api yes (http://wiki.openstreetmap.org/wiki/Nominatim)
-
- @using-api yes
- @results JSON
- @stable yes
- @parse url, title
"""
import re
from json import loads
from flask_babel import gettext
+# about
+about = {
+ "website": 'https://www.openstreetmap.org/',
+ "wikidata_id": 'Q936',
+ "official_api_documentation": 'http://wiki.openstreetmap.org/wiki/Nominatim',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# engine dependent config
categories = ['map']
paging = False
diff --git a/searx/engines/pdbe.py b/searx/engines/pdbe.py
index 2db92868a..b9bbfaf1b 100644
--- a/searx/engines/pdbe.py
+++ b/searx/engines/pdbe.py
@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
PDBe (Protein Data Bank in Europe)
-
- @website https://www.ebi.ac.uk/pdbe
- @provide-api yes (https://www.ebi.ac.uk/pdbe/api/doc/search.html),
- unlimited
- @using-api yes
- @results python dictionary (from json)
- @stable yes
- @parse url, title, content, img_src
"""
from json import loads
from flask_babel import gettext
+# about
+about = {
+ "website": 'https://www.ebi.ac.uk/pdbe',
+ "wikidata_id": 'Q55823905',
+ "official_api_documentation": 'https://www.ebi.ac.uk/pdbe/api/doc/search.html',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
categories = ['science']
hide_obsolete = False
diff --git a/searx/engines/peertube.py b/searx/engines/peertube.py
index e43b2a6b7..549141079 100644
--- a/searx/engines/peertube.py
+++ b/searx/engines/peertube.py
@@ -1,15 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
peertube (Videos)
-
- @website https://www.peertube.live
- @provide-api yes (https://docs.joinpeertube.org/api-rest-reference.html)
-
- @using-api yes
- @results JSON
- @stable yes
- @parse url, title, thumbnail, publishedDate, embedded
-
- @todo implement time range support
"""
from json import loads
@@ -17,6 +8,16 @@ from datetime import datetime
from urllib.parse import urlencode
from searx.utils import html_to_text
+# about
+about = {
+ "website": 'https://joinpeertube.org',
+ "wikidata_id": 'Q50938515',
+ "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# engine dependent config
categories = ["videos"]
paging = True
diff --git a/searx/engines/photon.py b/searx/engines/photon.py
index 7a6fc8321..f12bcd22a 100644
--- a/searx/engines/photon.py
+++ b/searx/engines/photon.py
@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Photon (Map)
-
- @website https://photon.komoot.de
- @provide-api yes (https://photon.komoot.de/)
-
- @using-api yes
- @results JSON
- @stable yes
- @parse url, title
"""
from json import loads
from urllib.parse import urlencode
from searx.utils import searx_useragent
+# about
+about = {
+ "website": 'https://photon.komoot.de',
+ "wikidata_id": None,
+ "official_api_documentation": 'https://photon.komoot.de/',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# engine dependent config
categories = ['map']
paging = False
diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py
index 828241ece..98a2dd9f2 100644
--- a/searx/engines/piratebay.py
+++ b/searx/engines/piratebay.py
@@ -1,12 +1,7 @@
-# Piratebay (Videos, Music, Files)
-#
-# @website https://thepiratebay.org
-# @provide-api yes (https://apibay.org/)
-#
-# @using-api yes
-# @results JSON
-# @stable no (the API is not documented nor versioned)
-# @parse url, title, seed, leech, magnetlink, filesize, publishedDate
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Piratebay (Videos, Music, Files)
+"""
from json import loads
from datetime import datetime
@@ -15,6 +10,16 @@ from operator import itemgetter
from urllib.parse import quote
from searx.utils import get_torrent_size
+# about
+about = {
+ "website": 'https://thepiratebay.org',
+ "wikidata_id": 'Q22663',
+ "official_api_documentation": 'https://apibay.org/',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# engine dependent config
categories = ["videos", "music", "files"]
diff --git a/searx/engines/pubmed.py b/searx/engines/pubmed.py
index 07c45709e..da02f91ca 100644
--- a/searx/engines/pubmed.py
+++ b/searx/engines/pubmed.py
@@ -1,14 +1,6 @@
-#!/usr/bin/env python
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
PubMed (Scholar publications)
- @website https://www.ncbi.nlm.nih.gov/pubmed/
- @provide-api yes (https://www.ncbi.nlm.nih.gov/home/develop/api/)
- @using-api yes
- @results XML
- @stable yes
- @parse url, title, publishedDate, content
- More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/
"""
from flask_babel import gettext
@@ -17,6 +9,18 @@ from datetime import datetime
from urllib.parse import urlencode
from searx.poolrequests import get
+# about
+about = {
+ "website": 'https://www.ncbi.nlm.nih.gov/pubmed/',
+ "wikidata_id": 'Q1540899',
+ "official_api_documentation": {
+ 'url': 'https://www.ncbi.nlm.nih.gov/home/develop/api/',
+ 'comment': 'More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/'
+ },
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'XML',
+}
categories = ['science']
diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py
index b785719d9..87499c8ad 100644
--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Qwant (Web, Images, News, Social)
-
- @website https://qwant.com/
- @provide-api not officially (https://api.qwant.com/api/search/)
-
- @using-api yes
- @results JSON
- @stable yes
- @parse url, title, content
"""
from datetime import datetime
@@ -17,6 +10,15 @@ from searx.utils import html_to_text, match_language
from searx.exceptions import SearxEngineAPIException, SearxEngineCaptchaException
from searx.raise_for_httperror import raise_for_httperror
+# about
+about = {
+ "website": 'https://www.qwant.com/',
+ "wikidata_id": 'Q14657870',
+ "official_api_documentation": None,
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
# engine dependent config
categories = []
diff --git a/searx/engines/recoll.py b/searx/engines/recoll.py
index 5a956b8bf..d90005a95 100644
--- a/searx/engines/recoll.py
+++ b/searx/engines/recoll.py
@@ -1,17 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Recoll (local search engine)
-
- @using-api yes
- @results JSON
- @stable yes
- @parse url, content, size, abstract, author, mtype, subtype, time, \
- filename, label, type, embedded
"""
from datetime import date, timedelta
from json import loads
from urllib.parse import urlencode, quote
+# about
+about = {
+ "website": None,
+ "wikidata_id": 'Q15735774',
+ "official_api_documentation": 'https://www.lesbonscomptes.com/recoll/',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# engine dependent config
time_range_support = True
diff --git a/searx/engines/reddit.py b/searx/engines/reddit.py
index e732875cb..ee734ace2 100644
--- a/searx/engines/reddit.py
+++ b/searx/engines/reddit.py
@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Reddit
-
- @website https://www.reddit.com/
- @provide-api yes (https://www.reddit.com/dev/api)
-
- @using-api yes
- @results JSON
- @stable yes
- @parse url, title, content, thumbnail, publishedDate
"""
import json
from datetime import datetime
from urllib.parse import urlencode, urljoin, urlparse
+# about
+about = {
+ "website": 'https://www.reddit.com/',
+ "wikidata_id": 'Q1136',
+ "official_api_documentation": 'https://www.reddit.com/dev/api',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# engine dependent config
categories = ['general', 'images', 'news', 'social media']
page_size = 25
diff --git a/searx/engines/scanr_structures.py b/searx/engines/scanr_structures.py
index 72fd2b3c9..51c925247 100644
--- a/searx/engines/scanr_structures.py
+++ b/searx/engines/scanr_structures.py
@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
ScanR Structures (Science)
-
- @website https://scanr.enseignementsup-recherche.gouv.fr
- @provide-api yes (https://scanr.enseignementsup-recherche.gouv.fr/api/swagger-ui.html)
-
- @using-api yes
- @results JSON
- @stable yes
- @parse url, title, content, img_src
"""
from json import loads, dumps
from searx.utils import html_to_text
+# about
+about = {
+ "website": 'https://scanr.enseignementsup-recherche.gouv.fr',
+ "wikidata_id": 'Q44105684',
+ "official_api_documentation": 'https://scanr.enseignementsup-recherche.gouv.fr/opendata',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# engine dependent config
categories = ['science']
paging = True
diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py
index 706285814..8c1330d98 100644
--- a/searx/engines/searchcode_code.py
+++ b/searx/engines/searchcode_code.py
@@ -1,18 +1,20 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
- Searchcode (It)
-
- @website https://searchcode.com/
- @provide-api yes (https://searchcode.com/api/)
-
- @using-api yes
- @results JSON
- @stable yes
- @parse url, title, content
+ Searchcode (IT)
"""
from json import loads
from urllib.parse import urlencode
+# about
+about = {
+ "website": 'https://searchcode.com/',
+ "wikidata_id": None,
+ "official_api_documentation": 'https://searchcode.com/api/',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
# engine dependent config
categories = ['it']
diff --git a/searx/engines/searx_engine.py b/searx/engines/searx_engine.py
index 87e5e05c2..c4f016adc 100644
--- a/searx/engines/searx_engine.py
+++ b/searx/engines/searx_engine.py
@@ -1,18 +1,20 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Searx (all)
-
- @website https://github.com/searx/searx
- @provide-api yes (https://searx.github.io/searx/dev/search_api.html)
-
- @using-api yes
- @results JSON
- @stable yes (using api)
- @parse url, title, content
"""
from json import loads
from searx.engines import categories as searx_categories
+# about
+about = {
+ "website": 'https://github.com/searx/searx',
+ "wikidata_id": 'Q17639196',
+ "official_api_documentation": 'https://searx.github.io/searx/dev/search_api.html',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
categories = searx_categories.keys()
diff --git a/searx/engines/sepiasearch.py b/searx/engines/sepiasearch.py
index 0b7c1ba6e..3433c897a 100644
--- a/searx/engines/sepiasearch.py
+++ b/searx/engines/sepiasearch.py
@@ -1,17 +1,23 @@
-# SepiaSearch (Videos)
-#
-# @website https://sepiasearch.org
-# @provide-api https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api
-# @using-api yes
-# @results JSON
-# @stable yes
-# @parse url, title, content, publishedDate, thumbnail
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ SepiaSearch (Videos)
+"""
from json import loads
from dateutil import parser, relativedelta
from urllib.parse import urlencode
from datetime import datetime
+# about
+about = {
+ "website": 'https://sepiasearch.org',
+ "wikidata_id": None,
+ "official_api_documentation": "https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api", # NOQA
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
categories = ['videos']
paging = True
language_support = True
diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py
index 84ff21a88..9e414746f 100644
--- a/searx/engines/soundcloud.py
+++ b/searx/engines/soundcloud.py
@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Soundcloud (Music)
-
- @website https://soundcloud.com
- @provide-api yes (https://developers.soundcloud.com/)
-
- @using-api yes
- @results JSON
- @stable yes
- @parse url, title, content, publishedDate, embedded
"""
import re
@@ -18,6 +11,15 @@ from urllib.parse import quote_plus, urlencode
from searx import logger
from searx.poolrequests import get as http_get
+# about
+about = {
+ "website": 'https://soundcloud.com',
+ "wikidata_id": 'Q568769',
+ "official_api_documentation": 'https://developers.soundcloud.com/',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
# engine dependent config
categories = ['music']
diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py
index 74942326e..0ad8bfe32 100644
--- a/searx/engines/spotify.py
+++ b/searx/engines/spotify.py
@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Spotify (Music)
-
- @website https://spotify.com
- @provide-api yes (https://developer.spotify.com/web-api/search-item/)
-
- @using-api yes
- @results JSON
- @stable yes
- @parse url, title, content, embedded
"""
from json import loads
@@ -15,6 +8,16 @@ from urllib.parse import urlencode
import requests
import base64
+# about
+about = {
+ "website": 'https://www.spotify.com',
+ "wikidata_id": 'Q689141',
+ "official_api_documentation": 'https://developer.spotify.com/web-api/search-item/',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# engine dependent config
categories = ['music']
paging = True
diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py
index f730264e2..91eaa68e9 100644
--- a/searx/engines/stackoverflow.py
+++ b/searx/engines/stackoverflow.py
@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
- Stackoverflow (It)
-
- @website https://stackoverflow.com/
- @provide-api not clear (https://api.stackexchange.com/docs/advanced-search)
-
- @using-api no
- @results HTML
- @stable no (HTML can change)
- @parse url, title, content
+ Stackoverflow (IT)
"""
from urllib.parse import urlencode, urljoin, urlparse
@@ -15,6 +8,16 @@ from lxml import html
from searx.utils import extract_text
from searx.exceptions import SearxEngineCaptchaException
+# about
+about = {
+ "website": 'https://stackoverflow.com/',
+ "wikidata_id": 'Q549037',
+ "official_api_documentation": 'https://api.stackexchange.com/docs',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['it']
paging = True
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
index cd8b132f9..68157971d 100644
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@@ -1,14 +1,7 @@
-# Startpage (Web)
-#
-# @website https://startpage.com
-# @provide-api no (nothing found)
-#
-# @using-api no
-# @results HTML
-# @stable no (HTML can change)
-# @parse url, title, content
-#
-# @todo paging
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Startpage (Web)
+"""
from lxml import html
from dateutil import parser
@@ -19,6 +12,16 @@ from babel import Locale
from babel.localedata import locale_identifiers
from searx.utils import extract_text, eval_xpath, match_language
+# about
+about = {
+ "website": 'https://startpage.com',
+ "wikidata_id": 'Q2333295',
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['general']
# there is a mechanism to block "bot" search
diff --git a/searx/engines/tokyotoshokan.py b/searx/engines/tokyotoshokan.py
index 9fffba8a6..91d1f01d5 100644
--- a/searx/engines/tokyotoshokan.py
+++ b/searx/engines/tokyotoshokan.py
@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Tokyo Toshokan (A BitTorrent Library for Japanese Media)
-
- @website https://www.tokyotosho.info/
- @provide-api no
- @using-api no
- @results HTML
- @stable no (HTML can change)
- @parse url, title, publishedDate, seed, leech,
- filesize, magnetlink, content
"""
import re
@@ -16,6 +9,16 @@ from lxml import html
from datetime import datetime
from searx.utils import extract_text, get_torrent_size, int_or_zero
+# about
+about = {
+ "website": 'https://www.tokyotosho.info/',
+ "wikidata_id": None,
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['files', 'videos', 'music']
paging = True
diff --git a/searx/engines/torrentz.py b/searx/engines/torrentz.py
index 4d3e6fdd7..94a7a5343 100644
--- a/searx/engines/torrentz.py
+++ b/searx/engines/torrentz.py
@@ -1,14 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Torrentz2.is (BitTorrent meta-search engine)
-
- @website https://torrentz2.is/
- @provide-api no
-
- @using-api no
- @results HTML
- @stable no (HTML can change, although unlikely,
- see https://torrentz.is/torrentz.btsearch)
- @parse url, title, publishedDate, seed, leech, filesize, magnetlink
"""
import re
@@ -17,6 +9,16 @@ from lxml import html
from datetime import datetime
from searx.utils import extract_text, get_torrent_size
+# about
+about = {
+ "website": 'https://torrentz2.is/',
+ "wikidata_id": 'Q1156687',
+ "official_api_documentation": 'https://torrentz.is/torrentz.btsearch',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['files', 'videos', 'music']
paging = True
diff --git a/searx/engines/translated.py b/searx/engines/translated.py
index 2706e3617..9c53d70ad 100644
--- a/searx/engines/translated.py
+++ b/searx/engines/translated.py
@@ -1,14 +1,18 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
MyMemory Translated
-
- @website https://mymemory.translated.net/
- @provide-api yes (https://mymemory.translated.net/doc/spec.php)
- @using-api yes
- @results JSON
- @stable yes
- @parse url, title, content
"""
+# about
+about = {
+ "website": 'https://mymemory.translated.net/',
+ "wikidata_id": None,
+ "official_api_documentation": 'https://mymemory.translated.net/doc/spec.php',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
engine_type = 'online_dictionnary'
categories = ['general']
url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
diff --git a/searx/engines/unsplash.py b/searx/engines/unsplash.py
index 45c6b30da..3bbdf630d 100644
--- a/searx/engines/unsplash.py
+++ b/searx/engines/unsplash.py
@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Unsplash
-
- @website https://unsplash.com
- @provide-api yes (https://unsplash.com/developers)
-
- @using-api no
- @results JSON (using search portal's infiniscroll API)
- @stable no (JSON format could change any time)
- @parse url, title, img_src, thumbnail_src
"""
from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
from json import loads
+# about
+about = {
+ "website": 'https://unsplash.com',
+ "wikidata_id": 'Q28233552',
+ "official_api_documentation": 'https://unsplash.com/developers',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
url = 'https://unsplash.com/'
search_url = url + 'napi/search/photos?'
categories = ['images']
diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py
index fd3abc858..824579256 100644
--- a/searx/engines/vimeo.py
+++ b/searx/engines/vimeo.py
@@ -1,21 +1,22 @@
-# Vimeo (Videos)
-#
-# @website https://vimeo.com/
-# @provide-api yes (http://developer.vimeo.com/api),
-# they have a maximum count of queries/hour
-#
-# @using-api no (TODO, rewrite to api)
-# @results HTML (using search portal)
-# @stable no (HTML can change)
-# @parse url, title, publishedDate, thumbnail, embedded
-#
-# @todo rewrite to api
-# @todo set content-parameter with correct data
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Wikipedia (Web
+"""
from urllib.parse import urlencode
from json import loads
from dateutil import parser
+# about
+about = {
+ "website": 'https://vimeo.com/',
+ "wikidata_id": 'Q156376',
+ "official_api_documentation": 'http://developer.vimeo.com/api',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['videos']
paging = True
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index 8d787caac..c8e4cfae6 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -1,14 +1,6 @@
-# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Wikidata
-
- @website https://wikidata.org
- @provide-api yes (https://query.wikidata.org/)
-
- @using-api yes
- @results JSON
- @stable yes
- @parse url, infobox
"""
@@ -27,6 +19,16 @@ from searx.engines.wikipedia import _fetch_supported_languages, supported_langua
logger = logger.getChild('wikidata')
+# about
+about = {
+ "website": 'https://wikidata.org/',
+ "wikidata_id": 'Q2013',
+ "official_api_documentation": 'https://query.wikidata.org/',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# SPARQL
SPARQL_ENDPOINT_URL = 'https://query.wikidata.org/sparql'
SPARQL_EXPLAIN_URL = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain'
diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py
index 54d75108e..eff301145 100644
--- a/searx/engines/wikipedia.py
+++ b/searx/engines/wikipedia.py
@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Wikipedia (Web)
-
- @website https://en.wikipedia.org/api/rest_v1/
- @provide-api yes
-
- @using-api yes
- @results JSON
- @stable yes
- @parse url, infobox
"""
from urllib.parse import quote
@@ -16,6 +9,16 @@ from lxml.html import fromstring
from searx.utils import match_language, searx_useragent
from searx.raise_for_httperror import raise_for_httperror
+# about
+about = {
+ "website": 'https://www.wikipedia.org/',
+ "wikidata_id": 'Q52',
+ "official_api_documentation": 'https://en.wikipedia.org/api/',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# search-url
search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py
index 520eaa209..9c84e2809 100644
--- a/searx/engines/wolframalpha_api.py
+++ b/searx/engines/wolframalpha_api.py
@@ -1,16 +1,21 @@
-# Wolfram Alpha (Science)
-#
-# @website https://www.wolframalpha.com
-# @provide-api yes (https://api.wolframalpha.com/v2/)
-#
-# @using-api yes
-# @results XML
-# @stable yes
-# @parse url, infobox
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Wolfram|Alpha (Science)
+"""
from lxml import etree
from urllib.parse import urlencode
+# about
+about = {
+ "website": 'https://www.wolframalpha.com',
+ "wikidata_id": 'Q207006',
+ "official_api_documentation": 'https://products.wolframalpha.com/api/',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'XML',
+}
+
# search-url
search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
site_url = 'https://www.wolframalpha.com/input/?{query}'
diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
index 943d4f3fb..8e427d575 100644
--- a/searx/engines/wolframalpha_noapi.py
+++ b/searx/engines/wolframalpha_noapi.py
@@ -1,12 +1,7 @@
-# Wolfram|Alpha (Science)
-#
-# @website https://www.wolframalpha.com/
-# @provide-api yes (https://api.wolframalpha.com/v2/)
-#
-# @using-api no
-# @results JSON
-# @stable no
-# @parse url, infobox
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Wolfram|Alpha (Science)
+"""
from json import loads
from time import time
@@ -14,6 +9,16 @@ from urllib.parse import urlencode
from searx.poolrequests import get as http_get
+# about
+about = {
+ "website": 'https://www.wolframalpha.com/',
+ "wikidata_id": 'Q207006',
+ "official_api_documentation": 'https://products.wolframalpha.com/api/',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# search-url
url = 'https://www.wolframalpha.com/'
diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py
index b8f111a50..96b8d680c 100644
--- a/searx/engines/www1x.py
+++ b/searx/engines/www1x.py
@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
1x (Images)
-
- @website http://1x.com/
- @provide-api no
-
- @using-api no
- @results HTML
- @stable no (HTML can change)
- @parse url, title, thumbnail
"""
from lxml import html, etree
from urllib.parse import urlencode, urljoin
from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
+# about
+about = {
+ "website": 'https://1x.com/',
+ "wikidata_id": None,
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['images']
paging = False
diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
index 1507176ec..612f69abd 100644
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
from lxml import html
from urllib.parse import urlencode
from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list
diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py
index 6f7ab759f..afd59cd49 100644
--- a/searx/engines/yacy.py
+++ b/searx/engines/yacy.py
@@ -1,16 +1,7 @@
-# Yacy (Web, Images, Videos, Music, Files)
-#
-# @website http://yacy.net
-# @provide-api yes
-# (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch)
-#
-# @using-api yes
-# @results JSON
-# @stable yes
-# @parse (general) url, title, content, publishedDate
-# @parse (images) url, title, img_src
-#
-# @todo parse video, audio and file results
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Yacy (Web, Images, Videos, Music, Files)
+"""
from json import loads
from dateutil import parser
@@ -20,6 +11,16 @@ from requests.auth import HTTPDigestAuth
from searx.utils import html_to_text
+# about
+about = {
+ "website": 'https://yacy.net/',
+ "wikidata_id": 'Q1759675',
+ "official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# engine dependent config
categories = ['general', 'images'] # TODO , 'music', 'videos', 'files'
paging = True
diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py
index 3420aa6d5..eb07a45fc 100644
--- a/searx/engines/yahoo.py
+++ b/searx/engines/yahoo.py
@@ -1,20 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Yahoo (Web)
-
- @website https://search.yahoo.com/web
- @provide-api yes (https://developer.yahoo.com/boss/search/),
- $0.80/1000 queries
-
- @using-api no (because pricing)
- @results HTML (using search portal)
- @stable no (HTML can change)
- @parse url, title, content, suggestion
"""
from urllib.parse import unquote, urlencode
from lxml import html
from searx.utils import extract_text, extract_url, match_language, eval_xpath
+# about
+about = {
+ "website": 'https://search.yahoo.com/',
+ "wikidata_id": None,
+ "official_api_documentation": 'https://developer.yahoo.com/api/',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['general']
paging = True
diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py
index 793d1104a..b324ecdf3 100644
--- a/searx/engines/yahoo_news.py
+++ b/searx/engines/yahoo_news.py
@@ -1,13 +1,7 @@
-# Yahoo (News)
-#
-# @website https://news.yahoo.com
-# @provide-api yes (https://developer.yahoo.com/boss/search/)
-# $0.80/1000 queries
-#
-# @using-api no (because pricing)
-# @results HTML (using search portal)
-# @stable no (HTML can change)
-# @parse url, title, content, publishedDate
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Yahoo (News)
+"""
import re
from datetime import datetime, timedelta
@@ -18,6 +12,16 @@ from searx.engines.yahoo import _fetch_supported_languages, supported_languages_
from dateutil import parser
from searx.utils import extract_text, extract_url, match_language
+# about
+about = {
+ "website": 'https://news.yahoo.com',
+ "wikidata_id": 'Q3044717',
+ "official_api_documentation": 'https://developer.yahoo.com/api/',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['news']
paging = True
diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py
index b4a6a54cf..57a2f4b79 100644
--- a/searx/engines/yandex.py
+++ b/searx/engines/yandex.py
@@ -1,12 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Yahoo (Web)
-
- @website https://yandex.ru/
- @provide-api ?
- @using-api no
- @results HTML (using search portal)
- @stable no (HTML can change)
- @parse url, title, content
"""
from urllib.parse import urlencode, urlparse
@@ -16,6 +10,16 @@ from searx.exceptions import SearxEngineCaptchaException
logger = logger.getChild('yandex engine')
+# about
+about = {
+ "website": 'https://yandex.ru/',
+ "wikidata_id": 'Q5281',
+ "official_api_documentation": "?",
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['general']
paging = True
diff --git a/searx/engines/yggtorrent.py b/searx/engines/yggtorrent.py
index ec84d2c6b..cad2de52b 100644
--- a/searx/engines/yggtorrent.py
+++ b/searx/engines/yggtorrent.py
@@ -1,12 +1,7 @@
-# Yggtorrent (Videos, Music, Files)
-#
-# @website https://www2.yggtorrent.si
-# @provide-api no (nothing found)
-#
-# @using-api no
-# @results HTML (using search portal)
-# @stable no (HTML can change)
-# @parse url, title, seed, leech, publishedDate, filesize
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Yggtorrent (Videos, Music, Files)
+"""
from lxml import html
from operator import itemgetter
@@ -15,6 +10,16 @@ from urllib.parse import quote
from searx.utils import extract_text, get_torrent_size
from searx.poolrequests import get as http_get
+# about
+about = {
+ "website": 'https://www2.yggtorrent.si',
+ "wikidata_id": None,
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['videos', 'music', 'files']
paging = True
diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py
index 8c12ac4d2..b3dcb4907 100644
--- a/searx/engines/youtube_api.py
+++ b/searx/engines/youtube_api.py
@@ -1,18 +1,23 @@
-# Youtube (Videos)
-#
-# @website https://www.youtube.com/
-# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list)
-#
-# @using-api yes
-# @results JSON
-# @stable yes
-# @parse url, title, content, publishedDate, thumbnail, embedded
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Youtube (Videos)
+"""
from json import loads
from dateutil import parser
from urllib.parse import urlencode
from searx.exceptions import SearxEngineAPIException
+# about
+about = {
+ "website": 'https://www.youtube.com/',
+ "wikidata_id": 'Q866',
+ "official_api_documentation": 'https://developers.google.com/youtube/v3/docs/search/list?apix=true',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
# engine dependent config
categories = ['videos', 'music']
paging = False
diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py
index 36fc72e36..4a6df57c4 100644
--- a/searx/engines/youtube_noapi.py
+++ b/searx/engines/youtube_noapi.py
@@ -1,17 +1,22 @@
-# Youtube (Videos)
-#
-# @website https://www.youtube.com/
-# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list)
-#
-# @using-api no
-# @results HTML
-# @stable no
-# @parse url, title, content, publishedDate, thumbnail, embedded
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Youtube (Videos)
+"""
from functools import reduce
from json import loads
from urllib.parse import quote_plus
+# about
+about = {
+ "website": 'https://www.youtube.com/',
+ "wikidata_id": 'Q866',
+ "official_api_documentation": 'https://developers.google.com/youtube/v3/docs/search/list?apix=true',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['videos', 'music']
paging = True
diff --git a/searx/query.py b/searx/query.py
index e61e24f2c..38cb03ffe 100644
--- a/searx/query.py
+++ b/searx/query.py
@@ -77,7 +77,7 @@ class RawTextQuery:
pass
# this force a language
- if query_part[0] == ':':
+ if query_part[0] == ':' and len(query_part) > 1:
lang = query_part[1:].lower().replace('_', '-')
# check if any language-code is equal with
diff --git a/searx/search/__init__.py b/searx/search/__init__.py
index 0d45f0b7c..f777e8595 100644
--- a/searx/search/__init__.py
+++ b/searx/search/__init__.py
@@ -28,7 +28,9 @@ from searx.external_bang import get_bang_url
from searx.results import ResultContainer
from searx import logger
from searx.plugins import plugins
+from searx.search.models import EngineRef, SearchQuery
from searx.search.processors import processors, initialize as initialize_processors
+from searx.search.checker import initialize as initialize_checker
logger = logger.getChild('search')
@@ -45,68 +47,11 @@ else:
sys.exit(1)
-def initialize(settings_engines=None):
+def initialize(settings_engines=None, enable_checker=False):
settings_engines = settings_engines or settings['engines']
initialize_processors(settings_engines)
-
-
-class EngineRef:
-
- __slots__ = 'name', 'category'
-
- def __init__(self, name: str, category: str):
- self.name = name
- self.category = category
-
- def __repr__(self):
- return "EngineRef({!r}, {!r})".format(self.name, self.category)
-
- def __eq__(self, other):
- return self.name == other.name and self.category == other.category
-
-
-class SearchQuery:
- """container for all the search parameters (query, language, etc...)"""
-
- __slots__ = 'query', 'engineref_list', 'lang', 'safesearch', 'pageno', 'time_range',\
- 'timeout_limit', 'external_bang'
-
- def __init__(self,
- query: str,
- engineref_list: typing.List[EngineRef],
- lang: str='all',
- safesearch: int=0,
- pageno: int=1,
- time_range: typing.Optional[str]=None,
- timeout_limit: typing.Optional[float]=None,
- external_bang: typing.Optional[str]=None):
- self.query = query
- self.engineref_list = engineref_list
- self.lang = lang
- self.safesearch = safesearch
- self.pageno = pageno
- self.time_range = time_range
- self.timeout_limit = timeout_limit
- self.external_bang = external_bang
-
- @property
- def categories(self):
- return list(set(map(lambda engineref: engineref.category, self.engineref_list)))
-
- def __repr__(self):
- return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\
- format(self.query, self.engineref_list, self.lang, self.safesearch,
- self.pageno, self.time_range, self.timeout_limit, self.external_bang)
-
- def __eq__(self, other):
- return self.query == other.query\
- and self.engineref_list == other.engineref_list\
- and self.lang == other.lang\
- and self.safesearch == other.safesearch\
- and self.pageno == other.pageno\
- and self.time_range == other.time_range\
- and self.timeout_limit == other.timeout_limit\
- and self.external_bang == other.external_bang
+ if enable_checker:
+ initialize_checker()
class Search:
diff --git a/searx/search/checker/__init__.py b/searx/search/checker/__init__.py
new file mode 100644
index 000000000..85b9178df
--- /dev/null
+++ b/searx/search/checker/__init__.py
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+from .impl import Checker
+from .background import initialize, get_result
diff --git a/searx/search/checker/__main__.py b/searx/search/checker/__main__.py
new file mode 100644
index 000000000..75b37e6c5
--- /dev/null
+++ b/searx/search/checker/__main__.py
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import sys
+import io
+import os
+import argparse
+import logging
+
+import searx.search
+import searx.search.checker
+from searx.search import processors
+from searx.engines import engine_shortcuts
+
+
+# configure logging
+root = logging.getLogger()
+handler = logging.StreamHandler(sys.stdout)
+for h in root.handlers:
+ root.removeHandler(h)
+root.addHandler(handler)
+
+# color only for a valid terminal
+if sys.stdout.isatty() and os.environ.get('TERM') not in ['dumb', 'unknown']:
+ RESET_SEQ = "\033[0m"
+ COLOR_SEQ = "\033[1;%dm"
+ BOLD_SEQ = "\033[1m"
+ BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = map(lambda i: COLOR_SEQ % (30 + i), range(8))
+else:
+ RESET_SEQ = ""
+ COLOR_SEQ = ""
+ BOLD_SEQ = ""
+ BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = "", "", "", "", "", "", "", ""
+
+# equivalent of 'python -u' (unbuffered stdout, stderr)
+stdout = io.TextIOWrapper(open(sys.stdout.fileno(), 'wb', 0), write_through=True)
+stderr = io.TextIOWrapper(open(sys.stderr.fileno(), 'wb', 0), write_through=True)
+
+
+# iterator of processors
+def iter_processor(engine_name_list):
+ if len(engine_name_list) > 0:
+ for name in engine_name_list:
+ name = engine_shortcuts.get(name, name)
+ processor = processors.get(name)
+ if processor is not None:
+ yield name, processor
+ else:
+ stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}{RED}Engine does not exist{RESET_SEQ}')
+ else:
+ for name, processor in searx.search.processors.items():
+ yield name, processor
+
+
+# actual check & display
+def run(engine_name_list, verbose):
+ searx.search.initialize()
+ for name, processor in iter_processor(engine_name_list):
+ stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}Checking\n')
+ if not sys.stdout.isatty():
+ stderr.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}Checking\n')
+ checker = searx.search.checker.Checker(processor)
+ checker.run()
+ if checker.test_results.succesfull:
+ stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}{GREEN}OK{RESET_SEQ}\n')
+ if verbose:
+ stdout.write(f' {"found languages":15}: {" ".join(sorted(list(checker.test_results.languages)))}\n')
+ else:
+ stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}{RESET_SEQ}{RED}Error{RESET_SEQ}')
+ if not verbose:
+ errors = [test_name + ': ' + error for test_name, error in checker.test_results]
+ stdout.write(f'{RED}Error {str(errors)}{RESET_SEQ}\n')
+ else:
+ stdout.write('\n')
+ stdout.write(f' {"found languages":15}: {" ".join(sorted(list(checker.test_results.languages)))}\n')
+ for test_name, logs in checker.test_results.logs.items():
+ for log in logs:
+ stdout.write(f' {test_name:15}: {RED}{" ".join(log)}{RESET_SEQ}\n')
+
+
+# call by setup.py
+def main():
+ parser = argparse.ArgumentParser(description='Check searx engines.')
+ parser.add_argument('engine_name_list', metavar='engine name', type=str, nargs='*',
+ help='engines name or shortcut list. Empty for all engines.')
+ parser.add_argument('--verbose', '-v',
+ action='store_true', dest='verbose',
+ help='Display details about the test results',
+ default=False)
+ args = parser.parse_args()
+ run(args.engine_name_list, args.verbose)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/searx/search/checker/background.py b/searx/search/checker/background.py
new file mode 100644
index 000000000..e41bff5f5
--- /dev/null
+++ b/searx/search/checker/background.py
@@ -0,0 +1,123 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import json
+import random
+import time
+import threading
+import os
+import signal
+
+from searx import logger, settings, searx_debug
+from searx.exceptions import SearxSettingsException
+from searx.search.processors import processors
+from searx.search.checker import Checker
+from searx.shared import schedule, storage
+
+
+CHECKER_RESULT = 'CHECKER_RESULT'
+running = threading.Lock()
+
+
+def _get_interval(every, error_msg):
+ if isinstance(every, int):
+ every = (every, every)
+ if not isinstance(every, (tuple, list))\
+ or len(every) != 2\
+ or not isinstance(every[0], int)\
+ or not isinstance(every[1], int):
+ raise SearxSettingsException(error_msg, None)
+ return every
+
+
+def _get_every():
+ every = settings.get('checker', {}).get('scheduling', {}).get('every', (300, 1800))
+ return _get_interval(every, 'checker.scheduling.every is not a int or list')
+
+
+def get_result():
+ serialized_result = storage.get_str(CHECKER_RESULT)
+ if serialized_result is not None:
+ return json.loads(serialized_result)
+
+
+def _set_result(result, include_timestamp=True):
+ if include_timestamp:
+ result['timestamp'] = int(time.time() / 3600) * 3600
+ storage.set_str(CHECKER_RESULT, json.dumps(result))
+
+
+def run():
+ if not running.acquire(blocking=False):
+ return
+ try:
+ logger.info('Starting checker')
+ result = {
+ 'status': 'ok',
+ 'engines': {}
+ }
+ for name, processor in processors.items():
+ logger.debug('Checking %s engine', name)
+ checker = Checker(processor)
+ checker.run()
+ if checker.test_results.succesfull:
+ result['engines'][name] = {'success': True}
+ else:
+ result['engines'][name] = {'success': False, 'errors': checker.test_results.errors}
+
+ _set_result(result)
+ logger.info('Check done')
+ except Exception:
+ _set_result({'status': 'error'})
+ logger.exception('Error while running the checker')
+ finally:
+ running.release()
+
+
+def _run_with_delay():
+ every = _get_every()
+ delay = random.randint(0, every[1] - every[0])
+ logger.debug('Start checker in %i seconds', delay)
+ time.sleep(delay)
+ run()
+
+
+def _start_scheduling():
+ every = _get_every()
+ if schedule(every[0], _run_with_delay):
+ run()
+
+
+def _signal_handler(signum, frame):
+ t = threading.Thread(target=run)
+ t.daemon = True
+ t.start()
+
+
+def initialize():
+ logger.info('Send SIGUSR1 signal to pid %i to start the checker', os.getpid())
+ signal.signal(signal.SIGUSR1, _signal_handler)
+
+ # disabled by default
+ _set_result({'status': 'disabled'})
+
+ # special case when debug is activate
+ if searx_debug and settings.get('checker', {}).get('off_when_debug', True):
+ logger.info('debug mode: checker is disabled')
+ return
+
+ # check value of checker.scheduling.every now
+ scheduling = settings.get('checker', {}).get('scheduling', None)
+ if scheduling is None or not scheduling:
+ logger.info('Checker scheduler is disabled')
+ return
+
+ #
+ _set_result({'status': 'unknown'}, include_timestamp=False)
+
+ start_after = scheduling.get('start_after', (300, 1800))
+ start_after = _get_interval(start_after, 'checker.scheduling.start_after is not a int or list')
+ delay = random.randint(start_after[0], start_after[1])
+ logger.info('Start checker in %i seconds', delay)
+ t = threading.Timer(delay, _start_scheduling)
+ t.daemon = True
+ t.start()
diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py
new file mode 100644
index 000000000..71a941f73
--- /dev/null
+++ b/searx/search/checker/impl.py
@@ -0,0 +1,406 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import typing
+import types
+import functools
+import itertools
+from time import time
+from urllib.parse import urlparse
+
+import re
+import cld3
+import requests.exceptions
+
+from searx import poolrequests, logger
+from searx.results import ResultContainer
+from searx.search.models import SearchQuery, EngineRef
+from searx.search.processors import EngineProcessor
+
+
+logger = logger.getChild('searx.search.checker')
+
+HTML_TAGS = [
+ 'embed', 'iframe', 'object', 'param', 'picture', 'source', 'svg', 'math', 'canvas', 'noscript', 'script',
+ 'del', 'ins', 'area', 'audio', 'img', 'map', 'track', 'video', 'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite',
+ 'code', 'data', 'dfn', 'em', 'i', 'kdb', 'mark', 'q', 'rb', 'rp', 'rt', 'rtc', 'ruby', 's', 'samp', 'small',
+ 'span', 'strong', 'sub', 'sup', 'time', 'u', 'var', 'wbr', 'style', 'blockquote', 'dd', 'div', 'dl', 'dt',
+ 'figcaption', 'figure', 'hr', 'li', 'ol', 'p', 'pre', 'ul', 'button', 'datalist', 'fieldset', 'form', 'input',
+ 'label', 'legend', 'meter', 'optgroup', 'option', 'output', 'progress', 'select', 'textarea', 'applet',
+ 'frame', 'frameset'
+]
+
+
+def get_check_no_html():
+ rep = ['<' + tag + '[^\>]*>' for tag in HTML_TAGS]
+ rep += ['</' + tag + '>' for tag in HTML_TAGS]
+ pattern = re.compile('|'.join(rep))
+
+ def f(text):
+ return pattern.search(text.lower()) is None
+
+ return f
+
+
+_check_no_html = get_check_no_html()
+
+
+def _is_url(url):
+ try:
+ result = urlparse(url)
+ except ValueError:
+ return False
+ if result.scheme not in ('http', 'https'):
+ return False
+ return True
+
+
+@functools.lru_cache(maxsize=8192)
+def _is_url_image(image_url):
+ if not isinstance(image_url, str):
+ return False
+
+ if image_url.startswith('//'):
+ image_url = 'https:' + image_url
+
+ if image_url.startswith('data:'):
+ return image_url.startswith('data:image/')
+
+ if not _is_url(image_url):
+ return False
+
+ retry = 2
+
+ while retry > 0:
+ a = time()
+ try:
+ poolrequests.set_timeout_for_thread(10.0, time())
+ r = poolrequests.get(image_url, timeout=10.0, allow_redirects=True, headers={
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0',
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+ 'Accept-Language': 'en-US;q=0.5,en;q=0.3',
+ 'Accept-Encoding': 'gzip, deflate, br',
+ 'DNT': '1',
+ 'Connection': 'keep-alive',
+ 'Upgrade-Insecure-Requests': '1',
+ 'Sec-GPC': '1',
+ 'Cache-Control': 'max-age=0'
+ })
+ if r.headers["content-type"].startswith('image/'):
+ return True
+ return False
+ except requests.exceptions.Timeout:
+ logger.error('Timeout for %s: %i', image_url, int(time() - a))
+ retry -= 1
+ except requests.exceptions.RequestException:
+ logger.exception('Exception for %s', image_url)
+ return False
+
+
+def _search_query_to_dict(search_query: SearchQuery) -> typing.Dict[str, typing.Any]:
+ return {
+ 'query': search_query.query,
+ 'lang': search_query.lang,
+ 'pageno': search_query.pageno,
+ 'safesearch': search_query.safesearch,
+ 'time_range': search_query.time_range,
+ }
+
+
+def _search_query_diff(sq1: SearchQuery, sq2: SearchQuery)\
+ -> typing.Tuple[typing.Dict[str, typing.Any], typing.Dict[str, typing.Any]]:
+ param1 = _search_query_to_dict(sq1)
+ param2 = _search_query_to_dict(sq2)
+ common = {}
+ diff = {}
+ for k, value1 in param1.items():
+ value2 = param2[k]
+ if value1 == value2:
+ common[k] = value1
+ else:
+ diff[k] = (value1, value2)
+ return (common, diff)
+
+
+class TestResults:
+
+ __slots__ = 'errors', 'logs', 'languages'
+
+ def __init__(self):
+ self.errors: typing.Dict[str, typing.List[str]] = {}
+ self.logs: typing.Dict[str, typing.List[typing.Any]] = {}
+ self.languages: typing.Set[str] = set()
+
+ def add_error(self, test, message, *args):
+ # message to self.errors
+ errors_for_test = self.errors.setdefault(test, [])
+ if message not in errors_for_test:
+ errors_for_test.append(message)
+ # (message, *args) to self.logs
+ logs_for_test = self.logs.setdefault(test, [])
+ if (message, *args) not in logs_for_test:
+ logs_for_test.append((message, *args))
+
+ def add_language(self, language):
+ self.languages.add(language)
+
+ @property
+ def succesfull(self):
+ return len(self.errors) == 0
+
+ def __iter__(self):
+ for test_name, errors in self.errors.items():
+ for error in sorted(errors):
+ yield (test_name, error)
+
+
+class ResultContainerTests:
+
+ __slots__ = 'test_name', 'search_query', 'result_container', 'languages', 'stop_test', 'test_results'
+
+ def __init__(self,
+ test_results: TestResults,
+ test_name: str,
+ search_query: SearchQuery,
+ result_container: ResultContainer):
+ self.test_name = test_name
+ self.search_query = search_query
+ self.result_container = result_container
+ self.languages: typing.Set[str] = set()
+ self.test_results = test_results
+ self.stop_test = False
+
+ @property
+ def result_urls(self):
+ results = self.result_container.get_ordered_results()
+ return [result['url'] for result in results]
+
+ def _record_error(self, message: str, *args) -> None:
+ sq = _search_query_to_dict(self.search_query)
+ sqstr = ' '.join(['{}={!r}'.format(k, v) for k, v in sq.items()])
+ self.test_results.add_error(self.test_name, message, *args, '(' + sqstr + ')')
+
+ def _add_language(self, text: str) -> typing.Optional[str]:
+ r = cld3.get_language(str(text)) # pylint: disable=E1101
+ if r is not None and r.probability >= 0.98 and r.is_reliable:
+ self.languages.add(r.language)
+ self.test_results.add_language(r.language)
+ return None
+
+ def _check_result(self, result):
+ if not _check_no_html(result.get('title', '')):
+ self._record_error('HTML in title', repr(result.get('title', '')))
+ if not _check_no_html(result.get('content', '')):
+ self._record_error('HTML in content', repr(result.get('content', '')))
+
+ self._add_language(result.get('title', ''))
+ self._add_language(result.get('content', ''))
+
+ template = result.get('template', 'default.html')
+ if template == 'default.html':
+ return
+ if template == 'code.html':
+ return
+ if template == 'torrent.html':
+ return
+ if template == 'map.html':
+ return
+ if template == 'images.html':
+ thumbnail_src = result.get('thumbnail_src')
+ if thumbnail_src is not None:
+ if not _is_url_image(thumbnail_src):
+ self._record_error('thumbnail_src URL is invalid', thumbnail_src)
+ elif not _is_url_image(result.get('img_src')):
+ self._record_error('img_src URL is invalid', result.get('img_src'))
+ if template == 'videos.html' and not _is_url_image(result.get('thumbnail')):
+ self._record_error('thumbnail URL is invalid', result.get('img_src'))
+
+ def _check_results(self, results: list):
+ for result in results:
+ self._check_result(result)
+
+ def _check_answers(self, answers):
+ for answer in answers:
+ if not _check_no_html(answer):
+ self._record_error('HTML in answer', answer)
+
+ def _check_infoboxes(self, infoboxes):
+ for infobox in infoboxes:
+ if not _check_no_html(infobox.get('content', '')):
+ self._record_error('HTML in infobox content', infobox.get('content', ''))
+ self._add_language(infobox.get('content', ''))
+ for attribute in infobox.get('attributes', {}):
+ if not _check_no_html(attribute.get('value', '')):
+ self._record_error('HTML in infobox attribute value', attribute.get('value', ''))
+
+ def check_basic(self):
+ if len(self.result_container.unresponsive_engines) > 0:
+ for message in self.result_container.unresponsive_engines:
+ self._record_error(message[1] + ' ' + (message[2] or ''))
+ self.stop_test = True
+ return
+
+ results = self.result_container.get_ordered_results()
+ if len(results) > 0:
+ self._check_results(results)
+
+ if len(self.result_container.answers) > 0:
+ self._check_answers(self.result_container.answers)
+
+ if len(self.result_container.infoboxes) > 0:
+ self._check_infoboxes(self.result_container.infoboxes)
+
+ def has_infobox(self):
+ """Check the ResultContainer has at least one infobox"""
+ if len(self.result_container.infoboxes) == 0:
+ self._record_error('No infobox')
+
+ def has_answer(self):
+ """Check the ResultContainer has at least one answer"""
+ if len(self.result_container.answers) == 0:
+ self._record_error('No answer')
+
+ def has_language(self, lang):
+ """Check at least one title or content of the results is written in the `lang`.
+
+ Detected using pycld3, may be not accurate"""
+ if lang not in self.languages:
+ self._record_error(lang + ' not found')
+
+ def not_empty(self):
+ """Check the ResultContainer has at least one answer or infobox or result"""
+ result_types = set()
+ results = self.result_container.get_ordered_results()
+ if len(results) > 0:
+ result_types.add('results')
+
+ if len(self.result_container.answers) > 0:
+ result_types.add('answers')
+
+ if len(self.result_container.infoboxes) > 0:
+ result_types.add('infoboxes')
+
+ if len(result_types) == 0:
+ self._record_error('No result')
+
+ def one_title_contains(self, title: str):
+ """Check one of the title contains `title` (case insensitive comparaison)"""
+ title = title.lower()
+ for result in self.result_container.get_ordered_results():
+ if title in result['title'].lower():
+ return
+ self._record_error(('{!r} not found in the title'.format(title)))
+
+
+class CheckerTests:
+
+ __slots__ = 'test_results', 'test_name', 'result_container_tests_list'
+
+ def __init__(self,
+ test_results: TestResults,
+ test_name: str,
+ result_container_tests_list: typing.List[ResultContainerTests]):
+ self.test_results = test_results
+ self.test_name = test_name
+ self.result_container_tests_list = result_container_tests_list
+
+ def unique_results(self):
+ """Check the results of each ResultContain is unique"""
+ urls_list = [rct.result_urls for rct in self.result_container_tests_list]
+ if len(urls_list[0]) > 0:
+ # results on the first page
+ for i, urls_i in enumerate(urls_list):
+ for j, urls_j in enumerate(urls_list):
+ if i < j and urls_i == urls_j:
+ common, diff = _search_query_diff(self.result_container_tests_list[i].search_query,
+ self.result_container_tests_list[j].search_query)
+ common_str = ' '.join(['{}={!r}'.format(k, v) for k, v in common.items()])
+ diff1_str = ', ' .join(['{}={!r}'.format(k, v1) for (k, (v1, v2)) in diff.items()])
+ diff2_str = ', ' .join(['{}={!r}'.format(k, v2) for (k, (v1, v2)) in diff.items()])
+ self.test_results.add_error(self.test_name,
+ 'results are identitical for {} and {} ({})'
+ .format(diff1_str, diff2_str, common_str))
+
+
+class Checker:
+
+ __slots__ = 'processor', 'tests', 'test_results'
+
+ def __init__(self, processor: EngineProcessor):
+ self.processor = processor
+ self.tests = self.processor.get_tests()
+ self.test_results = TestResults()
+
+ @property
+ def engineref_list(self):
+ engine_name = self.processor.engine_name
+ engine_category = self.processor.engine.categories[0]
+ return [EngineRef(engine_name, engine_category)]
+
+ @staticmethod
+ def search_query_matrix_iterator(engineref_list, matrix):
+ p = []
+ for name, values in matrix.items():
+ if isinstance(values, (tuple, list)):
+ l = [(name, value) for value in values]
+ else:
+ l = [(name, values)]
+ p.append(l)
+
+ for kwargs in itertools.product(*p):
+ kwargs = {k: v for k, v in kwargs}
+ query = kwargs['query']
+ params = dict(kwargs)
+ del params['query']
+ yield SearchQuery(query, engineref_list, **params)
+
+ def call_test(self, obj, test_description):
+ if isinstance(test_description, (tuple, list)):
+ method, args = test_description[0], test_description[1:]
+ else:
+ method = test_description
+ args = ()
+ if isinstance(method, str) and hasattr(obj, method):
+ getattr(obj, method)(*args)
+ elif isinstance(method, types.FunctionType):
+ method(*args)
+ else:
+ self.test_results.add_error(obj.test_name,
+ 'method {!r} ({}) not found for {}'
+ .format(method, method.__class__.__name__, obj.__class__.__name__))
+
+ def call_tests(self, obj, test_descriptions):
+ for test_description in test_descriptions:
+ self.call_test(obj, test_description)
+
+ def search(self, search_query: SearchQuery) -> ResultContainer:
+ result_container = ResultContainer()
+ engineref_category = search_query.engineref_list[0].category
+ params = self.processor.get_params(search_query, engineref_category)
+ if params is not None:
+ self.processor.search(search_query.query, params, result_container, time(), 5)
+ return result_container
+
+ def get_result_container_tests(self, test_name: str, search_query: SearchQuery) -> ResultContainerTests:
+ result_container = self.search(search_query)
+ result_container_check = ResultContainerTests(self.test_results, test_name, search_query, result_container)
+ result_container_check.check_basic()
+ return result_container_check
+
+ def run_test(self, test_name):
+ test_parameters = self.tests[test_name]
+ search_query_list = list(Checker.search_query_matrix_iterator(self.engineref_list, test_parameters['matrix']))
+ rct_list = [self.get_result_container_tests(test_name, search_query) for search_query in search_query_list]
+ stop_test = False
+ if 'result_container' in test_parameters:
+ for rct in rct_list:
+ stop_test = stop_test or rct.stop_test
+ if not rct.stop_test:
+ self.call_tests(rct, test_parameters['result_container'])
+ if not stop_test:
+ if 'test' in test_parameters:
+ checker_tests = CheckerTests(self.test_results, test_name, rct_list)
+ self.call_tests(checker_tests, test_parameters['test'])
+
+ def run(self):
+ for test_name in self.tests:
+ self.run_test(test_name)
diff --git a/searx/search/models.py b/searx/search/models.py
new file mode 100644
index 000000000..80ceaa223
--- /dev/null
+++ b/searx/search/models.py
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import typing
+
+
+class EngineRef:
+
+ __slots__ = 'name', 'category'
+
+ def __init__(self, name: str, category: str):
+ self.name = name
+ self.category = category
+
+ def __repr__(self):
+ return "EngineRef({!r}, {!r})".format(self.name, self.category)
+
+ def __eq__(self, other):
+ return self.name == other.name and self.category == other.category
+
+ def __hash__(self):
+ return hash((self.name, self.category))
+
+
+class SearchQuery:
+ """container for all the search parameters (query, language, etc...)"""
+
+ __slots__ = 'query', 'engineref_list', 'lang', 'safesearch', 'pageno', 'time_range',\
+ 'timeout_limit', 'external_bang'
+
+ def __init__(self,
+ query: str,
+ engineref_list: typing.List[EngineRef],
+ lang: str='all',
+ safesearch: int=0,
+ pageno: int=1,
+ time_range: typing.Optional[str]=None,
+ timeout_limit: typing.Optional[float]=None,
+ external_bang: typing.Optional[str]=None):
+ self.query = query
+ self.engineref_list = engineref_list
+ self.lang = lang
+ self.safesearch = safesearch
+ self.pageno = pageno
+ self.time_range = time_range
+ self.timeout_limit = timeout_limit
+ self.external_bang = external_bang
+
+ @property
+ def categories(self):
+ return list(set(map(lambda engineref: engineref.category, self.engineref_list)))
+
+ def __repr__(self):
+ return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\
+ format(self.query, self.engineref_list, self.lang, self.safesearch,
+ self.pageno, self.time_range, self.timeout_limit, self.external_bang)
+
+ def __eq__(self, other):
+ return self.query == other.query\
+ and self.engineref_list == other.engineref_list\
+ and self.lang == other.lang\
+ and self.safesearch == other.safesearch\
+ and self.pageno == other.pageno\
+ and self.time_range == other.time_range\
+ and self.timeout_limit == other.timeout_limit\
+ and self.external_bang == other.external_bang
+
+ def __hash__(self):
+ return hash((self.query, tuple(self.engineref_list), self.lang, self.safesearch, self.pageno, self.time_range,
+ self.timeout_limit, self.external_bang))
diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py
index cf3fd7236..eb8d296ec 100644
--- a/searx/search/processors/abstract.py
+++ b/searx/search/processors/abstract.py
@@ -37,3 +37,15 @@ class EngineProcessor:
@abstractmethod
def search(self, query, params, result_container, start_time, timeout_limit):
pass
+
+ def get_tests(self):
+ tests = getattr(self.engine, 'tests', None)
+ if tests is None:
+ tests = getattr(self.engine, 'additional_tests', {})
+ tests.update(self.get_default_tests())
+ return tests
+ else:
+ return tests
+
+ def get_default_tests(self):
+ return {}
diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py
index b62f8059e..0ceb0adf2 100644
--- a/searx/search/processors/online.py
+++ b/searx/search/processors/online.py
@@ -179,15 +179,15 @@ class OnlineProcessor(EngineProcessor):
requests_exception = True
elif (issubclass(e.__class__, SearxEngineCaptchaException)):
result_container.add_unresponsive_engine(self.engine_name, 'CAPTCHA required')
- logger.exception('engine {0} : CAPTCHA')
+ logger.exception('engine {0} : CAPTCHA'.format(self.engine_name))
suspended_time = e.suspended_time # pylint: disable=no-member
elif (issubclass(e.__class__, SearxEngineTooManyRequestsException)):
result_container.add_unresponsive_engine(self.engine_name, 'too many requests')
- logger.exception('engine {0} : Too many requests')
+ logger.exception('engine {0} : Too many requests'.format(self.engine_name))
suspended_time = e.suspended_time # pylint: disable=no-member
elif (issubclass(e.__class__, SearxEngineAccessDeniedException)):
result_container.add_unresponsive_engine(self.engine_name, 'blocked')
- logger.exception('engine {0} : Searx is blocked')
+ logger.exception('engine {0} : Searx is blocked'.format(self.engine_name))
suspended_time = e.suspended_time # pylint: disable=no-member
else:
result_container.add_unresponsive_engine(self.engine_name, 'unexpected crash')
@@ -211,3 +211,49 @@ class OnlineProcessor(EngineProcessor):
# reset the suspend variables
self.engine.continuous_errors = 0
self.engine.suspend_end_time = 0
+
+ def get_default_tests(self):
+ tests = {}
+
+ tests['simple'] = {
+ 'matrix': {'query': ('life', 'computer')},
+ 'result_container': ['not_empty'],
+ }
+
+ if getattr(self.engine, 'paging', False):
+ tests['paging'] = {
+ 'matrix': {'query': 'time',
+ 'pageno': (1, 2, 3)},
+ 'result_container': ['not_empty'],
+ 'test': ['unique_results']
+ }
+ if 'general' in self.engine.categories:
+ # avoid documentation about HTML tags (<time> and <input type="time">)
+ tests['paging']['matrix']['query'] = 'news'
+
+ if getattr(self.engine, 'time_range', False):
+ tests['time_range'] = {
+ 'matrix': {'query': 'news',
+ 'time_range': (None, 'day')},
+ 'result_container': ['not_empty'],
+ 'test': ['unique_results']
+ }
+
+ if getattr(self.engine, 'lang', False):
+ tests['lang_fr'] = {
+ 'matrix': {'query': 'paris', 'lang': 'fr'},
+ 'result_container': ['not_empty', ('has_lang', 'fr')],
+ }
+ tests['lang_en'] = {
+ 'matrix': {'query': 'paris', 'lang': 'en'},
+ 'result_container': ['not_empty', ('has_lang', 'en')],
+ }
+
+ if getattr(self.engine, 'safesearch', False):
+ tests['safesearch'] = {
+ 'matrix': {'query': 'porn',
+ 'safesearch': (0, 2)},
+ 'test': ['unique_results']
+ }
+
+ return tests
diff --git a/searx/search/processors/online_currency.py b/searx/search/processors/online_currency.py
index f0e919c03..132c10594 100644
--- a/searx/search/processors/online_currency.py
+++ b/searx/search/processors/online_currency.py
@@ -55,3 +55,13 @@ class OnlineCurrencyProcessor(OnlineProcessor):
params['from_name'] = iso4217_to_name(from_currency, 'en')
params['to_name'] = iso4217_to_name(to_currency, 'en')
return params
+
+ def get_default_tests(self):
+ tests = {}
+
+ tests['currency'] = {
+ 'matrix': {'query': '1337 usd in rmb'},
+ 'result_container': ['has_answer'],
+ }
+
+ return tests
diff --git a/searx/search/processors/online_dictionary.py b/searx/search/processors/online_dictionary.py
index 8e9ef1620..987c710a1 100644
--- a/searx/search/processors/online_dictionary.py
+++ b/searx/search/processors/online_dictionary.py
@@ -35,3 +35,21 @@ class OnlineDictionaryProcessor(OnlineProcessor):
params['query'] = query
return params
+
+ def get_default_tests(self):
+ tests = {}
+
+ if getattr(self.engine, 'paging', False):
+ tests['translation_paging'] = {
+ 'matrix': {'query': 'en-es house',
+ 'pageno': (1, 2, 3)},
+ 'result_container': ['not_empty', ('one_title_contains', 'house')],
+ 'test': ['unique_results']
+ }
+ else:
+ tests['translation'] = {
+ 'matrix': {'query': 'en-es house'},
+ 'result_container': ['not_empty', ('one_title_contains', 'house')],
+ }
+
+ return tests
diff --git a/searx/settings.yml b/searx/settings.yml
index e85b4939a..767bf6d82 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -1,6 +1,16 @@
general:
debug : False # Debug mode, only for development
instance_name : "searx" # displayed name
+ contact_url: False # mailto:contact@example.com
+
+brand:
+ git_url: https://github.com/searx/searx
+ git_branch: master
+ issue_url: https://github.com/searx/searx/issues
+ docs_url: https://searx.github.io/searx
+ public_instances: https://searx.space
+ wiki_url: https://github.com/searx/searx/wiki
+ twitter_url: https://twitter.com/Searx_engine
search:
safe_search : 0 # Filter results. 0: None, 1: Moderate, 2: Strict
@@ -92,6 +102,34 @@ outgoing: # communication with search engines
# - "HTTPS rewrite"
# - ...
+checker:
+ # disable checker when in debug mode
+ off_when_debug: True
+ # scheduling: interval or int
+ # use "scheduling: False" to disable scheduling
+ scheduling:
+ start_after: [300, 1800] # delay to start the first run of the checker
+ every: [86400, 90000] # how often the checker runs
+ # additional tests: only for the YAML anchors (see the engines section)
+ additional_tests:
+ rosebud: &test_rosebud
+ matrix:
+ query: rosebud
+ lang: en
+ result_container:
+ - not_empty
+ - ['one_title_contains', 'citizen kane']
+ test:
+ - unique_results
+ # tests: only for the YAML anchors (see the engines section)
+ tests:
+ infobox: &tests_infobox
+ infobox:
+ matrix:
+ query: ["linux", "new york", "bbc"]
+ result_container:
+ - has_infobox
+
engines:
- name: apk mirror
engine: apkmirror
@@ -119,6 +157,13 @@ engines:
timeout : 7.0
disabled : True
shortcut : ai
+ about:
+ website: https://archive.is/
+ wikidata_id: Q13515725
+ official_api_documentation: http://mementoweb.org/depot/native/archiveis/
+ use_official_api: false
+ require_api_key: false
+ results: HTML
- name : arxiv
engine : arxiv
@@ -163,6 +208,13 @@ engines:
timeout : 4.0
disabled : True
shortcut : bb
+ about:
+ website: https://bitbucket.org/
+ wikidata_id: Q2493781
+ official_api_documentation: https://developer.atlassian.com/bitbucket
+ use_official_api: false
+ require_api_key: false
+ results: HTML
- name : btdigg
engine : btdigg
@@ -178,6 +230,13 @@ engines:
categories : videos
disabled : True
shortcut : c3tv
+ about:
+ website: https://media.ccc.de/
+ wikidata_id: Q80729951
+ official_api_documentation: https://github.com/voc/voctoweb
+ use_official_api: false
+ require_api_key: false
+ results: HTML
- name : crossref
engine : json_engine
@@ -188,6 +247,13 @@ engines:
content_query : fullCitation
categories : science
shortcut : cr
+ about:
+ website: https://www.crossref.org/
+ wikidata_id: Q5188229
+ official_api_documentation: https://github.com/CrossRef/rest-api-doc
+ use_official_api: false
+ require_api_key: false
+ results: JSON
- name : currency
engine : currency_convert
@@ -208,6 +274,7 @@ engines:
shortcut : ddd
weight : 2
disabled : True
+ tests: *tests_infobox
# cloudflare protected
# - name : digbt
@@ -232,6 +299,13 @@ engines:
categories : general
shortcut : ew
disabled : True
+ about:
+ website: https://www.erowid.org/
+ wikidata_id: Q1430691
+ official_api_documentation:
+ use_official_api: false
+ require_api_key: false
+ results: HTML
# - name : elasticsearch
# shortcut : es
@@ -252,6 +326,7 @@ engines:
shortcut : wd
timeout : 3.0
weight : 2
+ tests: *tests_infobox
- name : duckduckgo
engine : duckduckgo
@@ -268,6 +343,8 @@ engines:
engine : etools
shortcut : eto
disabled : True
+ additional_tests:
+ rosebud: *test_rosebud
- name : etymonline
engine : xpath
@@ -279,6 +356,13 @@ engines:
first_page_num : 1
shortcut : et
disabled : True
+ about:
+ website: https://www.etymonline.com/
+ wikidata_id: Q1188617
+ official_api_documentation:
+ use_official_api: false
+ require_api_key: false
+ results: HTML
# - name : ebay
# engine : ebay
@@ -318,6 +402,9 @@ engines:
search_type : title
timeout : 5.0
disabled : True
+ about:
+ website: https://directory.fsf.org/
+ wikidata_id: Q2470288
- name : frinkiac
engine : frinkiac
@@ -333,6 +420,8 @@ engines:
shortcut : gb
timeout : 3.0
disabled: True
+ additional_tests:
+ rosebud: *test_rosebud
- name : gentoo
engine : gentoo
@@ -350,6 +439,13 @@ engines:
shortcut : gl
timeout : 10.0
disabled : True
+ about:
+ website: https://about.gitlab.com/
+ wikidata_id: Q16639197
+ official_api_documentation: https://docs.gitlab.com/ee/api/
+ use_official_api: false
+ require_api_key: false
+ results: JSON
- name : github
engine : github
@@ -367,6 +463,13 @@ engines:
categories : it
shortcut : cb
disabled : True
+ about:
+ website: https://codeberg.org/
+ wikidata_id:
+ official_api_documentation: https://try.gitea.io/api/swagger
+ use_official_api: false
+ require_api_key: false
+ results: JSON
- name : google
engine : google
@@ -397,6 +500,13 @@ engines:
first_page_num : 0
categories : science
shortcut : gos
+ about:
+ website: https://scholar.google.com/
+ wikidata_id: Q494817
+ official_api_documentation:
+ use_official_api: false
+ require_api_key: false
+ results: HTML
- name : google play apps
engine : xpath
@@ -409,6 +519,13 @@ engines:
categories : files
shortcut : gpa
disabled : True
+ about:
+ website: https://play.google.com/
+ wikidata_id: Q79576
+ official_api_documentation:
+ use_official_api: false
+ require_api_key: false
+ results: HTML
- name : google play movies
engine : xpath
@@ -421,6 +538,13 @@ engines:
categories : videos
shortcut : gpm
disabled : True
+ about:
+ website: https://play.google.com/
+ wikidata_id: Q79576
+ official_api_documentation:
+ use_official_api: false
+ require_api_key: false
+ results: HTML
- name : google play music
engine : xpath
@@ -433,6 +557,13 @@ engines:
categories : music
shortcut : gps
disabled : True
+ about:
+ website: https://play.google.com/
+ wikidata_id: Q79576
+ official_api_documentation:
+ use_official_api: false
+ require_api_key: false
+ results: HTML
- name : geektimes
engine : xpath
@@ -445,6 +576,13 @@ engines:
timeout : 4.0
disabled : True
shortcut : gt
+ about:
+ website: https://geektimes.ru/
+ wikidata_id: Q50572423
+ official_api_documentation:
+ use_official_api: false
+ require_api_key: false
+ results: HTML
- name : habrahabr
engine : xpath
@@ -457,6 +595,13 @@ engines:
timeout : 4.0
disabled : True
shortcut : habr
+ about:
+ website: https://habr.com/
+ wikidata_id: Q4494434
+ official_api_documentation: https://habr.com/en/docs/help/api/
+ use_official_api: false
+ require_api_key: false
+ results: HTML
- name : hoogle
engine : json_engine
@@ -469,6 +614,13 @@ engines:
page_size : 20
categories : it
shortcut : ho
+ about:
+ website: https://www.haskell.org/
+ wikidata_id: Q34010
+ official_api_documentation: https://hackage.haskell.org/api
+ use_official_api: false
+ require_api_key: false
+ results: JSON
- name : ina
engine : ina
@@ -478,7 +630,9 @@ engines:
- name : invidious
engine : invidious
- base_url : 'https://invidio.us/'
+ base_url :
+ - https://invidious.tube/
+ - https://invidious.snopyta.org/
shortcut: iv
timeout : 5.0
disabled : True
@@ -499,6 +653,13 @@ engines:
timeout : 7.0
disabled : True
shortcut : lg
+ about:
+ website: http://libgen.rs/
+ wikidata_id: Q22017206
+ official_api_documentation:
+ use_official_api: false
+ require_api_key: false
+ results: HTML
- name : lobste.rs
engine : xpath
@@ -511,6 +672,13 @@ engines:
shortcut : lo
timeout : 3.0
disabled: True
+ about:
+ website: https://lobste.rs/
+ wikidata_id: Q60762874
+ official_api_documentation:
+ use_official_api: false
+ require_api_key: false
+ results: HTML
- name : metager
engine : xpath
@@ -522,6 +690,13 @@ engines:
categories : general
shortcut : mg
disabled : True
+ about:
+ website: https://metager.org/
+ wikidata_id: Q1924645
+ official_api_documentation:
+ use_official_api: false
+ require_api_key: false
+ results: HTML
- name : microsoft academic
engine : microsoft_academic
@@ -545,6 +720,13 @@ engines:
disabled: True
timeout: 5.0
shortcut : npm
+ about:
+ website: https://npms.io/
+ wikidata_id: Q7067518
+ official_api_documentation: https://api-docs.npms.io/
+ use_official_api: false
+ require_api_key: false
+ results: JSON
# Requires Tor
- name : not evil
@@ -573,6 +755,13 @@ engines:
categories : science
shortcut : oad
timeout: 5.0
+ about:
+ website: https://www.openaire.eu/
+ wikidata_id: Q25106053
+ official_api_documentation: https://api.openaire.eu/
+ use_official_api: false
+ require_api_key: false
+ results: JSON
- name : openairepublications
engine : json_engine
@@ -585,6 +774,13 @@ engines:
categories : science
shortcut : oap
timeout: 5.0
+ about:
+ website: https://www.openaire.eu/
+ wikidata_id: Q25106053
+ official_api_documentation: https://api.openaire.eu/
+ use_official_api: false
+ require_api_key: false
+ results: JSON
# - name : opensemanticsearch
# engine : opensemantic
@@ -606,6 +802,13 @@ engines:
timeout : 4.0
disabled : True
shortcut : or
+ about:
+ website: https://openrepos.net/
+ wikidata_id:
+ official_api_documentation:
+ use_official_api: false
+ require_api_key: false
+ results: HTML
- name : pdbe
engine : pdbe
@@ -636,6 +839,8 @@ engines:
shortcut : qw
categories : general
disabled : True
+ additional_tests:
+ rosebud: *test_rosebud
- name : qwant images
engine : qwant
@@ -722,6 +927,13 @@ engines:
content_xpath : .//div[@class="search-result-abstract"]
shortcut : se
categories : science
+ about:
+ website: https://www.semanticscholar.org/
+ wikidata_id: Q22908627
+ official_api_documentation: https://api.semanticscholar.org/
+ use_official_api: false
+ require_api_key: false
+ results: HTML
# Spotify needs API credentials
# - name : spotify
@@ -735,6 +947,8 @@ engines:
shortcut : sp
timeout : 6.0
disabled : True
+ additional_tests:
+ rosebud: *test_rosebud
- name : tokyotoshokan
engine : tokyotoshokan
@@ -838,6 +1052,9 @@ engines:
number_of_results : 5
search_type : text
disabled : True
+ about:
+ website: https://www.wikibooks.org/
+ wikidata_id: Q367
- name : wikinews
engine : mediawiki
@@ -847,6 +1064,9 @@ engines:
number_of_results : 5
search_type : text
disabled : True
+ about:
+ website: https://www.wikinews.org/
+ wikidata_id: Q964
- name : wikiquote
engine : mediawiki
@@ -856,6 +1076,11 @@ engines:
number_of_results : 5
search_type : text
disabled : True
+ additional_tests:
+ rosebud: *test_rosebud
+ about:
+ website: https://www.wikiquote.org/
+ wikidata_id: Q369
- name : wikisource
engine : mediawiki
@@ -865,6 +1090,9 @@ engines:
number_of_results : 5
search_type : text
disabled : True
+ about:
+ website: https://www.wikisource.org/
+ wikidata_id: Q263
- name : wiktionary
engine : mediawiki
@@ -874,6 +1102,9 @@ engines:
number_of_results : 5
search_type : text
disabled : True
+ about:
+ website: https://www.wiktionary.org/
+ wikidata_id: Q151
- name : wikiversity
engine : mediawiki
@@ -883,6 +1114,9 @@ engines:
number_of_results : 5
search_type : text
disabled : True
+ about:
+ website: https://www.wikiversity.org/
+ wikidata_id: Q370
- name : wikivoyage
engine : mediawiki
@@ -892,6 +1126,9 @@ engines:
number_of_results : 5
search_type : text
disabled : True
+ about:
+ website: https://www.wikivoyage.org/
+ wikidata_id: Q373
- name : wolframalpha
shortcut : wa
@@ -939,6 +1176,13 @@ engines:
first_page_num : 0
page_size : 10
disabled : True
+ about:
+ website: https://www.seznam.cz/
+ wikidata_id: Q3490485
+ official_api_documentation: https://api.sklik.cz/
+ use_official_api: false
+ require_api_key: false
+ results: HTML
- name : mojeek
shortcut: mjk
@@ -953,6 +1197,13 @@ engines:
first_page_num : 0
page_size : 10
disabled : True
+ about:
+ website: https://www.mojeek.com/
+ wikidata_id: Q60747299
+ official_api_documentation: https://www.mojeek.com/services/api.html/
+ use_official_api: false
+ require_api_key: false
+ results: HTML
- name : naver
shortcut: nvr
@@ -967,6 +1218,13 @@ engines:
first_page_num : 1
page_size : 10
disabled : True
+ about:
+ website: https://www.naver.com/
+ wikidata_id: Q485639
+ official_api_documentation: https://developers.naver.com/docs/nmt/examples/
+ use_official_api: false
+ require_api_key: false
+ results: HTML
- name : rubygems
shortcut: rbg
@@ -981,6 +1239,13 @@ engines:
first_page_num : 1
categories: it
disabled : True
+ about:
+ website: https://rubygems.org/
+ wikidata_id: Q1853420
+ official_api_documentation: https://guides.rubygems.org/rubygems-org-api/
+ use_official_api: false
+ require_api_key: false
+ results: HTML
- name : peertube
engine: peertube
diff --git a/searx/shared/__init__.py b/searx/shared/__init__.py
new file mode 100644
index 000000000..cbe24d239
--- /dev/null
+++ b/searx/shared/__init__.py
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import logging
+
+logger = logging.getLogger('searx.shared')
+
+try:
+ import uwsgi
+except:
+ # no uwsgi
+ from .shared_simple import SimpleSharedDict as SharedDict, schedule
+ logger.info('Use shared_simple implementation')
+else:
+ try:
+ uwsgi.cache_update('dummy', b'dummy')
+ if uwsgi.cache_get('dummy') != b'dummy':
+ raise Exception()
+ except:
+ # uwsgi.ini configuration problem: disable all scheduling
+ logger.error('uwsgi.ini configuration error, add this line to your uwsgi.ini\n'
+ 'cache2 = name=searxcache,items=2000,blocks=2000,blocksize=4096,bitmap=1')
+ from .shared_simple import SimpleSharedDict as SharedDict
+
+ def schedule(delay, func, *args):
+ return False
+ else:
+ # uwsgi
+ from .shared_uwsgi import UwsgiCacheSharedDict as SharedDict, schedule
+ logger.info('Use shared_uwsgi implementation')
+
+storage = SharedDict()
diff --git a/searx/shared/shared_abstract.py b/searx/shared/shared_abstract.py
new file mode 100644
index 000000000..b1c72aabe
--- /dev/null
+++ b/searx/shared/shared_abstract.py
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+from abc import ABC, abstractmethod
+
+
+class SharedDict(ABC):
+
+ @abstractmethod
+ def get_int(self, key):
+ pass
+
+ @abstractmethod
+ def set_int(self, key, value):
+ pass
+
+ @abstractmethod
+ def get_str(self, key):
+ pass
+
+ @abstractmethod
+ def set_str(self, key, value):
+ pass
diff --git a/searx/shared/shared_simple.py b/searx/shared/shared_simple.py
new file mode 100644
index 000000000..48d8cb822
--- /dev/null
+++ b/searx/shared/shared_simple.py
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import threading
+
+from . import shared_abstract
+
+
+class SimpleSharedDict(shared_abstract.SharedDict):
+
+ __slots__ = 'd',
+
+ def __init__(self):
+ self.d = {}
+
+ def get_int(self, key):
+ return self.d.get(key, None)
+
+ def set_int(self, key, value):
+ self.d[key] = value
+
+ def get_str(self, key):
+ return self.d.get(key, None)
+
+ def set_str(self, key, value):
+ self.d[key] = value
+
+
+def schedule(delay, func, *args):
+ def call_later():
+ t = threading.Timer(delay, wrapper)
+ t.daemon = True
+ t.start()
+
+ def wrapper():
+ call_later()
+ func(*args)
+
+ call_later()
+ return True
diff --git a/searx/shared/shared_uwsgi.py b/searx/shared/shared_uwsgi.py
new file mode 100644
index 000000000..a6dba9f59
--- /dev/null
+++ b/searx/shared/shared_uwsgi.py
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import time
+import uwsgi # pylint: disable=E0401
+from . import shared_abstract
+
+
+_last_signal = 10
+
+
+class UwsgiCacheSharedDict(shared_abstract.SharedDict):
+
+ def get_int(self, key):
+ value = uwsgi.cache_get(key)
+ if value is None:
+ return value
+ else:
+ return int.from_bytes(value, 'big')
+
+ def set_int(self, key, value):
+ b = value.to_bytes(4, 'big')
+ uwsgi.cache_update(key, b)
+
+ def get_str(self, key):
+ value = uwsgi.cache_get(key)
+ if value is None:
+ return value
+ else:
+ return value.decode('utf-8')
+
+ def set_str(self, key, value):
+ b = value.encode('utf-8')
+ uwsgi.cache_update(key, b)
+
+
+def schedule(delay, func, *args):
+ """
+ Can be implemented using a spooler.
+ https://uwsgi-docs.readthedocs.io/en/latest/PythonDecorators.html
+
+ To make the uwsgi configuration simple, use the alternative implementation.
+ """
+ global _last_signal
+
+ def sighandler(signum):
+ now = int(time.time())
+ key = 'scheduler_call_time_signal_' + str(signum)
+ uwsgi.lock()
+ try:
+ updating = uwsgi.cache_get(key)
+ if updating is not None:
+ updating = int.from_bytes(updating, 'big')
+ if now - updating < delay:
+ return
+ uwsgi.cache_update(key, now.to_bytes(4, 'big'))
+ finally:
+ uwsgi.unlock()
+ func(*args)
+
+ signal_num = _last_signal
+ _last_signal += 1
+ uwsgi.register_signal(signal_num, 'worker', sighandler)
+ uwsgi.add_timer(signal_num, delay)
+ return True
diff --git a/searx/templates/oscar/results.html b/searx/templates/oscar/results.html
index 7f6071374..eada8cd26 100644
--- a/searx/templates/oscar/results.html
+++ b/searx/templates/oscar/results.html
@@ -57,7 +57,7 @@
</div>
{%- endif %}
- <div class="panel panel-default">
+ <div class="panel panel-default hidden-xs">
<div class="panel-heading">{{- "" -}}
<h4 class="panel-title">{{ _('Links') }}</h4>{{- "" -}}
</div>
diff --git a/searx/webapp.py b/searx/webapp.py
index 49750d210..985eced18 100755
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -71,7 +71,8 @@ from searx.webadapter import get_search_query_from_webapp, get_selected_categori
from searx.utils import html_to_text, gen_useragent, dict_subset, match_language
from searx.version import VERSION_STRING
from searx.languages import language_codes as languages
-from searx.search import SearchWithPlugins, initialize
+from searx.search import SearchWithPlugins, initialize as search_initialize
+from searx.search.checker import get_result as checker_get_result
from searx.query import RawTextQuery
from searx.autocomplete import searx_bang, backends as autocomplete_backends
from searx.plugins import plugins
@@ -81,7 +82,6 @@ from searx.answerers import answerers
from searx.poolrequests import get_global_proxies
from searx.metrology.error_recorder import errors_per_engines
-
# serve pages with HTTP/1.1
from werkzeug.serving import WSGIRequestHandler
WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
@@ -136,7 +136,7 @@ werkzeug_reloader = flask_run_development or (searx_debug and __name__ == "__mai
# initialize the engines except on the first run of the werkzeug server.
if not werkzeug_reloader\
or (werkzeug_reloader and os.environ.get("WERKZEUG_RUN_MAIN") == "true"):
- initialize()
+ search_initialize(enable_checker=True)
babel = Babel(app)
@@ -977,6 +977,12 @@ def stats_errors():
return jsonify(result)
+@app.route('/stats/checker', methods=['GET'])
+def stats_checker():
+ result = checker_get_result()
+ return jsonify(result)
+
+
@app.route('/robots.txt', methods=['GET'])
def robots():
return Response("""User-agent: *
@@ -1071,6 +1077,7 @@ def config():
'default_theme': settings['ui']['default_theme'],
'version': VERSION_STRING,
'brand': {
+ 'CONTACT_URL': brand.CONTACT_URL,
'GIT_URL': brand.GIT_URL,
'DOCS_URL': brand.DOCS_URL
},
diff --git a/setup.py b/setup.py
index 6a78f61b4..09a3021ee 100644
--- a/setup.py
+++ b/setup.py
@@ -7,10 +7,8 @@ from setuptools import find_packages
import os
import sys
-# required to load VERSION_STRING constant
-sys.path.insert(0, './searx')
-from version import VERSION_STRING
-import brand
+from searx.version import VERSION_STRING
+from searx import brand
with open('README.rst', encoding='utf-8') as f:
long_description = f.read()
@@ -51,7 +49,8 @@ setup(
},
entry_points={
'console_scripts': [
- 'searx-run = searx.webapp:run'
+ 'searx-run = searx.webapp:run',
+ 'searx-checker = searx.search.checker.__main__:main'
]
},
package_data={
diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py
index eff4d643c..0176f735e 100644
--- a/tests/unit/test_query.py
+++ b/tests/unit/test_query.py
@@ -59,6 +59,15 @@ class TestQuery(SearxTestCase):
self.assertEqual(len(query.languages), 0)
self.assertFalse(query.specific)
+ def test_empty_colon_in_query(self):
+ query_text = 'the : query'
+ query = RawTextQuery(query_text, [])
+
+ self.assertEqual(query.getFullQuery(), query_text)
+ self.assertEqual(len(query.query_parts), 0)
+ self.assertEqual(len(query.languages), 0)
+ self.assertFalse(query.specific)
+
def test_timeout_below100(self):
query_text = '<3 the query'
query = RawTextQuery(query_text, [])
diff --git a/utils/brand.env b/utils/brand.env
index 2e763159d..2136d278f 100644
--- a/utils/brand.env
+++ b/utils/brand.env
@@ -1,7 +1,9 @@
+export SEARX_URL=''
export GIT_URL='https://github.com/searx/searx'
export GIT_BRANCH='master'
export ISSUE_URL='https://github.com/searx/searx/issues'
-export SEARX_URL='https://searx.me'
export DOCS_URL='https://searx.github.io/searx'
export PUBLIC_INSTANCES='https://searx.space'
export CONTACT_URL=''
+export WIKI_URL='https://github.com/searx/searx/wiki'
+export TWITTER_URL='https://twitter.com/Searx_engine'
diff --git a/utils/build_env.py b/utils/build_env.py
new file mode 100644
index 000000000..ffb2689e9
--- /dev/null
+++ b/utils/build_env.py
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""build environment used by shell scripts
+"""
+
+# set path
+import sys
+import os
+from os.path import realpath, dirname, join, sep, abspath
+
+repo_root = realpath(dirname(realpath(__file__)) + sep + '..')
+sys.path.insert(0, repo_root)
+os.environ['SEARX_SETTINGS_PATH'] = abspath(dirname(__file__) + '/settings.yml')
+
+# Under the assumption that a brand is always a fork assure that the settings
+# file from reposetorie's working tree is used to generate the build_env, not
+# from /etc/searx/settings.yml.
+os.environ['SEARX_SETTINGS_PATH'] = abspath(dirname(__file__) + sep + 'settings.yml')
+
+from searx import brand
+
+name_val = [
+ ('SEARX_URL' , brand.SEARX_URL),
+ ('GIT_URL' , brand.GIT_URL),
+ ('GIT_BRANCH' , brand.GIT_BRANCH),
+ ('ISSUE_URL' , brand.ISSUE_URL),
+ ('DOCS_URL' , brand.DOCS_URL),
+ ('PUBLIC_INSTANCES' , brand.PUBLIC_INSTANCES),
+ ('CONTACT_URL' , brand.CONTACT_URL),
+ ('WIKI_URL' , brand.WIKI_URL),
+ ('TWITTER_URL' , brand.TWITTER_URL),
+]
+
+brand_env = 'utils' + sep + 'brand.env'
+
+print('build %s' % brand_env)
+with open(repo_root + sep + brand_env, 'w', encoding='utf-8') as f:
+ for name, val in name_val:
+ print("export %s='%s'" % (name, val), file=f)
diff --git a/utils/fetch_engine_descriptions.py b/utils/fetch_engine_descriptions.py
new file mode 100644
index 000000000..9ca001d45
--- /dev/null
+++ b/utils/fetch_engine_descriptions.py
@@ -0,0 +1,206 @@
+#!/usr/bin/env python
+
+import sys
+import json
+from urllib.parse import quote, urlparse
+from os.path import realpath, dirname
+import cld3
+from lxml.html import fromstring
+
+# set path
+sys.path.append(realpath(dirname(realpath(__file__)) + '/../'))
+
+from searx.engines.wikidata import send_wikidata_query
+from searx.utils import extract_text
+import searx
+import searx.search
+import searx.poolrequests
+
+SPARQL_WIKIPEDIA_ARTICLE = """
+SELECT DISTINCT ?item ?name
+WHERE {
+ VALUES ?item { %IDS% }
+ ?article schema:about ?item ;
+ schema:inLanguage ?lang ;
+ schema:name ?name ;
+ schema:isPartOf [ wikibase:wikiGroup "wikipedia" ] .
+ FILTER(?lang in (%LANGUAGES_SPARQL%)) .
+ FILTER (!CONTAINS(?name, ':')) .
+}
+"""
+
+SPARQL_DESCRIPTION = """
+SELECT DISTINCT ?item ?itemDescription
+WHERE {
+ VALUES ?item { %IDS% }
+ ?item schema:description ?itemDescription .
+ FILTER (lang(?itemDescription) in (%LANGUAGES_SPARQL%))
+}
+ORDER BY ?itemLang
+"""
+
+LANGUAGES = searx.settings['locales'].keys()
+LANGUAGES_SPARQL = ', '.join(set(map(lambda l: repr(l.split('_')[0]), LANGUAGES)))
+IDS = None
+
+descriptions = {}
+wd_to_engine_name = {}
+
+
+def normalize_description(description):
+ for c in [chr(c) for c in range(0, 31)]:
+ description = description.replace(c, ' ')
+ description = ' '.join(description.strip().split())
+ return description
+
+
+def update_description(engine_name, lang, description, source, replace=True):
+ if replace or lang not in descriptions[engine_name]:
+ descriptions[engine_name][lang] = [normalize_description(description), source]
+
+
+def get_wikipedia_summary(language, pageid):
+ search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
+ url = search_url.format(title=quote(pageid), language=language)
+ try:
+ response = searx.poolrequests.get(url)
+ response.raise_for_status()
+ api_result = json.loads(response.text)
+ return api_result.get('extract')
+ except:
+ return None
+
+
+def detect_language(text):
+ r = cld3.get_language(str(text)) # pylint: disable=E1101
+ if r is not None and r.probability >= 0.98 and r.is_reliable:
+ return r.language
+ return None
+
+
+def get_website_description(url, lang1, lang2=None):
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0',
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+ 'DNT': '1',
+ 'Upgrade-Insecure-Requests': '1',
+ 'Sec-GPC': '1',
+ 'Cache-Control': 'max-age=0',
+ }
+ if lang1 is not None:
+ lang_list = [lang1]
+ if lang2 is not None:
+ lang_list.append(lang2)
+ headers['Accept-Language'] = f'{",".join(lang_list)};q=0.8'
+ try:
+ response = searx.poolrequests.get(url, headers=headers, timeout=10)
+ response.raise_for_status()
+ except Exception:
+ return (None, None)
+
+ try:
+ html = fromstring(response.text)
+ except ValueError:
+ html = fromstring(response.content)
+
+ description = extract_text(html.xpath('/html/head/meta[@name="description"]/@content'))
+ if not description:
+ description = extract_text(html.xpath('/html/head/meta[@property="og:description"]/@content'))
+ if not description:
+ description = extract_text(html.xpath('/html/head/title'))
+ lang = extract_text(html.xpath('/html/@lang'))
+ if lang is None and len(lang1) > 0:
+ lang = lang1
+ lang = detect_language(description) or lang or 'en'
+ lang = lang.split('_')[0]
+ lang = lang.split('-')[0]
+ return (lang, description)
+
+
+def initialize():
+ global descriptions, wd_to_engine_name, IDS
+ searx.search.initialize()
+ for engine_name, engine in searx.engines.engines.items():
+ descriptions[engine_name] = {}
+ wikidata_id = getattr(engine, "about", {}).get('wikidata_id')
+ if wikidata_id is not None:
+ wd_to_engine_name.setdefault(wikidata_id, set()).add(engine_name)
+
+ IDS = ' '.join(list(map(lambda wd_id: 'wd:' + wd_id, wd_to_engine_name.keys())))
+
+
+def fetch_wikidata_descriptions():
+ global IDS
+ result = send_wikidata_query(SPARQL_DESCRIPTION
+ .replace('%IDS%', IDS)
+ .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
+ if result is not None:
+ for binding in result['results']['bindings']:
+ wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')
+ lang = binding['itemDescription']['xml:lang']
+ description = binding['itemDescription']['value']
+ if ' ' in description: # skip unique word description (like "website")
+ for engine_name in wd_to_engine_name[wikidata_id]:
+ update_description(engine_name, lang, description, 'wikidata')
+
+
+def fetch_wikipedia_descriptions():
+ global IDS
+ result = send_wikidata_query(SPARQL_WIKIPEDIA_ARTICLE
+ .replace('%IDS%', IDS)
+ .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
+ if result is not None:
+ for binding in result['results']['bindings']:
+ wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')
+ lang = binding['name']['xml:lang']
+ pageid = binding['name']['value']
+ description = get_wikipedia_summary(lang, pageid)
+ if description is not None and ' ' in description:
+ for engine_name in wd_to_engine_name[wikidata_id]:
+ update_description(engine_name, lang, description, 'wikipedia')
+
+
+def normalize_url(url):
+ url = url.replace('{language}', 'en')
+ url = urlparse(url)._replace(path='/', params='', query='', fragment='').geturl()
+ url = url.replace('https://api.', 'https://')
+ return url
+
+
+def fetch_website_description(engine_name, website):
+ default_lang, default_description = get_website_description(website, None, None)
+ if default_lang is None or default_description is None:
+ return
+ if default_lang not in descriptions[engine_name]:
+ descriptions[engine_name][default_lang] = [normalize_description(default_description), website]
+ for request_lang in ('en-US', 'es-US', 'fr-FR', 'zh', 'ja', 'ru', 'ar', 'ko'):
+ if request_lang.split('-')[0] not in descriptions[engine_name]:
+ lang, desc = get_website_description(website, request_lang, request_lang.split('-')[0])
+ if desc is not None and desc != default_description:
+ update_description(engine_name, lang, desc, website, replace=False)
+ else:
+ break
+
+
+def fetch_website_descriptions():
+ for engine_name, engine in searx.engines.engines.items():
+ website = getattr(engine, "about", {}).get('website')
+ if website is None:
+ website = normalize_url(getattr(engine, "search_url"))
+ if website is None:
+ website = normalize_url(getattr(engine, "base_url"))
+ if website is not None:
+ fetch_website_description(engine_name, website)
+
+
+def main():
+ initialize()
+ fetch_wikidata_descriptions()
+ fetch_wikipedia_descriptions()
+ fetch_website_descriptions()
+
+ sys.stdout.write(json.dumps(descriptions, indent=1, separators=(',', ':'), ensure_ascii=False))
+
+
+if __name__ == "__main__":
+ main()
diff --git a/utils/makefile.python b/utils/makefile.python
index 5d0837e00..345434384 100644
--- a/utils/makefile.python
+++ b/utils/makefile.python
@@ -243,8 +243,10 @@ pyenv-python: pyenvinstall
# [2] https://github.com/pypa/pip/pull/1519
# https://github.com/pypa/twine
-PHONY += upload-pypi
+PHONY += upload-pypi upload-pypi-test
upload-pypi: pyclean pyenvinstall pybuild
@$(PY_ENV_BIN)/twine upload $(PYDIST)/*
+upload-pypi-test: pyclean pyenvinstall pybuild
+ @$(PY_ENV_BIN)/twine upload -r testpypi $(PYDIST)/*
.PHONY: $(PHONY)
diff --git a/utils/searx.sh b/utils/searx.sh
index b7d3b8e1c..f85935fa2 100755
--- a/utils/searx.sh
+++ b/utils/searx.sh
@@ -46,6 +46,7 @@ SEARX_PACKAGES_debian="\
python3-dev python3-babel python3-venv
uwsgi uwsgi-plugin-python3
git build-essential libxslt-dev zlib1g-dev libffi-dev libssl-dev
+libprotobuf-dev protobuf-compiler
shellcheck"
BUILD_PACKAGES_debian="\
@@ -58,6 +59,7 @@ SEARX_PACKAGES_arch="\
python python-pip python-lxml python-babel
uwsgi uwsgi-plugin-python
git base-devel libxml2
+protobuf
shellcheck"
BUILD_PACKAGES_arch="\
@@ -69,7 +71,7 @@ SEARX_PACKAGES_fedora="\
python python-pip python-lxml python-babel
uwsgi uwsgi-plugin-python3
git @development-tools libxml2
-ShellCheck"
+ShellCheck protobuf-compiler protobuf-devel"
BUILD_PACKAGES_fedora="\
firefox graphviz graphviz-gd ImageMagick librsvg2-tools
@@ -82,7 +84,7 @@ SEARX_PACKAGES_centos="\
python36 python36-pip python36-lxml python-babel
uwsgi uwsgi-plugin-python3
git @development-tools libxml2
-ShellCheck"
+ShellCheck protobuf-compiler protobuf-devel"
BUILD_PACKAGES_centos="\
firefox graphviz graphviz-gd ImageMagick librsvg2-tools
@@ -331,6 +333,7 @@ git pull
pip install -U pip
pip install -U setuptools
pip install -U wheel
+pip install -U pyyaml
pip install -U -e .
EOF
install_settings
@@ -501,6 +504,7 @@ EOF
pip install -U pip
pip install -U setuptools
pip install -U wheel
+pip install -U pyyaml
pip install -U -e .
cd ${SEARX_SRC}
pip install -e .
diff --git a/utils/templates/etc/uwsgi/apps-archlinux/searx.ini b/utils/templates/etc/uwsgi/apps-archlinux/searx.ini
index 9dd2e6f2f..71cece3c4 100644
--- a/utils/templates/etc/uwsgi/apps-archlinux/searx.ini
+++ b/utils/templates/etc/uwsgi/apps-archlinux/searx.ini
@@ -82,4 +82,7 @@ http = ${SEARX_INTERNAL_HTTP}
# mkdir -p /run/uwsgi/app/searx
# chown -R ${SERVICE_USER}:${SERVICE_GROUP} /run/uwsgi/app/searx
#
-# socket = /run/uwsgi/app/searx/socket \ No newline at end of file
+# socket = /run/uwsgi/app/searx/socket
+
+# Cache
+cache2 = name=searxcache,items=2000,blocks=2000,blocksize=4096,bitmap=1
diff --git a/utils/templates/etc/uwsgi/apps-available/searx.ini b/utils/templates/etc/uwsgi/apps-available/searx.ini
index 4d69da0cf..45214ef13 100644
--- a/utils/templates/etc/uwsgi/apps-available/searx.ini
+++ b/utils/templates/etc/uwsgi/apps-available/searx.ini
@@ -82,3 +82,6 @@ http = ${SEARX_INTERNAL_HTTP}
# chown -R ${SERVICE_USER}:${SERVICE_GROUP} /run/uwsgi/app/searx
#
# socket = /run/uwsgi/app/searx/socket
+
+# Cache
+cache2 = name=searxcache,items=2000,blocks=2000,blocksize=4096,bitmap=1