diff options
134 files changed, 2738 insertions, 991 deletions
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index cff8854b9..4f3b8e9c8 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -5,11 +5,11 @@ on: [push, pull_request] jobs: python: name: Python ${{ matrix.python-version }} - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 strategy: matrix: - os: [ubuntu-latest] - python-version: [3.5, 3.6, 3.7, 3.8] + os: [ubuntu-20.04] + python-version: [3.6, 3.7, 3.8, 3.9] steps: - name: Checkout uses: actions/checkout@v2 @@ -46,7 +46,7 @@ jobs: themes: name: Themes - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 steps: - name: Checkout uses: actions/checkout@v2 @@ -59,7 +59,7 @@ jobs: documentation: name: Documentation - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 steps: - name: Checkout uses: actions/checkout@v2 @@ -92,7 +92,7 @@ jobs: - documentation env: DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} - runs-on: ubuntu-latest + runs-on: ubuntu-18.04 steps: - name: Checkout if: env.DOCKERHUB_USERNAME != null diff --git a/Dockerfile b/Dockerfile index 3894aa968..f251d06ea 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,6 +41,8 @@ RUN apk upgrade --no-cache \ openssl-dev \ tar \ git \ + protoc \ + protobuf-dev \ && apk add --no-cache \ ca-certificates \ su-exec \ @@ -53,6 +55,7 @@ RUN apk upgrade --no-cache \ uwsgi \ uwsgi-python3 \ brotli \ + protobuf \ && pip3 install --upgrade pip \ && pip3 install --no-cache -r requirements.txt \ && apk del build-dependencies \ @@ -1,20 +1,12 @@ # -*- coding: utf-8; mode: makefile-gmake -*- .DEFAULT_GOAL=help -# START Makefile setup -export GIT_URL=https://github.com/searx/searx -export GIT_BRANCH=master -export SEARX_URL=https://searx.me -export DOCS_URL=https://searx.github.io/searx -# export CONTACT_URL=mailto:contact@example.com -# END Makefile setup - include utils/makefile.include PYOBJECTS = searx DOC = docs PY_SETUP_EXTRAS ?= \[test\] -PYLINT_SEARX_DISABLE_OPTION := I,C,R,W0105,W0212,W0511,W0603,W0613,W0621,W0702,W0703,W1401 +PYLINT_SEARX_DISABLE_OPTION := I,C,R,W0105,W0212,W0511,W0603,W0613,W0621,W0702,W0703,W1401,E1136 PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES := supported_languages,language_aliases include utils/makefile.python @@ -43,12 +35,6 @@ help-min: @echo ' docker - build Docker image' @echo ' node.env - download & install npm dependencies locally' @echo '' - @echo 'environment' - @echo ' SEARX_URL = $(SEARX_URL)' - @echo ' GIT_URL = $(GIT_URL)' - @echo ' DOCS_URL = $(DOCS_URL)' - @echo ' CONTACT_URL = $(CONTACT_URL)' - @echo '' @$(MAKE) -e -s make-help help-all: help-min @@ -118,24 +104,8 @@ useragents.update: pyenvinstall $(Q)echo "Update searx/data/useragents.json with the most recent versions of Firefox." $(Q)$(PY_ENV_ACT); python utils/fetch_firefox_version.py -buildenv: - $(Q)echo "build searx/brand.py" - $(Q)echo "GIT_URL = '$(GIT_URL)'" > searx/brand.py - $(Q)echo "GIT_BRANCH = '$(GIT_BRANCH)'" >> searx/brand.py - $(Q)echo "ISSUE_URL = 'https://github.com/searx/searx/issues'" >> searx/brand.py - $(Q)echo "SEARX_URL = '$(SEARX_URL)'" >> searx/brand.py - $(Q)echo "DOCS_URL = '$(DOCS_URL)'" >> searx/brand.py - $(Q)echo "PUBLIC_INSTANCES = 'https://searx.space'" >> searx/brand.py - $(Q)echo "CONTACT_URL = '$(CONTACT_URL)'" >> searx/brand.py - $(Q)echo "build utils/brand.env" - $(Q)echo "export GIT_URL='$(GIT_URL)'" > utils/brand.env - $(Q)echo "export GIT_BRANCH='$(GIT_BRANCH)'" >> utils/brand.env - $(Q)echo "export ISSUE_URL='https://github.com/searx/searx/issues'" >> utils/brand.env - $(Q)echo "export SEARX_URL='$(SEARX_URL)'" >> utils/brand.env - $(Q)echo "export DOCS_URL='$(DOCS_URL)'" >> utils/brand.env - $(Q)echo "export PUBLIC_INSTANCES='https://searx.space'" >> utils/brand.env - $(Q)echo "export CONTACT_URL='$(CONTACT_URL)'" >> utils/brand.env - +buildenv: pyenv + $(Q)$(PY_ENV_ACT); SEARX_DEBUG=1 python utils/build_env.py # node / npm # ---------- diff --git a/dockerfiles/uwsgi.ini b/dockerfiles/uwsgi.ini index 398a440d9..818a99cc0 100644 --- a/dockerfiles/uwsgi.ini +++ b/dockerfiles/uwsgi.ini @@ -42,3 +42,6 @@ static-map = /static=/usr/local/searx/searx/static static-expires = /* 864000 static-gzip-all = True offload-threads = %k + +# Cache +cache2 = name=searxcache,items=2000,blocks=2000,blocksize=4096,bitmap=1 diff --git a/docs/admin/installation-searx.rst b/docs/admin/installation-searx.rst index 3f8904a1d..512a185a7 100644 --- a/docs/admin/installation-searx.rst +++ b/docs/admin/installation-searx.rst @@ -70,13 +70,20 @@ from the login (*~/.profile*): Configuration ============= +.. sidebar:: ``use_default_settings: True`` + + - :ref:`settings global` + - :ref:`settings location` + - :ref:`settings use_default_settings` + - :origin:`/etc/searx/settings.yml <utils/templates/etc/searx/use_default_settings.yml>` + To create a initial ``/etc/searx/settings.yml`` you can start with a copy of the file :origin:`utils/templates/etc/searx/use_default_settings.yml`. This setup -:option:ref:`use default settings <settings use_default_settings>` from +:ref:`use default settings <settings use_default_settings>` from :origin:`searx/settings.yml` and is recommended since :pull:`2291` is merged. -For minimal Setup, configure like shown below – replace ``searx@\$(uname -n)`` -with a name of your choice, set ``ultrasecretkey`` -- *and/or* edit +For a *minimal setup*, configure like shown below – replace ``searx@$(uname +-n)`` with a name of your choice, set ``ultrasecretkey`` -- *and/or* edit ``/etc/searx/settings.yml`` to your needs. .. kernel-include:: $DOCS_BUILD/includes/searx.rst diff --git a/docs/admin/installation.rst b/docs/admin/installation.rst index 4a301ecf8..8a066dec7 100644 --- a/docs/admin/installation.rst +++ b/docs/admin/installation.rst @@ -76,6 +76,6 @@ If all services are running fine, you can add it to your HTTP server: .. tip:: About script's installation options have a look at chapter :ref:`toolboxing - setup`. How to brand your instance see chapter :ref:`makefile setup`. To + setup`. How to brand your instance see chapter :ref:`settings global`. To *stash* your instance's setup, `git stash`_ your clone's :origin:`Makefile` and :origin:`.config.sh` file . diff --git a/docs/admin/settings.rst b/docs/admin/settings.rst index 985c16f85..7cf055dbf 100644 --- a/docs/admin/settings.rst +++ b/docs/admin/settings.rst @@ -27,7 +27,8 @@ First, searx will try to load settings.yml from these locations: 1. the full path specified in the ``SEARX_SETTINGS_PATH`` environment variable. 2. ``/etc/searx/settings.yml`` -If these files don't exist (or are empty or can't be read), searx uses the :origin:`searx/settings.yml` file. +If these files don't exist (or are empty or can't be read), searx uses the +:origin:`searx/settings.yml` file. .. _settings global: @@ -35,16 +36,46 @@ If these files don't exist (or are empty or can't be read), searx uses the :orig Global Settings =============== +``general:`` +------------ + .. code:: yaml general: debug : False # Debug mode, only for development instance_name : "searx" # displayed name + git_url: https://github.com/searx/searx + git_branch: master + issue_url: https://github.com/searx/searx/issues + docs_url: https://searx.github.io/searx + public_instances: https://searx.space + contact_url: False # mailto:contact@example.com + wiki_url: https://github.com/searx/searx/wiki + twitter_url: https://twitter.com/Searx_engine ``debug`` : Allow a more detailed log if you run searx directly. Display *detailed* error messages in the browser too, so this must be deactivated in production. +``contact_url``: + Contact ``mailto:`` address or WEB form. + +``git_url`` and ``git_branch``: + Changes this, to point to your searx fork (branch). + +``docs_url`` + If you host your own documentation, change this URL. + +``wiki_url``: + Link to your wiki (or ``False``) + +``twitter_url``: + Link to your tweets (or ``False``) + + +``server:`` +----------- + .. code:: yaml server: @@ -90,6 +121,8 @@ Global Settings ``default_http_headers``: Set additional HTTP headers, see `#755 <https://github.com/searx/searx/issues/715>`__ +``outgoing:`` +------------- .. code:: yaml @@ -139,6 +172,10 @@ Global Settings If you use multiple network interfaces, define from which IP the requests must be made. This parameter is ignored when ``proxies`` is set. + +``locales:`` +------------ + .. code:: yaml locales: @@ -244,61 +281,76 @@ Engine settings use_default_settings ==================== -.. note:: - - If searx is cloned from a git repository, most probably there is no need to have an user settings. - -The user defined settings.yml can relied on the default configuration :origin:`searx/settings.yml` using ``use_default_settings: True``. +.. sidebar:: ``use_default_settings: True`` -In the following example, the actual settings are the default settings defined in :origin:`searx/settings.yml` with the exception of the ``secret_key`` and the ``bind_address``: - -.. code-block:: yaml - - use_default_settings: True - server: - secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA" - bind_address: "0.0.0.0" - -With ``use_default_settings: True``, each settings can be override in a similar way, the ``engines`` section is merged according to the engine ``name``. - -In this example, searx will load all the engine and the arch linux wiki engine has a :ref:`token<private engines>`: - -.. code-block:: yaml - - use_default_settings: True - server: - secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA" - engines: - - name: arch linux wiki - tokens: ['$ecretValue'] - -It is possible to remove some engines from the default settings. The following example is similar to the above one, but searx doesn't load the the google engine: - -.. code-block:: yaml - - use_default_settings: - engines: - remove: - - google - server: - secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA" - engines: - - name: arch linux wiki - tokens: ['$ecretValue'] - -As an alternative, it is possible to specify the engines to keep. In the following example, searx has only two engines: - -.. code-block:: yaml - - use_default_settings: - engines: - keep_only: - - google - - duckduckgo - server: - secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA" - engines: - - name: google - tokens: ['$ecretValue'] - - name: duckduckgo - tokens: ['$ecretValue'] + - :ref:`settings location` + - :ref:`use_default_settings.yml` + - :origin:`/etc/searx/settings.yml <utils/templates/etc/searx/use_default_settings.yml>` + +The user defined ``settings.yml`` is loaded from the :ref:`settings location` +and can relied on the default configuration :origin:`searx/settings.yml` using: + + ``use_default_settings: True`` + +``server:`` + In the following example, the actual settings are the default settings defined + in :origin:`searx/settings.yml` with the exception of the ``secret_key`` and + the ``bind_address``: + + .. code-block:: yaml + + use_default_settings: True + server: + secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA" + bind_address: "0.0.0.0" + +``engines:`` + With ``use_default_settings: True``, each settings can be override in a + similar way, the ``engines`` section is merged according to the engine + ``name``. In this example, searx will load all the engine and the arch linux + wiki engine has a :ref:`token<private engines>`: + + .. code-block:: yaml + + use_default_settings: True + server: + secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA" + engines: + - name: arch linux wiki + tokens: ['$ecretValue'] + +``engines:`` / ``remove:`` + It is possible to remove some engines from the default settings. The following + example is similar to the above one, but searx doesn't load the the google + engine: + + .. code-block:: yaml + + use_default_settings: + engines: + remove: + - google + server: + secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA" + engines: + - name: arch linux wiki + tokens: ['$ecretValue'] + +``engines:`` / ``keep_only:`` + As an alternative, it is possible to specify the engines to keep. In the + following example, searx has only two engines: + + .. code-block:: yaml + + use_default_settings: + engines: + keep_only: + - google + - duckduckgo + server: + secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA" + engines: + - name: google + tokens: ['$ecretValue'] + - name: duckduckgo + tokens: ['$ecretValue'] diff --git a/docs/blog/lxcdev-202006.rst b/docs/blog/lxcdev-202006.rst index b8d470d4e..2bea19807 100644 --- a/docs/blog/lxcdev-202006.rst +++ b/docs/blog/lxcdev-202006.rst @@ -259,8 +259,8 @@ suite. For this, we have to keep an eye on the :ref:`installation basic`: - virtualenv in: ``/usr/local/searx/searx-pyenv`` - searx software in: ``/usr/local/searx/searx-src`` -The searx software is a clone of the ``GIT_URL`` (see :ref:`makefile setup`) and -the working tree is checked out from the ``GIT_BRANCH``. With the use of the +The searx software is a clone of the ``git_url`` (see :ref:`settings global`) and +the working tree is checked out from the ``git_branch``. With the use of the :ref:`searx.sh` the searx service was installed as :ref:`uWSGI application <searx uwsgi>`. To maintain this service, we can use ``systemctl`` (compare :ref:`service architectures on distributions <uwsgi configuration>`). diff --git a/docs/build-templates/searx.rst b/docs/build-templates/searx.rst index fe82ec3d0..e06bc2c6a 100644 --- a/docs/build-templates/searx.rst +++ b/docs/build-templates/searx.rst @@ -116,6 +116,7 @@ ${fedora_build} pip install -U pip pip install -U setuptools pip install -U wheel + pip install -U pyyaml # jump to searx's working tree and install searx into virtualenv (${SERVICE_USER})$ cd \"$SEARX_SRC\" diff --git a/docs/conf.py b/docs/conf.py index 0c07761a8..e467c6262 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -2,14 +2,10 @@ import sys, os from sphinx_build_tools import load_sphinx_config -from searx.version import VERSION_STRING from pallets_sphinx_themes import ProjectLink -from searx.brand import GIT_URL -GIT_BRANCH = os.environ.get("GIT_BRANCH", "master") -from searx.brand import SEARX_URL -from searx.brand import DOCS_URL - +from searx import brand +from searx.version import VERSION_STRING # Project -------------------------------------------------------------- @@ -46,10 +42,10 @@ extlinks['wiki'] = ('https://github.com/searx/searx/wiki/%s', ' ') extlinks['pull'] = ('https://github.com/searx/searx/pull/%s', 'PR ') # links to custom brand -extlinks['origin'] = (GIT_URL + '/blob/' + GIT_BRANCH + '/%s', 'git://') -extlinks['patch'] = (GIT_URL + '/commit/%s', '#') -extlinks['search'] = (SEARX_URL + '/%s', '#') -extlinks['docs'] = (DOCS_URL + '/%s', 'docs: ') +extlinks['origin'] = (brand.GIT_URL + '/blob/' + brand.GIT_BRANCH + '/%s', 'git://') +extlinks['patch'] = (brand.GIT_URL + '/commit/%s', '#') +extlinks['search'] = (brand.SEARX_URL + '/%s', '#') +extlinks['docs'] = (brand.DOCS_URL + '/%s', 'docs: ') extlinks['pypi'] = ('https://pypi.org/project/%s', 'PyPi: ') extlinks['man'] = ('https://manpages.debian.org/jump?q=%s', '') #extlinks['role'] = ( @@ -104,14 +100,20 @@ imgmath_font_size = 14 # sphinx.ext.imgmath setup END html_theme_options = {"index_sidebar_logo": True} -html_context = { - "project_links": [ - ProjectLink("Source", GIT_URL), - ProjectLink("Wiki", "https://github.com/searx/searx/wiki"), - ProjectLink("Public instances", "https://searx.space/"), - ProjectLink("Twitter", "https://twitter.com/Searx_engine"), - ] -} +html_context = {"project_links": [] } +if brand.GIT_URL: + html_context["project_links"].append(ProjectLink("Source", brand.GIT_URL)) +if brand.WIKI_URL: + html_context["project_links"].append(ProjectLink("Wiki", brand.WIKI_URL)) +if brand.PUBLIC_INSTANCES: + html_context["project_links"].append(ProjectLink("Public instances", brand.PUBLIC_INSTANCES)) +if brand.TWITTER_URL: + html_context["project_links"].append(ProjectLink("Twitter", brand.TWITTER_URL)) +if brand.ISSUE_URL: + html_context["project_links"].append(ProjectLink("Issue Tracker", brand.ISSUE_URL)) +if brand.CONTACT_URL: + html_context["project_links"].append(ProjectLink("Contact", brand.CONTACT_URL)) + html_sidebars = { "**": ["project.html", "relations.html", "searchbox.html"], } diff --git a/docs/dev/makefile.rst b/docs/dev/makefile.rst index c43855617..f93855927 100644 --- a/docs/dev/makefile.rst +++ b/docs/dev/makefile.rst @@ -8,8 +8,7 @@ Makefile Targets .. sidebar:: build environment - Before looking deeper at the targets, first read about :ref:`makefile setup` - and :ref:`make pyenv`. + Before looking deeper at the targets, first read about :ref:`make pyenv`. To install system requirements follow :ref:`buildhosts`. @@ -28,37 +27,6 @@ Calling the ``help`` target gives a first overview (``make help``): :local: :backlinks: entry - -.. _makefile setup: - -Makefile setup -============== - -.. _git stash: https://git-scm.com/docs/git-stash - -.. sidebar:: fork & upstream - - Commit changes in your (local) branch, fork or whatever, but do not push them - upstream / `git stash`_ is your friend. - -The main setup is done in the :origin:`Makefile`. - -.. literalinclude:: ../../Makefile - :start-after: START Makefile setup - :end-before: END Makefile setup - -:GIT_URL: Changes this, to point to your searx fork. -:GIT_BRANCH: Changes this, to point to your searx branch. -:SEARX_URL: Changes this, to point to your searx instance. -:DOCS_URL: If you host your own (*brand*) documentation, change this URL. - -If you change any of this build environment variables, you have to run ``make -buildenv``:: - - $ make buildenv - build searx/brand.py - build utils/brand.env - .. _make pyenv: Python environment @@ -148,7 +116,7 @@ clean`` stop all processes using :ref:`make pyenv`. We describe the usage of the ``doc*`` targets in the :ref:`How to contribute / Documentation <contrib docs>` section. If you want to edit the documentation read our :ref:`make docs-live` section. If you are working in your own brand, -adjust your :ref:`Makefile setup <makefile setup>`. +adjust your :ref:`settings global`. .. _make books: @@ -185,8 +153,8 @@ Use ``make docs-help`` to see which books available: ``make gh-pages`` ================= -To deploy on github.io first adjust your :ref:`Makefile setup <makefile -setup>`. For any further read :ref:`deploy on github.io`. +To deploy on github.io first adjust your :ref:`settings global`. For any +further read :ref:`deploy on github.io`. .. _make test: diff --git a/docs/dev/search_api.rst b/docs/dev/search_api.rst index b63891f53..68fee94bf 100644 --- a/docs/dev/search_api.rst +++ b/docs/dev/search_api.rst @@ -6,7 +6,7 @@ Search API The search supports both ``GET`` and ``POST``. -Furthermore, two enpoints ``/`` and ``/search`` are available for querying. +Furthermore, two endpoints ``/`` and ``/search`` are available for querying. ``GET /`` diff --git a/docs/utils/index.rst b/docs/utils/index.rst index ada78cef3..28515318f 100644 --- a/docs/utils/index.rst +++ b/docs/utils/index.rst @@ -47,8 +47,8 @@ Scripts to maintain services often dispose of common commands and environments. Tooling box setup ================= -The main setup is done in the :origin:`.config.sh` (read also :ref:`makefile -setup`). +The main setup is done in the :origin:`.config.sh` (read also :ref:`settings +global`). .. literalinclude:: ../../.config.sh :language: bash @@ -123,9 +123,9 @@ docker_build() { SEARX_GIT_VERSION=$(git describe --match "v[0-9]*\.[0-9]*\.[0-9]*" HEAD 2>/dev/null | awk -F'-' '{OFS="-"; $1=substr($1, 2); if ($3) { $3=substr($3, 2); } print}') # add the suffix "-dirty" if the repository has uncommited change - # /!\ HACK for searx/searx: ignore searx/brand.py and utils/brand.env + # /!\ HACK for searx/searx: ignore utils/brand.env git update-index -q --refresh - if [ ! -z "$(git diff-index --name-only HEAD -- | grep -v 'searx/brand.py' | grep -v 'utils/brand.env')" ]; then + if [ ! -z "$(git diff-index --name-only HEAD -- | grep -v 'utils/brand.env')" ]; then SEARX_GIT_VERSION="${SEARX_GIT_VERSION}-dirty" fi diff --git a/requirements-dev.txt b/requirements-dev.txt index 170f8c1d9..483532723 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,4 @@ -mock==4.0.3; python_version >= "3.6" -mock==2.0.0; python_version < "3.6" +mock==4.0.3 nose2[coverage_plugin]==0.9.2 cov-core==1.15.0 pycodestyle==2.6.0 @@ -8,16 +7,12 @@ splinter==0.14.0 transifex-client==0.14.2 unittest2==1.1.0 selenium==3.141.0 -twine==3.3.0; python_version >= "3.6" -twine==1.15.0; python_version < "3.6" +twine==3.3.0 Pallets-Sphinx-Themes==1.2.3 -Sphinx==3.4.1; python_version >= '3.6' -Sphinx==3.0.1; python_version < '3.6' +Sphinx==3.4.1 sphinx-issues==1.2.0 sphinx-jinja==1.1.1 -sphinx-tabs==1.3.0; python_version >= '3.6' -sphinx-tabs==1.1.13; python_version < '3.6' +sphinx-tabs==1.3.0 sphinxcontrib-programoutput==0.16 -sphinx-autobuild==2020.9.1; python_version >= '3.6' -sphinx-autobuild==0.7.1; python_version < '3.6' -linuxdoc @ git+http://github.com/return42/linuxdoc.git@70673dcf69e705e08d81f53794895dc15c4920b3#egg=linuxdoc +sphinx-autobuild==2020.9.1 +linuxdoc==20210110 diff --git a/requirements.txt b/requirements.txt index e0c48ca94..776bbc20b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ idna==2.10 jinja2==2.11.2 lxml==4.6.2 pygments==2.1.3 -pyopenssl==20.0.1 python-dateutil==2.8.1 pyyaml==5.3.1 requests[socks]==2.25.1 +pycld3==0.20 diff --git a/searx/__init__.py b/searx/__init__.py index 08e67f69d..11adbba73 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -60,3 +60,49 @@ if 'SEARX_SECRET' in environ: settings['server']['secret_key'] = environ['SEARX_SECRET'] if 'SEARX_BIND_ADDRESS' in environ: settings['server']['bind_address'] = environ['SEARX_BIND_ADDRESS'] + + +class _brand_namespace: + + @classmethod + def get_val(cls, group, name, default=''): + return settings.get(group, {}).get(name) or default + + @property + def SEARX_URL(self): + return self.get_val('server', 'base_url') + + @property + def CONTACT_URL(self): + return self.get_val('general', 'contact_url') + + @property + def GIT_URL(self): + return self.get_val('brand', 'git_url') + + @property + def GIT_BRANCH(self): + return self.get_val('brand', 'git_branch') + + @property + def ISSUE_URL(self): + return self.get_val('brand', 'issue_url') + + @property + def DOCS_URL(self): + return self.get_val('brand', 'docs_url') + + @property + def PUBLIC_INSTANCES(self): + return self.get_val('brand', 'public_instances') + + @property + def WIKI_URL(self): + return self.get_val('brand', 'wiki_url') + + @property + def TWITTER_URL(self): + return self.get_val('brand', 'twitter_url') + + +brand = _brand_namespace() diff --git a/searx/brand.py b/searx/brand.py deleted file mode 100644 index 7fcab6fad..000000000 --- a/searx/brand.py +++ /dev/null @@ -1,7 +0,0 @@ -GIT_URL = 'https://github.com/searx/searx' -GIT_BRANCH = 'master' -ISSUE_URL = 'https://github.com/searx/searx/issues' -SEARX_URL = 'https://searx.me' -DOCS_URL = 'https://searx.github.io/searx' -PUBLIC_INSTANCES = 'https://searx.space' -CONTACT_URL = '' diff --git a/searx/data/__init__.py b/searx/data/__init__.py index 55a254b13..29ac5b7a2 100644 --- a/searx/data/__init__.py +++ b/searx/data/__init__.py @@ -8,8 +8,7 @@ data_dir = Path(__file__).parent def load(filename): - # add str(...) for Python 3.5 - with open(str(data_dir / filename), encoding='utf-8') as fd: + with open(data_dir / filename, encoding='utf-8') as fd: return json.load(fd) diff --git a/searx/engines/1337x.py b/searx/engines/1337x.py index 18478876a..9cc7c1b79 100644 --- a/searx/engines/1337x.py +++ b/searx/engines/1337x.py @@ -1,7 +1,21 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + 1337x +""" + from urllib.parse import quote, urljoin from lxml import html from searx.utils import extract_text, get_torrent_size, eval_xpath, eval_xpath_list, eval_xpath_getindex +# about +about = { + "website": 'https://1337x.to/', + "wikidata_id": 'Q28134166', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} url = 'https://1337x.to/' search_url = url + 'search/{search_term}/{pageno}/' diff --git a/searx/engines/acgsou.py b/searx/engines/acgsou.py index 637443edc..ea9793f10 100644 --- a/searx/engines/acgsou.py +++ b/searx/engines/acgsou.py @@ -1,18 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Acgsou (Japanese Animation/Music/Comics Bittorrent tracker) - - @website https://www.acgsou.com/ - @provide-api no - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title, content, seed, leech, torrentfile """ from urllib.parse import urlencode from lxml import html from searx.utils import extract_text, get_torrent_size, eval_xpath_list, eval_xpath_getindex +# about +about = { + "website": 'https://www.acgsou.com/', + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['files', 'images', 'videos', 'music'] paging = True diff --git a/searx/engines/ahmia.py b/searx/engines/ahmia.py index 7a2ae0075..6c502bb40 100644 --- a/searx/engines/ahmia.py +++ b/searx/engines/ahmia.py @@ -1,19 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Ahmia (Onions) - - @website http://msydqstlz2kzerdg.onion - @provides-api no - - @using-api no - @results HTML - @stable no - @parse url, title, content """ from urllib.parse import urlencode, urlparse, parse_qs from lxml.html import fromstring from searx.engines.xpath import extract_url, extract_text, eval_xpath_list, eval_xpath +# about +about = { + "website": 'http://msydqstlz2kzerdg.onion', + "wikidata_id": 'Q18693938', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine config categories = ['onions'] paging = True diff --git a/searx/engines/apkmirror.py b/searx/engines/apkmirror.py index 3a948dcb4..a4c66e891 100644 --- a/searx/engines/apkmirror.py +++ b/searx/engines/apkmirror.py @@ -1,18 +1,21 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ APK Mirror - - @website https://www.apkmirror.com - - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title, thumbnail_src """ from urllib.parse import urlencode from lxml import html from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex +# about +about = { + "website": 'https://www.apkmirror.com', + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} # engine dependent config categories = ['it'] diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index 04117c07d..d29d65ba3 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -1,20 +1,24 @@ -# -*- coding: utf-8 -*- - +# SPDX-License-Identifier: AGPL-3.0-or-later """ Arch Linux Wiki - @website https://wiki.archlinux.org - @provide-api no (Mediawiki provides API, but Arch Wiki blocks access to it - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title + API: Mediawiki provides API, but Arch Wiki blocks access to it """ from urllib.parse import urlencode, urljoin from lxml import html from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex +# about +about = { + "website": 'https://wiki.archlinux.org/', + "wikidata_id": 'Q101445877', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['it'] language_support = True diff --git a/searx/engines/arxiv.py b/searx/engines/arxiv.py index 1190de363..09ea07ea5 100644 --- a/searx/engines/arxiv.py +++ b/searx/engines/arxiv.py @@ -1,20 +1,21 @@ -#!/usr/bin/env python - +# SPDX-License-Identifier: AGPL-3.0-or-later """ ArXiV (Scientific preprints) - @website https://arxiv.org - @provide-api yes (export.arxiv.org/api/query) - @using-api yes - @results XML-RSS - @stable yes - @parse url, title, publishedDate, content - More info on api: https://arxiv.org/help/api/user-manual """ from lxml import html from datetime import datetime from searx.utils import eval_xpath_list, eval_xpath_getindex +# about +about = { + "website": 'https://arxiv.org', + "wikidata_id": 'Q118398', + "official_api_documentation": 'https://arxiv.org/help/api', + "use_official_api": True, + "require_api_key": False, + "results": 'XML-RSS', +} categories = ['science'] paging = True diff --git a/searx/engines/base.py b/searx/engines/base.py index 3648d7ed0..463274681 100755 --- a/searx/engines/base.py +++ b/searx/engines/base.py @@ -1,16 +1,6 @@ -#!/usr/bin/env python - +# SPDX-License-Identifier: AGPL-3.0-or-later """ BASE (Scholar publications) - - @website https://base-search.net - @provide-api yes with authorization (https://api.base-search.net/) - - @using-api yes - @results XML - @stable ? - @parse url, title, publishedDate, content - More info on api: http://base-search.net/about/download/base_interface.pdf """ from urllib.parse import urlencode @@ -19,6 +9,15 @@ from datetime import datetime import re from searx.utils import searx_useragent +# about +about = { + "website": 'https://base-search.net', + "wikidata_id": 'Q448335', + "official_api_documentation": 'https://api.base-search.net/', + "use_official_api": True, + "require_api_key": False, + "results": 'XML', +} categories = ['science'] diff --git a/searx/engines/bing.py b/searx/engines/bing.py index f0882fcc9..edf6baef9 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -1,16 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Bing (Web) - - @website https://www.bing.com - @provide-api yes (http://datamarket.azure.com/dataset/bing/search), - max. 5000 query/month - - @using-api no (because of query limit) - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content - - @todo publishedDate """ import re @@ -21,6 +11,16 @@ from searx.utils import eval_xpath, extract_text, match_language logger = logger.getChild('bing engine') +# about +about = { + "website": 'https://www.bing.com', + "wikidata_id": 'Q182496', + "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-web-search-api', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['general'] paging = True diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 2bcf82b84..b4ca57f4b 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -1,15 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Bing (Images) - - @website https://www.bing.com/images - @provide-api yes (http://datamarket.azure.com/dataset/bing/search), - max. 5000 query/month - - @using-api no (because of query limit) - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, img_src - """ from urllib.parse import urlencode @@ -20,6 +11,16 @@ from searx.utils import match_language from searx.engines.bing import language_aliases from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +# about +about = { + "website": 'https://www.bing.com/images', + "wikidata_id": 'Q182496', + "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-image-search-api', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['images'] paging = True diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index b95def48b..2e4b78278 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -1,14 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Bing (News) - - @website https://www.bing.com/news - @provide-api yes (http://datamarket.azure.com/dataset/bing/search), - max. 5000 query/month - - @using-api no (because of query limit) - @results RSS (using search portal) - @stable yes (except perhaps for the images) - @parse url, title, content, publishedDate, thumbnail """ from datetime import datetime @@ -20,6 +12,16 @@ from searx.utils import match_language, eval_xpath_getindex from searx.engines.bing import language_aliases from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +# about +about = { + "website": 'https://www.bing.com/news', + "wikidata_id": 'Q2878637', + "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-news-search-api', + "use_official_api": False, + "require_api_key": False, + "results": 'RSS', +} + # engine dependent config categories = ['news'] paging = True diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py index 143c71a3e..b4584bb37 100644 --- a/searx/engines/bing_videos.py +++ b/searx/engines/bing_videos.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Bing (Videos) - - @website https://www.bing.com/videos - @provide-api yes (http://datamarket.azure.com/dataset/bing/search) - - @using-api no - @results HTML - @stable no - @parse url, title, content, thumbnail """ from json import loads @@ -18,6 +11,16 @@ from searx.utils import match_language from searx.engines.bing import language_aliases from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +# about +about = { + "website": 'https://www.bing.com/videos', + "wikidata_id": 'Q4914152', + "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-video-search-api', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + categories = ['videos'] paging = True safesearch = True diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index 72bda8d20..863396f6e 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -1,19 +1,25 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ BTDigg (Videos, Music, Files) - - @website https://btdig.com - @provide-api yes (on demand) - - @using-api no - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content, seed, leech, magnetlink """ from lxml import html from urllib.parse import quote, urljoin from searx.utils import extract_text, get_torrent_size +# about +about = { + "website": 'https://btdig.com', + "wikidata_id": 'Q4836698', + "official_api_documentation": { + 'url': 'https://btdig.com/contacts', + 'comment': 'on demand' + }, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['videos', 'music', 'files'] paging = True diff --git a/searx/engines/command.py b/searx/engines/command.py index 6321e0004..33270d245 100644 --- a/searx/engines/command.py +++ b/searx/engines/command.py @@ -1,18 +1,7 @@ -''' -searx is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -searx is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with searx. If not, see < http://www.gnu.org/licenses/ >. -''' - +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Command (offline) +""" import re from os.path import expanduser, isabs, realpath, commonprefix diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index 7098dd3c7..d4c3b5f81 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -1,5 +1,19 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + currency convert (DuckDuckGo) +""" + import json +# about +about = { + "website": 'https://duckduckgo.com/', + "wikidata_id": 'Q12805', + "official_api_documentation": 'https://duckduckgo.com/api', + "use_official_api": False, + "require_api_key": False, + "results": 'JSONP', +} engine_type = 'online_currency' categories = [] diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 1e24e41da..874e0f42a 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -1,15 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Dailymotion (Videos) - - @website https://www.dailymotion.com - @provide-api yes (http://www.dailymotion.com/developer) - - @using-api yes - @results JSON - @stable yes - @parse url, title, thumbnail, publishedDate, embedded - - @todo set content-parameter with correct data """ from json import loads @@ -17,6 +8,16 @@ from datetime import datetime from urllib.parse import urlencode from searx.utils import match_language, html_to_text +# about +about = { + "website": 'https://www.dailymotion.com', + "wikidata_id": 'Q769222', + "official_api_documentation": 'https://www.dailymotion.com/developer', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['videos'] paging = True diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py index 48c0429a7..946bd3ebe 100644 --- a/searx/engines/deezer.py +++ b/searx/engines/deezer.py @@ -1,18 +1,21 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Deezer (Music) - - @website https://deezer.com - @provide-api yes (http://developers.deezer.com/api/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content, embedded """ from json import loads from urllib.parse import urlencode +# about +about = { + "website": 'https://deezer.com', + "wikidata_id": 'Q602243', + "official_api_documentation": 'https://developers.deezer.com/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['music'] paging = True diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py index 0378929b2..7840495e1 100644 --- a/searx/engines/deviantart.py +++ b/searx/engines/deviantart.py @@ -1,21 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Deviantart (Images) - - @website https://www.deviantart.com/ - @provide-api yes (https://www.deviantart.com/developers/) (RSS) - - @using-api no (TODO, rewrite to api) - @results HTML - @stable no (HTML can change) - @parse url, title, img_src - - @todo rewrite to api """ # pylint: disable=missing-function-docstring from urllib.parse import urlencode from lxml import html +# about +about = { + "website": 'https://www.deviantart.com/', + "wikidata_id": 'Q46523', + "official_api_documentation": 'https://www.deviantart.com/developers/', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['images'] paging = True diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index 5e6f688a1..2483c0805 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -1,18 +1,21 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Dictzone - - @website https://dictzone.com/ - @provide-api no - @using-api no - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content """ from urllib.parse import urljoin from lxml import html from searx.utils import eval_xpath +# about +about = { + "website": 'https://dictzone.com/', + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} engine_type = 'online_dictionnary' categories = ['general'] diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py index b1a90fb2f..109662a49 100644 --- a/searx/engines/digbt.py +++ b/searx/engines/digbt.py @@ -1,19 +1,21 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ DigBT (Videos, Music, Files) - - @website https://digbt.org - @provide-api no - - @using-api no - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content, magnetlink """ from urllib.parse import urljoin from lxml import html from searx.utils import extract_text, get_torrent_size +# about +about = { + "website": 'https://digbt.org', + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} categories = ['videos', 'music', 'files'] paging = True diff --git a/searx/engines/digg.py b/searx/engines/digg.py index 85f727f0d..defcacd20 100644 --- a/searx/engines/digg.py +++ b/searx/engines/digg.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Digg (News, Social media) - - @website https://digg.com - @provide-api no - - @using-api no - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content, publishedDate, thumbnail """ # pylint: disable=missing-function-docstring @@ -17,6 +10,16 @@ from datetime import datetime from lxml import html +# about +about = { + "website": 'https://digg.com', + "wikidata_id": 'Q270478', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['news', 'social media'] paging = True diff --git a/searx/engines/doku.py b/searx/engines/doku.py index e1b10d664..ed1eab388 100644 --- a/searx/engines/doku.py +++ b/searx/engines/doku.py @@ -1,18 +1,22 @@ -# Doku Wiki -# -# @website https://www.dokuwiki.org/ -# @provide-api yes -# (https://www.dokuwiki.org/devel:xmlrpc) -# -# @using-api no -# @results HTML -# @stable yes -# @parse (general) url, title, content +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Doku Wiki +""" from urllib.parse import urlencode from lxml.html import fromstring from searx.utils import extract_text, eval_xpath +# about +about = { + "website": 'https://www.dokuwiki.org/', + "wikidata_id": 'Q851864', + "official_api_documentation": 'https://www.dokuwiki.org/devel:xmlrpc', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' paging = False diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index c1c984623..fc20de239 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -1,22 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ DuckDuckGo (Web) - - @website https://duckduckgo.com/ - @provide-api yes (https://duckduckgo.com/api), - but not all results from search-site - - @using-api no - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content - - @todo rewrite to api """ from lxml.html import fromstring from json import loads from searx.utils import extract_text, match_language, eval_xpath +# about +about = { + "website": 'https://duckduckgo.com/', + "wikidata_id": 'Q12805', + "official_api_documentation": 'https://duckduckgo.com/api', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['general'] paging = False diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index 1d1c84b4b..0473b0a95 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -1,12 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ -DuckDuckGo (definitions) - -- `Instant Answer API`_ -- `DuckDuckGo query`_ - -.. _Instant Answer API: https://duckduckgo.com/api -.. _DuckDuckGo query: https://api.duckduckgo.com/?q=DuckDuckGo&format=json&pretty=1 - + DuckDuckGo (Instant Answer API) """ import json @@ -22,6 +16,16 @@ from searx.external_urls import get_external_url, get_earth_coordinates_url, are logger = logger.getChild('duckduckgo_definitions') +# about +about = { + "website": 'https://duckduckgo.com/', + "wikidata_id": 'Q12805', + "official_api_documentation": 'https://duckduckgo.com/api', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + URL = 'https://api.duckduckgo.com/'\ + '?{query}&format=json&pretty=0&no_redirect=1&d=1' diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py index 009f81cca..b5c2d4506 100644 --- a/searx/engines/duckduckgo_images.py +++ b/searx/engines/duckduckgo_images.py @@ -1,16 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ DuckDuckGo (Images) - - @website https://duckduckgo.com/ - @provide-api yes (https://duckduckgo.com/api), - but images are not supported - - @using-api no - @results JSON (site requires js to get images) - @stable no (JSON can change) - @parse url, title, img_src - - @todo avoid extra request """ from json import loads @@ -20,6 +10,19 @@ from searx.engines.duckduckgo import get_region_code from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import from searx.poolrequests import get +# about +about = { + "website": 'https://duckduckgo.com/', + "wikidata_id": 'Q12805', + "official_api_documentation": { + 'url': 'https://duckduckgo.com/api', + 'comment': 'but images are not supported', + }, + "use_official_api": False, + "require_api_key": False, + "results": 'JSON (site requires js to get images)', +} + # engine dependent config categories = ['images'] paging = True diff --git a/searx/engines/duden.py b/searx/engines/duden.py index 1475fb846..f1c9efd3f 100644 --- a/searx/engines/duden.py +++ b/searx/engines/duden.py @@ -1,11 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Duden - @website https://www.duden.de - @provide-api no - @using-api no - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content """ import re @@ -13,6 +8,16 @@ from urllib.parse import quote, urljoin from lxml import html from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex +# about +about = { + "website": 'https://www.duden.de', + "wikidata_id": 'Q73624591', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + categories = ['general'] paging = True language_support = False diff --git a/searx/engines/dummy-offline.py b/searx/engines/dummy-offline.py index 13a9ecc01..cf2f75312 100644 --- a/searx/engines/dummy-offline.py +++ b/searx/engines/dummy-offline.py @@ -1,11 +1,19 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Dummy Offline - - @results one result - @stable yes """ +# about +about = { + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + + def search(query, request_params): return [{ 'result': 'this is what you get', diff --git a/searx/engines/dummy.py b/searx/engines/dummy.py index 50b56ef78..1a1b57d8c 100644 --- a/searx/engines/dummy.py +++ b/searx/engines/dummy.py @@ -1,10 +1,18 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Dummy - - @results empty array - @stable yes """ +# about +about = { + "website": None, + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'empty array', +} + # do search-request def request(query, params): diff --git a/searx/engines/ebay.py b/searx/engines/ebay.py index e2e5ded6a..45c633b42 100644 --- a/searx/engines/ebay.py +++ b/searx/engines/ebay.py @@ -1,17 +1,22 @@ -# Ebay (Videos, Music, Files) -# -# @website https://www.ebay.com -# @provide-api no (nothing found) -# -# @using-api no -# @results HTML (using search portal) -# @stable yes (HTML can change) -# @parse url, title, content, price, shipping, source +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Ebay (Videos, Music, Files) +""" from lxml import html from searx.engines.xpath import extract_text from urllib.parse import quote +# about +about = { + "website": 'https://www.ebay.com', + "wikidata_id": 'Q58024', + "official_api_documentation": 'https://developer.ebay.com/', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + categories = ['shopping'] paging = True diff --git a/searx/engines/elasticsearch.py b/searx/engines/elasticsearch.py index 0e2d35756..da7f98074 100644 --- a/searx/engines/elasticsearch.py +++ b/searx/engines/elasticsearch.py @@ -1,3 +1,8 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Elasticsearch +""" + from json import loads, dumps from requests.auth import HTTPBasicAuth from searx.exceptions import SearxEngineAPIException diff --git a/searx/engines/etools.py b/searx/engines/etools.py index a0762d1c7..77d7e71c6 100644 --- a/searx/engines/etools.py +++ b/searx/engines/etools.py @@ -1,18 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ eTools (Web) - - @website https://www.etools.ch - @provide-api no - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title, content """ from lxml import html from urllib.parse import quote from searx.utils import extract_text, eval_xpath +# about +about = { + "website": 'https://www.etools.ch', + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + categories = ['general'] paging = False language_support = False diff --git a/searx/engines/fdroid.py b/searx/engines/fdroid.py index 3d37db44e..8fff2e384 100644 --- a/searx/engines/fdroid.py +++ b/searx/engines/fdroid.py @@ -1,18 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ F-Droid (a repository of FOSS applications for Android) - - @website https://f-droid.org/ - @provide-api no - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title, content """ from urllib.parse import urlencode from lxml import html from searx.utils import extract_text +# about +about = { + "website": 'https://f-droid.org/', + "wikidata_id": 'Q1386210', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['files'] paging = True diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py index b23c447b8..b0ddf6224 100644 --- a/searx/engines/flickr.py +++ b/searx/engines/flickr.py @@ -1,21 +1,23 @@ -#!/usr/bin/env python - +# SPDX-License-Identifier: AGPL-3.0-or-later """ Flickr (Images) - @website https://www.flickr.com - @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) - - @using-api yes - @results JSON - @stable yes - @parse url, title, thumbnail, img_src More info on api-key : https://www.flickr.com/services/apps/create/ """ from json import loads from urllib.parse import urlencode +# about +about = { + "website": 'https://www.flickr.com', + "wikidata_id": 'Q103204', + "official_api_documentation": 'https://secure.flickr.com/services/api/flickr.photos.search.html', + "use_official_api": True, + "require_api_key": True, + "results": 'JSON', +} + categories = ['images'] nb_per_page = 15 diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py index 4bcf837cb..a07aad51e 100644 --- a/searx/engines/flickr_noapi.py +++ b/searx/engines/flickr_noapi.py @@ -1,15 +1,6 @@ -#!/usr/bin/env python - +# SPDX-License-Identifier: AGPL-3.0-or-later """ - Flickr (Images) - - @website https://www.flickr.com - @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) - - @using-api no - @results HTML - @stable no - @parse url, title, thumbnail, img_src + Flickr (Images) """ from json import loads @@ -21,6 +12,16 @@ from searx.utils import ecma_unescape, html_to_text logger = logger.getChild('flickr-noapi') +# about +about = { + "website": 'https://www.flickr.com', + "wikidata_id": 'Q103204', + "official_api_documentation": 'https://secure.flickr.com/services/api/flickr.photos.search.html', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + categories = ['images'] url = 'https://www.flickr.com/' diff --git a/searx/engines/framalibre.py b/searx/engines/framalibre.py index e3d056425..42c08cf95 100644 --- a/searx/engines/framalibre.py +++ b/searx/engines/framalibre.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ FramaLibre (It) - - @website https://framalibre.org/ - @provide-api no - - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title, content, thumbnail, img_src """ from html import escape @@ -15,6 +8,16 @@ from urllib.parse import urljoin, urlencode from lxml import html from searx.utils import extract_text +# about +about = { + "website": 'https://framalibre.org/', + "wikidata_id": 'Q30213882', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['it'] paging = True diff --git a/searx/engines/frinkiac.py b/searx/engines/frinkiac.py index 5b174a687..f43bb6e20 100644 --- a/searx/engines/frinkiac.py +++ b/searx/engines/frinkiac.py @@ -1,17 +1,24 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ -Frinkiac (Images) - -@website https://www.frinkiac.com -@provide-api no -@using-api no -@results JSON -@stable no -@parse url, title, img_src + Frinkiac (Images) """ from json import loads from urllib.parse import urlencode +# about +about = { + "website": 'https://frinkiac.com', + "wikidata_id": 'Q24882614', + "official_api_documentation": { + 'url': None, + 'comment': 'see https://github.com/MitchellAW/CompuGlobal' + }, + "use_official_api": False, + "require_api_key": False, + "results": 'JSON', +} + categories = ['images'] BASE = 'https://frinkiac.com/' diff --git a/searx/engines/genius.py b/searx/engines/genius.py index 2bfbfddf5..1667d529d 100644 --- a/searx/engines/genius.py +++ b/searx/engines/genius.py @@ -1,19 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ -Genius - - @website https://www.genius.com/ - @provide-api yes (https://docs.genius.com/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content, thumbnail, publishedDate + Genius """ from json import loads from urllib.parse import urlencode from datetime import datetime +# about +about = { + "website": 'https://genius.com/', + "wikidata_id": 'Q3419343', + "official_api_documentation": 'https://docs.genius.com/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['music'] paging = True diff --git a/searx/engines/gentoo.py b/searx/engines/gentoo.py index 16b3e692d..55f15576e 100644 --- a/searx/engines/gentoo.py +++ b/searx/engines/gentoo.py @@ -1,20 +1,22 @@ -# -*- coding: utf-8 -*- - +# SPDX-License-Identifier: AGPL-3.0-or-later """ Gentoo Wiki - - @website https://wiki.gentoo.org - @provide-api yes - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title """ from urllib.parse import urlencode, urljoin from lxml import html from searx.utils import extract_text +# about +about = { + "website": 'https://wiki.gentoo.org/', + "wikidata_id": 'Q1050637', + "official_api_documentation": 'https://wiki.gentoo.org/api.php', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['it'] language_support = True diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index 1d71b18e9..f5f89a736 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -1,14 +1,6 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """ Gigablast (Web) - - @website https://gigablast.com - @provide-api yes (https://gigablast.com/api.html) - - @using-api yes - @results XML - @stable yes - @parse url, title, content """ # pylint: disable=missing-function-docstring, invalid-name @@ -18,6 +10,16 @@ from urllib.parse import urlencode # from searx import logger from searx.poolrequests import get +# about +about = { + "website": 'https://www.gigablast.com', + "wikidata_id": 'Q3105449', + "official_api_documentation": 'https://gigablast.com/api.html', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['general'] # gigablast's pagination is totally damaged, don't use it diff --git a/searx/engines/github.py b/searx/engines/github.py index 80b50ceda..b68caa350 100644 --- a/searx/engines/github.py +++ b/searx/engines/github.py @@ -1,18 +1,21 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ - Github (It) - - @website https://github.com/ - @provide-api yes (https://developer.github.com/v3/) - - @using-api yes - @results JSON - @stable yes (using api) - @parse url, title, content + Github (IT) """ from json import loads from urllib.parse import urlencode +# about +about = { + "website": 'https://github.com/', + "wikidata_id": 'Q364', + "official_api_documentation": 'https://developer.github.com/v3/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['it'] diff --git a/searx/engines/google.py b/searx/engines/google.py index 17ab21f6a..4198de640 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -1,19 +1,11 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """Google (Web) -:website: https://www.google.com -:provide-api: yes (https://developers.google.com/custom-search/) -:using-api: not the offical, since it needs registration to another service -:results: HTML -:stable: no -:parse: url, title, content, number_of_results, answer, suggestion, correction - -For detailed description of the *REST-full* API see: `Query Parameter -Definitions`_. - -.. _Query Parameter Definitions: - https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions + For detailed description of the *REST-full* API see: `Query Parameter + Definitions`_. + .. _Query Parameter Definitions: + https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions """ # pylint: disable=invalid-name, missing-function-docstring @@ -27,6 +19,16 @@ from searx.exceptions import SearxEngineCaptchaException logger = logger.getChild('google engine') +# about +about = { + "website": 'https://www.google.com', + "wikidata_id": 'Q9366', + "official_api_documentation": 'https://developers.google.com/custom-search/', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['general'] paging = True diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 9ef1be753..8c2cb9d2a 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -1,14 +1,6 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """Google (Images) -:website: https://images.google.com (redirected to subdomain www.) -:provide-api: yes (https://developers.google.com/custom-search/) -:using-api: not the offical, since it needs registration to another service -:results: HTML -:stable: no -:template: images.html -:parse: url, title, content, source, thumbnail_src, img_src - For detailed description of the *REST-full* API see: `Query Parameter Definitions`_. @@ -18,10 +10,6 @@ Definitions`_. ``data:` scheme).:: Header set Content-Security-Policy "img-src 'self' data: ;" - -.. _Query Parameter Definitions: - https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions - """ from urllib.parse import urlencode, urlparse, unquote @@ -39,6 +27,16 @@ from searx.engines.google import ( logger = logger.getChild('google images') +# about +about = { + "website": 'https://images.google.com/', + "wikidata_id": 'Q521550', + "official_api_documentation": 'https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions', # NOQA + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['images'] diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index f1b7cfa79..63fef6696 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Google (News) - - @website https://news.google.com - @provide-api no - - @using-api no - @results HTML - @stable no - @parse url, title, content, publishedDate """ from urllib.parse import urlencode @@ -15,6 +8,16 @@ from lxml import html from searx.utils import match_language from searx.engines.google import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +# about +about = { + "website": 'https://news.google.com', + "wikidata_id": 'Q12020', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # search-url categories = ['news'] paging = True diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index eedefbf45..61e01ca7b 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Google (Videos) - - @website https://www.google.com - @provide-api yes (https://developers.google.com/custom-search/) - - @using-api no - @results HTML - @stable no - @parse url, title, content, thumbnail """ from datetime import date, timedelta @@ -16,6 +9,16 @@ from lxml import html from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex import re +# about +about = { + "website": 'https://www.google.com', + "wikidata_id": 'Q219885', + "official_api_documentation": 'https://developers.google.com/custom-search/', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['videos'] paging = True diff --git a/searx/engines/ina.py b/searx/engines/ina.py index ce241d409..1a47ca51e 100644 --- a/searx/engines/ina.py +++ b/searx/engines/ina.py @@ -1,15 +1,7 @@ -# INA (Videos) -# -# @website https://www.ina.fr/ -# @provide-api no -# -# @using-api no -# @results HTML (using search portal) -# @stable no (HTML can change) -# @parse url, title, content, publishedDate, thumbnail -# -# @todo set content-parameter with correct data -# @todo embedded (needs some md5 from video page) +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + INA (Videos) +""" from json import loads from html import unescape @@ -18,6 +10,15 @@ from lxml import html from dateutil import parser from searx.utils import extract_text +# about +about = { + "website": 'https://www.ina.fr/', + "wikidata_id": 'Q1665109', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} # engine dependent config categories = ['videos'] diff --git a/searx/engines/invidious.py b/searx/engines/invidious.py index 6ea942699..61a6e5a19 100644 --- a/searx/engines/invidious.py +++ b/searx/engines/invidious.py @@ -1,16 +1,22 @@ -# Invidious (Videos) -# -# @website https://invidio.us/ -# @provide-api yes (https://github.com/omarroth/invidious/wiki/API) -# -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title, content, publishedDate, thumbnail, embedded, author, length +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Invidious (Videos) +""" from urllib.parse import quote_plus from dateutil import parser import time +import random + +# about +about = { + "website": 'https://instances.invidio.us/', + "wikidata_id": 'Q79343316', + "official_api_documentation": 'https://github.com/omarroth/invidious/wiki/API', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} # engine dependent config categories = ["videos", "music"] @@ -18,19 +24,29 @@ paging = True language_support = True time_range_support = True + # search-url -base_url = "https://invidio.us/" + +base_url = '' +base_url_rand = '' # do search-request def request(query, params): + global base_url_rand time_range_dict = { "day": "today", "week": "week", "month": "month", "year": "year", } - search_url = base_url + "api/v1/search?q={query}" + + if isinstance(base_url, list): + base_url_rand = random.choice(base_url) + else: + base_url_rand = base_url + + search_url = base_url_rand + "api/v1/search?q={query}" params["url"] = search_url.format( query=quote_plus(query) ) + "&page={pageno}".format(pageno=params["pageno"]) @@ -56,12 +72,12 @@ def response(resp): embedded_url = ( '<iframe width="540" height="304" ' + 'data-src="' - + base_url + + base_url_rand + 'embed/{videoid}" ' + 'frameborder="0" allowfullscreen></iframe>' ) - base_invidious_url = base_url + "watch?v=" + base_invidious_url = base_url_rand + "watch?v=" for result in search_results: rtype = result.get("type", None) diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py index e2aa436cc..f4a5ff6d2 100644 --- a/searx/engines/json_engine.py +++ b/searx/engines/json_engine.py @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + from collections.abc import Iterable from json import loads from urllib.parse import urlencode diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py index 90bd33063..6a44e2fd7 100644 --- a/searx/engines/kickass.py +++ b/searx/engines/kickass.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Kickass Torrent (Videos, Music, Files) - - @website https://kickass.so - @provide-api no (nothing found) - - @using-api no - @results HTML (using search portal) - @stable yes (HTML can change) - @parse url, title, content, seed, leech, magnetlink """ from lxml import html @@ -15,6 +8,16 @@ from operator import itemgetter from urllib.parse import quote, urljoin from searx.utils import extract_text, get_torrent_size, convert_str_to_int +# about +about = { + "website": 'https://kickass.so', + "wikidata_id": 'Q17062285', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['videos', 'music', 'files'] paging = True diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py index 50ba74efc..21abff86e 100644 --- a/searx/engines/mediawiki.py +++ b/searx/engines/mediawiki.py @@ -1,21 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ - general mediawiki-engine (Web) - - @website websites built on mediawiki (https://www.mediawiki.org) - @provide-api yes (http://www.mediawiki.org/wiki/API:Search) - - @using-api yes - @results JSON - @stable yes - @parse url, title - - @todo content + General mediawiki-engine (Web) """ from json import loads from string import Formatter from urllib.parse import urlencode, quote +# about +about = { + "website": None, + "wikidata_id": None, + "official_api_documentation": 'http://www.mediawiki.org/wiki/API:Search', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['general'] language_support = True diff --git a/searx/engines/microsoft_academic.py b/searx/engines/microsoft_academic.py index 7426eef7e..14de4ac9a 100644 --- a/searx/engines/microsoft_academic.py +++ b/searx/engines/microsoft_academic.py @@ -1,12 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ -Microsoft Academic (Science) - -@website https://academic.microsoft.com -@provide-api yes -@using-api no -@results JSON -@stable no -@parse url, title, content + Microsoft Academic (Science) """ from datetime import datetime @@ -15,6 +9,16 @@ from uuid import uuid4 from urllib.parse import urlencode from searx.utils import html_to_text +# about +about = { + "website": 'https://academic.microsoft.com', + "wikidata_id": 'Q28136779', + "official_api_documentation": 'http://ma-graph.org/', + "use_official_api": False, + "require_api_key": False, + "results": 'JSON', +} + categories = ['images'] paging = True result_url = 'https://academic.microsoft.com/api/search/GetEntityResults?{query}' diff --git a/searx/engines/mixcloud.py b/searx/engines/mixcloud.py index 0606350a9..a6fd1c0a1 100644 --- a/searx/engines/mixcloud.py +++ b/searx/engines/mixcloud.py @@ -1,19 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Mixcloud (Music) - - @website https://http://www.mixcloud.com/ - @provide-api yes (http://www.mixcloud.com/developers/ - - @using-api yes - @results JSON - @stable yes - @parse url, title, content, embedded, publishedDate """ from json import loads from dateutil import parser from urllib.parse import urlencode +# about +about = { + "website": 'https://www.mixcloud.com/', + "wikidata_id": 'Q6883832', + "official_api_documentation": 'http://www.mixcloud.com/developers/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['music'] paging = True diff --git a/searx/engines/not_evil.py b/searx/engines/not_evil.py index e84f153bd..df41c0941 100644 --- a/searx/engines/not_evil.py +++ b/searx/engines/not_evil.py @@ -1,19 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ not Evil (Onions) - - @website http://hss3uro2hsxfogfq.onion - @provide-api yes (http://hss3uro2hsxfogfq.onion/api.htm) - - @using-api no - @results HTML - @stable no - @parse url, title, content """ from urllib.parse import urlencode from lxml import html from searx.engines.xpath import extract_text +# about +about = { + "website": 'http://hss3uro2hsxfogfq.onion', + "wikidata_id": None, + "official_api_documentation": 'http://hss3uro2hsxfogfq.onion/api.htm', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['onions'] paging = True diff --git a/searx/engines/nyaa.py b/searx/engines/nyaa.py index e0a91494f..f8178d637 100644 --- a/searx/engines/nyaa.py +++ b/searx/engines/nyaa.py @@ -1,18 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Nyaa.si (Anime Bittorrent tracker) - - @website https://nyaa.si/ - @provide-api no - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title, content, seed, leech, torrentfile """ from lxml import html from urllib.parse import urlencode from searx.utils import extract_text, get_torrent_size, int_or_zero +# about +about = { + "website": 'https://nyaa.si/', + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['files', 'images', 'videos', 'music'] paging = True diff --git a/searx/engines/opensemantic.py b/searx/engines/opensemantic.py index 9364bab41..64bc321f1 100644 --- a/searx/engines/opensemantic.py +++ b/searx/engines/opensemantic.py @@ -1,18 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ -Open Semantic Search - - @website https://www.opensemanticsearch.org/ - @provide-api yes (https://www.opensemanticsearch.org/dev) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content, publishedDate + Open Semantic Search """ + from dateutil import parser from json import loads from urllib.parse import quote +# about +about = { + "website": 'https://www.opensemanticsearch.org/', + "wikidata_id": None, + "official_api_documentation": 'https://www.opensemanticsearch.org/dev', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + base_url = 'http://localhost:8983/solr/opensemanticsearch/' search_string = 'query?q={query}' diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py index 5475c7a6d..f11aa5f8c 100644 --- a/searx/engines/openstreetmap.py +++ b/searx/engines/openstreetmap.py @@ -1,19 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ OpenStreetMap (Map) - - @website https://openstreetmap.org/ - @provide-api yes (http://wiki.openstreetmap.org/wiki/Nominatim) - - @using-api yes - @results JSON - @stable yes - @parse url, title """ import re from json import loads from flask_babel import gettext +# about +about = { + "website": 'https://www.openstreetmap.org/', + "wikidata_id": 'Q936', + "official_api_documentation": 'http://wiki.openstreetmap.org/wiki/Nominatim', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['map'] paging = False diff --git a/searx/engines/pdbe.py b/searx/engines/pdbe.py index 2db92868a..b9bbfaf1b 100644 --- a/searx/engines/pdbe.py +++ b/searx/engines/pdbe.py @@ -1,18 +1,21 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ PDBe (Protein Data Bank in Europe) - - @website https://www.ebi.ac.uk/pdbe - @provide-api yes (https://www.ebi.ac.uk/pdbe/api/doc/search.html), - unlimited - @using-api yes - @results python dictionary (from json) - @stable yes - @parse url, title, content, img_src """ from json import loads from flask_babel import gettext +# about +about = { + "website": 'https://www.ebi.ac.uk/pdbe', + "wikidata_id": 'Q55823905', + "official_api_documentation": 'https://www.ebi.ac.uk/pdbe/api/doc/search.html', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + categories = ['science'] hide_obsolete = False diff --git a/searx/engines/peertube.py b/searx/engines/peertube.py index e43b2a6b7..549141079 100644 --- a/searx/engines/peertube.py +++ b/searx/engines/peertube.py @@ -1,15 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ peertube (Videos) - - @website https://www.peertube.live - @provide-api yes (https://docs.joinpeertube.org/api-rest-reference.html) - - @using-api yes - @results JSON - @stable yes - @parse url, title, thumbnail, publishedDate, embedded - - @todo implement time range support """ from json import loads @@ -17,6 +8,16 @@ from datetime import datetime from urllib.parse import urlencode from searx.utils import html_to_text +# about +about = { + "website": 'https://joinpeertube.org', + "wikidata_id": 'Q50938515', + "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ["videos"] paging = True diff --git a/searx/engines/photon.py b/searx/engines/photon.py index 7a6fc8321..f12bcd22a 100644 --- a/searx/engines/photon.py +++ b/searx/engines/photon.py @@ -1,19 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Photon (Map) - - @website https://photon.komoot.de - @provide-api yes (https://photon.komoot.de/) - - @using-api yes - @results JSON - @stable yes - @parse url, title """ from json import loads from urllib.parse import urlencode from searx.utils import searx_useragent +# about +about = { + "website": 'https://photon.komoot.de', + "wikidata_id": None, + "official_api_documentation": 'https://photon.komoot.de/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['map'] paging = False diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py index 828241ece..98a2dd9f2 100644 --- a/searx/engines/piratebay.py +++ b/searx/engines/piratebay.py @@ -1,12 +1,7 @@ -# Piratebay (Videos, Music, Files) -# -# @website https://thepiratebay.org -# @provide-api yes (https://apibay.org/) -# -# @using-api yes -# @results JSON -# @stable no (the API is not documented nor versioned) -# @parse url, title, seed, leech, magnetlink, filesize, publishedDate +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Piratebay (Videos, Music, Files) +""" from json import loads from datetime import datetime @@ -15,6 +10,16 @@ from operator import itemgetter from urllib.parse import quote from searx.utils import get_torrent_size +# about +about = { + "website": 'https://thepiratebay.org', + "wikidata_id": 'Q22663', + "official_api_documentation": 'https://apibay.org/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ["videos", "music", "files"] diff --git a/searx/engines/pubmed.py b/searx/engines/pubmed.py index 07c45709e..da02f91ca 100644 --- a/searx/engines/pubmed.py +++ b/searx/engines/pubmed.py @@ -1,14 +1,6 @@ -#!/usr/bin/env python - +# SPDX-License-Identifier: AGPL-3.0-or-later """ PubMed (Scholar publications) - @website https://www.ncbi.nlm.nih.gov/pubmed/ - @provide-api yes (https://www.ncbi.nlm.nih.gov/home/develop/api/) - @using-api yes - @results XML - @stable yes - @parse url, title, publishedDate, content - More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/ """ from flask_babel import gettext @@ -17,6 +9,18 @@ from datetime import datetime from urllib.parse import urlencode from searx.poolrequests import get +# about +about = { + "website": 'https://www.ncbi.nlm.nih.gov/pubmed/', + "wikidata_id": 'Q1540899', + "official_api_documentation": { + 'url': 'https://www.ncbi.nlm.nih.gov/home/develop/api/', + 'comment': 'More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/' + }, + "use_official_api": True, + "require_api_key": False, + "results": 'XML', +} categories = ['science'] diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index b785719d9..87499c8ad 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Qwant (Web, Images, News, Social) - - @website https://qwant.com/ - @provide-api not officially (https://api.qwant.com/api/search/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content """ from datetime import datetime @@ -17,6 +10,15 @@ from searx.utils import html_to_text, match_language from searx.exceptions import SearxEngineAPIException, SearxEngineCaptchaException from searx.raise_for_httperror import raise_for_httperror +# about +about = { + "website": 'https://www.qwant.com/', + "wikidata_id": 'Q14657870', + "official_api_documentation": None, + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} # engine dependent config categories = [] diff --git a/searx/engines/recoll.py b/searx/engines/recoll.py index 5a956b8bf..d90005a95 100644 --- a/searx/engines/recoll.py +++ b/searx/engines/recoll.py @@ -1,17 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Recoll (local search engine) - - @using-api yes - @results JSON - @stable yes - @parse url, content, size, abstract, author, mtype, subtype, time, \ - filename, label, type, embedded """ from datetime import date, timedelta from json import loads from urllib.parse import urlencode, quote +# about +about = { + "website": None, + "wikidata_id": 'Q15735774', + "official_api_documentation": 'https://www.lesbonscomptes.com/recoll/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config time_range_support = True diff --git a/searx/engines/reddit.py b/searx/engines/reddit.py index e732875cb..ee734ace2 100644 --- a/searx/engines/reddit.py +++ b/searx/engines/reddit.py @@ -1,19 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Reddit - - @website https://www.reddit.com/ - @provide-api yes (https://www.reddit.com/dev/api) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content, thumbnail, publishedDate """ import json from datetime import datetime from urllib.parse import urlencode, urljoin, urlparse +# about +about = { + "website": 'https://www.reddit.com/', + "wikidata_id": 'Q1136', + "official_api_documentation": 'https://www.reddit.com/dev/api', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['general', 'images', 'news', 'social media'] page_size = 25 diff --git a/searx/engines/scanr_structures.py b/searx/engines/scanr_structures.py index 72fd2b3c9..51c925247 100644 --- a/searx/engines/scanr_structures.py +++ b/searx/engines/scanr_structures.py @@ -1,18 +1,21 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ ScanR Structures (Science) - - @website https://scanr.enseignementsup-recherche.gouv.fr - @provide-api yes (https://scanr.enseignementsup-recherche.gouv.fr/api/swagger-ui.html) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content, img_src """ from json import loads, dumps from searx.utils import html_to_text +# about +about = { + "website": 'https://scanr.enseignementsup-recherche.gouv.fr', + "wikidata_id": 'Q44105684', + "official_api_documentation": 'https://scanr.enseignementsup-recherche.gouv.fr/opendata', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['science'] paging = True diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py index 706285814..8c1330d98 100644 --- a/searx/engines/searchcode_code.py +++ b/searx/engines/searchcode_code.py @@ -1,18 +1,20 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ - Searchcode (It) - - @website https://searchcode.com/ - @provide-api yes (https://searchcode.com/api/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content + Searchcode (IT) """ from json import loads from urllib.parse import urlencode +# about +about = { + "website": 'https://searchcode.com/', + "wikidata_id": None, + "official_api_documentation": 'https://searchcode.com/api/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} # engine dependent config categories = ['it'] diff --git a/searx/engines/searx_engine.py b/searx/engines/searx_engine.py index 87e5e05c2..c4f016adc 100644 --- a/searx/engines/searx_engine.py +++ b/searx/engines/searx_engine.py @@ -1,18 +1,20 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Searx (all) - - @website https://github.com/searx/searx - @provide-api yes (https://searx.github.io/searx/dev/search_api.html) - - @using-api yes - @results JSON - @stable yes (using api) - @parse url, title, content """ from json import loads from searx.engines import categories as searx_categories +# about +about = { + "website": 'https://github.com/searx/searx', + "wikidata_id": 'Q17639196', + "official_api_documentation": 'https://searx.github.io/searx/dev/search_api.html', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} categories = searx_categories.keys() diff --git a/searx/engines/sepiasearch.py b/searx/engines/sepiasearch.py index 0b7c1ba6e..3433c897a 100644 --- a/searx/engines/sepiasearch.py +++ b/searx/engines/sepiasearch.py @@ -1,17 +1,23 @@ -# SepiaSearch (Videos) -# -# @website https://sepiasearch.org -# @provide-api https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title, content, publishedDate, thumbnail +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + SepiaSearch (Videos) +""" from json import loads from dateutil import parser, relativedelta from urllib.parse import urlencode from datetime import datetime +# about +about = { + "website": 'https://sepiasearch.org', + "wikidata_id": None, + "official_api_documentation": "https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api", # NOQA + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + categories = ['videos'] paging = True language_support = True diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py index 84ff21a88..9e414746f 100644 --- a/searx/engines/soundcloud.py +++ b/searx/engines/soundcloud.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Soundcloud (Music) - - @website https://soundcloud.com - @provide-api yes (https://developers.soundcloud.com/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content, publishedDate, embedded """ import re @@ -18,6 +11,15 @@ from urllib.parse import quote_plus, urlencode from searx import logger from searx.poolrequests import get as http_get +# about +about = { + "website": 'https://soundcloud.com', + "wikidata_id": 'Q568769', + "official_api_documentation": 'https://developers.soundcloud.com/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} # engine dependent config categories = ['music'] diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py index 74942326e..0ad8bfe32 100644 --- a/searx/engines/spotify.py +++ b/searx/engines/spotify.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Spotify (Music) - - @website https://spotify.com - @provide-api yes (https://developer.spotify.com/web-api/search-item/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content, embedded """ from json import loads @@ -15,6 +8,16 @@ from urllib.parse import urlencode import requests import base64 +# about +about = { + "website": 'https://www.spotify.com', + "wikidata_id": 'Q689141', + "official_api_documentation": 'https://developer.spotify.com/web-api/search-item/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['music'] paging = True diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py index f730264e2..91eaa68e9 100644 --- a/searx/engines/stackoverflow.py +++ b/searx/engines/stackoverflow.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ - Stackoverflow (It) - - @website https://stackoverflow.com/ - @provide-api not clear (https://api.stackexchange.com/docs/advanced-search) - - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title, content + Stackoverflow (IT) """ from urllib.parse import urlencode, urljoin, urlparse @@ -15,6 +8,16 @@ from lxml import html from searx.utils import extract_text from searx.exceptions import SearxEngineCaptchaException +# about +about = { + "website": 'https://stackoverflow.com/', + "wikidata_id": 'Q549037', + "official_api_documentation": 'https://api.stackexchange.com/docs', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['it'] paging = True diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index cd8b132f9..68157971d 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -1,14 +1,7 @@ -# Startpage (Web) -# -# @website https://startpage.com -# @provide-api no (nothing found) -# -# @using-api no -# @results HTML -# @stable no (HTML can change) -# @parse url, title, content -# -# @todo paging +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Startpage (Web) +""" from lxml import html from dateutil import parser @@ -19,6 +12,16 @@ from babel import Locale from babel.localedata import locale_identifiers from searx.utils import extract_text, eval_xpath, match_language +# about +about = { + "website": 'https://startpage.com', + "wikidata_id": 'Q2333295', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['general'] # there is a mechanism to block "bot" search diff --git a/searx/engines/tokyotoshokan.py b/searx/engines/tokyotoshokan.py index 9fffba8a6..91d1f01d5 100644 --- a/searx/engines/tokyotoshokan.py +++ b/searx/engines/tokyotoshokan.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Tokyo Toshokan (A BitTorrent Library for Japanese Media) - - @website https://www.tokyotosho.info/ - @provide-api no - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title, publishedDate, seed, leech, - filesize, magnetlink, content """ import re @@ -16,6 +9,16 @@ from lxml import html from datetime import datetime from searx.utils import extract_text, get_torrent_size, int_or_zero +# about +about = { + "website": 'https://www.tokyotosho.info/', + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['files', 'videos', 'music'] paging = True diff --git a/searx/engines/torrentz.py b/searx/engines/torrentz.py index 4d3e6fdd7..94a7a5343 100644 --- a/searx/engines/torrentz.py +++ b/searx/engines/torrentz.py @@ -1,14 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Torrentz2.is (BitTorrent meta-search engine) - - @website https://torrentz2.is/ - @provide-api no - - @using-api no - @results HTML - @stable no (HTML can change, although unlikely, - see https://torrentz.is/torrentz.btsearch) - @parse url, title, publishedDate, seed, leech, filesize, magnetlink """ import re @@ -17,6 +9,16 @@ from lxml import html from datetime import datetime from searx.utils import extract_text, get_torrent_size +# about +about = { + "website": 'https://torrentz2.is/', + "wikidata_id": 'Q1156687', + "official_api_documentation": 'https://torrentz.is/torrentz.btsearch', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['files', 'videos', 'music'] paging = True diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 2706e3617..9c53d70ad 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -1,14 +1,18 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ MyMemory Translated - - @website https://mymemory.translated.net/ - @provide-api yes (https://mymemory.translated.net/doc/spec.php) - @using-api yes - @results JSON - @stable yes - @parse url, title, content """ +# about +about = { + "website": 'https://mymemory.translated.net/', + "wikidata_id": None, + "official_api_documentation": 'https://mymemory.translated.net/doc/spec.php', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + engine_type = 'online_dictionnary' categories = ['general'] url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' diff --git a/searx/engines/unsplash.py b/searx/engines/unsplash.py index 45c6b30da..3bbdf630d 100644 --- a/searx/engines/unsplash.py +++ b/searx/engines/unsplash.py @@ -1,18 +1,21 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Unsplash - - @website https://unsplash.com - @provide-api yes (https://unsplash.com/developers) - - @using-api no - @results JSON (using search portal's infiniscroll API) - @stable no (JSON format could change any time) - @parse url, title, img_src, thumbnail_src """ from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl from json import loads +# about +about = { + "website": 'https://unsplash.com', + "wikidata_id": 'Q28233552', + "official_api_documentation": 'https://unsplash.com/developers', + "use_official_api": False, + "require_api_key": False, + "results": 'JSON', +} + url = 'https://unsplash.com/' search_url = url + 'napi/search/photos?' categories = ['images'] diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py index fd3abc858..824579256 100644 --- a/searx/engines/vimeo.py +++ b/searx/engines/vimeo.py @@ -1,21 +1,22 @@ -# Vimeo (Videos) -# -# @website https://vimeo.com/ -# @provide-api yes (http://developer.vimeo.com/api), -# they have a maximum count of queries/hour -# -# @using-api no (TODO, rewrite to api) -# @results HTML (using search portal) -# @stable no (HTML can change) -# @parse url, title, publishedDate, thumbnail, embedded -# -# @todo rewrite to api -# @todo set content-parameter with correct data +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Wikipedia (Web +""" from urllib.parse import urlencode from json import loads from dateutil import parser +# about +about = { + "website": 'https://vimeo.com/', + "wikidata_id": 'Q156376', + "official_api_documentation": 'http://developer.vimeo.com/api', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['videos'] paging = True diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index 8d787caac..c8e4cfae6 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -1,14 +1,6 @@ -# -*- coding: utf-8 -*- +# SPDX-License-Identifier: AGPL-3.0-or-later """ Wikidata - - @website https://wikidata.org - @provide-api yes (https://query.wikidata.org/) - - @using-api yes - @results JSON - @stable yes - @parse url, infobox """ @@ -27,6 +19,16 @@ from searx.engines.wikipedia import _fetch_supported_languages, supported_langua logger = logger.getChild('wikidata') +# about +about = { + "website": 'https://wikidata.org/', + "wikidata_id": 'Q2013', + "official_api_documentation": 'https://query.wikidata.org/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # SPARQL SPARQL_ENDPOINT_URL = 'https://query.wikidata.org/sparql' SPARQL_EXPLAIN_URL = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain' diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 54d75108e..eff301145 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Wikipedia (Web) - - @website https://en.wikipedia.org/api/rest_v1/ - @provide-api yes - - @using-api yes - @results JSON - @stable yes - @parse url, infobox """ from urllib.parse import quote @@ -16,6 +9,16 @@ from lxml.html import fromstring from searx.utils import match_language, searx_useragent from searx.raise_for_httperror import raise_for_httperror +# about +about = { + "website": 'https://www.wikipedia.org/', + "wikidata_id": 'Q52', + "official_api_documentation": 'https://en.wikipedia.org/api/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # search-url search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}' supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 520eaa209..9c84e2809 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -1,16 +1,21 @@ -# Wolfram Alpha (Science) -# -# @website https://www.wolframalpha.com -# @provide-api yes (https://api.wolframalpha.com/v2/) -# -# @using-api yes -# @results XML -# @stable yes -# @parse url, infobox +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Wolfram|Alpha (Science) +""" from lxml import etree from urllib.parse import urlencode +# about +about = { + "website": 'https://www.wolframalpha.com', + "wikidata_id": 'Q207006', + "official_api_documentation": 'https://products.wolframalpha.com/api/', + "use_official_api": True, + "require_api_key": False, + "results": 'XML', +} + # search-url search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}' site_url = 'https://www.wolframalpha.com/input/?{query}' diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 943d4f3fb..8e427d575 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -1,12 +1,7 @@ -# Wolfram|Alpha (Science) -# -# @website https://www.wolframalpha.com/ -# @provide-api yes (https://api.wolframalpha.com/v2/) -# -# @using-api no -# @results JSON -# @stable no -# @parse url, infobox +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Wolfram|Alpha (Science) +""" from json import loads from time import time @@ -14,6 +9,16 @@ from urllib.parse import urlencode from searx.poolrequests import get as http_get +# about +about = { + "website": 'https://www.wolframalpha.com/', + "wikidata_id": 'Q207006', + "official_api_documentation": 'https://products.wolframalpha.com/api/', + "use_official_api": False, + "require_api_key": False, + "results": 'JSON', +} + # search-url url = 'https://www.wolframalpha.com/' diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py index b8f111a50..96b8d680c 100644 --- a/searx/engines/www1x.py +++ b/searx/engines/www1x.py @@ -1,19 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ 1x (Images) - - @website http://1x.com/ - @provide-api no - - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title, thumbnail """ from lxml import html, etree from urllib.parse import urlencode, urljoin from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex +# about +about = { + "website": 'https://1x.com/', + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['images'] paging = False diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 1507176ec..612f69abd 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + from lxml import html from urllib.parse import urlencode from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py index 6f7ab759f..afd59cd49 100644 --- a/searx/engines/yacy.py +++ b/searx/engines/yacy.py @@ -1,16 +1,7 @@ -# Yacy (Web, Images, Videos, Music, Files) -# -# @website http://yacy.net -# @provide-api yes -# (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch) -# -# @using-api yes -# @results JSON -# @stable yes -# @parse (general) url, title, content, publishedDate -# @parse (images) url, title, img_src -# -# @todo parse video, audio and file results +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Yacy (Web, Images, Videos, Music, Files) +""" from json import loads from dateutil import parser @@ -20,6 +11,16 @@ from requests.auth import HTTPDigestAuth from searx.utils import html_to_text +# about +about = { + "website": 'https://yacy.net/', + "wikidata_id": 'Q1759675', + "official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['general', 'images'] # TODO , 'music', 'videos', 'files' paging = True diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index 3420aa6d5..eb07a45fc 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -1,20 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Yahoo (Web) - - @website https://search.yahoo.com/web - @provide-api yes (https://developer.yahoo.com/boss/search/), - $0.80/1000 queries - - @using-api no (because pricing) - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content, suggestion """ from urllib.parse import unquote, urlencode from lxml import html from searx.utils import extract_text, extract_url, match_language, eval_xpath +# about +about = { + "website": 'https://search.yahoo.com/', + "wikidata_id": None, + "official_api_documentation": 'https://developer.yahoo.com/api/', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['general'] paging = True diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py index 793d1104a..b324ecdf3 100644 --- a/searx/engines/yahoo_news.py +++ b/searx/engines/yahoo_news.py @@ -1,13 +1,7 @@ -# Yahoo (News) -# -# @website https://news.yahoo.com -# @provide-api yes (https://developer.yahoo.com/boss/search/) -# $0.80/1000 queries -# -# @using-api no (because pricing) -# @results HTML (using search portal) -# @stable no (HTML can change) -# @parse url, title, content, publishedDate +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Yahoo (News) +""" import re from datetime import datetime, timedelta @@ -18,6 +12,16 @@ from searx.engines.yahoo import _fetch_supported_languages, supported_languages_ from dateutil import parser from searx.utils import extract_text, extract_url, match_language +# about +about = { + "website": 'https://news.yahoo.com', + "wikidata_id": 'Q3044717', + "official_api_documentation": 'https://developer.yahoo.com/api/', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['news'] paging = True diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py index b4a6a54cf..57a2f4b79 100644 --- a/searx/engines/yandex.py +++ b/searx/engines/yandex.py @@ -1,12 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Yahoo (Web) - - @website https://yandex.ru/ - @provide-api ? - @using-api no - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content """ from urllib.parse import urlencode, urlparse @@ -16,6 +10,16 @@ from searx.exceptions import SearxEngineCaptchaException logger = logger.getChild('yandex engine') +# about +about = { + "website": 'https://yandex.ru/', + "wikidata_id": 'Q5281', + "official_api_documentation": "?", + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['general'] paging = True diff --git a/searx/engines/yggtorrent.py b/searx/engines/yggtorrent.py index ec84d2c6b..cad2de52b 100644 --- a/searx/engines/yggtorrent.py +++ b/searx/engines/yggtorrent.py @@ -1,12 +1,7 @@ -# Yggtorrent (Videos, Music, Files) -# -# @website https://www2.yggtorrent.si -# @provide-api no (nothing found) -# -# @using-api no -# @results HTML (using search portal) -# @stable no (HTML can change) -# @parse url, title, seed, leech, publishedDate, filesize +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Yggtorrent (Videos, Music, Files) +""" from lxml import html from operator import itemgetter @@ -15,6 +10,16 @@ from urllib.parse import quote from searx.utils import extract_text, get_torrent_size from searx.poolrequests import get as http_get +# about +about = { + "website": 'https://www2.yggtorrent.si', + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['videos', 'music', 'files'] paging = True diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py index 8c12ac4d2..b3dcb4907 100644 --- a/searx/engines/youtube_api.py +++ b/searx/engines/youtube_api.py @@ -1,18 +1,23 @@ -# Youtube (Videos) -# -# @website https://www.youtube.com/ -# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list) -# -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title, content, publishedDate, thumbnail, embedded +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Youtube (Videos) +""" from json import loads from dateutil import parser from urllib.parse import urlencode from searx.exceptions import SearxEngineAPIException +# about +about = { + "website": 'https://www.youtube.com/', + "wikidata_id": 'Q866', + "official_api_documentation": 'https://developers.google.com/youtube/v3/docs/search/list?apix=true', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['videos', 'music'] paging = False diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py index 36fc72e36..4a6df57c4 100644 --- a/searx/engines/youtube_noapi.py +++ b/searx/engines/youtube_noapi.py @@ -1,17 +1,22 @@ -# Youtube (Videos) -# -# @website https://www.youtube.com/ -# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list) -# -# @using-api no -# @results HTML -# @stable no -# @parse url, title, content, publishedDate, thumbnail, embedded +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Youtube (Videos) +""" from functools import reduce from json import loads from urllib.parse import quote_plus +# about +about = { + "website": 'https://www.youtube.com/', + "wikidata_id": 'Q866', + "official_api_documentation": 'https://developers.google.com/youtube/v3/docs/search/list?apix=true', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['videos', 'music'] paging = True diff --git a/searx/query.py b/searx/query.py index e61e24f2c..38cb03ffe 100644 --- a/searx/query.py +++ b/searx/query.py @@ -77,7 +77,7 @@ class RawTextQuery: pass # this force a language - if query_part[0] == ':': + if query_part[0] == ':' and len(query_part) > 1: lang = query_part[1:].lower().replace('_', '-') # check if any language-code is equal with diff --git a/searx/search/__init__.py b/searx/search/__init__.py index 0d45f0b7c..f777e8595 100644 --- a/searx/search/__init__.py +++ b/searx/search/__init__.py @@ -28,7 +28,9 @@ from searx.external_bang import get_bang_url from searx.results import ResultContainer from searx import logger from searx.plugins import plugins +from searx.search.models import EngineRef, SearchQuery from searx.search.processors import processors, initialize as initialize_processors +from searx.search.checker import initialize as initialize_checker logger = logger.getChild('search') @@ -45,68 +47,11 @@ else: sys.exit(1) -def initialize(settings_engines=None): +def initialize(settings_engines=None, enable_checker=False): settings_engines = settings_engines or settings['engines'] initialize_processors(settings_engines) - - -class EngineRef: - - __slots__ = 'name', 'category' - - def __init__(self, name: str, category: str): - self.name = name - self.category = category - - def __repr__(self): - return "EngineRef({!r}, {!r})".format(self.name, self.category) - - def __eq__(self, other): - return self.name == other.name and self.category == other.category - - -class SearchQuery: - """container for all the search parameters (query, language, etc...)""" - - __slots__ = 'query', 'engineref_list', 'lang', 'safesearch', 'pageno', 'time_range',\ - 'timeout_limit', 'external_bang' - - def __init__(self, - query: str, - engineref_list: typing.List[EngineRef], - lang: str='all', - safesearch: int=0, - pageno: int=1, - time_range: typing.Optional[str]=None, - timeout_limit: typing.Optional[float]=None, - external_bang: typing.Optional[str]=None): - self.query = query - self.engineref_list = engineref_list - self.lang = lang - self.safesearch = safesearch - self.pageno = pageno - self.time_range = time_range - self.timeout_limit = timeout_limit - self.external_bang = external_bang - - @property - def categories(self): - return list(set(map(lambda engineref: engineref.category, self.engineref_list))) - - def __repr__(self): - return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\ - format(self.query, self.engineref_list, self.lang, self.safesearch, - self.pageno, self.time_range, self.timeout_limit, self.external_bang) - - def __eq__(self, other): - return self.query == other.query\ - and self.engineref_list == other.engineref_list\ - and self.lang == other.lang\ - and self.safesearch == other.safesearch\ - and self.pageno == other.pageno\ - and self.time_range == other.time_range\ - and self.timeout_limit == other.timeout_limit\ - and self.external_bang == other.external_bang + if enable_checker: + initialize_checker() class Search: diff --git a/searx/search/checker/__init__.py b/searx/search/checker/__init__.py new file mode 100644 index 000000000..85b9178df --- /dev/null +++ b/searx/search/checker/__init__.py @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +from .impl import Checker +from .background import initialize, get_result diff --git a/searx/search/checker/__main__.py b/searx/search/checker/__main__.py new file mode 100644 index 000000000..75b37e6c5 --- /dev/null +++ b/searx/search/checker/__main__.py @@ -0,0 +1,94 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +import sys +import io +import os +import argparse +import logging + +import searx.search +import searx.search.checker +from searx.search import processors +from searx.engines import engine_shortcuts + + +# configure logging +root = logging.getLogger() +handler = logging.StreamHandler(sys.stdout) +for h in root.handlers: + root.removeHandler(h) +root.addHandler(handler) + +# color only for a valid terminal +if sys.stdout.isatty() and os.environ.get('TERM') not in ['dumb', 'unknown']: + RESET_SEQ = "\033[0m" + COLOR_SEQ = "\033[1;%dm" + BOLD_SEQ = "\033[1m" + BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = map(lambda i: COLOR_SEQ % (30 + i), range(8)) +else: + RESET_SEQ = "" + COLOR_SEQ = "" + BOLD_SEQ = "" + BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = "", "", "", "", "", "", "", "" + +# equivalent of 'python -u' (unbuffered stdout, stderr) +stdout = io.TextIOWrapper(open(sys.stdout.fileno(), 'wb', 0), write_through=True) +stderr = io.TextIOWrapper(open(sys.stderr.fileno(), 'wb', 0), write_through=True) + + +# iterator of processors +def iter_processor(engine_name_list): + if len(engine_name_list) > 0: + for name in engine_name_list: + name = engine_shortcuts.get(name, name) + processor = processors.get(name) + if processor is not None: + yield name, processor + else: + stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}{RED}Engine does not exist{RESET_SEQ}') + else: + for name, processor in searx.search.processors.items(): + yield name, processor + + +# actual check & display +def run(engine_name_list, verbose): + searx.search.initialize() + for name, processor in iter_processor(engine_name_list): + stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}Checking\n') + if not sys.stdout.isatty(): + stderr.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}Checking\n') + checker = searx.search.checker.Checker(processor) + checker.run() + if checker.test_results.succesfull: + stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}{GREEN}OK{RESET_SEQ}\n') + if verbose: + stdout.write(f' {"found languages":15}: {" ".join(sorted(list(checker.test_results.languages)))}\n') + else: + stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}{RESET_SEQ}{RED}Error{RESET_SEQ}') + if not verbose: + errors = [test_name + ': ' + error for test_name, error in checker.test_results] + stdout.write(f'{RED}Error {str(errors)}{RESET_SEQ}\n') + else: + stdout.write('\n') + stdout.write(f' {"found languages":15}: {" ".join(sorted(list(checker.test_results.languages)))}\n') + for test_name, logs in checker.test_results.logs.items(): + for log in logs: + stdout.write(f' {test_name:15}: {RED}{" ".join(log)}{RESET_SEQ}\n') + + +# call by setup.py +def main(): + parser = argparse.ArgumentParser(description='Check searx engines.') + parser.add_argument('engine_name_list', metavar='engine name', type=str, nargs='*', + help='engines name or shortcut list. Empty for all engines.') + parser.add_argument('--verbose', '-v', + action='store_true', dest='verbose', + help='Display details about the test results', + default=False) + args = parser.parse_args() + run(args.engine_name_list, args.verbose) + + +if __name__ == '__main__': + main() diff --git a/searx/search/checker/background.py b/searx/search/checker/background.py new file mode 100644 index 000000000..e41bff5f5 --- /dev/null +++ b/searx/search/checker/background.py @@ -0,0 +1,123 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +import json +import random +import time +import threading +import os +import signal + +from searx import logger, settings, searx_debug +from searx.exceptions import SearxSettingsException +from searx.search.processors import processors +from searx.search.checker import Checker +from searx.shared import schedule, storage + + +CHECKER_RESULT = 'CHECKER_RESULT' +running = threading.Lock() + + +def _get_interval(every, error_msg): + if isinstance(every, int): + every = (every, every) + if not isinstance(every, (tuple, list))\ + or len(every) != 2\ + or not isinstance(every[0], int)\ + or not isinstance(every[1], int): + raise SearxSettingsException(error_msg, None) + return every + + +def _get_every(): + every = settings.get('checker', {}).get('scheduling', {}).get('every', (300, 1800)) + return _get_interval(every, 'checker.scheduling.every is not a int or list') + + +def get_result(): + serialized_result = storage.get_str(CHECKER_RESULT) + if serialized_result is not None: + return json.loads(serialized_result) + + +def _set_result(result, include_timestamp=True): + if include_timestamp: + result['timestamp'] = int(time.time() / 3600) * 3600 + storage.set_str(CHECKER_RESULT, json.dumps(result)) + + +def run(): + if not running.acquire(blocking=False): + return + try: + logger.info('Starting checker') + result = { + 'status': 'ok', + 'engines': {} + } + for name, processor in processors.items(): + logger.debug('Checking %s engine', name) + checker = Checker(processor) + checker.run() + if checker.test_results.succesfull: + result['engines'][name] = {'success': True} + else: + result['engines'][name] = {'success': False, 'errors': checker.test_results.errors} + + _set_result(result) + logger.info('Check done') + except Exception: + _set_result({'status': 'error'}) + logger.exception('Error while running the checker') + finally: + running.release() + + +def _run_with_delay(): + every = _get_every() + delay = random.randint(0, every[1] - every[0]) + logger.debug('Start checker in %i seconds', delay) + time.sleep(delay) + run() + + +def _start_scheduling(): + every = _get_every() + if schedule(every[0], _run_with_delay): + run() + + +def _signal_handler(signum, frame): + t = threading.Thread(target=run) + t.daemon = True + t.start() + + +def initialize(): + logger.info('Send SIGUSR1 signal to pid %i to start the checker', os.getpid()) + signal.signal(signal.SIGUSR1, _signal_handler) + + # disabled by default + _set_result({'status': 'disabled'}) + + # special case when debug is activate + if searx_debug and settings.get('checker', {}).get('off_when_debug', True): + logger.info('debug mode: checker is disabled') + return + + # check value of checker.scheduling.every now + scheduling = settings.get('checker', {}).get('scheduling', None) + if scheduling is None or not scheduling: + logger.info('Checker scheduler is disabled') + return + + # + _set_result({'status': 'unknown'}, include_timestamp=False) + + start_after = scheduling.get('start_after', (300, 1800)) + start_after = _get_interval(start_after, 'checker.scheduling.start_after is not a int or list') + delay = random.randint(start_after[0], start_after[1]) + logger.info('Start checker in %i seconds', delay) + t = threading.Timer(delay, _start_scheduling) + t.daemon = True + t.start() diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py new file mode 100644 index 000000000..71a941f73 --- /dev/null +++ b/searx/search/checker/impl.py @@ -0,0 +1,406 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +import typing +import types +import functools +import itertools +from time import time +from urllib.parse import urlparse + +import re +import cld3 +import requests.exceptions + +from searx import poolrequests, logger +from searx.results import ResultContainer +from searx.search.models import SearchQuery, EngineRef +from searx.search.processors import EngineProcessor + + +logger = logger.getChild('searx.search.checker') + +HTML_TAGS = [ + 'embed', 'iframe', 'object', 'param', 'picture', 'source', 'svg', 'math', 'canvas', 'noscript', 'script', + 'del', 'ins', 'area', 'audio', 'img', 'map', 'track', 'video', 'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite', + 'code', 'data', 'dfn', 'em', 'i', 'kdb', 'mark', 'q', 'rb', 'rp', 'rt', 'rtc', 'ruby', 's', 'samp', 'small', + 'span', 'strong', 'sub', 'sup', 'time', 'u', 'var', 'wbr', 'style', 'blockquote', 'dd', 'div', 'dl', 'dt', + 'figcaption', 'figure', 'hr', 'li', 'ol', 'p', 'pre', 'ul', 'button', 'datalist', 'fieldset', 'form', 'input', + 'label', 'legend', 'meter', 'optgroup', 'option', 'output', 'progress', 'select', 'textarea', 'applet', + 'frame', 'frameset' +] + + +def get_check_no_html(): + rep = ['<' + tag + '[^\>]*>' for tag in HTML_TAGS] + rep += ['</' + tag + '>' for tag in HTML_TAGS] + pattern = re.compile('|'.join(rep)) + + def f(text): + return pattern.search(text.lower()) is None + + return f + + +_check_no_html = get_check_no_html() + + +def _is_url(url): + try: + result = urlparse(url) + except ValueError: + return False + if result.scheme not in ('http', 'https'): + return False + return True + + +@functools.lru_cache(maxsize=8192) +def _is_url_image(image_url): + if not isinstance(image_url, str): + return False + + if image_url.startswith('//'): + image_url = 'https:' + image_url + + if image_url.startswith('data:'): + return image_url.startswith('data:image/') + + if not _is_url(image_url): + return False + + retry = 2 + + while retry > 0: + a = time() + try: + poolrequests.set_timeout_for_thread(10.0, time()) + r = poolrequests.get(image_url, timeout=10.0, allow_redirects=True, headers={ + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Accept-Language': 'en-US;q=0.5,en;q=0.3', + 'Accept-Encoding': 'gzip, deflate, br', + 'DNT': '1', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + 'Sec-GPC': '1', + 'Cache-Control': 'max-age=0' + }) + if r.headers["content-type"].startswith('image/'): + return True + return False + except requests.exceptions.Timeout: + logger.error('Timeout for %s: %i', image_url, int(time() - a)) + retry -= 1 + except requests.exceptions.RequestException: + logger.exception('Exception for %s', image_url) + return False + + +def _search_query_to_dict(search_query: SearchQuery) -> typing.Dict[str, typing.Any]: + return { + 'query': search_query.query, + 'lang': search_query.lang, + 'pageno': search_query.pageno, + 'safesearch': search_query.safesearch, + 'time_range': search_query.time_range, + } + + +def _search_query_diff(sq1: SearchQuery, sq2: SearchQuery)\ + -> typing.Tuple[typing.Dict[str, typing.Any], typing.Dict[str, typing.Any]]: + param1 = _search_query_to_dict(sq1) + param2 = _search_query_to_dict(sq2) + common = {} + diff = {} + for k, value1 in param1.items(): + value2 = param2[k] + if value1 == value2: + common[k] = value1 + else: + diff[k] = (value1, value2) + return (common, diff) + + +class TestResults: + + __slots__ = 'errors', 'logs', 'languages' + + def __init__(self): + self.errors: typing.Dict[str, typing.List[str]] = {} + self.logs: typing.Dict[str, typing.List[typing.Any]] = {} + self.languages: typing.Set[str] = set() + + def add_error(self, test, message, *args): + # message to self.errors + errors_for_test = self.errors.setdefault(test, []) + if message not in errors_for_test: + errors_for_test.append(message) + # (message, *args) to self.logs + logs_for_test = self.logs.setdefault(test, []) + if (message, *args) not in logs_for_test: + logs_for_test.append((message, *args)) + + def add_language(self, language): + self.languages.add(language) + + @property + def succesfull(self): + return len(self.errors) == 0 + + def __iter__(self): + for test_name, errors in self.errors.items(): + for error in sorted(errors): + yield (test_name, error) + + +class ResultContainerTests: + + __slots__ = 'test_name', 'search_query', 'result_container', 'languages', 'stop_test', 'test_results' + + def __init__(self, + test_results: TestResults, + test_name: str, + search_query: SearchQuery, + result_container: ResultContainer): + self.test_name = test_name + self.search_query = search_query + self.result_container = result_container + self.languages: typing.Set[str] = set() + self.test_results = test_results + self.stop_test = False + + @property + def result_urls(self): + results = self.result_container.get_ordered_results() + return [result['url'] for result in results] + + def _record_error(self, message: str, *args) -> None: + sq = _search_query_to_dict(self.search_query) + sqstr = ' '.join(['{}={!r}'.format(k, v) for k, v in sq.items()]) + self.test_results.add_error(self.test_name, message, *args, '(' + sqstr + ')') + + def _add_language(self, text: str) -> typing.Optional[str]: + r = cld3.get_language(str(text)) # pylint: disable=E1101 + if r is not None and r.probability >= 0.98 and r.is_reliable: + self.languages.add(r.language) + self.test_results.add_language(r.language) + return None + + def _check_result(self, result): + if not _check_no_html(result.get('title', '')): + self._record_error('HTML in title', repr(result.get('title', ''))) + if not _check_no_html(result.get('content', '')): + self._record_error('HTML in content', repr(result.get('content', ''))) + + self._add_language(result.get('title', '')) + self._add_language(result.get('content', '')) + + template = result.get('template', 'default.html') + if template == 'default.html': + return + if template == 'code.html': + return + if template == 'torrent.html': + return + if template == 'map.html': + return + if template == 'images.html': + thumbnail_src = result.get('thumbnail_src') + if thumbnail_src is not None: + if not _is_url_image(thumbnail_src): + self._record_error('thumbnail_src URL is invalid', thumbnail_src) + elif not _is_url_image(result.get('img_src')): + self._record_error('img_src URL is invalid', result.get('img_src')) + if template == 'videos.html' and not _is_url_image(result.get('thumbnail')): + self._record_error('thumbnail URL is invalid', result.get('img_src')) + + def _check_results(self, results: list): + for result in results: + self._check_result(result) + + def _check_answers(self, answers): + for answer in answers: + if not _check_no_html(answer): + self._record_error('HTML in answer', answer) + + def _check_infoboxes(self, infoboxes): + for infobox in infoboxes: + if not _check_no_html(infobox.get('content', '')): + self._record_error('HTML in infobox content', infobox.get('content', '')) + self._add_language(infobox.get('content', '')) + for attribute in infobox.get('attributes', {}): + if not _check_no_html(attribute.get('value', '')): + self._record_error('HTML in infobox attribute value', attribute.get('value', '')) + + def check_basic(self): + if len(self.result_container.unresponsive_engines) > 0: + for message in self.result_container.unresponsive_engines: + self._record_error(message[1] + ' ' + (message[2] or '')) + self.stop_test = True + return + + results = self.result_container.get_ordered_results() + if len(results) > 0: + self._check_results(results) + + if len(self.result_container.answers) > 0: + self._check_answers(self.result_container.answers) + + if len(self.result_container.infoboxes) > 0: + self._check_infoboxes(self.result_container.infoboxes) + + def has_infobox(self): + """Check the ResultContainer has at least one infobox""" + if len(self.result_container.infoboxes) == 0: + self._record_error('No infobox') + + def has_answer(self): + """Check the ResultContainer has at least one answer""" + if len(self.result_container.answers) == 0: + self._record_error('No answer') + + def has_language(self, lang): + """Check at least one title or content of the results is written in the `lang`. + + Detected using pycld3, may be not accurate""" + if lang not in self.languages: + self._record_error(lang + ' not found') + + def not_empty(self): + """Check the ResultContainer has at least one answer or infobox or result""" + result_types = set() + results = self.result_container.get_ordered_results() + if len(results) > 0: + result_types.add('results') + + if len(self.result_container.answers) > 0: + result_types.add('answers') + + if len(self.result_container.infoboxes) > 0: + result_types.add('infoboxes') + + if len(result_types) == 0: + self._record_error('No result') + + def one_title_contains(self, title: str): + """Check one of the title contains `title` (case insensitive comparaison)""" + title = title.lower() + for result in self.result_container.get_ordered_results(): + if title in result['title'].lower(): + return + self._record_error(('{!r} not found in the title'.format(title))) + + +class CheckerTests: + + __slots__ = 'test_results', 'test_name', 'result_container_tests_list' + + def __init__(self, + test_results: TestResults, + test_name: str, + result_container_tests_list: typing.List[ResultContainerTests]): + self.test_results = test_results + self.test_name = test_name + self.result_container_tests_list = result_container_tests_list + + def unique_results(self): + """Check the results of each ResultContain is unique""" + urls_list = [rct.result_urls for rct in self.result_container_tests_list] + if len(urls_list[0]) > 0: + # results on the first page + for i, urls_i in enumerate(urls_list): + for j, urls_j in enumerate(urls_list): + if i < j and urls_i == urls_j: + common, diff = _search_query_diff(self.result_container_tests_list[i].search_query, + self.result_container_tests_list[j].search_query) + common_str = ' '.join(['{}={!r}'.format(k, v) for k, v in common.items()]) + diff1_str = ', ' .join(['{}={!r}'.format(k, v1) for (k, (v1, v2)) in diff.items()]) + diff2_str = ', ' .join(['{}={!r}'.format(k, v2) for (k, (v1, v2)) in diff.items()]) + self.test_results.add_error(self.test_name, + 'results are identitical for {} and {} ({})' + .format(diff1_str, diff2_str, common_str)) + + +class Checker: + + __slots__ = 'processor', 'tests', 'test_results' + + def __init__(self, processor: EngineProcessor): + self.processor = processor + self.tests = self.processor.get_tests() + self.test_results = TestResults() + + @property + def engineref_list(self): + engine_name = self.processor.engine_name + engine_category = self.processor.engine.categories[0] + return [EngineRef(engine_name, engine_category)] + + @staticmethod + def search_query_matrix_iterator(engineref_list, matrix): + p = [] + for name, values in matrix.items(): + if isinstance(values, (tuple, list)): + l = [(name, value) for value in values] + else: + l = [(name, values)] + p.append(l) + + for kwargs in itertools.product(*p): + kwargs = {k: v for k, v in kwargs} + query = kwargs['query'] + params = dict(kwargs) + del params['query'] + yield SearchQuery(query, engineref_list, **params) + + def call_test(self, obj, test_description): + if isinstance(test_description, (tuple, list)): + method, args = test_description[0], test_description[1:] + else: + method = test_description + args = () + if isinstance(method, str) and hasattr(obj, method): + getattr(obj, method)(*args) + elif isinstance(method, types.FunctionType): + method(*args) + else: + self.test_results.add_error(obj.test_name, + 'method {!r} ({}) not found for {}' + .format(method, method.__class__.__name__, obj.__class__.__name__)) + + def call_tests(self, obj, test_descriptions): + for test_description in test_descriptions: + self.call_test(obj, test_description) + + def search(self, search_query: SearchQuery) -> ResultContainer: + result_container = ResultContainer() + engineref_category = search_query.engineref_list[0].category + params = self.processor.get_params(search_query, engineref_category) + if params is not None: + self.processor.search(search_query.query, params, result_container, time(), 5) + return result_container + + def get_result_container_tests(self, test_name: str, search_query: SearchQuery) -> ResultContainerTests: + result_container = self.search(search_query) + result_container_check = ResultContainerTests(self.test_results, test_name, search_query, result_container) + result_container_check.check_basic() + return result_container_check + + def run_test(self, test_name): + test_parameters = self.tests[test_name] + search_query_list = list(Checker.search_query_matrix_iterator(self.engineref_list, test_parameters['matrix'])) + rct_list = [self.get_result_container_tests(test_name, search_query) for search_query in search_query_list] + stop_test = False + if 'result_container' in test_parameters: + for rct in rct_list: + stop_test = stop_test or rct.stop_test + if not rct.stop_test: + self.call_tests(rct, test_parameters['result_container']) + if not stop_test: + if 'test' in test_parameters: + checker_tests = CheckerTests(self.test_results, test_name, rct_list) + self.call_tests(checker_tests, test_parameters['test']) + + def run(self): + for test_name in self.tests: + self.run_test(test_name) diff --git a/searx/search/models.py b/searx/search/models.py new file mode 100644 index 000000000..80ceaa223 --- /dev/null +++ b/searx/search/models.py @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +import typing + + +class EngineRef: + + __slots__ = 'name', 'category' + + def __init__(self, name: str, category: str): + self.name = name + self.category = category + + def __repr__(self): + return "EngineRef({!r}, {!r})".format(self.name, self.category) + + def __eq__(self, other): + return self.name == other.name and self.category == other.category + + def __hash__(self): + return hash((self.name, self.category)) + + +class SearchQuery: + """container for all the search parameters (query, language, etc...)""" + + __slots__ = 'query', 'engineref_list', 'lang', 'safesearch', 'pageno', 'time_range',\ + 'timeout_limit', 'external_bang' + + def __init__(self, + query: str, + engineref_list: typing.List[EngineRef], + lang: str='all', + safesearch: int=0, + pageno: int=1, + time_range: typing.Optional[str]=None, + timeout_limit: typing.Optional[float]=None, + external_bang: typing.Optional[str]=None): + self.query = query + self.engineref_list = engineref_list + self.lang = lang + self.safesearch = safesearch + self.pageno = pageno + self.time_range = time_range + self.timeout_limit = timeout_limit + self.external_bang = external_bang + + @property + def categories(self): + return list(set(map(lambda engineref: engineref.category, self.engineref_list))) + + def __repr__(self): + return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\ + format(self.query, self.engineref_list, self.lang, self.safesearch, + self.pageno, self.time_range, self.timeout_limit, self.external_bang) + + def __eq__(self, other): + return self.query == other.query\ + and self.engineref_list == other.engineref_list\ + and self.lang == other.lang\ + and self.safesearch == other.safesearch\ + and self.pageno == other.pageno\ + and self.time_range == other.time_range\ + and self.timeout_limit == other.timeout_limit\ + and self.external_bang == other.external_bang + + def __hash__(self): + return hash((self.query, tuple(self.engineref_list), self.lang, self.safesearch, self.pageno, self.time_range, + self.timeout_limit, self.external_bang)) diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index cf3fd7236..eb8d296ec 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -37,3 +37,15 @@ class EngineProcessor: @abstractmethod def search(self, query, params, result_container, start_time, timeout_limit): pass + + def get_tests(self): + tests = getattr(self.engine, 'tests', None) + if tests is None: + tests = getattr(self.engine, 'additional_tests', {}) + tests.update(self.get_default_tests()) + return tests + else: + return tests + + def get_default_tests(self): + return {} diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index b62f8059e..0ceb0adf2 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -179,15 +179,15 @@ class OnlineProcessor(EngineProcessor): requests_exception = True elif (issubclass(e.__class__, SearxEngineCaptchaException)): result_container.add_unresponsive_engine(self.engine_name, 'CAPTCHA required') - logger.exception('engine {0} : CAPTCHA') + logger.exception('engine {0} : CAPTCHA'.format(self.engine_name)) suspended_time = e.suspended_time # pylint: disable=no-member elif (issubclass(e.__class__, SearxEngineTooManyRequestsException)): result_container.add_unresponsive_engine(self.engine_name, 'too many requests') - logger.exception('engine {0} : Too many requests') + logger.exception('engine {0} : Too many requests'.format(self.engine_name)) suspended_time = e.suspended_time # pylint: disable=no-member elif (issubclass(e.__class__, SearxEngineAccessDeniedException)): result_container.add_unresponsive_engine(self.engine_name, 'blocked') - logger.exception('engine {0} : Searx is blocked') + logger.exception('engine {0} : Searx is blocked'.format(self.engine_name)) suspended_time = e.suspended_time # pylint: disable=no-member else: result_container.add_unresponsive_engine(self.engine_name, 'unexpected crash') @@ -211,3 +211,49 @@ class OnlineProcessor(EngineProcessor): # reset the suspend variables self.engine.continuous_errors = 0 self.engine.suspend_end_time = 0 + + def get_default_tests(self): + tests = {} + + tests['simple'] = { + 'matrix': {'query': ('life', 'computer')}, + 'result_container': ['not_empty'], + } + + if getattr(self.engine, 'paging', False): + tests['paging'] = { + 'matrix': {'query': 'time', + 'pageno': (1, 2, 3)}, + 'result_container': ['not_empty'], + 'test': ['unique_results'] + } + if 'general' in self.engine.categories: + # avoid documentation about HTML tags (<time> and <input type="time">) + tests['paging']['matrix']['query'] = 'news' + + if getattr(self.engine, 'time_range', False): + tests['time_range'] = { + 'matrix': {'query': 'news', + 'time_range': (None, 'day')}, + 'result_container': ['not_empty'], + 'test': ['unique_results'] + } + + if getattr(self.engine, 'lang', False): + tests['lang_fr'] = { + 'matrix': {'query': 'paris', 'lang': 'fr'}, + 'result_container': ['not_empty', ('has_lang', 'fr')], + } + tests['lang_en'] = { + 'matrix': {'query': 'paris', 'lang': 'en'}, + 'result_container': ['not_empty', ('has_lang', 'en')], + } + + if getattr(self.engine, 'safesearch', False): + tests['safesearch'] = { + 'matrix': {'query': 'porn', + 'safesearch': (0, 2)}, + 'test': ['unique_results'] + } + + return tests diff --git a/searx/search/processors/online_currency.py b/searx/search/processors/online_currency.py index f0e919c03..132c10594 100644 --- a/searx/search/processors/online_currency.py +++ b/searx/search/processors/online_currency.py @@ -55,3 +55,13 @@ class OnlineCurrencyProcessor(OnlineProcessor): params['from_name'] = iso4217_to_name(from_currency, 'en') params['to_name'] = iso4217_to_name(to_currency, 'en') return params + + def get_default_tests(self): + tests = {} + + tests['currency'] = { + 'matrix': {'query': '1337 usd in rmb'}, + 'result_container': ['has_answer'], + } + + return tests diff --git a/searx/search/processors/online_dictionary.py b/searx/search/processors/online_dictionary.py index 8e9ef1620..987c710a1 100644 --- a/searx/search/processors/online_dictionary.py +++ b/searx/search/processors/online_dictionary.py @@ -35,3 +35,21 @@ class OnlineDictionaryProcessor(OnlineProcessor): params['query'] = query return params + + def get_default_tests(self): + tests = {} + + if getattr(self.engine, 'paging', False): + tests['translation_paging'] = { + 'matrix': {'query': 'en-es house', + 'pageno': (1, 2, 3)}, + 'result_container': ['not_empty', ('one_title_contains', 'house')], + 'test': ['unique_results'] + } + else: + tests['translation'] = { + 'matrix': {'query': 'en-es house'}, + 'result_container': ['not_empty', ('one_title_contains', 'house')], + } + + return tests diff --git a/searx/settings.yml b/searx/settings.yml index e85b4939a..767bf6d82 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1,6 +1,16 @@ general: debug : False # Debug mode, only for development instance_name : "searx" # displayed name + contact_url: False # mailto:contact@example.com + +brand: + git_url: https://github.com/searx/searx + git_branch: master + issue_url: https://github.com/searx/searx/issues + docs_url: https://searx.github.io/searx + public_instances: https://searx.space + wiki_url: https://github.com/searx/searx/wiki + twitter_url: https://twitter.com/Searx_engine search: safe_search : 0 # Filter results. 0: None, 1: Moderate, 2: Strict @@ -92,6 +102,34 @@ outgoing: # communication with search engines # - "HTTPS rewrite" # - ... +checker: + # disable checker when in debug mode + off_when_debug: True + # scheduling: interval or int + # use "scheduling: False" to disable scheduling + scheduling: + start_after: [300, 1800] # delay to start the first run of the checker + every: [86400, 90000] # how often the checker runs + # additional tests: only for the YAML anchors (see the engines section) + additional_tests: + rosebud: &test_rosebud + matrix: + query: rosebud + lang: en + result_container: + - not_empty + - ['one_title_contains', 'citizen kane'] + test: + - unique_results + # tests: only for the YAML anchors (see the engines section) + tests: + infobox: &tests_infobox + infobox: + matrix: + query: ["linux", "new york", "bbc"] + result_container: + - has_infobox + engines: - name: apk mirror engine: apkmirror @@ -119,6 +157,13 @@ engines: timeout : 7.0 disabled : True shortcut : ai + about: + website: https://archive.is/ + wikidata_id: Q13515725 + official_api_documentation: http://mementoweb.org/depot/native/archiveis/ + use_official_api: false + require_api_key: false + results: HTML - name : arxiv engine : arxiv @@ -163,6 +208,13 @@ engines: timeout : 4.0 disabled : True shortcut : bb + about: + website: https://bitbucket.org/ + wikidata_id: Q2493781 + official_api_documentation: https://developer.atlassian.com/bitbucket + use_official_api: false + require_api_key: false + results: HTML - name : btdigg engine : btdigg @@ -178,6 +230,13 @@ engines: categories : videos disabled : True shortcut : c3tv + about: + website: https://media.ccc.de/ + wikidata_id: Q80729951 + official_api_documentation: https://github.com/voc/voctoweb + use_official_api: false + require_api_key: false + results: HTML - name : crossref engine : json_engine @@ -188,6 +247,13 @@ engines: content_query : fullCitation categories : science shortcut : cr + about: + website: https://www.crossref.org/ + wikidata_id: Q5188229 + official_api_documentation: https://github.com/CrossRef/rest-api-doc + use_official_api: false + require_api_key: false + results: JSON - name : currency engine : currency_convert @@ -208,6 +274,7 @@ engines: shortcut : ddd weight : 2 disabled : True + tests: *tests_infobox # cloudflare protected # - name : digbt @@ -232,6 +299,13 @@ engines: categories : general shortcut : ew disabled : True + about: + website: https://www.erowid.org/ + wikidata_id: Q1430691 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML # - name : elasticsearch # shortcut : es @@ -252,6 +326,7 @@ engines: shortcut : wd timeout : 3.0 weight : 2 + tests: *tests_infobox - name : duckduckgo engine : duckduckgo @@ -268,6 +343,8 @@ engines: engine : etools shortcut : eto disabled : True + additional_tests: + rosebud: *test_rosebud - name : etymonline engine : xpath @@ -279,6 +356,13 @@ engines: first_page_num : 1 shortcut : et disabled : True + about: + website: https://www.etymonline.com/ + wikidata_id: Q1188617 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML # - name : ebay # engine : ebay @@ -318,6 +402,9 @@ engines: search_type : title timeout : 5.0 disabled : True + about: + website: https://directory.fsf.org/ + wikidata_id: Q2470288 - name : frinkiac engine : frinkiac @@ -333,6 +420,8 @@ engines: shortcut : gb timeout : 3.0 disabled: True + additional_tests: + rosebud: *test_rosebud - name : gentoo engine : gentoo @@ -350,6 +439,13 @@ engines: shortcut : gl timeout : 10.0 disabled : True + about: + website: https://about.gitlab.com/ + wikidata_id: Q16639197 + official_api_documentation: https://docs.gitlab.com/ee/api/ + use_official_api: false + require_api_key: false + results: JSON - name : github engine : github @@ -367,6 +463,13 @@ engines: categories : it shortcut : cb disabled : True + about: + website: https://codeberg.org/ + wikidata_id: + official_api_documentation: https://try.gitea.io/api/swagger + use_official_api: false + require_api_key: false + results: JSON - name : google engine : google @@ -397,6 +500,13 @@ engines: first_page_num : 0 categories : science shortcut : gos + about: + website: https://scholar.google.com/ + wikidata_id: Q494817 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML - name : google play apps engine : xpath @@ -409,6 +519,13 @@ engines: categories : files shortcut : gpa disabled : True + about: + website: https://play.google.com/ + wikidata_id: Q79576 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML - name : google play movies engine : xpath @@ -421,6 +538,13 @@ engines: categories : videos shortcut : gpm disabled : True + about: + website: https://play.google.com/ + wikidata_id: Q79576 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML - name : google play music engine : xpath @@ -433,6 +557,13 @@ engines: categories : music shortcut : gps disabled : True + about: + website: https://play.google.com/ + wikidata_id: Q79576 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML - name : geektimes engine : xpath @@ -445,6 +576,13 @@ engines: timeout : 4.0 disabled : True shortcut : gt + about: + website: https://geektimes.ru/ + wikidata_id: Q50572423 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML - name : habrahabr engine : xpath @@ -457,6 +595,13 @@ engines: timeout : 4.0 disabled : True shortcut : habr + about: + website: https://habr.com/ + wikidata_id: Q4494434 + official_api_documentation: https://habr.com/en/docs/help/api/ + use_official_api: false + require_api_key: false + results: HTML - name : hoogle engine : json_engine @@ -469,6 +614,13 @@ engines: page_size : 20 categories : it shortcut : ho + about: + website: https://www.haskell.org/ + wikidata_id: Q34010 + official_api_documentation: https://hackage.haskell.org/api + use_official_api: false + require_api_key: false + results: JSON - name : ina engine : ina @@ -478,7 +630,9 @@ engines: - name : invidious engine : invidious - base_url : 'https://invidio.us/' + base_url : + - https://invidious.tube/ + - https://invidious.snopyta.org/ shortcut: iv timeout : 5.0 disabled : True @@ -499,6 +653,13 @@ engines: timeout : 7.0 disabled : True shortcut : lg + about: + website: http://libgen.rs/ + wikidata_id: Q22017206 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML - name : lobste.rs engine : xpath @@ -511,6 +672,13 @@ engines: shortcut : lo timeout : 3.0 disabled: True + about: + website: https://lobste.rs/ + wikidata_id: Q60762874 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML - name : metager engine : xpath @@ -522,6 +690,13 @@ engines: categories : general shortcut : mg disabled : True + about: + website: https://metager.org/ + wikidata_id: Q1924645 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML - name : microsoft academic engine : microsoft_academic @@ -545,6 +720,13 @@ engines: disabled: True timeout: 5.0 shortcut : npm + about: + website: https://npms.io/ + wikidata_id: Q7067518 + official_api_documentation: https://api-docs.npms.io/ + use_official_api: false + require_api_key: false + results: JSON # Requires Tor - name : not evil @@ -573,6 +755,13 @@ engines: categories : science shortcut : oad timeout: 5.0 + about: + website: https://www.openaire.eu/ + wikidata_id: Q25106053 + official_api_documentation: https://api.openaire.eu/ + use_official_api: false + require_api_key: false + results: JSON - name : openairepublications engine : json_engine @@ -585,6 +774,13 @@ engines: categories : science shortcut : oap timeout: 5.0 + about: + website: https://www.openaire.eu/ + wikidata_id: Q25106053 + official_api_documentation: https://api.openaire.eu/ + use_official_api: false + require_api_key: false + results: JSON # - name : opensemanticsearch # engine : opensemantic @@ -606,6 +802,13 @@ engines: timeout : 4.0 disabled : True shortcut : or + about: + website: https://openrepos.net/ + wikidata_id: + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML - name : pdbe engine : pdbe @@ -636,6 +839,8 @@ engines: shortcut : qw categories : general disabled : True + additional_tests: + rosebud: *test_rosebud - name : qwant images engine : qwant @@ -722,6 +927,13 @@ engines: content_xpath : .//div[@class="search-result-abstract"] shortcut : se categories : science + about: + website: https://www.semanticscholar.org/ + wikidata_id: Q22908627 + official_api_documentation: https://api.semanticscholar.org/ + use_official_api: false + require_api_key: false + results: HTML # Spotify needs API credentials # - name : spotify @@ -735,6 +947,8 @@ engines: shortcut : sp timeout : 6.0 disabled : True + additional_tests: + rosebud: *test_rosebud - name : tokyotoshokan engine : tokyotoshokan @@ -838,6 +1052,9 @@ engines: number_of_results : 5 search_type : text disabled : True + about: + website: https://www.wikibooks.org/ + wikidata_id: Q367 - name : wikinews engine : mediawiki @@ -847,6 +1064,9 @@ engines: number_of_results : 5 search_type : text disabled : True + about: + website: https://www.wikinews.org/ + wikidata_id: Q964 - name : wikiquote engine : mediawiki @@ -856,6 +1076,11 @@ engines: number_of_results : 5 search_type : text disabled : True + additional_tests: + rosebud: *test_rosebud + about: + website: https://www.wikiquote.org/ + wikidata_id: Q369 - name : wikisource engine : mediawiki @@ -865,6 +1090,9 @@ engines: number_of_results : 5 search_type : text disabled : True + about: + website: https://www.wikisource.org/ + wikidata_id: Q263 - name : wiktionary engine : mediawiki @@ -874,6 +1102,9 @@ engines: number_of_results : 5 search_type : text disabled : True + about: + website: https://www.wiktionary.org/ + wikidata_id: Q151 - name : wikiversity engine : mediawiki @@ -883,6 +1114,9 @@ engines: number_of_results : 5 search_type : text disabled : True + about: + website: https://www.wikiversity.org/ + wikidata_id: Q370 - name : wikivoyage engine : mediawiki @@ -892,6 +1126,9 @@ engines: number_of_results : 5 search_type : text disabled : True + about: + website: https://www.wikivoyage.org/ + wikidata_id: Q373 - name : wolframalpha shortcut : wa @@ -939,6 +1176,13 @@ engines: first_page_num : 0 page_size : 10 disabled : True + about: + website: https://www.seznam.cz/ + wikidata_id: Q3490485 + official_api_documentation: https://api.sklik.cz/ + use_official_api: false + require_api_key: false + results: HTML - name : mojeek shortcut: mjk @@ -953,6 +1197,13 @@ engines: first_page_num : 0 page_size : 10 disabled : True + about: + website: https://www.mojeek.com/ + wikidata_id: Q60747299 + official_api_documentation: https://www.mojeek.com/services/api.html/ + use_official_api: false + require_api_key: false + results: HTML - name : naver shortcut: nvr @@ -967,6 +1218,13 @@ engines: first_page_num : 1 page_size : 10 disabled : True + about: + website: https://www.naver.com/ + wikidata_id: Q485639 + official_api_documentation: https://developers.naver.com/docs/nmt/examples/ + use_official_api: false + require_api_key: false + results: HTML - name : rubygems shortcut: rbg @@ -981,6 +1239,13 @@ engines: first_page_num : 1 categories: it disabled : True + about: + website: https://rubygems.org/ + wikidata_id: Q1853420 + official_api_documentation: https://guides.rubygems.org/rubygems-org-api/ + use_official_api: false + require_api_key: false + results: HTML - name : peertube engine: peertube diff --git a/searx/shared/__init__.py b/searx/shared/__init__.py new file mode 100644 index 000000000..cbe24d239 --- /dev/null +++ b/searx/shared/__init__.py @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +import logging + +logger = logging.getLogger('searx.shared') + +try: + import uwsgi +except: + # no uwsgi + from .shared_simple import SimpleSharedDict as SharedDict, schedule + logger.info('Use shared_simple implementation') +else: + try: + uwsgi.cache_update('dummy', b'dummy') + if uwsgi.cache_get('dummy') != b'dummy': + raise Exception() + except: + # uwsgi.ini configuration problem: disable all scheduling + logger.error('uwsgi.ini configuration error, add this line to your uwsgi.ini\n' + 'cache2 = name=searxcache,items=2000,blocks=2000,blocksize=4096,bitmap=1') + from .shared_simple import SimpleSharedDict as SharedDict + + def schedule(delay, func, *args): + return False + else: + # uwsgi + from .shared_uwsgi import UwsgiCacheSharedDict as SharedDict, schedule + logger.info('Use shared_uwsgi implementation') + +storage = SharedDict() diff --git a/searx/shared/shared_abstract.py b/searx/shared/shared_abstract.py new file mode 100644 index 000000000..b1c72aabe --- /dev/null +++ b/searx/shared/shared_abstract.py @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +from abc import ABC, abstractmethod + + +class SharedDict(ABC): + + @abstractmethod + def get_int(self, key): + pass + + @abstractmethod + def set_int(self, key, value): + pass + + @abstractmethod + def get_str(self, key): + pass + + @abstractmethod + def set_str(self, key, value): + pass diff --git a/searx/shared/shared_simple.py b/searx/shared/shared_simple.py new file mode 100644 index 000000000..48d8cb822 --- /dev/null +++ b/searx/shared/shared_simple.py @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +import threading + +from . import shared_abstract + + +class SimpleSharedDict(shared_abstract.SharedDict): + + __slots__ = 'd', + + def __init__(self): + self.d = {} + + def get_int(self, key): + return self.d.get(key, None) + + def set_int(self, key, value): + self.d[key] = value + + def get_str(self, key): + return self.d.get(key, None) + + def set_str(self, key, value): + self.d[key] = value + + +def schedule(delay, func, *args): + def call_later(): + t = threading.Timer(delay, wrapper) + t.daemon = True + t.start() + + def wrapper(): + call_later() + func(*args) + + call_later() + return True diff --git a/searx/shared/shared_uwsgi.py b/searx/shared/shared_uwsgi.py new file mode 100644 index 000000000..a6dba9f59 --- /dev/null +++ b/searx/shared/shared_uwsgi.py @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +import time +import uwsgi # pylint: disable=E0401 +from . import shared_abstract + + +_last_signal = 10 + + +class UwsgiCacheSharedDict(shared_abstract.SharedDict): + + def get_int(self, key): + value = uwsgi.cache_get(key) + if value is None: + return value + else: + return int.from_bytes(value, 'big') + + def set_int(self, key, value): + b = value.to_bytes(4, 'big') + uwsgi.cache_update(key, b) + + def get_str(self, key): + value = uwsgi.cache_get(key) + if value is None: + return value + else: + return value.decode('utf-8') + + def set_str(self, key, value): + b = value.encode('utf-8') + uwsgi.cache_update(key, b) + + +def schedule(delay, func, *args): + """ + Can be implemented using a spooler. + https://uwsgi-docs.readthedocs.io/en/latest/PythonDecorators.html + + To make the uwsgi configuration simple, use the alternative implementation. + """ + global _last_signal + + def sighandler(signum): + now = int(time.time()) + key = 'scheduler_call_time_signal_' + str(signum) + uwsgi.lock() + try: + updating = uwsgi.cache_get(key) + if updating is not None: + updating = int.from_bytes(updating, 'big') + if now - updating < delay: + return + uwsgi.cache_update(key, now.to_bytes(4, 'big')) + finally: + uwsgi.unlock() + func(*args) + + signal_num = _last_signal + _last_signal += 1 + uwsgi.register_signal(signal_num, 'worker', sighandler) + uwsgi.add_timer(signal_num, delay) + return True diff --git a/searx/templates/oscar/results.html b/searx/templates/oscar/results.html index 7f6071374..eada8cd26 100644 --- a/searx/templates/oscar/results.html +++ b/searx/templates/oscar/results.html @@ -57,7 +57,7 @@ </div> {%- endif %} - <div class="panel panel-default"> + <div class="panel panel-default hidden-xs"> <div class="panel-heading">{{- "" -}} <h4 class="panel-title">{{ _('Links') }}</h4>{{- "" -}} </div> diff --git a/searx/webapp.py b/searx/webapp.py index 49750d210..985eced18 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -71,7 +71,8 @@ from searx.webadapter import get_search_query_from_webapp, get_selected_categori from searx.utils import html_to_text, gen_useragent, dict_subset, match_language from searx.version import VERSION_STRING from searx.languages import language_codes as languages -from searx.search import SearchWithPlugins, initialize +from searx.search import SearchWithPlugins, initialize as search_initialize +from searx.search.checker import get_result as checker_get_result from searx.query import RawTextQuery from searx.autocomplete import searx_bang, backends as autocomplete_backends from searx.plugins import plugins @@ -81,7 +82,6 @@ from searx.answerers import answerers from searx.poolrequests import get_global_proxies from searx.metrology.error_recorder import errors_per_engines - # serve pages with HTTP/1.1 from werkzeug.serving import WSGIRequestHandler WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0')) @@ -136,7 +136,7 @@ werkzeug_reloader = flask_run_development or (searx_debug and __name__ == "__mai # initialize the engines except on the first run of the werkzeug server. if not werkzeug_reloader\ or (werkzeug_reloader and os.environ.get("WERKZEUG_RUN_MAIN") == "true"): - initialize() + search_initialize(enable_checker=True) babel = Babel(app) @@ -977,6 +977,12 @@ def stats_errors(): return jsonify(result) +@app.route('/stats/checker', methods=['GET']) +def stats_checker(): + result = checker_get_result() + return jsonify(result) + + @app.route('/robots.txt', methods=['GET']) def robots(): return Response("""User-agent: * @@ -1071,6 +1077,7 @@ def config(): 'default_theme': settings['ui']['default_theme'], 'version': VERSION_STRING, 'brand': { + 'CONTACT_URL': brand.CONTACT_URL, 'GIT_URL': brand.GIT_URL, 'DOCS_URL': brand.DOCS_URL }, @@ -7,10 +7,8 @@ from setuptools import find_packages import os import sys -# required to load VERSION_STRING constant -sys.path.insert(0, './searx') -from version import VERSION_STRING -import brand +from searx.version import VERSION_STRING +from searx import brand with open('README.rst', encoding='utf-8') as f: long_description = f.read() @@ -51,7 +49,8 @@ setup( }, entry_points={ 'console_scripts': [ - 'searx-run = searx.webapp:run' + 'searx-run = searx.webapp:run', + 'searx-checker = searx.search.checker.__main__:main' ] }, package_data={ diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index eff4d643c..0176f735e 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -59,6 +59,15 @@ class TestQuery(SearxTestCase): self.assertEqual(len(query.languages), 0) self.assertFalse(query.specific) + def test_empty_colon_in_query(self): + query_text = 'the : query' + query = RawTextQuery(query_text, []) + + self.assertEqual(query.getFullQuery(), query_text) + self.assertEqual(len(query.query_parts), 0) + self.assertEqual(len(query.languages), 0) + self.assertFalse(query.specific) + def test_timeout_below100(self): query_text = '<3 the query' query = RawTextQuery(query_text, []) diff --git a/utils/brand.env b/utils/brand.env index 2e763159d..2136d278f 100644 --- a/utils/brand.env +++ b/utils/brand.env @@ -1,7 +1,9 @@ +export SEARX_URL='' export GIT_URL='https://github.com/searx/searx' export GIT_BRANCH='master' export ISSUE_URL='https://github.com/searx/searx/issues' -export SEARX_URL='https://searx.me' export DOCS_URL='https://searx.github.io/searx' export PUBLIC_INSTANCES='https://searx.space' export CONTACT_URL='' +export WIKI_URL='https://github.com/searx/searx/wiki' +export TWITTER_URL='https://twitter.com/Searx_engine' diff --git a/utils/build_env.py b/utils/build_env.py new file mode 100644 index 000000000..ffb2689e9 --- /dev/null +++ b/utils/build_env.py @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""build environment used by shell scripts +""" + +# set path +import sys +import os +from os.path import realpath, dirname, join, sep, abspath + +repo_root = realpath(dirname(realpath(__file__)) + sep + '..') +sys.path.insert(0, repo_root) +os.environ['SEARX_SETTINGS_PATH'] = abspath(dirname(__file__) + '/settings.yml') + +# Under the assumption that a brand is always a fork assure that the settings +# file from reposetorie's working tree is used to generate the build_env, not +# from /etc/searx/settings.yml. +os.environ['SEARX_SETTINGS_PATH'] = abspath(dirname(__file__) + sep + 'settings.yml') + +from searx import brand + +name_val = [ + ('SEARX_URL' , brand.SEARX_URL), + ('GIT_URL' , brand.GIT_URL), + ('GIT_BRANCH' , brand.GIT_BRANCH), + ('ISSUE_URL' , brand.ISSUE_URL), + ('DOCS_URL' , brand.DOCS_URL), + ('PUBLIC_INSTANCES' , brand.PUBLIC_INSTANCES), + ('CONTACT_URL' , brand.CONTACT_URL), + ('WIKI_URL' , brand.WIKI_URL), + ('TWITTER_URL' , brand.TWITTER_URL), +] + +brand_env = 'utils' + sep + 'brand.env' + +print('build %s' % brand_env) +with open(repo_root + sep + brand_env, 'w', encoding='utf-8') as f: + for name, val in name_val: + print("export %s='%s'" % (name, val), file=f) diff --git a/utils/fetch_engine_descriptions.py b/utils/fetch_engine_descriptions.py new file mode 100644 index 000000000..9ca001d45 --- /dev/null +++ b/utils/fetch_engine_descriptions.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python + +import sys +import json +from urllib.parse import quote, urlparse +from os.path import realpath, dirname +import cld3 +from lxml.html import fromstring + +# set path +sys.path.append(realpath(dirname(realpath(__file__)) + '/../')) + +from searx.engines.wikidata import send_wikidata_query +from searx.utils import extract_text +import searx +import searx.search +import searx.poolrequests + +SPARQL_WIKIPEDIA_ARTICLE = """ +SELECT DISTINCT ?item ?name +WHERE { + VALUES ?item { %IDS% } + ?article schema:about ?item ; + schema:inLanguage ?lang ; + schema:name ?name ; + schema:isPartOf [ wikibase:wikiGroup "wikipedia" ] . + FILTER(?lang in (%LANGUAGES_SPARQL%)) . + FILTER (!CONTAINS(?name, ':')) . +} +""" + +SPARQL_DESCRIPTION = """ +SELECT DISTINCT ?item ?itemDescription +WHERE { + VALUES ?item { %IDS% } + ?item schema:description ?itemDescription . + FILTER (lang(?itemDescription) in (%LANGUAGES_SPARQL%)) +} +ORDER BY ?itemLang +""" + +LANGUAGES = searx.settings['locales'].keys() +LANGUAGES_SPARQL = ', '.join(set(map(lambda l: repr(l.split('_')[0]), LANGUAGES))) +IDS = None + +descriptions = {} +wd_to_engine_name = {} + + +def normalize_description(description): + for c in [chr(c) for c in range(0, 31)]: + description = description.replace(c, ' ') + description = ' '.join(description.strip().split()) + return description + + +def update_description(engine_name, lang, description, source, replace=True): + if replace or lang not in descriptions[engine_name]: + descriptions[engine_name][lang] = [normalize_description(description), source] + + +def get_wikipedia_summary(language, pageid): + search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}' + url = search_url.format(title=quote(pageid), language=language) + try: + response = searx.poolrequests.get(url) + response.raise_for_status() + api_result = json.loads(response.text) + return api_result.get('extract') + except: + return None + + +def detect_language(text): + r = cld3.get_language(str(text)) # pylint: disable=E1101 + if r is not None and r.probability >= 0.98 and r.is_reliable: + return r.language + return None + + +def get_website_description(url, lang1, lang2=None): + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'DNT': '1', + 'Upgrade-Insecure-Requests': '1', + 'Sec-GPC': '1', + 'Cache-Control': 'max-age=0', + } + if lang1 is not None: + lang_list = [lang1] + if lang2 is not None: + lang_list.append(lang2) + headers['Accept-Language'] = f'{",".join(lang_list)};q=0.8' + try: + response = searx.poolrequests.get(url, headers=headers, timeout=10) + response.raise_for_status() + except Exception: + return (None, None) + + try: + html = fromstring(response.text) + except ValueError: + html = fromstring(response.content) + + description = extract_text(html.xpath('/html/head/meta[@name="description"]/@content')) + if not description: + description = extract_text(html.xpath('/html/head/meta[@property="og:description"]/@content')) + if not description: + description = extract_text(html.xpath('/html/head/title')) + lang = extract_text(html.xpath('/html/@lang')) + if lang is None and len(lang1) > 0: + lang = lang1 + lang = detect_language(description) or lang or 'en' + lang = lang.split('_')[0] + lang = lang.split('-')[0] + return (lang, description) + + +def initialize(): + global descriptions, wd_to_engine_name, IDS + searx.search.initialize() + for engine_name, engine in searx.engines.engines.items(): + descriptions[engine_name] = {} + wikidata_id = getattr(engine, "about", {}).get('wikidata_id') + if wikidata_id is not None: + wd_to_engine_name.setdefault(wikidata_id, set()).add(engine_name) + + IDS = ' '.join(list(map(lambda wd_id: 'wd:' + wd_id, wd_to_engine_name.keys()))) + + +def fetch_wikidata_descriptions(): + global IDS + result = send_wikidata_query(SPARQL_DESCRIPTION + .replace('%IDS%', IDS) + .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)) + if result is not None: + for binding in result['results']['bindings']: + wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '') + lang = binding['itemDescription']['xml:lang'] + description = binding['itemDescription']['value'] + if ' ' in description: # skip unique word description (like "website") + for engine_name in wd_to_engine_name[wikidata_id]: + update_description(engine_name, lang, description, 'wikidata') + + +def fetch_wikipedia_descriptions(): + global IDS + result = send_wikidata_query(SPARQL_WIKIPEDIA_ARTICLE + .replace('%IDS%', IDS) + .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)) + if result is not None: + for binding in result['results']['bindings']: + wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '') + lang = binding['name']['xml:lang'] + pageid = binding['name']['value'] + description = get_wikipedia_summary(lang, pageid) + if description is not None and ' ' in description: + for engine_name in wd_to_engine_name[wikidata_id]: + update_description(engine_name, lang, description, 'wikipedia') + + +def normalize_url(url): + url = url.replace('{language}', 'en') + url = urlparse(url)._replace(path='/', params='', query='', fragment='').geturl() + url = url.replace('https://api.', 'https://') + return url + + +def fetch_website_description(engine_name, website): + default_lang, default_description = get_website_description(website, None, None) + if default_lang is None or default_description is None: + return + if default_lang not in descriptions[engine_name]: + descriptions[engine_name][default_lang] = [normalize_description(default_description), website] + for request_lang in ('en-US', 'es-US', 'fr-FR', 'zh', 'ja', 'ru', 'ar', 'ko'): + if request_lang.split('-')[0] not in descriptions[engine_name]: + lang, desc = get_website_description(website, request_lang, request_lang.split('-')[0]) + if desc is not None and desc != default_description: + update_description(engine_name, lang, desc, website, replace=False) + else: + break + + +def fetch_website_descriptions(): + for engine_name, engine in searx.engines.engines.items(): + website = getattr(engine, "about", {}).get('website') + if website is None: + website = normalize_url(getattr(engine, "search_url")) + if website is None: + website = normalize_url(getattr(engine, "base_url")) + if website is not None: + fetch_website_description(engine_name, website) + + +def main(): + initialize() + fetch_wikidata_descriptions() + fetch_wikipedia_descriptions() + fetch_website_descriptions() + + sys.stdout.write(json.dumps(descriptions, indent=1, separators=(',', ':'), ensure_ascii=False)) + + +if __name__ == "__main__": + main() diff --git a/utils/makefile.python b/utils/makefile.python index 5d0837e00..345434384 100644 --- a/utils/makefile.python +++ b/utils/makefile.python @@ -243,8 +243,10 @@ pyenv-python: pyenvinstall # [2] https://github.com/pypa/pip/pull/1519 # https://github.com/pypa/twine -PHONY += upload-pypi +PHONY += upload-pypi upload-pypi-test upload-pypi: pyclean pyenvinstall pybuild @$(PY_ENV_BIN)/twine upload $(PYDIST)/* +upload-pypi-test: pyclean pyenvinstall pybuild + @$(PY_ENV_BIN)/twine upload -r testpypi $(PYDIST)/* .PHONY: $(PHONY) diff --git a/utils/searx.sh b/utils/searx.sh index b7d3b8e1c..f85935fa2 100755 --- a/utils/searx.sh +++ b/utils/searx.sh @@ -46,6 +46,7 @@ SEARX_PACKAGES_debian="\ python3-dev python3-babel python3-venv uwsgi uwsgi-plugin-python3 git build-essential libxslt-dev zlib1g-dev libffi-dev libssl-dev +libprotobuf-dev protobuf-compiler shellcheck" BUILD_PACKAGES_debian="\ @@ -58,6 +59,7 @@ SEARX_PACKAGES_arch="\ python python-pip python-lxml python-babel uwsgi uwsgi-plugin-python git base-devel libxml2 +protobuf shellcheck" BUILD_PACKAGES_arch="\ @@ -69,7 +71,7 @@ SEARX_PACKAGES_fedora="\ python python-pip python-lxml python-babel uwsgi uwsgi-plugin-python3 git @development-tools libxml2 -ShellCheck" +ShellCheck protobuf-compiler protobuf-devel" BUILD_PACKAGES_fedora="\ firefox graphviz graphviz-gd ImageMagick librsvg2-tools @@ -82,7 +84,7 @@ SEARX_PACKAGES_centos="\ python36 python36-pip python36-lxml python-babel uwsgi uwsgi-plugin-python3 git @development-tools libxml2 -ShellCheck" +ShellCheck protobuf-compiler protobuf-devel" BUILD_PACKAGES_centos="\ firefox graphviz graphviz-gd ImageMagick librsvg2-tools @@ -331,6 +333,7 @@ git pull pip install -U pip pip install -U setuptools pip install -U wheel +pip install -U pyyaml pip install -U -e . EOF install_settings @@ -501,6 +504,7 @@ EOF pip install -U pip pip install -U setuptools pip install -U wheel +pip install -U pyyaml pip install -U -e . cd ${SEARX_SRC} pip install -e . diff --git a/utils/templates/etc/uwsgi/apps-archlinux/searx.ini b/utils/templates/etc/uwsgi/apps-archlinux/searx.ini index 9dd2e6f2f..71cece3c4 100644 --- a/utils/templates/etc/uwsgi/apps-archlinux/searx.ini +++ b/utils/templates/etc/uwsgi/apps-archlinux/searx.ini @@ -82,4 +82,7 @@ http = ${SEARX_INTERNAL_HTTP} # mkdir -p /run/uwsgi/app/searx # chown -R ${SERVICE_USER}:${SERVICE_GROUP} /run/uwsgi/app/searx # -# socket = /run/uwsgi/app/searx/socket
\ No newline at end of file +# socket = /run/uwsgi/app/searx/socket + +# Cache +cache2 = name=searxcache,items=2000,blocks=2000,blocksize=4096,bitmap=1 diff --git a/utils/templates/etc/uwsgi/apps-available/searx.ini b/utils/templates/etc/uwsgi/apps-available/searx.ini index 4d69da0cf..45214ef13 100644 --- a/utils/templates/etc/uwsgi/apps-available/searx.ini +++ b/utils/templates/etc/uwsgi/apps-available/searx.ini @@ -82,3 +82,6 @@ http = ${SEARX_INTERNAL_HTTP} # chown -R ${SERVICE_USER}:${SERVICE_GROUP} /run/uwsgi/app/searx # # socket = /run/uwsgi/app/searx/socket + +# Cache +cache2 = name=searxcache,items=2000,blocks=2000,blocksize=4096,bitmap=1 |