diff options
| author | Alexandre Flament <alex@al-f.net> | 2021-01-13 11:31:25 +0100 |
|---|---|---|
| committer | Alexandre Flament <alex@al-f.net> | 2021-01-14 20:57:17 +0100 |
| commit | a4dcfa025c690dc4c824b2261242748a331a97e8 (patch) | |
| tree | 2085c05e8ba319411a4b729c589e660d973a5c7c /searx | |
| parent | 5a511f0d620038b8e94c581bcfd3c987082b9414 (diff) | |
[enh] engines: add about variable
move meta information from comment to the about variable
so the preferences, the documentation can show these information
Diffstat (limited to 'searx')
89 files changed, 1215 insertions, 725 deletions
diff --git a/searx/engines/1337x.py b/searx/engines/1337x.py index 18478876a..9cc7c1b79 100644 --- a/searx/engines/1337x.py +++ b/searx/engines/1337x.py @@ -1,7 +1,21 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + 1337x +""" + from urllib.parse import quote, urljoin from lxml import html from searx.utils import extract_text, get_torrent_size, eval_xpath, eval_xpath_list, eval_xpath_getindex +# about +about = { + "website": 'https://1337x.to/', + "wikidata_id": 'Q28134166', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} url = 'https://1337x.to/' search_url = url + 'search/{search_term}/{pageno}/' diff --git a/searx/engines/acgsou.py b/searx/engines/acgsou.py index 637443edc..ea9793f10 100644 --- a/searx/engines/acgsou.py +++ b/searx/engines/acgsou.py @@ -1,18 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Acgsou (Japanese Animation/Music/Comics Bittorrent tracker) - - @website https://www.acgsou.com/ - @provide-api no - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title, content, seed, leech, torrentfile """ from urllib.parse import urlencode from lxml import html from searx.utils import extract_text, get_torrent_size, eval_xpath_list, eval_xpath_getindex +# about +about = { + "website": 'https://www.acgsou.com/', + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['files', 'images', 'videos', 'music'] paging = True diff --git a/searx/engines/ahmia.py b/searx/engines/ahmia.py index 7a2ae0075..6c502bb40 100644 --- a/searx/engines/ahmia.py +++ b/searx/engines/ahmia.py @@ -1,19 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Ahmia (Onions) - - @website http://msydqstlz2kzerdg.onion - @provides-api no - - @using-api no - @results HTML - @stable no - @parse url, title, content """ from urllib.parse import urlencode, urlparse, parse_qs from lxml.html import fromstring from searx.engines.xpath import extract_url, extract_text, eval_xpath_list, eval_xpath +# about +about = { + "website": 'http://msydqstlz2kzerdg.onion', + "wikidata_id": 'Q18693938', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine config categories = ['onions'] paging = True diff --git a/searx/engines/apkmirror.py b/searx/engines/apkmirror.py index 3a948dcb4..a4c66e891 100644 --- a/searx/engines/apkmirror.py +++ b/searx/engines/apkmirror.py @@ -1,18 +1,21 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ APK Mirror - - @website https://www.apkmirror.com - - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title, thumbnail_src """ from urllib.parse import urlencode from lxml import html from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex +# about +about = { + "website": 'https://www.apkmirror.com', + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} # engine dependent config categories = ['it'] diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index 04117c07d..d29d65ba3 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -1,20 +1,24 @@ -# -*- coding: utf-8 -*- - +# SPDX-License-Identifier: AGPL-3.0-or-later """ Arch Linux Wiki - @website https://wiki.archlinux.org - @provide-api no (Mediawiki provides API, but Arch Wiki blocks access to it - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title + API: Mediawiki provides API, but Arch Wiki blocks access to it """ from urllib.parse import urlencode, urljoin from lxml import html from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex +# about +about = { + "website": 'https://wiki.archlinux.org/', + "wikidata_id": 'Q101445877', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['it'] language_support = True diff --git a/searx/engines/arxiv.py b/searx/engines/arxiv.py index 1190de363..09ea07ea5 100644 --- a/searx/engines/arxiv.py +++ b/searx/engines/arxiv.py @@ -1,20 +1,21 @@ -#!/usr/bin/env python - +# SPDX-License-Identifier: AGPL-3.0-or-later """ ArXiV (Scientific preprints) - @website https://arxiv.org - @provide-api yes (export.arxiv.org/api/query) - @using-api yes - @results XML-RSS - @stable yes - @parse url, title, publishedDate, content - More info on api: https://arxiv.org/help/api/user-manual """ from lxml import html from datetime import datetime from searx.utils import eval_xpath_list, eval_xpath_getindex +# about +about = { + "website": 'https://arxiv.org', + "wikidata_id": 'Q118398', + "official_api_documentation": 'https://arxiv.org/help/api', + "use_official_api": True, + "require_api_key": False, + "results": 'XML-RSS', +} categories = ['science'] paging = True diff --git a/searx/engines/base.py b/searx/engines/base.py index 3648d7ed0..463274681 100755 --- a/searx/engines/base.py +++ b/searx/engines/base.py @@ -1,16 +1,6 @@ -#!/usr/bin/env python - +# SPDX-License-Identifier: AGPL-3.0-or-later """ BASE (Scholar publications) - - @website https://base-search.net - @provide-api yes with authorization (https://api.base-search.net/) - - @using-api yes - @results XML - @stable ? - @parse url, title, publishedDate, content - More info on api: http://base-search.net/about/download/base_interface.pdf """ from urllib.parse import urlencode @@ -19,6 +9,15 @@ from datetime import datetime import re from searx.utils import searx_useragent +# about +about = { + "website": 'https://base-search.net', + "wikidata_id": 'Q448335', + "official_api_documentation": 'https://api.base-search.net/', + "use_official_api": True, + "require_api_key": False, + "results": 'XML', +} categories = ['science'] diff --git a/searx/engines/bing.py b/searx/engines/bing.py index f0882fcc9..edf6baef9 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -1,16 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Bing (Web) - - @website https://www.bing.com - @provide-api yes (http://datamarket.azure.com/dataset/bing/search), - max. 5000 query/month - - @using-api no (because of query limit) - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content - - @todo publishedDate """ import re @@ -21,6 +11,16 @@ from searx.utils import eval_xpath, extract_text, match_language logger = logger.getChild('bing engine') +# about +about = { + "website": 'https://www.bing.com', + "wikidata_id": 'Q182496', + "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-web-search-api', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['general'] paging = True diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 2bcf82b84..b4ca57f4b 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -1,15 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Bing (Images) - - @website https://www.bing.com/images - @provide-api yes (http://datamarket.azure.com/dataset/bing/search), - max. 5000 query/month - - @using-api no (because of query limit) - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, img_src - """ from urllib.parse import urlencode @@ -20,6 +11,16 @@ from searx.utils import match_language from searx.engines.bing import language_aliases from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +# about +about = { + "website": 'https://www.bing.com/images', + "wikidata_id": 'Q182496', + "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-image-search-api', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['images'] paging = True diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index b95def48b..2e4b78278 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -1,14 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Bing (News) - - @website https://www.bing.com/news - @provide-api yes (http://datamarket.azure.com/dataset/bing/search), - max. 5000 query/month - - @using-api no (because of query limit) - @results RSS (using search portal) - @stable yes (except perhaps for the images) - @parse url, title, content, publishedDate, thumbnail """ from datetime import datetime @@ -20,6 +12,16 @@ from searx.utils import match_language, eval_xpath_getindex from searx.engines.bing import language_aliases from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +# about +about = { + "website": 'https://www.bing.com/news', + "wikidata_id": 'Q2878637', + "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-news-search-api', + "use_official_api": False, + "require_api_key": False, + "results": 'RSS', +} + # engine dependent config categories = ['news'] paging = True diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py index 143c71a3e..b4584bb37 100644 --- a/searx/engines/bing_videos.py +++ b/searx/engines/bing_videos.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Bing (Videos) - - @website https://www.bing.com/videos - @provide-api yes (http://datamarket.azure.com/dataset/bing/search) - - @using-api no - @results HTML - @stable no - @parse url, title, content, thumbnail """ from json import loads @@ -18,6 +11,16 @@ from searx.utils import match_language from searx.engines.bing import language_aliases from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +# about +about = { + "website": 'https://www.bing.com/videos', + "wikidata_id": 'Q4914152', + "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-video-search-api', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + categories = ['videos'] paging = True safesearch = True diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index 72bda8d20..863396f6e 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -1,19 +1,25 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ BTDigg (Videos, Music, Files) - - @website https://btdig.com - @provide-api yes (on demand) - - @using-api no - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content, seed, leech, magnetlink """ from lxml import html from urllib.parse import quote, urljoin from searx.utils import extract_text, get_torrent_size +# about +about = { + "website": 'https://btdig.com', + "wikidata_id": 'Q4836698', + "official_api_documentation": { + 'url': 'https://btdig.com/contacts', + 'comment': 'on demand' + }, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['videos', 'music', 'files'] paging = True diff --git a/searx/engines/command.py b/searx/engines/command.py index 6321e0004..33270d245 100644 --- a/searx/engines/command.py +++ b/searx/engines/command.py @@ -1,18 +1,7 @@ -''' -searx is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -searx is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with searx. If not, see < http://www.gnu.org/licenses/ >. -''' - +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Command (offline) +""" import re from os.path import expanduser, isabs, realpath, commonprefix diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index 7098dd3c7..d4c3b5f81 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -1,5 +1,19 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + currency convert (DuckDuckGo) +""" + import json +# about +about = { + "website": 'https://duckduckgo.com/', + "wikidata_id": 'Q12805', + "official_api_documentation": 'https://duckduckgo.com/api', + "use_official_api": False, + "require_api_key": False, + "results": 'JSONP', +} engine_type = 'online_currency' categories = [] diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 1e24e41da..874e0f42a 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -1,15 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Dailymotion (Videos) - - @website https://www.dailymotion.com - @provide-api yes (http://www.dailymotion.com/developer) - - @using-api yes - @results JSON - @stable yes - @parse url, title, thumbnail, publishedDate, embedded - - @todo set content-parameter with correct data """ from json import loads @@ -17,6 +8,16 @@ from datetime import datetime from urllib.parse import urlencode from searx.utils import match_language, html_to_text +# about +about = { + "website": 'https://www.dailymotion.com', + "wikidata_id": 'Q769222', + "official_api_documentation": 'https://www.dailymotion.com/developer', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['videos'] paging = True diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py index 48c0429a7..946bd3ebe 100644 --- a/searx/engines/deezer.py +++ b/searx/engines/deezer.py @@ -1,18 +1,21 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Deezer (Music) - - @website https://deezer.com - @provide-api yes (http://developers.deezer.com/api/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content, embedded """ from json import loads from urllib.parse import urlencode +# about +about = { + "website": 'https://deezer.com', + "wikidata_id": 'Q602243', + "official_api_documentation": 'https://developers.deezer.com/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['music'] paging = True diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py index 0378929b2..7840495e1 100644 --- a/searx/engines/deviantart.py +++ b/searx/engines/deviantart.py @@ -1,21 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Deviantart (Images) - - @website https://www.deviantart.com/ - @provide-api yes (https://www.deviantart.com/developers/) (RSS) - - @using-api no (TODO, rewrite to api) - @results HTML - @stable no (HTML can change) - @parse url, title, img_src - - @todo rewrite to api """ # pylint: disable=missing-function-docstring from urllib.parse import urlencode from lxml import html +# about +about = { + "website": 'https://www.deviantart.com/', + "wikidata_id": 'Q46523', + "official_api_documentation": 'https://www.deviantart.com/developers/', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['images'] paging = True diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index 5e6f688a1..2483c0805 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -1,18 +1,21 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Dictzone - - @website https://dictzone.com/ - @provide-api no - @using-api no - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content """ from urllib.parse import urljoin from lxml import html from searx.utils import eval_xpath +# about +about = { + "website": 'https://dictzone.com/', + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} engine_type = 'online_dictionnary' categories = ['general'] diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py index b1a90fb2f..109662a49 100644 --- a/searx/engines/digbt.py +++ b/searx/engines/digbt.py @@ -1,19 +1,21 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ DigBT (Videos, Music, Files) - - @website https://digbt.org - @provide-api no - - @using-api no - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content, magnetlink """ from urllib.parse import urljoin from lxml import html from searx.utils import extract_text, get_torrent_size +# about +about = { + "website": 'https://digbt.org', + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} categories = ['videos', 'music', 'files'] paging = True diff --git a/searx/engines/digg.py b/searx/engines/digg.py index 85f727f0d..defcacd20 100644 --- a/searx/engines/digg.py +++ b/searx/engines/digg.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Digg (News, Social media) - - @website https://digg.com - @provide-api no - - @using-api no - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content, publishedDate, thumbnail """ # pylint: disable=missing-function-docstring @@ -17,6 +10,16 @@ from datetime import datetime from lxml import html +# about +about = { + "website": 'https://digg.com', + "wikidata_id": 'Q270478', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['news', 'social media'] paging = True diff --git a/searx/engines/doku.py b/searx/engines/doku.py index e1b10d664..ed1eab388 100644 --- a/searx/engines/doku.py +++ b/searx/engines/doku.py @@ -1,18 +1,22 @@ -# Doku Wiki -# -# @website https://www.dokuwiki.org/ -# @provide-api yes -# (https://www.dokuwiki.org/devel:xmlrpc) -# -# @using-api no -# @results HTML -# @stable yes -# @parse (general) url, title, content +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Doku Wiki +""" from urllib.parse import urlencode from lxml.html import fromstring from searx.utils import extract_text, eval_xpath +# about +about = { + "website": 'https://www.dokuwiki.org/', + "wikidata_id": 'Q851864', + "official_api_documentation": 'https://www.dokuwiki.org/devel:xmlrpc', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' paging = False diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index c1c984623..fc20de239 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -1,22 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ DuckDuckGo (Web) - - @website https://duckduckgo.com/ - @provide-api yes (https://duckduckgo.com/api), - but not all results from search-site - - @using-api no - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content - - @todo rewrite to api """ from lxml.html import fromstring from json import loads from searx.utils import extract_text, match_language, eval_xpath +# about +about = { + "website": 'https://duckduckgo.com/', + "wikidata_id": 'Q12805', + "official_api_documentation": 'https://duckduckgo.com/api', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['general'] paging = False diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index 1d1c84b4b..0473b0a95 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -1,12 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ -DuckDuckGo (definitions) - -- `Instant Answer API`_ -- `DuckDuckGo query`_ - -.. _Instant Answer API: https://duckduckgo.com/api -.. _DuckDuckGo query: https://api.duckduckgo.com/?q=DuckDuckGo&format=json&pretty=1 - + DuckDuckGo (Instant Answer API) """ import json @@ -22,6 +16,16 @@ from searx.external_urls import get_external_url, get_earth_coordinates_url, are logger = logger.getChild('duckduckgo_definitions') +# about +about = { + "website": 'https://duckduckgo.com/', + "wikidata_id": 'Q12805', + "official_api_documentation": 'https://duckduckgo.com/api', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + URL = 'https://api.duckduckgo.com/'\ + '?{query}&format=json&pretty=0&no_redirect=1&d=1' diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py index 009f81cca..b5c2d4506 100644 --- a/searx/engines/duckduckgo_images.py +++ b/searx/engines/duckduckgo_images.py @@ -1,16 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ DuckDuckGo (Images) - - @website https://duckduckgo.com/ - @provide-api yes (https://duckduckgo.com/api), - but images are not supported - - @using-api no - @results JSON (site requires js to get images) - @stable no (JSON can change) - @parse url, title, img_src - - @todo avoid extra request """ from json import loads @@ -20,6 +10,19 @@ from searx.engines.duckduckgo import get_region_code from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import from searx.poolrequests import get +# about +about = { + "website": 'https://duckduckgo.com/', + "wikidata_id": 'Q12805', + "official_api_documentation": { + 'url': 'https://duckduckgo.com/api', + 'comment': 'but images are not supported', + }, + "use_official_api": False, + "require_api_key": False, + "results": 'JSON (site requires js to get images)', +} + # engine dependent config categories = ['images'] paging = True diff --git a/searx/engines/duden.py b/searx/engines/duden.py index 1475fb846..f1c9efd3f 100644 --- a/searx/engines/duden.py +++ b/searx/engines/duden.py @@ -1,11 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Duden - @website https://www.duden.de - @provide-api no - @using-api no - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content """ import re @@ -13,6 +8,16 @@ from urllib.parse import quote, urljoin from lxml import html from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex +# about +about = { + "website": 'https://www.duden.de', + "wikidata_id": 'Q73624591', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + categories = ['general'] paging = True language_support = False diff --git a/searx/engines/dummy-offline.py b/searx/engines/dummy-offline.py index 13a9ecc01..cf2f75312 100644 --- a/searx/engines/dummy-offline.py +++ b/searx/engines/dummy-offline.py @@ -1,11 +1,19 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Dummy Offline - - @results one result - @stable yes """ +# about +about = { + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + + def search(query, request_params): return [{ 'result': 'this is what you get', diff --git a/searx/engines/dummy.py b/searx/engines/dummy.py index 50b56ef78..1a1b57d8c 100644 --- a/searx/engines/dummy.py +++ b/searx/engines/dummy.py @@ -1,10 +1,18 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Dummy - - @results empty array - @stable yes """ +# about +about = { + "website": None, + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'empty array', +} + # do search-request def request(query, params): diff --git a/searx/engines/ebay.py b/searx/engines/ebay.py index e2e5ded6a..45c633b42 100644 --- a/searx/engines/ebay.py +++ b/searx/engines/ebay.py @@ -1,17 +1,22 @@ -# Ebay (Videos, Music, Files) -# -# @website https://www.ebay.com -# @provide-api no (nothing found) -# -# @using-api no -# @results HTML (using search portal) -# @stable yes (HTML can change) -# @parse url, title, content, price, shipping, source +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Ebay (Videos, Music, Files) +""" from lxml import html from searx.engines.xpath import extract_text from urllib.parse import quote +# about +about = { + "website": 'https://www.ebay.com', + "wikidata_id": 'Q58024', + "official_api_documentation": 'https://developer.ebay.com/', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + categories = ['shopping'] paging = True diff --git a/searx/engines/elasticsearch.py b/searx/engines/elasticsearch.py index 0e2d35756..da7f98074 100644 --- a/searx/engines/elasticsearch.py +++ b/searx/engines/elasticsearch.py @@ -1,3 +1,8 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Elasticsearch +""" + from json import loads, dumps from requests.auth import HTTPBasicAuth from searx.exceptions import SearxEngineAPIException diff --git a/searx/engines/etools.py b/searx/engines/etools.py index a0762d1c7..77d7e71c6 100644 --- a/searx/engines/etools.py +++ b/searx/engines/etools.py @@ -1,18 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ eTools (Web) - - @website https://www.etools.ch - @provide-api no - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title, content """ from lxml import html from urllib.parse import quote from searx.utils import extract_text, eval_xpath +# about +about = { + "website": 'https://www.etools.ch', + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + categories = ['general'] paging = False language_support = False diff --git a/searx/engines/fdroid.py b/searx/engines/fdroid.py index 3d37db44e..8fff2e384 100644 --- a/searx/engines/fdroid.py +++ b/searx/engines/fdroid.py @@ -1,18 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ F-Droid (a repository of FOSS applications for Android) - - @website https://f-droid.org/ - @provide-api no - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title, content """ from urllib.parse import urlencode from lxml import html from searx.utils import extract_text +# about +about = { + "website": 'https://f-droid.org/', + "wikidata_id": 'Q1386210', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['files'] paging = True diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py index b23c447b8..b0ddf6224 100644 --- a/searx/engines/flickr.py +++ b/searx/engines/flickr.py @@ -1,21 +1,23 @@ -#!/usr/bin/env python - +# SPDX-License-Identifier: AGPL-3.0-or-later """ Flickr (Images) - @website https://www.flickr.com - @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) - - @using-api yes - @results JSON - @stable yes - @parse url, title, thumbnail, img_src More info on api-key : https://www.flickr.com/services/apps/create/ """ from json import loads from urllib.parse import urlencode +# about +about = { + "website": 'https://www.flickr.com', + "wikidata_id": 'Q103204', + "official_api_documentation": 'https://secure.flickr.com/services/api/flickr.photos.search.html', + "use_official_api": True, + "require_api_key": True, + "results": 'JSON', +} + categories = ['images'] nb_per_page = 15 diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py index 4bcf837cb..a07aad51e 100644 --- a/searx/engines/flickr_noapi.py +++ b/searx/engines/flickr_noapi.py @@ -1,15 +1,6 @@ -#!/usr/bin/env python - +# SPDX-License-Identifier: AGPL-3.0-or-later """ - Flickr (Images) - - @website https://www.flickr.com - @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) - - @using-api no - @results HTML - @stable no - @parse url, title, thumbnail, img_src + Flickr (Images) """ from json import loads @@ -21,6 +12,16 @@ from searx.utils import ecma_unescape, html_to_text logger = logger.getChild('flickr-noapi') +# about +about = { + "website": 'https://www.flickr.com', + "wikidata_id": 'Q103204', + "official_api_documentation": 'https://secure.flickr.com/services/api/flickr.photos.search.html', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + categories = ['images'] url = 'https://www.flickr.com/' diff --git a/searx/engines/framalibre.py b/searx/engines/framalibre.py index e3d056425..42c08cf95 100644 --- a/searx/engines/framalibre.py +++ b/searx/engines/framalibre.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ FramaLibre (It) - - @website https://framalibre.org/ - @provide-api no - - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title, content, thumbnail, img_src """ from html import escape @@ -15,6 +8,16 @@ from urllib.parse import urljoin, urlencode from lxml import html from searx.utils import extract_text +# about +about = { + "website": 'https://framalibre.org/', + "wikidata_id": 'Q30213882', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['it'] paging = True diff --git a/searx/engines/frinkiac.py b/searx/engines/frinkiac.py index 5b174a687..f43bb6e20 100644 --- a/searx/engines/frinkiac.py +++ b/searx/engines/frinkiac.py @@ -1,17 +1,24 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ -Frinkiac (Images) - -@website https://www.frinkiac.com -@provide-api no -@using-api no -@results JSON -@stable no -@parse url, title, img_src + Frinkiac (Images) """ from json import loads from urllib.parse import urlencode +# about +about = { + "website": 'https://frinkiac.com', + "wikidata_id": 'Q24882614', + "official_api_documentation": { + 'url': None, + 'comment': 'see https://github.com/MitchellAW/CompuGlobal' + }, + "use_official_api": False, + "require_api_key": False, + "results": 'JSON', +} + categories = ['images'] BASE = 'https://frinkiac.com/' diff --git a/searx/engines/genius.py b/searx/engines/genius.py index 2bfbfddf5..1667d529d 100644 --- a/searx/engines/genius.py +++ b/searx/engines/genius.py @@ -1,19 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ -Genius - - @website https://www.genius.com/ - @provide-api yes (https://docs.genius.com/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content, thumbnail, publishedDate + Genius """ from json import loads from urllib.parse import urlencode from datetime import datetime +# about +about = { + "website": 'https://genius.com/', + "wikidata_id": 'Q3419343', + "official_api_documentation": 'https://docs.genius.com/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['music'] paging = True diff --git a/searx/engines/gentoo.py b/searx/engines/gentoo.py index 16b3e692d..55f15576e 100644 --- a/searx/engines/gentoo.py +++ b/searx/engines/gentoo.py @@ -1,20 +1,22 @@ -# -*- coding: utf-8 -*- - +# SPDX-License-Identifier: AGPL-3.0-or-later """ Gentoo Wiki - - @website https://wiki.gentoo.org - @provide-api yes - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title """ from urllib.parse import urlencode, urljoin from lxml import html from searx.utils import extract_text +# about +about = { + "website": 'https://wiki.gentoo.org/', + "wikidata_id": 'Q1050637', + "official_api_documentation": 'https://wiki.gentoo.org/api.php', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['it'] language_support = True diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index 1d71b18e9..f5f89a736 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -1,14 +1,6 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """ Gigablast (Web) - - @website https://gigablast.com - @provide-api yes (https://gigablast.com/api.html) - - @using-api yes - @results XML - @stable yes - @parse url, title, content """ # pylint: disable=missing-function-docstring, invalid-name @@ -18,6 +10,16 @@ from urllib.parse import urlencode # from searx import logger from searx.poolrequests import get +# about +about = { + "website": 'https://www.gigablast.com', + "wikidata_id": 'Q3105449', + "official_api_documentation": 'https://gigablast.com/api.html', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['general'] # gigablast's pagination is totally damaged, don't use it diff --git a/searx/engines/github.py b/searx/engines/github.py index 80b50ceda..b68caa350 100644 --- a/searx/engines/github.py +++ b/searx/engines/github.py @@ -1,18 +1,21 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ - Github (It) - - @website https://github.com/ - @provide-api yes (https://developer.github.com/v3/) - - @using-api yes - @results JSON - @stable yes (using api) - @parse url, title, content + Github (IT) """ from json import loads from urllib.parse import urlencode +# about +about = { + "website": 'https://github.com/', + "wikidata_id": 'Q364', + "official_api_documentation": 'https://developer.github.com/v3/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['it'] diff --git a/searx/engines/google.py b/searx/engines/google.py index 17ab21f6a..4198de640 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -1,19 +1,11 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """Google (Web) -:website: https://www.google.com -:provide-api: yes (https://developers.google.com/custom-search/) -:using-api: not the offical, since it needs registration to another service -:results: HTML -:stable: no -:parse: url, title, content, number_of_results, answer, suggestion, correction - -For detailed description of the *REST-full* API see: `Query Parameter -Definitions`_. - -.. _Query Parameter Definitions: - https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions + For detailed description of the *REST-full* API see: `Query Parameter + Definitions`_. + .. _Query Parameter Definitions: + https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions """ # pylint: disable=invalid-name, missing-function-docstring @@ -27,6 +19,16 @@ from searx.exceptions import SearxEngineCaptchaException logger = logger.getChild('google engine') +# about +about = { + "website": 'https://www.google.com', + "wikidata_id": 'Q9366', + "official_api_documentation": 'https://developers.google.com/custom-search/', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['general'] paging = True diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 9ef1be753..8c2cb9d2a 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -1,14 +1,6 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """Google (Images) -:website: https://images.google.com (redirected to subdomain www.) -:provide-api: yes (https://developers.google.com/custom-search/) -:using-api: not the offical, since it needs registration to another service -:results: HTML -:stable: no -:template: images.html -:parse: url, title, content, source, thumbnail_src, img_src - For detailed description of the *REST-full* API see: `Query Parameter Definitions`_. @@ -18,10 +10,6 @@ Definitions`_. ``data:` scheme).:: Header set Content-Security-Policy "img-src 'self' data: ;" - -.. _Query Parameter Definitions: - https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions - """ from urllib.parse import urlencode, urlparse, unquote @@ -39,6 +27,16 @@ from searx.engines.google import ( logger = logger.getChild('google images') +# about +about = { + "website": 'https://images.google.com/', + "wikidata_id": 'Q521550', + "official_api_documentation": 'https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions', # NOQA + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['images'] diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index f1b7cfa79..63fef6696 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Google (News) - - @website https://news.google.com - @provide-api no - - @using-api no - @results HTML - @stable no - @parse url, title, content, publishedDate """ from urllib.parse import urlencode @@ -15,6 +8,16 @@ from lxml import html from searx.utils import match_language from searx.engines.google import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +# about +about = { + "website": 'https://news.google.com', + "wikidata_id": 'Q12020', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # search-url categories = ['news'] paging = True diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index eedefbf45..61e01ca7b 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Google (Videos) - - @website https://www.google.com - @provide-api yes (https://developers.google.com/custom-search/) - - @using-api no - @results HTML - @stable no - @parse url, title, content, thumbnail """ from datetime import date, timedelta @@ -16,6 +9,16 @@ from lxml import html from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex import re +# about +about = { + "website": 'https://www.google.com', + "wikidata_id": 'Q219885', + "official_api_documentation": 'https://developers.google.com/custom-search/', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['videos'] paging = True diff --git a/searx/engines/ina.py b/searx/engines/ina.py index ce241d409..1a47ca51e 100644 --- a/searx/engines/ina.py +++ b/searx/engines/ina.py @@ -1,15 +1,7 @@ -# INA (Videos) -# -# @website https://www.ina.fr/ -# @provide-api no -# -# @using-api no -# @results HTML (using search portal) -# @stable no (HTML can change) -# @parse url, title, content, publishedDate, thumbnail -# -# @todo set content-parameter with correct data -# @todo embedded (needs some md5 from video page) +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + INA (Videos) +""" from json import loads from html import unescape @@ -18,6 +10,15 @@ from lxml import html from dateutil import parser from searx.utils import extract_text +# about +about = { + "website": 'https://www.ina.fr/', + "wikidata_id": 'Q1665109', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} # engine dependent config categories = ['videos'] diff --git a/searx/engines/invidious.py b/searx/engines/invidious.py index 6ea942699..1d6d69f64 100644 --- a/searx/engines/invidious.py +++ b/searx/engines/invidious.py @@ -1,17 +1,22 @@ -# Invidious (Videos) -# -# @website https://invidio.us/ -# @provide-api yes (https://github.com/omarroth/invidious/wiki/API) -# -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title, content, publishedDate, thumbnail, embedded, author, length +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Invidious (Videos) +""" from urllib.parse import quote_plus from dateutil import parser import time +# about +about = { + "website": 'https://instances.invidio.us/', + "wikidata_id": 'Q79343316', + "official_api_documentation": 'https://github.com/omarroth/invidious/wiki/API', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ["videos", "music"] paging = True diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py index e2aa436cc..f4a5ff6d2 100644 --- a/searx/engines/json_engine.py +++ b/searx/engines/json_engine.py @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + from collections.abc import Iterable from json import loads from urllib.parse import urlencode diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py index 90bd33063..6a44e2fd7 100644 --- a/searx/engines/kickass.py +++ b/searx/engines/kickass.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Kickass Torrent (Videos, Music, Files) - - @website https://kickass.so - @provide-api no (nothing found) - - @using-api no - @results HTML (using search portal) - @stable yes (HTML can change) - @parse url, title, content, seed, leech, magnetlink """ from lxml import html @@ -15,6 +8,16 @@ from operator import itemgetter from urllib.parse import quote, urljoin from searx.utils import extract_text, get_torrent_size, convert_str_to_int +# about +about = { + "website": 'https://kickass.so', + "wikidata_id": 'Q17062285', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['videos', 'music', 'files'] paging = True diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py index 50ba74efc..21abff86e 100644 --- a/searx/engines/mediawiki.py +++ b/searx/engines/mediawiki.py @@ -1,21 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ - general mediawiki-engine (Web) - - @website websites built on mediawiki (https://www.mediawiki.org) - @provide-api yes (http://www.mediawiki.org/wiki/API:Search) - - @using-api yes - @results JSON - @stable yes - @parse url, title - - @todo content + General mediawiki-engine (Web) """ from json import loads from string import Formatter from urllib.parse import urlencode, quote +# about +about = { + "website": None, + "wikidata_id": None, + "official_api_documentation": 'http://www.mediawiki.org/wiki/API:Search', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['general'] language_support = True diff --git a/searx/engines/microsoft_academic.py b/searx/engines/microsoft_academic.py index 7426eef7e..14de4ac9a 100644 --- a/searx/engines/microsoft_academic.py +++ b/searx/engines/microsoft_academic.py @@ -1,12 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ -Microsoft Academic (Science) - -@website https://academic.microsoft.com -@provide-api yes -@using-api no -@results JSON -@stable no -@parse url, title, content + Microsoft Academic (Science) """ from datetime import datetime @@ -15,6 +9,16 @@ from uuid import uuid4 from urllib.parse import urlencode from searx.utils import html_to_text +# about +about = { + "website": 'https://academic.microsoft.com', + "wikidata_id": 'Q28136779', + "official_api_documentation": 'http://ma-graph.org/', + "use_official_api": False, + "require_api_key": False, + "results": 'JSON', +} + categories = ['images'] paging = True result_url = 'https://academic.microsoft.com/api/search/GetEntityResults?{query}' diff --git a/searx/engines/mixcloud.py b/searx/engines/mixcloud.py index 0606350a9..a6fd1c0a1 100644 --- a/searx/engines/mixcloud.py +++ b/searx/engines/mixcloud.py @@ -1,19 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Mixcloud (Music) - - @website https://http://www.mixcloud.com/ - @provide-api yes (http://www.mixcloud.com/developers/ - - @using-api yes - @results JSON - @stable yes - @parse url, title, content, embedded, publishedDate """ from json import loads from dateutil import parser from urllib.parse import urlencode +# about +about = { + "website": 'https://www.mixcloud.com/', + "wikidata_id": 'Q6883832', + "official_api_documentation": 'http://www.mixcloud.com/developers/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['music'] paging = True diff --git a/searx/engines/not_evil.py b/searx/engines/not_evil.py index e84f153bd..df41c0941 100644 --- a/searx/engines/not_evil.py +++ b/searx/engines/not_evil.py @@ -1,19 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ not Evil (Onions) - - @website http://hss3uro2hsxfogfq.onion - @provide-api yes (http://hss3uro2hsxfogfq.onion/api.htm) - - @using-api no - @results HTML - @stable no - @parse url, title, content """ from urllib.parse import urlencode from lxml import html from searx.engines.xpath import extract_text +# about +about = { + "website": 'http://hss3uro2hsxfogfq.onion', + "wikidata_id": None, + "official_api_documentation": 'http://hss3uro2hsxfogfq.onion/api.htm', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['onions'] paging = True diff --git a/searx/engines/nyaa.py b/searx/engines/nyaa.py index e0a91494f..f8178d637 100644 --- a/searx/engines/nyaa.py +++ b/searx/engines/nyaa.py @@ -1,18 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Nyaa.si (Anime Bittorrent tracker) - - @website https://nyaa.si/ - @provide-api no - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title, content, seed, leech, torrentfile """ from lxml import html from urllib.parse import urlencode from searx.utils import extract_text, get_torrent_size, int_or_zero +# about +about = { + "website": 'https://nyaa.si/', + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['files', 'images', 'videos', 'music'] paging = True diff --git a/searx/engines/opensemantic.py b/searx/engines/opensemantic.py index 9364bab41..64bc321f1 100644 --- a/searx/engines/opensemantic.py +++ b/searx/engines/opensemantic.py @@ -1,18 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ -Open Semantic Search - - @website https://www.opensemanticsearch.org/ - @provide-api yes (https://www.opensemanticsearch.org/dev) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content, publishedDate + Open Semantic Search """ + from dateutil import parser from json import loads from urllib.parse import quote +# about +about = { + "website": 'https://www.opensemanticsearch.org/', + "wikidata_id": None, + "official_api_documentation": 'https://www.opensemanticsearch.org/dev', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + base_url = 'http://localhost:8983/solr/opensemanticsearch/' search_string = 'query?q={query}' diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py index 5475c7a6d..f11aa5f8c 100644 --- a/searx/engines/openstreetmap.py +++ b/searx/engines/openstreetmap.py @@ -1,19 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ OpenStreetMap (Map) - - @website https://openstreetmap.org/ - @provide-api yes (http://wiki.openstreetmap.org/wiki/Nominatim) - - @using-api yes - @results JSON - @stable yes - @parse url, title """ import re from json import loads from flask_babel import gettext +# about +about = { + "website": 'https://www.openstreetmap.org/', + "wikidata_id": 'Q936', + "official_api_documentation": 'http://wiki.openstreetmap.org/wiki/Nominatim', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['map'] paging = False diff --git a/searx/engines/pdbe.py b/searx/engines/pdbe.py index 2db92868a..b9bbfaf1b 100644 --- a/searx/engines/pdbe.py +++ b/searx/engines/pdbe.py @@ -1,18 +1,21 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ PDBe (Protein Data Bank in Europe) - - @website https://www.ebi.ac.uk/pdbe - @provide-api yes (https://www.ebi.ac.uk/pdbe/api/doc/search.html), - unlimited - @using-api yes - @results python dictionary (from json) - @stable yes - @parse url, title, content, img_src """ from json import loads from flask_babel import gettext +# about +about = { + "website": 'https://www.ebi.ac.uk/pdbe', + "wikidata_id": 'Q55823905', + "official_api_documentation": 'https://www.ebi.ac.uk/pdbe/api/doc/search.html', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + categories = ['science'] hide_obsolete = False diff --git a/searx/engines/peertube.py b/searx/engines/peertube.py index e43b2a6b7..549141079 100644 --- a/searx/engines/peertube.py +++ b/searx/engines/peertube.py @@ -1,15 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ peertube (Videos) - - @website https://www.peertube.live - @provide-api yes (https://docs.joinpeertube.org/api-rest-reference.html) - - @using-api yes - @results JSON - @stable yes - @parse url, title, thumbnail, publishedDate, embedded - - @todo implement time range support """ from json import loads @@ -17,6 +8,16 @@ from datetime import datetime from urllib.parse import urlencode from searx.utils import html_to_text +# about +about = { + "website": 'https://joinpeertube.org', + "wikidata_id": 'Q50938515', + "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ["videos"] paging = True diff --git a/searx/engines/photon.py b/searx/engines/photon.py index 7a6fc8321..f12bcd22a 100644 --- a/searx/engines/photon.py +++ b/searx/engines/photon.py @@ -1,19 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Photon (Map) - - @website https://photon.komoot.de - @provide-api yes (https://photon.komoot.de/) - - @using-api yes - @results JSON - @stable yes - @parse url, title """ from json import loads from urllib.parse import urlencode from searx.utils import searx_useragent +# about +about = { + "website": 'https://photon.komoot.de', + "wikidata_id": None, + "official_api_documentation": 'https://photon.komoot.de/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['map'] paging = False diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py index 828241ece..98a2dd9f2 100644 --- a/searx/engines/piratebay.py +++ b/searx/engines/piratebay.py @@ -1,12 +1,7 @@ -# Piratebay (Videos, Music, Files) -# -# @website https://thepiratebay.org -# @provide-api yes (https://apibay.org/) -# -# @using-api yes -# @results JSON -# @stable no (the API is not documented nor versioned) -# @parse url, title, seed, leech, magnetlink, filesize, publishedDate +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Piratebay (Videos, Music, Files) +""" from json import loads from datetime import datetime @@ -15,6 +10,16 @@ from operator import itemgetter from urllib.parse import quote from searx.utils import get_torrent_size +# about +about = { + "website": 'https://thepiratebay.org', + "wikidata_id": 'Q22663', + "official_api_documentation": 'https://apibay.org/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ["videos", "music", "files"] diff --git a/searx/engines/pubmed.py b/searx/engines/pubmed.py index 07c45709e..da02f91ca 100644 --- a/searx/engines/pubmed.py +++ b/searx/engines/pubmed.py @@ -1,14 +1,6 @@ -#!/usr/bin/env python - +# SPDX-License-Identifier: AGPL-3.0-or-later """ PubMed (Scholar publications) - @website https://www.ncbi.nlm.nih.gov/pubmed/ - @provide-api yes (https://www.ncbi.nlm.nih.gov/home/develop/api/) - @using-api yes - @results XML - @stable yes - @parse url, title, publishedDate, content - More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/ """ from flask_babel import gettext @@ -17,6 +9,18 @@ from datetime import datetime from urllib.parse import urlencode from searx.poolrequests import get +# about +about = { + "website": 'https://www.ncbi.nlm.nih.gov/pubmed/', + "wikidata_id": 'Q1540899', + "official_api_documentation": { + 'url': 'https://www.ncbi.nlm.nih.gov/home/develop/api/', + 'comment': 'More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/' + }, + "use_official_api": True, + "require_api_key": False, + "results": 'XML', +} categories = ['science'] diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index b785719d9..87499c8ad 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Qwant (Web, Images, News, Social) - - @website https://qwant.com/ - @provide-api not officially (https://api.qwant.com/api/search/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content """ from datetime import datetime @@ -17,6 +10,15 @@ from searx.utils import html_to_text, match_language from searx.exceptions import SearxEngineAPIException, SearxEngineCaptchaException from searx.raise_for_httperror import raise_for_httperror +# about +about = { + "website": 'https://www.qwant.com/', + "wikidata_id": 'Q14657870', + "official_api_documentation": None, + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} # engine dependent config categories = [] diff --git a/searx/engines/recoll.py b/searx/engines/recoll.py index 5a956b8bf..d90005a95 100644 --- a/searx/engines/recoll.py +++ b/searx/engines/recoll.py @@ -1,17 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Recoll (local search engine) - - @using-api yes - @results JSON - @stable yes - @parse url, content, size, abstract, author, mtype, subtype, time, \ - filename, label, type, embedded """ from datetime import date, timedelta from json import loads from urllib.parse import urlencode, quote +# about +about = { + "website": None, + "wikidata_id": 'Q15735774', + "official_api_documentation": 'https://www.lesbonscomptes.com/recoll/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config time_range_support = True diff --git a/searx/engines/reddit.py b/searx/engines/reddit.py index e732875cb..ee734ace2 100644 --- a/searx/engines/reddit.py +++ b/searx/engines/reddit.py @@ -1,19 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Reddit - - @website https://www.reddit.com/ - @provide-api yes (https://www.reddit.com/dev/api) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content, thumbnail, publishedDate """ import json from datetime import datetime from urllib.parse import urlencode, urljoin, urlparse +# about +about = { + "website": 'https://www.reddit.com/', + "wikidata_id": 'Q1136', + "official_api_documentation": 'https://www.reddit.com/dev/api', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['general', 'images', 'news', 'social media'] page_size = 25 diff --git a/searx/engines/scanr_structures.py b/searx/engines/scanr_structures.py index 72fd2b3c9..51c925247 100644 --- a/searx/engines/scanr_structures.py +++ b/searx/engines/scanr_structures.py @@ -1,18 +1,21 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ ScanR Structures (Science) - - @website https://scanr.enseignementsup-recherche.gouv.fr - @provide-api yes (https://scanr.enseignementsup-recherche.gouv.fr/api/swagger-ui.html) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content, img_src """ from json import loads, dumps from searx.utils import html_to_text +# about +about = { + "website": 'https://scanr.enseignementsup-recherche.gouv.fr', + "wikidata_id": 'Q44105684', + "official_api_documentation": 'https://scanr.enseignementsup-recherche.gouv.fr/opendata', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['science'] paging = True diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py index 706285814..8c1330d98 100644 --- a/searx/engines/searchcode_code.py +++ b/searx/engines/searchcode_code.py @@ -1,18 +1,20 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ - Searchcode (It) - - @website https://searchcode.com/ - @provide-api yes (https://searchcode.com/api/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content + Searchcode (IT) """ from json import loads from urllib.parse import urlencode +# about +about = { + "website": 'https://searchcode.com/', + "wikidata_id": None, + "official_api_documentation": 'https://searchcode.com/api/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} # engine dependent config categories = ['it'] diff --git a/searx/engines/searx_engine.py b/searx/engines/searx_engine.py index 87e5e05c2..c4f016adc 100644 --- a/searx/engines/searx_engine.py +++ b/searx/engines/searx_engine.py @@ -1,18 +1,20 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Searx (all) - - @website https://github.com/searx/searx - @provide-api yes (https://searx.github.io/searx/dev/search_api.html) - - @using-api yes - @results JSON - @stable yes (using api) - @parse url, title, content """ from json import loads from searx.engines import categories as searx_categories +# about +about = { + "website": 'https://github.com/searx/searx', + "wikidata_id": 'Q17639196', + "official_api_documentation": 'https://searx.github.io/searx/dev/search_api.html', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} categories = searx_categories.keys() diff --git a/searx/engines/sepiasearch.py b/searx/engines/sepiasearch.py index 0b7c1ba6e..3433c897a 100644 --- a/searx/engines/sepiasearch.py +++ b/searx/engines/sepiasearch.py @@ -1,17 +1,23 @@ -# SepiaSearch (Videos) -# -# @website https://sepiasearch.org -# @provide-api https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title, content, publishedDate, thumbnail +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + SepiaSearch (Videos) +""" from json import loads from dateutil import parser, relativedelta from urllib.parse import urlencode from datetime import datetime +# about +about = { + "website": 'https://sepiasearch.org', + "wikidata_id": None, + "official_api_documentation": "https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api", # NOQA + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + categories = ['videos'] paging = True language_support = True diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py index 84ff21a88..9e414746f 100644 --- a/searx/engines/soundcloud.py +++ b/searx/engines/soundcloud.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Soundcloud (Music) - - @website https://soundcloud.com - @provide-api yes (https://developers.soundcloud.com/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content, publishedDate, embedded """ import re @@ -18,6 +11,15 @@ from urllib.parse import quote_plus, urlencode from searx import logger from searx.poolrequests import get as http_get +# about +about = { + "website": 'https://soundcloud.com', + "wikidata_id": 'Q568769', + "official_api_documentation": 'https://developers.soundcloud.com/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} # engine dependent config categories = ['music'] diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py index 74942326e..0ad8bfe32 100644 --- a/searx/engines/spotify.py +++ b/searx/engines/spotify.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Spotify (Music) - - @website https://spotify.com - @provide-api yes (https://developer.spotify.com/web-api/search-item/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content, embedded """ from json import loads @@ -15,6 +8,16 @@ from urllib.parse import urlencode import requests import base64 +# about +about = { + "website": 'https://www.spotify.com', + "wikidata_id": 'Q689141', + "official_api_documentation": 'https://developer.spotify.com/web-api/search-item/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['music'] paging = True diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py index f730264e2..91eaa68e9 100644 --- a/searx/engines/stackoverflow.py +++ b/searx/engines/stackoverflow.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ - Stackoverflow (It) - - @website https://stackoverflow.com/ - @provide-api not clear (https://api.stackexchange.com/docs/advanced-search) - - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title, content + Stackoverflow (IT) """ from urllib.parse import urlencode, urljoin, urlparse @@ -15,6 +8,16 @@ from lxml import html from searx.utils import extract_text from searx.exceptions import SearxEngineCaptchaException +# about +about = { + "website": 'https://stackoverflow.com/', + "wikidata_id": 'Q549037', + "official_api_documentation": 'https://api.stackexchange.com/docs', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['it'] paging = True diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index cd8b132f9..68157971d 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -1,14 +1,7 @@ -# Startpage (Web) -# -# @website https://startpage.com -# @provide-api no (nothing found) -# -# @using-api no -# @results HTML -# @stable no (HTML can change) -# @parse url, title, content -# -# @todo paging +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Startpage (Web) +""" from lxml import html from dateutil import parser @@ -19,6 +12,16 @@ from babel import Locale from babel.localedata import locale_identifiers from searx.utils import extract_text, eval_xpath, match_language +# about +about = { + "website": 'https://startpage.com', + "wikidata_id": 'Q2333295', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['general'] # there is a mechanism to block "bot" search diff --git a/searx/engines/tokyotoshokan.py b/searx/engines/tokyotoshokan.py index 9fffba8a6..91d1f01d5 100644 --- a/searx/engines/tokyotoshokan.py +++ b/searx/engines/tokyotoshokan.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Tokyo Toshokan (A BitTorrent Library for Japanese Media) - - @website https://www.tokyotosho.info/ - @provide-api no - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title, publishedDate, seed, leech, - filesize, magnetlink, content """ import re @@ -16,6 +9,16 @@ from lxml import html from datetime import datetime from searx.utils import extract_text, get_torrent_size, int_or_zero +# about +about = { + "website": 'https://www.tokyotosho.info/', + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['files', 'videos', 'music'] paging = True diff --git a/searx/engines/torrentz.py b/searx/engines/torrentz.py index 4d3e6fdd7..94a7a5343 100644 --- a/searx/engines/torrentz.py +++ b/searx/engines/torrentz.py @@ -1,14 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Torrentz2.is (BitTorrent meta-search engine) - - @website https://torrentz2.is/ - @provide-api no - - @using-api no - @results HTML - @stable no (HTML can change, although unlikely, - see https://torrentz.is/torrentz.btsearch) - @parse url, title, publishedDate, seed, leech, filesize, magnetlink """ import re @@ -17,6 +9,16 @@ from lxml import html from datetime import datetime from searx.utils import extract_text, get_torrent_size +# about +about = { + "website": 'https://torrentz2.is/', + "wikidata_id": 'Q1156687', + "official_api_documentation": 'https://torrentz.is/torrentz.btsearch', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['files', 'videos', 'music'] paging = True diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 2706e3617..9c53d70ad 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -1,14 +1,18 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ MyMemory Translated - - @website https://mymemory.translated.net/ - @provide-api yes (https://mymemory.translated.net/doc/spec.php) - @using-api yes - @results JSON - @stable yes - @parse url, title, content """ +# about +about = { + "website": 'https://mymemory.translated.net/', + "wikidata_id": None, + "official_api_documentation": 'https://mymemory.translated.net/doc/spec.php', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + engine_type = 'online_dictionnary' categories = ['general'] url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' diff --git a/searx/engines/unsplash.py b/searx/engines/unsplash.py index 45c6b30da..3bbdf630d 100644 --- a/searx/engines/unsplash.py +++ b/searx/engines/unsplash.py @@ -1,18 +1,21 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Unsplash - - @website https://unsplash.com - @provide-api yes (https://unsplash.com/developers) - - @using-api no - @results JSON (using search portal's infiniscroll API) - @stable no (JSON format could change any time) - @parse url, title, img_src, thumbnail_src """ from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl from json import loads +# about +about = { + "website": 'https://unsplash.com', + "wikidata_id": 'Q28233552', + "official_api_documentation": 'https://unsplash.com/developers', + "use_official_api": False, + "require_api_key": False, + "results": 'JSON', +} + url = 'https://unsplash.com/' search_url = url + 'napi/search/photos?' categories = ['images'] diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py index fd3abc858..824579256 100644 --- a/searx/engines/vimeo.py +++ b/searx/engines/vimeo.py @@ -1,21 +1,22 @@ -# Vimeo (Videos) -# -# @website https://vimeo.com/ -# @provide-api yes (http://developer.vimeo.com/api), -# they have a maximum count of queries/hour -# -# @using-api no (TODO, rewrite to api) -# @results HTML (using search portal) -# @stable no (HTML can change) -# @parse url, title, publishedDate, thumbnail, embedded -# -# @todo rewrite to api -# @todo set content-parameter with correct data +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Wikipedia (Web +""" from urllib.parse import urlencode from json import loads from dateutil import parser +# about +about = { + "website": 'https://vimeo.com/', + "wikidata_id": 'Q156376', + "official_api_documentation": 'http://developer.vimeo.com/api', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['videos'] paging = True diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index 8d787caac..c8e4cfae6 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -1,14 +1,6 @@ -# -*- coding: utf-8 -*- +# SPDX-License-Identifier: AGPL-3.0-or-later """ Wikidata - - @website https://wikidata.org - @provide-api yes (https://query.wikidata.org/) - - @using-api yes - @results JSON - @stable yes - @parse url, infobox """ @@ -27,6 +19,16 @@ from searx.engines.wikipedia import _fetch_supported_languages, supported_langua logger = logger.getChild('wikidata') +# about +about = { + "website": 'https://wikidata.org/', + "wikidata_id": 'Q2013', + "official_api_documentation": 'https://query.wikidata.org/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # SPARQL SPARQL_ENDPOINT_URL = 'https://query.wikidata.org/sparql' SPARQL_EXPLAIN_URL = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain' diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 54d75108e..eff301145 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -1,13 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Wikipedia (Web) - - @website https://en.wikipedia.org/api/rest_v1/ - @provide-api yes - - @using-api yes - @results JSON - @stable yes - @parse url, infobox """ from urllib.parse import quote @@ -16,6 +9,16 @@ from lxml.html import fromstring from searx.utils import match_language, searx_useragent from searx.raise_for_httperror import raise_for_httperror +# about +about = { + "website": 'https://www.wikipedia.org/', + "wikidata_id": 'Q52', + "official_api_documentation": 'https://en.wikipedia.org/api/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # search-url search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}' supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 520eaa209..9c84e2809 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -1,16 +1,21 @@ -# Wolfram Alpha (Science) -# -# @website https://www.wolframalpha.com -# @provide-api yes (https://api.wolframalpha.com/v2/) -# -# @using-api yes -# @results XML -# @stable yes -# @parse url, infobox +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Wolfram|Alpha (Science) +""" from lxml import etree from urllib.parse import urlencode +# about +about = { + "website": 'https://www.wolframalpha.com', + "wikidata_id": 'Q207006', + "official_api_documentation": 'https://products.wolframalpha.com/api/', + "use_official_api": True, + "require_api_key": False, + "results": 'XML', +} + # search-url search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}' site_url = 'https://www.wolframalpha.com/input/?{query}' diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 943d4f3fb..8e427d575 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -1,12 +1,7 @@ -# Wolfram|Alpha (Science) -# -# @website https://www.wolframalpha.com/ -# @provide-api yes (https://api.wolframalpha.com/v2/) -# -# @using-api no -# @results JSON -# @stable no -# @parse url, infobox +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Wolfram|Alpha (Science) +""" from json import loads from time import time @@ -14,6 +9,16 @@ from urllib.parse import urlencode from searx.poolrequests import get as http_get +# about +about = { + "website": 'https://www.wolframalpha.com/', + "wikidata_id": 'Q207006', + "official_api_documentation": 'https://products.wolframalpha.com/api/', + "use_official_api": False, + "require_api_key": False, + "results": 'JSON', +} + # search-url url = 'https://www.wolframalpha.com/' diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py index b8f111a50..96b8d680c 100644 --- a/searx/engines/www1x.py +++ b/searx/engines/www1x.py @@ -1,19 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ 1x (Images) - - @website http://1x.com/ - @provide-api no - - @using-api no - @results HTML - @stable no (HTML can change) - @parse url, title, thumbnail """ from lxml import html, etree from urllib.parse import urlencode, urljoin from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex +# about +about = { + "website": 'https://1x.com/', + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['images'] paging = False diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 1507176ec..612f69abd 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + from lxml import html from urllib.parse import urlencode from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py index 6f7ab759f..afd59cd49 100644 --- a/searx/engines/yacy.py +++ b/searx/engines/yacy.py @@ -1,16 +1,7 @@ -# Yacy (Web, Images, Videos, Music, Files) -# -# @website http://yacy.net -# @provide-api yes -# (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch) -# -# @using-api yes -# @results JSON -# @stable yes -# @parse (general) url, title, content, publishedDate -# @parse (images) url, title, img_src -# -# @todo parse video, audio and file results +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Yacy (Web, Images, Videos, Music, Files) +""" from json import loads from dateutil import parser @@ -20,6 +11,16 @@ from requests.auth import HTTPDigestAuth from searx.utils import html_to_text +# about +about = { + "website": 'https://yacy.net/', + "wikidata_id": 'Q1759675', + "official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['general', 'images'] # TODO , 'music', 'videos', 'files' paging = True diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index 3420aa6d5..eb07a45fc 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -1,20 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Yahoo (Web) - - @website https://search.yahoo.com/web - @provide-api yes (https://developer.yahoo.com/boss/search/), - $0.80/1000 queries - - @using-api no (because pricing) - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content, suggestion """ from urllib.parse import unquote, urlencode from lxml import html from searx.utils import extract_text, extract_url, match_language, eval_xpath +# about +about = { + "website": 'https://search.yahoo.com/', + "wikidata_id": None, + "official_api_documentation": 'https://developer.yahoo.com/api/', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['general'] paging = True diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py index 793d1104a..b324ecdf3 100644 --- a/searx/engines/yahoo_news.py +++ b/searx/engines/yahoo_news.py @@ -1,13 +1,7 @@ -# Yahoo (News) -# -# @website https://news.yahoo.com -# @provide-api yes (https://developer.yahoo.com/boss/search/) -# $0.80/1000 queries -# -# @using-api no (because pricing) -# @results HTML (using search portal) -# @stable no (HTML can change) -# @parse url, title, content, publishedDate +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Yahoo (News) +""" import re from datetime import datetime, timedelta @@ -18,6 +12,16 @@ from searx.engines.yahoo import _fetch_supported_languages, supported_languages_ from dateutil import parser from searx.utils import extract_text, extract_url, match_language +# about +about = { + "website": 'https://news.yahoo.com', + "wikidata_id": 'Q3044717', + "official_api_documentation": 'https://developer.yahoo.com/api/', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['news'] paging = True diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py index b4a6a54cf..57a2f4b79 100644 --- a/searx/engines/yandex.py +++ b/searx/engines/yandex.py @@ -1,12 +1,6 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Yahoo (Web) - - @website https://yandex.ru/ - @provide-api ? - @using-api no - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content """ from urllib.parse import urlencode, urlparse @@ -16,6 +10,16 @@ from searx.exceptions import SearxEngineCaptchaException logger = logger.getChild('yandex engine') +# about +about = { + "website": 'https://yandex.ru/', + "wikidata_id": 'Q5281', + "official_api_documentation": "?", + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['general'] paging = True diff --git a/searx/engines/yggtorrent.py b/searx/engines/yggtorrent.py index ec84d2c6b..cad2de52b 100644 --- a/searx/engines/yggtorrent.py +++ b/searx/engines/yggtorrent.py @@ -1,12 +1,7 @@ -# Yggtorrent (Videos, Music, Files) -# -# @website https://www2.yggtorrent.si -# @provide-api no (nothing found) -# -# @using-api no -# @results HTML (using search portal) -# @stable no (HTML can change) -# @parse url, title, seed, leech, publishedDate, filesize +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Yggtorrent (Videos, Music, Files) +""" from lxml import html from operator import itemgetter @@ -15,6 +10,16 @@ from urllib.parse import quote from searx.utils import extract_text, get_torrent_size from searx.poolrequests import get as http_get +# about +about = { + "website": 'https://www2.yggtorrent.si', + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['videos', 'music', 'files'] paging = True diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py index 8c12ac4d2..b3dcb4907 100644 --- a/searx/engines/youtube_api.py +++ b/searx/engines/youtube_api.py @@ -1,18 +1,23 @@ -# Youtube (Videos) -# -# @website https://www.youtube.com/ -# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list) -# -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title, content, publishedDate, thumbnail, embedded +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Youtube (Videos) +""" from json import loads from dateutil import parser from urllib.parse import urlencode from searx.exceptions import SearxEngineAPIException +# about +about = { + "website": 'https://www.youtube.com/', + "wikidata_id": 'Q866', + "official_api_documentation": 'https://developers.google.com/youtube/v3/docs/search/list?apix=true', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + # engine dependent config categories = ['videos', 'music'] paging = False diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py index 36fc72e36..4a6df57c4 100644 --- a/searx/engines/youtube_noapi.py +++ b/searx/engines/youtube_noapi.py @@ -1,17 +1,22 @@ -# Youtube (Videos) -# -# @website https://www.youtube.com/ -# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list) -# -# @using-api no -# @results HTML -# @stable no -# @parse url, title, content, publishedDate, thumbnail, embedded +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Youtube (Videos) +""" from functools import reduce from json import loads from urllib.parse import quote_plus +# about +about = { + "website": 'https://www.youtube.com/', + "wikidata_id": 'Q866', + "official_api_documentation": 'https://developers.google.com/youtube/v3/docs/search/list?apix=true', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + # engine dependent config categories = ['videos', 'music'] paging = True diff --git a/searx/settings.yml b/searx/settings.yml index 55c9849c1..e3259220b 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -157,6 +157,13 @@ engines: timeout : 7.0 disabled : True shortcut : ai + about: + website: https://archive.is/ + wikidata_id: Q13515725 + official_api_documentation: http://mementoweb.org/depot/native/archiveis/ + use_official_api: false + require_api_key: false + results: HTML - name : arxiv engine : arxiv @@ -201,6 +208,13 @@ engines: timeout : 4.0 disabled : True shortcut : bb + about: + website: https://bitbucket.org/ + wikidata_id: Q2493781 + official_api_documentation: https://developer.atlassian.com/bitbucket + use_official_api: false + require_api_key: false + results: HTML - name : btdigg engine : btdigg @@ -216,6 +230,13 @@ engines: categories : videos disabled : True shortcut : c3tv + about: + website: https://media.ccc.de/ + wikidata_id: Q80729951 + official_api_documentation: https://github.com/voc/voctoweb + use_official_api: false + require_api_key: false + results: HTML - name : crossref engine : json_engine @@ -226,6 +247,13 @@ engines: content_query : fullCitation categories : science shortcut : cr + about: + website: https://www.crossref.org/ + wikidata_id: Q5188229 + official_api_documentation: https://github.com/CrossRef/rest-api-doc + use_official_api: false + require_api_key: false + results: JSON - name : currency engine : currency_convert @@ -271,6 +299,13 @@ engines: categories : general shortcut : ew disabled : True + about: + website: https://www.erowid.org/ + wikidata_id: Q1430691 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML # - name : elasticsearch # shortcut : es @@ -321,6 +356,13 @@ engines: first_page_num : 1 shortcut : et disabled : True + about: + website: https://www.etymonline.com/ + wikidata_id: Q1188617 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML # - name : ebay # engine : ebay @@ -360,6 +402,9 @@ engines: search_type : title timeout : 5.0 disabled : True + about: + website: https://directory.fsf.org/ + wikidata_id: Q2470288 - name : frinkiac engine : frinkiac @@ -394,6 +439,13 @@ engines: shortcut : gl timeout : 10.0 disabled : True + about: + website: https://about.gitlab.com/ + wikidata_id: Q16639197 + official_api_documentation: https://docs.gitlab.com/ee/api/ + use_official_api: false + require_api_key: false + results: JSON - name : github engine : github @@ -411,6 +463,13 @@ engines: categories : it shortcut : cb disabled : True + about: + website: https://codeberg.org/ + wikidata_id: + official_api_documentation: https://try.gitea.io/api/swagger + use_official_api: false + require_api_key: false + results: JSON - name : google engine : google @@ -441,6 +500,13 @@ engines: first_page_num : 0 categories : science shortcut : gos + about: + website: https://scholar.google.com/ + wikidata_id: Q494817 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML - name : google play apps engine : xpath @@ -453,6 +519,13 @@ engines: categories : files shortcut : gpa disabled : True + about: + website: https://play.google.com/ + wikidata_id: Q79576 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML - name : google play movies engine : xpath @@ -465,6 +538,13 @@ engines: categories : videos shortcut : gpm disabled : True + about: + website: https://play.google.com/ + wikidata_id: Q79576 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML - name : google play music engine : xpath @@ -477,6 +557,13 @@ engines: categories : music shortcut : gps disabled : True + about: + website: https://play.google.com/ + wikidata_id: Q79576 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML - name : geektimes engine : xpath @@ -489,6 +576,13 @@ engines: timeout : 4.0 disabled : True shortcut : gt + about: + website: https://geektimes.ru/ + wikidata_id: Q50572423 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML - name : habrahabr engine : xpath @@ -501,6 +595,13 @@ engines: timeout : 4.0 disabled : True shortcut : habr + about: + website: https://habr.com/ + wikidata_id: Q4494434 + official_api_documentation: https://habr.com/en/docs/help/api/ + use_official_api: false + require_api_key: false + results: HTML - name : hoogle engine : json_engine @@ -513,6 +614,13 @@ engines: page_size : 20 categories : it shortcut : ho + about: + website: https://www.haskell.org/ + wikidata_id: Q34010 + official_api_documentation: https://hackage.haskell.org/api + use_official_api: false + require_api_key: false + results: JSON - name : ina engine : ina @@ -543,6 +651,13 @@ engines: timeout : 7.0 disabled : True shortcut : lg + about: + website: http://libgen.rs/ + wikidata_id: Q22017206 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML - name : lobste.rs engine : xpath @@ -555,6 +670,13 @@ engines: shortcut : lo timeout : 3.0 disabled: True + about: + website: https://lobste.rs/ + wikidata_id: Q60762874 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML - name : metager engine : xpath @@ -566,6 +688,13 @@ engines: categories : general shortcut : mg disabled : True + about: + website: https://metager.org/ + wikidata_id: Q1924645 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML - name : microsoft academic engine : microsoft_academic @@ -589,6 +718,13 @@ engines: disabled: True timeout: 5.0 shortcut : npm + about: + website: https://npms.io/ + wikidata_id: Q7067518 + official_api_documentation: https://api-docs.npms.io/ + use_official_api: false + require_api_key: false + results: JSON # Requires Tor - name : not evil @@ -617,6 +753,13 @@ engines: categories : science shortcut : oad timeout: 5.0 + about: + website: https://www.openaire.eu/ + wikidata_id: Q25106053 + official_api_documentation: https://api.openaire.eu/ + use_official_api: false + require_api_key: false + results: JSON - name : openairepublications engine : json_engine @@ -629,6 +772,13 @@ engines: categories : science shortcut : oap timeout: 5.0 + about: + website: https://www.openaire.eu/ + wikidata_id: Q25106053 + official_api_documentation: https://api.openaire.eu/ + use_official_api: false + require_api_key: false + results: JSON # - name : opensemanticsearch # engine : opensemantic @@ -650,6 +800,13 @@ engines: timeout : 4.0 disabled : True shortcut : or + about: + website: https://openrepos.net/ + wikidata_id: + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML - name : pdbe engine : pdbe @@ -768,6 +925,13 @@ engines: content_xpath : .//div[@class="search-result-abstract"] shortcut : se categories : science + about: + website: https://www.semanticscholar.org/ + wikidata_id: Q22908627 + official_api_documentation: https://api.semanticscholar.org/ + use_official_api: false + require_api_key: false + results: HTML # Spotify needs API credentials # - name : spotify @@ -876,6 +1040,9 @@ engines: number_of_results : 5 search_type : text disabled : True + about: + website: https://www.wikibooks.org/ + wikidata_id: Q367 - name : wikinews engine : mediawiki @@ -885,6 +1052,9 @@ engines: number_of_results : 5 search_type : text disabled : True + about: + website: https://www.wikinews.org/ + wikidata_id: Q964 - name : wikiquote engine : mediawiki @@ -896,6 +1066,9 @@ engines: disabled : True additional_tests: rosebud: *test_rosebud + about: + website: https://www.wikiquote.org/ + wikidata_id: Q369 - name : wikisource engine : mediawiki @@ -905,6 +1078,9 @@ engines: number_of_results : 5 search_type : text disabled : True + about: + website: https://www.wikisource.org/ + wikidata_id: Q263 - name : wiktionary engine : mediawiki @@ -914,6 +1090,9 @@ engines: number_of_results : 5 search_type : text disabled : True + about: + website: https://www.wiktionary.org/ + wikidata_id: Q151 - name : wikiversity engine : mediawiki @@ -923,6 +1102,9 @@ engines: number_of_results : 5 search_type : text disabled : True + about: + website: https://www.wikiversity.org/ + wikidata_id: Q370 - name : wikivoyage engine : mediawiki @@ -932,6 +1114,9 @@ engines: number_of_results : 5 search_type : text disabled : True + about: + website: https://www.wikivoyage.org/ + wikidata_id: Q373 - name : wolframalpha shortcut : wa @@ -979,6 +1164,13 @@ engines: first_page_num : 0 page_size : 10 disabled : True + about: + website: https://www.seznam.cz/ + wikidata_id: Q3490485 + official_api_documentation: https://api.sklik.cz/ + use_official_api: false + require_api_key: false + results: HTML - name : mojeek shortcut: mjk @@ -993,6 +1185,13 @@ engines: first_page_num : 0 page_size : 10 disabled : True + about: + website: https://www.mojeek.com/ + wikidata_id: Q60747299 + official_api_documentation: https://www.mojeek.com/services/api.html/ + use_official_api: false + require_api_key: false + results: HTML - name : naver shortcut: nvr @@ -1007,6 +1206,13 @@ engines: first_page_num : 1 page_size : 10 disabled : True + about: + website: https://www.naver.com/ + wikidata_id: Q485639 + official_api_documentation: https://developers.naver.com/docs/nmt/examples/ + use_official_api: false + require_api_key: false + results: HTML - name : rubygems shortcut: rbg @@ -1021,6 +1227,13 @@ engines: first_page_num : 1 categories: it disabled : True + about: + website: https://rubygems.org/ + wikidata_id: Q1853420 + official_api_documentation: https://guides.rubygems.org/rubygems-org-api/ + use_official_api: false + require_api_key: false + results: HTML - name : peertube engine: peertube |