diff options
| -rw-r--r-- | Makefile | 15 | ||||
| -rw-r--r-- | docs/dev/engine_overview.rst | 2 | ||||
| -rw-r--r-- | requirements-dev.txt | 4 | ||||
| -rw-r--r-- | searx/engines/__init__.py | 33 | ||||
| -rw-r--r-- | searx/engines/apkmirror.py | 42 | ||||
| -rw-r--r-- | searx/engines/solidtorrents.py | 53 | ||||
| -rw-r--r-- | searx/poolrequests.py | 16 | ||||
| -rw-r--r-- | searx/search/processors/online.py | 2 | ||||
| -rw-r--r-- | searx/settings.yml | 7 | ||||
| -rw-r--r-- | searx/templates/oscar/preferences.html | 2 | ||||
| -rw-r--r-- | searx/templates/simple/preferences.html | 2 |
11 files changed, 113 insertions, 65 deletions
@@ -94,15 +94,14 @@ project: buildenv useragents.update engines.languages engines.languages: pyenvinstall $(Q)echo "fetch languages .." - $(Q)$(PY_ENV_ACT); python utils/fetch_languages.py - $(Q)echo "update searx/data/engines_languages.json" - $(Q)mv engines_languages.json searx/data/engines_languages.json - $(Q)echo "update searx/languages.py" - $(Q)mv languages.py searx/languages.py + $(Q)$(PY_ENV_ACT); python ./searx_extra/update/update_languages.py + $(Q)echo "updated searx/data/engines_languages.json" + $(Q)echo "updated searx/languages.py" useragents.update: pyenvinstall - $(Q)echo "Update searx/data/useragents.json with the most recent versions of Firefox." - $(Q)$(PY_ENV_ACT); python utils/fetch_firefox_version.py + $(Q)echo "fetch useragents .." + $(Q)$(PY_ENV_ACT); python ./searx_extra/update/update_firefox_version.py + $(Q)echo "updated searx/data/useragents.json with the most recent versions of Firefox." buildenv: pyenv $(Q)$(PY_ENV_ACT); SEARX_DEBUG=1 python utils/build_env.py @@ -192,8 +191,10 @@ PYLINT_FILES=\ searx/engines/google_videos.py \ searx/engines/google_images.py \ searx/engines/mediathekviewweb.py \ + searx/engines/solidtorrents.py \ searx/engines/google_scholar.py \ searx/engines/yahoo_news.py \ + searx/engines/apkmirror.py \ searx_extra/update/update_external_bangs.py test.pylint: pyenvinstall diff --git a/docs/dev/engine_overview.rst b/docs/dev/engine_overview.rst index b51181974..42c205d9d 100644 --- a/docs/dev/engine_overview.rst +++ b/docs/dev/engine_overview.rst @@ -58,6 +58,8 @@ argument type information name string name of search-engine engine string name of searx-engine (filename without ``.py``) +enable_http bool enable HTTP + (by default only HTTPS is enabled). shortcut string shortcut of search-engine timeout string specific timeout for search-engine display_error_messages boolean display error messages on the web UI diff --git a/requirements-dev.txt b/requirements-dev.txt index c3cd35432..44093f216 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -8,10 +8,10 @@ transifex-client==0.14.2 selenium==3.141.0 twine==3.3.0 Pallets-Sphinx-Themes==1.2.3 -Sphinx==3.5.1 +Sphinx==3.5.2 sphinx-issues==1.2.0 sphinx-jinja==1.1.1 -sphinx-tabs==2.0.1 +sphinx-tabs==2.1.0 sphinxcontrib-programoutput==0.16 sphinx-autobuild==2020.9.1 linuxdoc==20210110 diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 7270724b6..2238ea1b9 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -50,6 +50,7 @@ engine_default_args = {'paging': False, 'timeout': settings['outgoing']['request_timeout'], 'shortcut': '-', 'disabled': False, + 'enable_http': False, 'suspend_end_time': 0, 'continuous_errors': 0, 'time_range_support': False, @@ -305,35 +306,3 @@ def initialize_engines(engine_list): if init_fn: logger.debug('%s engine: Starting background initialization', engine_name) threading.Thread(target=engine_init, args=(engine_name, init_fn)).start() - - _set_https_support_for_engine(engine) - - -def _set_https_support_for_engine(engine): - # check HTTPS support if it is not disabled - if engine.engine_type != 'offline' and not hasattr(engine, 'https_support'): - params = engine.request('http_test', { - 'method': 'GET', - 'headers': {}, - 'data': {}, - 'url': '', - 'cookies': {}, - 'verify': True, - 'auth': None, - 'pageno': 1, - 'time_range': None, - 'language': '', - 'safesearch': False, - 'is_test': True, - 'category': 'files', - 'raise_for_status': True, - 'engine_data': {}, - }) - - if 'url' not in params: - return - - parsed_url = urlparse(params['url']) - https_support = parsed_url.scheme == 'https' - - setattr(engine, 'https_support', https_support) diff --git a/searx/engines/apkmirror.py b/searx/engines/apkmirror.py index a9ddd711a..05a635883 100644 --- a/searx/engines/apkmirror.py +++ b/searx/engines/apkmirror.py @@ -1,13 +1,21 @@ # SPDX-License-Identifier: AGPL-3.0-or-later +"""APKMirror """ - APK Mirror -""" + +# pylint: disable=invalid-name, missing-function-docstring from urllib.parse import urlencode from lxml import html -from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex -# about +from searx import logger +from searx.utils import ( + eval_xpath_list, + eval_xpath_getindex, + extract_text, +) + +logger = logger.getChild('APKMirror engine') + about = { "website": 'https://www.apkmirror.com', "wikidata_id": None, @@ -18,11 +26,8 @@ about = { } # engine dependent config -categories = ['it'] +categories = ['files'] paging = True - -# I am not 100% certain about this, as apkmirror appears to be a wordpress site, -# which might support time_range searching. If you want to implement it, go ahead. time_range_support = False # search-url @@ -30,37 +35,34 @@ base_url = 'https://www.apkmirror.com' search_url = base_url + '/?post_type=app_release&searchtype=apk&page={pageno}&{query}' -# do search-request def request(query, params): - - params['url'] = search_url.format(pageno=params['pageno'], - query=urlencode({'s': query})) + params['url'] = search_url.format( + pageno = params['pageno'], + query = urlencode({'s': query}), + ) + logger.debug("query_url --> %s", params['url']) return params -# get response from search-request def response(resp): results = [] dom = html.fromstring(resp.text) # parse results - for result in eval_xpath_list(dom, './/div[@id="content"]/div[@class="listWidget"]//div[@class="appRow"]'): + for result in eval_xpath_list(dom, "//div[@id='content']//div[@class='listWidget']/div/div[@class='appRow']"): link = eval_xpath_getindex(result, './/h5/a', 0) + url = base_url + link.attrib.get('href') + '#downloads' title = extract_text(link) - thumbnail_src = base_url\ - + eval_xpath_getindex(result, './/img', 0).attrib.get('src').replace('&w=32&h=32', '&w=64&h=64') - + img_src = base_url + eval_xpath_getindex(result, './/img/@src', 0) res = { 'url': url, 'title': title, - 'thumbnail_src': thumbnail_src + 'img_src': img_src } - # append result results.append(res) - # return results return results diff --git a/searx/engines/solidtorrents.py b/searx/engines/solidtorrents.py new file mode 100644 index 000000000..050149187 --- /dev/null +++ b/searx/engines/solidtorrents.py @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Solid Torrents + +""" + +# pylint: disable=missing-function-docstring + +from json import loads +from urllib.parse import urlencode +from searx import logger + +logger = logger.getChild('solidtor engine') + +about = { + "website": 'https://www.solidtorrents.net/', + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + +categories = ['files'] +paging = True + +base_url = 'https://www.solidtorrents.net/' +search_url = base_url + 'api/v1/search?{query}' + + +def request(query, params): + skip = (params['pageno'] - 1) * 20 + query = urlencode({'q': query, 'skip': skip}) + params['url'] = search_url.format(query=query) + logger.debug("query_url --> %s", params['url']) + return params + + +def response(resp): + results = [] + search_results = loads(resp.text) + + for result in search_results["results"]: + results.append({ + 'infohash': result["infohash"], + 'seed': result["swarm"]["seeders"], + 'leech': result["swarm"]["leechers"], + 'title': result["title"], + 'link': "https://solidtorrents.net/view/" + result["_id"], + 'filesize': result["size"], + 'magnetlink': result["magnet"], + 'template': "torrent.html", + }) + return results diff --git a/searx/poolrequests.py b/searx/poolrequests.py index 8b8681437..ab327251b 100644 --- a/searx/poolrequests.py +++ b/searx/poolrequests.py @@ -91,9 +91,10 @@ class SessionSinglePool(requests.Session): self.adapters.clear() https_adapter = threadLocal.__dict__.setdefault('https_adapter', next(https_adapters)) - http_adapter = threadLocal.__dict__.setdefault('http_adapter', next(http_adapters)) self.mount('https://', https_adapter) - self.mount('http://', http_adapter) + if get_enable_http_protocol(): + http_adapter = threadLocal.__dict__.setdefault('http_adapter', next(http_adapters)) + self.mount('http://', http_adapter) def close(self): """Call super, but clear adapters since there are managed globaly""" @@ -106,6 +107,17 @@ def set_timeout_for_thread(timeout, start_time=None): threadLocal.start_time = start_time +def set_enable_http_protocol(enable_http): + threadLocal.enable_http = enable_http + + +def get_enable_http_protocol(): + try: + return threadLocal.enable_http + except AttributeError: + return False + + def reset_time_for_thread(): threadLocal.total_time = 0 diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index df0ab8c21..1fc6444ad 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -131,6 +131,8 @@ class OnlineProcessor(EngineProcessor): poolrequests.set_timeout_for_thread(timeout_limit, start_time=start_time) # reset the HTTP total time poolrequests.reset_time_for_thread() + # enable HTTP only if explicitly enabled + poolrequests.set_enable_http_protocol(self.engine.enable_http) # suppose everything will be alright requests_exception = False diff --git a/searx/settings.yml b/searx/settings.yml index e45afb59b..85ba4b2fe 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -656,6 +656,7 @@ engines: - name : library genesis engine : xpath + enable_http: True search_url : http://libgen.rs/search.php?req={query} url_xpath : //a[contains(@href,"bookfi.net/md5")]/@href title_xpath : //a[contains(@href,"book/")]/text()[1] @@ -956,6 +957,12 @@ engines: timeout : 6.0 disabled : True + - name : solidtorrents + engine : solidtorrents + shortcut : solid + timeout : 3.0 + disabled : True + - name : torrentz engine : torrentz shortcut : tor diff --git a/searx/templates/oscar/preferences.html b/searx/templates/oscar/preferences.html index fc20b8ca5..6253b9858 100644 --- a/searx/templates/oscar/preferences.html +++ b/searx/templates/oscar/preferences.html @@ -230,7 +230,7 @@ <td class="onoff-checkbox"> {{ checkbox_toggle('engine_' + search_engine.name|replace(' ', '_') + '__' + categ|replace(' ', '_'), (search_engine.name, categ) in disabled_engines) }} </td> - <th scope="row">{% if not search_engine.https_support %}{{ icon('exclamation-sign', 'No HTTPS') }}{% endif %} {{ search_engine.name }}</td></th> + <th scope="row">{% if search_engine.enable_http %}{{ icon('exclamation-sign', 'No HTTPS') }}{% endif %} {{ search_engine.name }}</td></th> <td class="name">{{ shortcuts[search_engine.name] }} <td>{{ support_toggle(stats[search_engine.name].supports_selected_language) }}</td> <td>{{ support_toggle(search_engine.safesearch==True) }}</td> diff --git a/searx/templates/simple/preferences.html b/searx/templates/simple/preferences.html index f091a97cf..dff7ffba6 100644 --- a/searx/templates/simple/preferences.html +++ b/searx/templates/simple/preferences.html @@ -121,7 +121,7 @@ {% set engine_id = 'engine_' + search_engine.name|replace(' ', '_') + '__' + categ|replace(' ', '_') %} <tr> <td class="engine_checkbox">{{ checkbox_onoff(engine_id, (search_engine.name, categ) in disabled_engines) }}</td> - <th class="name">{% if not search_engine.https_support %}{{ icon('warning', 'No HTTPS') }}{% endif %} {{ search_engine.name }}</th> + <th class="name">{% if search_engine.enable_http %}{{ icon('warning', 'No HTTPS') }}{% endif %} {{ search_engine.name }}</th> <td class="shortcut">{{ shortcuts[search_engine.name] }}</td> <td>{{ checkbox(engine_id + '_supported_languages', current_language == 'all' or current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages, true, true) }}</td> <td>{{ checkbox(engine_id + '_safesearch', search_engine.safesearch==True, true, true) }}</td> |