diff options
| author | Alexandre Flament <alex@al-f.net> | 2021-12-27 19:11:01 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-12-27 19:11:01 +0100 |
| commit | c6922ae7c5e53f695d5f5f8704b10b4e2815cda4 (patch) | |
| tree | 9c0456ad1a1d9d375311ccd8c9bd3eafd0779114 | |
| parent | 54bce130f9074c3d63009237b014c727a1443cc5 (diff) | |
| parent | d84226bf63757b1d4245ab26e9c081daf42389aa (diff) | |
Merge pull request #619 from dalf/apply-black
Apply black
187 files changed, 2975 insertions, 2926 deletions
@@ -54,8 +54,8 @@ search.checker.%: install $(Q)./manage pyenv.cmd searx-checker -v "$(subst _, ,$(patsubst search.checker.%,%,$@))" PHONY += test ci.test test.shell -ci.test: test.yamllint test.pep8 test.pylint test.unit test.robot -test: test.yamllint test.pep8 test.pylint test.unit test.robot test.shell +ci.test: test.yamllint test.black test.pylint test.unit test.robot +test: test.yamllint test.black test.pylint test.unit test.robot test.shell test.shell: $(Q)shellcheck -x -s dash \ dockerfiles/docker-entrypoint.sh @@ -88,7 +88,8 @@ MANAGE += node.env node.clean MANAGE += py.build py.clean MANAGE += pyenv pyenv.install pyenv.uninstall MANAGE += pypi.upload pypi.upload.test -MANAGE += test.yamllint test.pylint test.pep8 test.unit test.coverage test.robot test.clean +MANAGE += format.python +MANAGE += test.yamllint test.pylint test.black test.unit test.coverage test.robot test.clean MANAGE += themes.all themes.oscar themes.simple themes.simple.test pygments.less MANAGE += static.build.commit static.build.drop static.build.restore MANAGE += nvm.install nvm.clean nvm.status nvm.nodejs @@ -24,6 +24,8 @@ PY_SETUP_EXTRAS='[test]' GECKODRIVER_VERSION="v0.30.0" export NODE_MINIMUM_VERSION="16.13.0" # SPHINXOPTS= +BLACK_OPTIONS=("--target-version" "py37" "--line-length" "120" "--skip-string-normalization") +BLACK_TARGETS=("--exclude" "searx/static,searx/languages.py" "searx" "searxng_extra" "tests") pylint.FILES() { @@ -31,8 +33,7 @@ pylint.FILES() { # # # lint: pylint # - # These py files are linted by test.pylint(), all other files are linted by - # test.pep8() + # These py files are linted by test.pylint() grep -l -r --include \*.py '^#[[:blank:]]*lint:[[:blank:]]*pylint' searx searxng_extra tests } @@ -89,10 +90,12 @@ pyenv.: OK : test if virtualenv is OK pypi.upload: Upload python packages to PyPi (to test use pypi.upload.test) +format.: + python : format Python code source using black test.: yamllint : lint YAML files (YAMLLINT_FILES) pylint : lint PYLINT_FILES, searx/engines, searx & tests - pep8 : pycodestyle (pep8) for all files except PYLINT_FILES + black : check black code format unit : run unit tests coverage : run unit tests with coverage robot : run robot test @@ -617,6 +620,12 @@ pypi.upload.test() { pyenv.cmd twine upload -r testpypi "${PYDIST}"/* } +format.python() { + build_msg TEST "[format.python] black \$BLACK_TARGETS" + pyenv.cmd black "${BLACK_OPTIONS[@]}" "${BLACK_TARGETS[@]}" + dump_return $? +} + test.yamllint() { build_msg TEST "[yamllint] \$YAMLLINT_FILES" pyenv.cmd yamllint --format parsable "${YAMLLINT_FILES[@]}" @@ -646,15 +655,9 @@ test.pylint() { dump_return $? } -test.pep8() { - build_msg TEST 'pycodestyle (formerly pep8)' - local _exclude="" - printf -v _exclude '%s, ' "${PYLINT_FILES[@]}" - pyenv.cmd pycodestyle \ - --exclude="searx/static, searx/languages.py, $_exclude " \ - --max-line-length=120 \ - --ignore "E117,E252,E402,E722,E741,W503,W504,W605" \ - searx tests +test.black() { + build_msg TEST "[black] \$BLACK_TARGETS" + pyenv.cmd black --check --diff "${BLACK_OPTIONS[@]}" "${BLACK_TARGETS[@]}" dump_return $? } diff --git a/requirements-dev.txt b/requirements-dev.txt index c80afc460..0fef51f24 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,6 +1,7 @@ mock==4.0.3 nose2[coverage_plugin]==0.10.0 cov-core==1.15.0 +black==21.12b0 pycodestyle==2.8.0 pylint==2.12.2 splinter==0.17.0 diff --git a/searx/__init__.py b/searx/__init__.py index b1626ae9f..d2d389ea9 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -29,6 +29,7 @@ if settings is not None: _unset = object() + def get_setting(name, default=_unset): """Returns the value to which ``name`` point. If there is no such name in the settings and the ``default`` is unset, a :py:obj:`KeyError` is raised. @@ -80,14 +81,9 @@ def logging_config_debug(): 'levelname': {'color': 8}, 'name': {'color': 8}, 'programname': {'color': 'cyan'}, - 'username': {'color': 'yellow'} + 'username': {'color': 'yellow'}, } - coloredlogs.install( - level=log_level, - level_styles=level_styles, - field_styles=field_styles, - fmt=LOG_FORMAT_DEBUG - ) + coloredlogs.install(level=log_level, level_styles=level_styles, field_styles=field_styles, fmt=LOG_FORMAT_DEBUG) else: logging.basicConfig(level=logging.getLevelName(log_level), format=LOG_FORMAT_DEBUG) diff --git a/searx/answerers/random/answerer.py b/searx/answerers/random/answerer.py index d5223e517..e6c383330 100644 --- a/searx/answerers/random/answerer.py +++ b/searx/answerers/random/answerer.py @@ -8,13 +8,12 @@ from flask_babel import gettext # specifies which search query keywords triggers this answerer keywords = ('random',) -random_int_max = 2**31 +random_int_max = 2 ** 31 random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase def random_characters(): - return [random.choice(random_string_letters) - for _ in range(random.randint(8, 32))] + return [random.choice(random_string_letters) for _ in range(random.randint(8, 32))] def random_string(): @@ -39,11 +38,13 @@ def random_uuid(): return str(uuid.uuid4()) -random_types = {'string': random_string, - 'int': random_int, - 'float': random_float, - 'sha256': random_sha256, - 'uuid': random_uuid} +random_types = { + 'string': random_string, + 'int': random_int, + 'float': random_float, + 'sha256': random_sha256, + 'uuid': random_uuid, +} # required answerer function @@ -62,6 +63,8 @@ def answer(query): # required answerer function # returns information about the answerer def self_info(): - return {'name': gettext('Random value generator'), - 'description': gettext('Generate different random values'), - 'examples': ['random {}'.format(x) for x in random_types]} + return { + 'name': gettext('Random value generator'), + 'description': gettext('Generate different random values'), + 'examples': ['random {}'.format(x) for x in random_types], + } diff --git a/searx/answerers/statistics/answerer.py b/searx/answerers/statistics/answerer.py index abd4be7f5..60f0d304f 100644 --- a/searx/answerers/statistics/answerer.py +++ b/searx/answerers/statistics/answerer.py @@ -4,11 +4,7 @@ from operator import mul from flask_babel import gettext -keywords = ('min', - 'max', - 'avg', - 'sum', - 'prod') +keywords = ('min', 'max', 'avg', 'sum', 'prod') # required answerer function @@ -47,6 +43,8 @@ def answer(query): # required answerer function # returns information about the answerer def self_info(): - return {'name': gettext('Statistics functions'), - 'description': gettext('Compute {functions} of the arguments').format(functions='/'.join(keywords)), - 'examples': ['avg 123 548 2.04 24.2']} + return { + 'name': gettext('Statistics functions'), + 'description': gettext('Compute {functions} of the arguments').format(functions='/'.join(keywords)), + 'examples': ['avg 123 548 2.04 24.2'], + } diff --git a/searx/autocomplete.py b/searx/autocomplete.py index a55377cd9..b8d272c32 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -120,14 +120,15 @@ def wikipedia(query, lang): return [] -backends = {'dbpedia': dbpedia, - 'duckduckgo': duckduckgo, - 'google': google, - 'startpage': startpage, - 'swisscows': swisscows, - 'qwant': qwant, - 'wikipedia': wikipedia - } +backends = { + 'dbpedia': dbpedia, + 'duckduckgo': duckduckgo, + 'google': google, + 'startpage': startpage, + 'swisscows': swisscows, + 'qwant': qwant, + 'wikipedia': wikipedia, +} def search_autocomplete(backend_name, query, lang): diff --git a/searx/data/__init__.py b/searx/data/__init__.py index 5937ea557..87bfb5477 100644 --- a/searx/data/__init__.py +++ b/searx/data/__init__.py @@ -23,10 +23,12 @@ from pathlib import Path data_dir = Path(__file__).parent + def _load(filename): with open(data_dir / filename, encoding='utf-8') as f: return json.load(f) + def ahmia_blacklist_loader(): """Load data from `ahmia_blacklist.txt` and return a list of MD5 values of onion names. The MD5 values are fetched by:: @@ -39,6 +41,7 @@ def ahmia_blacklist_loader(): with open(str(data_dir / 'ahmia_blacklist.txt'), encoding='utf-8') as f: return f.read().split() + ENGINES_LANGUAGES = _load('engines_languages.json') CURRENCIES = _load('currencies.json') USER_AGENTS = _load('useragents.json') diff --git a/searx/engines/1337x.py b/searx/engines/1337x.py index e6a243596..730a4c445 100644 --- a/searx/engines/1337x.py +++ b/searx/engines/1337x.py @@ -43,11 +43,15 @@ def response(resp): filesize, filesize_multiplier = filesize_info.split() filesize = get_torrent_size(filesize, filesize_multiplier) - results.append({'url': href, - 'title': title, - 'seed': seed, - 'leech': leech, - 'filesize': filesize, - 'template': 'torrent.html'}) + results.append( + { + 'url': href, + 'title': title, + 'seed': seed, + 'leech': leech, + 'filesize': filesize, + 'template': 'torrent.html', + } + ) return results diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 44ea9a4bd..fa9749e9d 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -57,6 +57,7 @@ engine_shortcuts = {} """ + def load_engine(engine_data): """Load engine from ``engine_data``. @@ -166,20 +167,19 @@ def set_language_attributes(engine): # settings.yml if engine.language not in engine.supported_languages: raise ValueError( - "settings.yml - engine: '%s' / language: '%s' not supported" % ( - engine.name, engine.language )) + "settings.yml - engine: '%s' / language: '%s' not supported" % (engine.name, engine.language) + ) if isinstance(engine.supported_languages, dict): - engine.supported_languages = { - engine.language : engine.supported_languages[engine.language] - } + engine.supported_languages = {engine.language: engine.supported_languages[engine.language]} else: engine.supported_languages = [engine.language] # find custom aliases for non standard language codes for engine_lang in engine.supported_languages: iso_lang = match_language(engine_lang, BABEL_LANGS, fallback=None) - if (iso_lang + if ( + iso_lang and iso_lang != engine_lang and not engine_lang.startswith(iso_lang) and iso_lang not in engine.supported_languages @@ -197,14 +197,12 @@ def set_language_attributes(engine): } engine.fetch_supported_languages = ( # pylint: disable=protected-access - lambda: engine._fetch_supported_languages( - get(engine.supported_languages_url, headers=headers)) + lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers)) ) def update_attributes_for_tor(engine): - if (settings['outgoing'].get('using_tor_proxy') - and hasattr(engine, 'onion_url') ): + if settings['outgoing'].get('using_tor_proxy') and hasattr(engine, 'onion_url'): engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0) @@ -217,9 +215,7 @@ def is_missing_required_attributes(engine): missing = False for engine_attr in dir(engine): if not engine_attr.startswith('_') and getattr(engine, engine_attr) is None: - logger.error( - 'Missing engine config attribute: "{0}.{1}"' - .format(engine.name, engine_attr)) + logger.error('Missing engine config attribute: "{0}.{1}"'.format(engine.name, engine_attr)) missing = True return missing @@ -230,8 +226,7 @@ def is_engine_active(engine): return False # exclude onion engines if not using tor - if ('onions' in engine.categories - and not settings['outgoing'].get('using_tor_proxy') ): + if 'onions' in engine.categories and not settings['outgoing'].get('using_tor_proxy'): return False return True @@ -253,8 +248,7 @@ def register_engine(engine): def load_engines(engine_list): - """usage: ``engine_list = settings['engines']`` - """ + """usage: ``engine_list = settings['engines']``""" engines.clear() engine_shortcuts.clear() categories.clear() diff --git a/searx/engines/ahmia.py b/searx/engines/ahmia.py index b9a0086bd..33e0cc393 100644 --- a/searx/engines/ahmia.py +++ b/searx/engines/ahmia.py @@ -25,9 +25,7 @@ page_size = 10 # search url search_url = 'http://juhanurmihxlp77nkq76byazcldy2hlmovfu2epvl5ankdibsot4csyd.onion/search/?{query}' time_range_support = True -time_range_dict = {'day': 1, - 'week': 7, - 'month': 30} +time_range_dict = {'day': 1, 'week': 7, 'month': 30} # xpaths results_xpath = '//li[@class="result"]' @@ -54,7 +52,7 @@ def response(resp): # trim results so there's not way too many at once first_result_index = page_size * (resp.search_params.get('pageno', 1) - 1) all_results = eval_xpath_list(dom, results_xpath) - trimmed_results = all_results[first_result_index:first_result_index + page_size] + trimmed_results = all_results[first_result_index : first_result_index + page_size] # get results for result in trimmed_results: @@ -65,10 +63,7 @@ def response(resp): title = extract_text(eval_xpath(result, title_xpath)) content = extract_text(eval_xpath(result, content_xpath)) - results.append({'url': cleaned_url, - 'title': title, - 'content': content, - 'is_onion': True}) + results.append({'url': cleaned_url, 'title': title, 'content': content, 'is_onion': True}) # get spelling corrections for correction in eval_xpath_list(dom, correction_xpath): diff --git a/searx/engines/apkmirror.py b/searx/engines/apkmirror.py index 746a8cd9c..da84bc79e 100644 --- a/searx/engines/apkmirror.py +++ b/searx/engines/apkmirror.py @@ -35,8 +35,8 @@ search_url = base_url + '/?post_type=app_release&searchtype=apk&page={pageno}&{q def request(query, params): params['url'] = search_url.format( - pageno = params['pageno'], - query = urlencode({'s': query}), + pageno=params['pageno'], + query=urlencode({'s': query}), ) logger.debug("query_url --> %s", params['url']) return params @@ -55,11 +55,7 @@ def response(resp): url = base_url + link.attrib.get('href') + '#downloads' title = extract_text(link) img_src = base_url + eval_xpath_getindex(result, './/img/@src', 0) - res = { - 'url': url, - 'title': title, - 'img_src': img_src - } + res = {'url': url, 'title': title, 'img_src': img_src} results.append(res) diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index aeac145d1..1cfb3983f 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -39,6 +39,7 @@ def locale_to_lang_code(locale): # wikis for some languages were moved off from the main site, we need to make # requests to correct URLs to be able to get results in those languages lang_urls = { + # fmt: off 'all': { 'base': 'https://wiki.archlinux.org', 'search': '/index.php?title=Special:Search&offset={offset}&{query}' @@ -63,6 +64,7 @@ lang_urls = { 'base': 'http://archtr.org/wiki', 'search': '/index.php?title=Özel:Ara&offset={offset}&{query}' } + # fmt: on } @@ -95,7 +97,7 @@ main_langs = { 'sl': 'Slovenský', 'th': 'ไทย', 'uk': 'Українська', - 'zh': '简体中文' + 'zh': '简体中文', } supported_languages = dict(lang_urls, **main_langs) @@ -139,7 +141,6 @@ def response(resp): href = urljoin(base_url, link.attrib.get('href')) title = extract_text(link) - results.append({'url': href, - 'title': title}) + results.append({'url': href, 'title': title}) return results diff --git a/searx/engines/artic.py b/searx/engines/artic.py index 104ab8839..c0ae0a5e7 100644 --- a/searx/engines/artic.py +++ b/searx/engines/artic.py @@ -27,19 +27,23 @@ nb_per_page = 20 search_api = 'https://api.artic.edu/api/v1/artworks/search?' image_api = 'https://www.artic.edu/iiif/2/' + def request(query, params): - args = urlencode({ - 'q' : query, - 'page' : params['pageno'], - 'fields' : 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles', - 'limit' : nb_per_page, - }) + args = urlencode( + { + 'q': query, + 'page': params['pageno'], + 'fields': 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles', + 'limit': nb_per_page, + } + ) params['url'] = search_api + args logger.debug("query_url --> %s", params['url']) return params + def response(resp): results = [] @@ -50,14 +54,16 @@ def response(resp): if not result['image_id']: continue - results.append({ - 'url': 'https://artic.edu/artworks/%(id)s' % result, - 'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result, - 'content': result['medium_display'], - 'author': ', '.join(result['artist_titles']), - 'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result, - 'img_format': result['dimensions'], - 'template': 'images.html' - }) + results.append( + { + 'url': 'https://artic.edu/artworks/%(id)s' % result, + 'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result, + 'content': result['medium_display'], + 'author': ', '.join(result['artist_titles']), + 'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result, + 'img_format': result['dimensions'], + 'template': 'images.html', + } + ) return results diff --git a/searx/engines/arxiv.py b/searx/engines/arxiv.py index 09ea07ea5..a1a58172d 100644 --- a/searx/engines/arxiv.py +++ b/searx/engines/arxiv.py @@ -20,8 +20,9 @@ about = { categories = ['science'] paging = True -base_url = 'https://export.arxiv.org/api/query?search_query=all:'\ - + '{query}&start={offset}&max_results={number_of_results}' +base_url = ( + 'https://export.arxiv.org/api/query?search_query=all:' + '{query}&start={offset}&max_results={number_of_results}' +) # engine dependent config number_of_results = 10 @@ -31,9 +32,7 @@ def request(query, params): # basic search offset = (params['pageno'] - 1) * number_of_results - string_args = dict(query=query, - offset=offset, - number_of_results=number_of_results) + string_args = dict(query=query, offset=offset, number_of_results=number_of_results) params['url'] = base_url.format(**string_args) @@ -65,10 +64,7 @@ def response(resp): publishedDate = datetime.strptime(eval_xpath_getindex(entry, './/published', 0).text, '%Y-%m-%dT%H:%M:%SZ') - res_dict = {'url': url, - 'title': title, - 'publishedDate': publishedDate, - 'content': content} + res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content} results.append(res_dict) diff --git a/searx/engines/bandcamp.py b/searx/engines/bandcamp.py index 62745243f..ba951a393 100644 --- a/searx/engines/bandcamp.py +++ b/searx/engines/bandcamp.py @@ -44,9 +44,7 @@ def request(query, params): pageno : 1 # number of the requested page ''' - search_path = search_string.format( - query=urlencode({'q': query}), - page=params['pageno']) + search_path = search_string.format(query=urlencode({'q': query}), page=params['pageno']) params['url'] = base_url + search_path diff --git a/searx/engines/base.py b/searx/engines/base.py index 463274681..5a2d66619 100755 --- a/searx/engines/base.py +++ b/searx/engines/base.py @@ -21,8 +21,10 @@ about = { categories = ['science'] -base_url = 'https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi'\ - + '?func=PerformSearch&{query}&boost=oa&hits={hits}&offset={offset}' +base_url = ( + 'https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi' + + '?func=PerformSearch&{query}&boost=oa&hits={hits}&offset={offset}' +) # engine dependent config paging = True @@ -47,7 +49,7 @@ shorcut_dict = { 'source:': 'dcsource:', 'subject:': 'dcsubject:', 'title:': 'dctitle:', - 'type:': 'dcdctype:' + 'type:': 'dcdctype:', } @@ -59,9 +61,7 @@ def request(query, params): # basic search offset = (params['pageno'] - 1) * number_of_results - string_args = dict(query=urlencode({'query': query}), - offset=offset, - hits=number_of_results) + string_args = dict(query=urlencode({'query': query}), offset=offset, hits=number_of_results) params['url'] = base_url.format(**string_args) @@ -93,7 +93,7 @@ def response(resp): if len(item.text) > 300: content += "..." -# dates returned by the BASE API are not several formats + # dates returned by the BASE API are not several formats publishedDate = None for date_format in ['%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%d', '%Y-%m', '%Y']: try: @@ -103,14 +103,9 @@ def response(resp): pass if publishedDate is not None: - res_dict = {'url': url, - 'title': title, - 'publishedDate': publishedDate, - 'content': content} + res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content} else: - res_dict = {'url': url, - 'title': title, - 'content': content} + res_dict = {'url': url, 'title': title, 'content': content} results.append(res_dict) diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 3917e54c1..59fc22be4 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -36,9 +36,11 @@ inital_query = 'search?{query}&search=&form=QBLH' # following queries: https://www.bing.com/search?q=foo&search=&first=11&FORM=PERE page_query = 'search?{query}&search=&first={offset}&FORM=PERE' + def _get_offset_from_pageno(pageno): return (pageno - 1) * 10 + 1 + def request(query, params): offset = _get_offset_from_pageno(params.get('pageno', 1)) @@ -53,30 +55,23 @@ def request(query, params): if params['language'] == 'all': lang = 'EN' else: - lang = match_language( - params['language'], supported_languages, language_aliases - ) + lang = match_language(params['language'], supported_languages, language_aliases) - query = 'language:{} {}'.format( - lang.split('-')[0].upper(), query - ) + query = 'language:{} {}'.format(lang.split('-')[0].upper(), query) - search_path = search_string.format( - query = urlencode({'q': query}), - offset = offset) + search_path = search_string.format(query=urlencode({'q': query}), offset=offset) if offset > 1: - referer = base_url + inital_query.format(query = urlencode({'q': query})) + referer = base_url + inital_query.format(query=urlencode({'q': query})) params['headers']['Referer'] = referer - logger.debug("headers.Referer --> %s", referer ) + logger.debug("headers.Referer --> %s", referer) params['url'] = base_url + search_path params['headers']['Accept-Language'] = "en-US,en;q=0.5" - params['headers']['Accept'] = ( - 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - ) + params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' return params + def response(resp): results = [] @@ -87,7 +82,7 @@ def response(resp): for result in eval_xpath(dom, '//div[@class="sa_cc"]'): # IMO //div[@class="sa_cc"] does no longer match - logger.debug('found //div[@class="sa_cc"] --> %s', result) + logger.debug('found //div[@class="sa_cc"] --> %s', result) link = eval_xpath(result, './/h3/a')[0] url = link.attrib.get('href') @@ -95,11 +90,7 @@ def response(resp): content = extract_text(eval_xpath(result, './/p')) # append result - results.append({ - 'url': url, - 'title': title, - 'content': content - }) + results.append({'url': url, 'title': title, 'content': content}) # parse results again if nothing is found yet for result in eval_xpath(dom, '//li[@class="b_algo"]'): @@ -110,18 +101,14 @@ def response(resp): content = extract_text(eval_xpath(result, './/p')) # append result - results.append({ - 'url': url, - 'title': title, - 'content': content - }) + results.append({'url': url, 'title': title, 'content': content}) try: result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()')) if "-" in result_len_container: # Remove the part "from-to" for paginated request ... - result_len_container = result_len_container[result_len_container.find("-") * 2 + 2:] + result_len_container = result_len_container[result_len_container.find("-") * 2 + 2 :] result_len_container = re.sub('[^0-9]', '', result_len_container) diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 4bee9bc7d..73b61b896 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -6,10 +6,13 @@ from urllib.parse import urlencode from lxml import html from json import loads -from searx.utils import match_language +from searx.utils import match_language from searx.engines.bing import language_aliases -from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +from searx.engines.bing import ( # pylint: disable=unused-import + _fetch_supported_languages, + supported_languages_url, +) # about about = { @@ -31,39 +34,33 @@ number_of_results = 28 # search-url base_url = 'https://www.bing.com/' -search_string = 'images/search'\ - '?{query}'\ - '&count={count}'\ - '&first={first}'\ +search_string = ( + # fmt: off + 'images/search' + '?{query}' + '&count={count}' + '&first={first}' '&tsc=ImageHoverTitle' + # fmt: on +) time_range_string = '&qft=+filterui:age-lt{interval}' -time_range_dict = {'day': '1440', - 'week': '10080', - 'month': '43200', - 'year': '525600'} +time_range_dict = {'day': '1440', 'week': '10080', 'month': '43200', 'year': '525600'} # safesearch definitions -safesearch_types = {2: 'STRICT', - 1: 'DEMOTE', - 0: 'OFF'} +safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'} # do search-request def request(query, params): offset = ((params['pageno'] - 1) * number_of_results) + 1 - search_path = search_string.format( - query=urlencode({'q': query}), - count=number_of_results, - first=offset) + search_path = search_string.format(query=urlencode({'q': query}), count=number_of_results, first=offset) language = match_language(params['language'], supported_languages, language_aliases).lower() - params['cookies']['SRCHHPGUSR'] = \ - 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') + params['cookies']['SRCHHPGUSR'] = 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') - params['cookies']['_EDGE_S'] = 'mkt=' + language +\ - '&ui=' + language + '&F=1' + params['cookies']['_EDGE_S'] = 'mkt=' + language + '&ui=' + language + '&F=1' params['url'] = base_url + search_path if params['time_range'] in time_range_dict: @@ -92,14 +89,18 @@ def response(resp): # strip 'Unicode private use area' highlighting, they render to Tux # the Linux penguin and a standing diamond on my machine... title = m.get('t', '').replace('\ue000', '').replace('\ue001', '') - results.append({'template': 'images.html', - 'url': m['purl'], - 'thumbnail_src': m['turl'], - 'img_src': m['murl'], - 'content': '', - 'title': title, - 'source': source, - 'img_format': img_format}) + results.append( + { + 'template': 'images.html', + 'url': m['purl'], + 'thumbnail_src': m['turl'], + 'img_src': m['murl'], + 'content': '', + 'title': title, + 'source': source, + 'img_format': img_format, + } + ) except: continue diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index c2515385c..22856541b 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -13,10 +13,7 @@ from datetime import datetime from dateutil import parser from lxml import etree from lxml.etree import XPath -from searx.utils import ( - match_language, - eval_xpath_getindex -) +from searx.utils import match_language, eval_xpath_getindex from searx.engines.bing import ( # pylint: disable=unused-import language_aliases, _fetch_supported_languages, @@ -42,11 +39,8 @@ time_range_support = True base_url = 'https://www.bing.com/' search_string = 'news/search?{query}&first={offset}&format=RSS' search_string_with_time = 'news/search?{query}&first={offset}&qft=interval%3d"{interval}"&format=RSS' -time_range_dict = { - 'day': '7', - 'week': '8', - 'month': '9' -} +time_range_dict = {'day': '7', 'week': '8', 'month': '9'} + def url_cleanup(url_string): """remove click""" @@ -57,6 +51,7 @@ def url_cleanup(url_string): url_string = query.get('url', None) return url_string + def image_url_cleanup(url_string): """replace the http://*bing.com/th?id=... by https://www.bing.com/th?id=...""" @@ -66,27 +61,33 @@ def image_url_cleanup(url_string): url_string = "https://www.bing.com/th?id=" + quote(query.get('id')) return url_string + def _get_url(query, language, offset, time_range): if time_range in time_range_dict: search_path = search_string_with_time.format( + # fmt: off query = urlencode({ 'q': query, 'setmkt': language }), offset = offset, interval = time_range_dict[time_range] + # fmt: on ) else: # e.g. setmkt=de-de&setlang=de search_path = search_string.format( + # fmt: off query = urlencode({ 'q': query, 'setmkt': language }), offset = offset + # fmt: on ) return base_url + search_path + def request(query, params): if params['time_range'] and params['time_range'] not in time_range_dict: @@ -101,6 +102,7 @@ def request(query, params): return params + def response(resp): results = [] @@ -123,26 +125,16 @@ def response(resp): publishedDate = datetime.now() # thumbnail - thumbnail = eval_xpath_getindex( - item, XPath('./News:Image/text()', namespaces=namespaces), 0, default=None) + thumbnail = eval_xpath_getindex(item, XPath('./News:Image/text()', namespaces=namespaces), 0, default=None) if thumbnail is not None: thumbnail = image_url_cleanup(thumbnail) # append result if thumbnail is not None: - results.append({ - 'url': url, - 'title': title, - 'publishedDate': publishedDate, - 'content': content, - 'img_src': thumbnail - }) + results.append( + {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content, 'img_src': thumbnail} + ) else: - results.append({ - 'url': url, - 'title': title, - 'publishedDate': publishedDate, - 'content': content - }) + results.append({'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content}) return results diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py index 2e1f13de2..7f8820546 100644 --- a/searx/engines/bing_videos.py +++ b/searx/engines/bing_videos.py @@ -6,12 +6,15 @@ from json import loads from lxml import html from urllib.parse import urlencode -from searx.utils import match_language +from searx.utils import match_language from searx.engines.bing import language_aliases -from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import -# about +from searx.engines.bing import ( # pylint: disable=unused-import + _fetch_supported_languages, + supported_languages_url, +) + about = { "website": 'https://www.bing.com/videos', "wikidata_id": 'Q4914152', @@ -28,36 +31,31 @@ time_range_support = True number_of_results = 28 base_url = 'https://www.bing.com/' -search_string = 'videos/search'\ - '?{query}'\ - '&count={count}'\ - '&first={first}'\ - '&scope=video'\ +search_string = ( + # fmt: off + 'videos/search' + '?{query}' + '&count={count}' + '&first={first}' + '&scope=video' '&FORM=QBLH' + # fmt: on +) time_range_string = '&qft=+filterui:videoage-lt{interval}' -time_range_dict = {'day': '1440', - 'week': '10080', - 'month': '43200', - 'year': '525600'} +time_range_dict = {'day': '1440', 'week': '10080', 'month': '43200', 'year': '525600'} # safesearch definitions -safesearch_types = {2: 'STRICT', - 1: 'DEMOTE', - 0: 'OFF'} +safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'} # do search-request def request(query, params): offset = ((params['pageno'] - 1) * number_of_results) + 1 - search_path = search_string.format( - query=urlencode({'q': query}), - count=number_of_results, - first=offset) + search_path = search_string.format(query=urlencode({'q': query}), count=number_of_results, first=offset) # safesearch cookie - params['cookies']['SRCHHPGUSR'] = \ - 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') + params['cookies']['SRCHHPGUSR'] = 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') # language cookie language = match_language(params['language'], supported_languages, language_aliases).lower() @@ -89,11 +87,15 @@ def response(resp): info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip() content = '{0} - {1}'.format(metadata['du'], info) thumbnail = '{0}th?id={1}'.format(base_url, metadata['thid']) - results.append({'url': metadata['murl'], - 'thumbnail': thumbnail, - 'title': metadata.get('vt', ''), - 'content': content, - 'template': 'videos.html'}) + results.append( + { + 'url': metadata['murl'], + 'thumbnail': thumbnail, + 'title': metadata.get('vt', ''), + 'content': content, + 'template': 'videos.html', + } + ) except: continue diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index cda9e9355..c5dd92105 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -11,10 +11,7 @@ from searx.utils import extract_text, get_torrent_size about = { "website": 'https://btdig.com', "wikidata_id": 'Q4836698', - "official_api_documentation": { - 'url': 'https://btdig.com/contacts', - 'comment': 'on demand' - }, + "official_api_documentation": {'url': 'https://btdig.com/contacts', 'comment': 'on demand'}, "use_official_api": False, "require_api_key": False, "results": 'HTML', @@ -31,8 +28,7 @@ search_url = url + '/search?q={search_term}&p={pageno}' # do search-request def request(query, params): - params['url'] = search_url.format(search_term=quote(query), - pageno=params['pageno'] - 1) + params['url'] = search_url.format(search_term=quote(query), pageno=params['pageno'] - 1) return params @@ -77,13 +73,17 @@ def response(resp): magnetlink = result.xpath('.//div[@class="torrent_magnet"]//a')[0].attrib['href'] # append result - results.append({'url': href, - 'title': title, - 'content': content, - 'filesize': filesize, - 'files': files, - 'magnetlink': magnetlink, - 'template': 'torrent.html'}) + results.append( + { + 'url': href, + 'title': title, + 'content': content, + 'filesize': filesize, + 'files': files, + 'magnetlink': magnetlink, + 'template': 'torrent.html', + } + ) # return results sorted by seeder return results diff --git a/searx/engines/ccengine.py b/searx/engines/ccengine.py index 6f3a5adb7..93ac30c86 100644 --- a/searx/engines/ccengine.py +++ b/searx/engines/ccengine.py @@ -29,10 +29,7 @@ search_string = '&page={page}&page_size={nb_per_page}&format=json&{query}' def request(query, params): - search_path = search_string.format( - query=urlencode({'q': query}), - nb_per_page=nb_per_page, - page=params['pageno']) + search_path = search_string.format(query=urlencode({'q': query}), nb_per_page=nb_per_page, page=params['pageno']) params['url'] = base_url + search_path @@ -45,9 +42,13 @@ def response(resp): json_data = loads(resp.text) for result in json_data['results']: - results.append({'url': result['foreign_landing_url'], - 'title': result['title'], - 'img_src': result['url'], - 'template': 'images.html'}) + results.append( + { + 'url': result['foreign_landing_url'], + 'title': result['title'], + 'img_src': result['url'], + 'template': 'images.html', + } + ) return results diff --git a/searx/engines/command.py b/searx/engines/command.py index aca379c67..abd29e2a5 100644 --- a/searx/engines/command.py +++ b/searx/engines/command.py @@ -138,7 +138,7 @@ def __check_query_params(params): def check_parsing_options(engine_settings): - """ Checks if delimiter based parsing or regex parsing is configured correctly """ + """Checks if delimiter based parsing or regex parsing is configured correctly""" if 'delimiter' not in engine_settings and 'parse_regex' not in engine_settings: raise ValueError('failed to init settings for parsing lines: missing delimiter or parse_regex') @@ -151,7 +151,7 @@ def check_parsing_options(engine_settings): def __parse_single_result(raw_result): - """ Parses command line output based on configuration """ + """Parses command line output based on configuration""" result = {} @@ -167,6 +167,6 @@ def __parse_single_result(raw_result): found = regex.search(raw_result) if not found: return {} - result[result_key] = raw_result[found.start():found.end()] + result[result_key] = raw_result[found.start() : found.end()] return result diff --git a/searx/engines/core.py b/searx/engines/core.py index e83c8bbe9..1fcb68f1f 100644 --- a/searx/engines/core.py +++ b/searx/engines/core.py @@ -28,22 +28,24 @@ api_key = 'unset' base_url = 'https://core.ac.uk:443/api-v2/search/' search_string = '{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}' + def request(query, params): if api_key == 'unset': raise SearxEngineAPIException('missing CORE API key') search_path = search_string.format( - query = urlencode({'q': query}), - nb_per_page = nb_per_page, - page = params['pageno'], - apikey = api_key, + query=urlencode({'q': query}), + nb_per_page=nb_per_page, + page=params['pageno'], + apikey=api_key, ) params['url'] = base_url + search_path logger.debug("query_url --> %s", params['url']) return params + def response(resp): results = [] json_data = loads(resp.text) @@ -52,7 +54,7 @@ def response(resp): source = result['_source'] time = source['publishedDate'] or source['depositedDate'] - if time : + if time: date = datetime.fromtimestamp(time / 1000) else: date = None @@ -66,12 +68,14 @@ def response(resp): metadata.append(source['doi']) metadata = ' / '.join(metadata) - results.append({ - 'url': source['urls'][0].replace('http://', 'https://', 1), - 'title': source['title'], - 'content': source['description'], - 'publishedDate': date, - 'metadata' : metadata, - }) + results.append( + { + 'url': source['urls'][0].replace('http://', 'https://', 1), + 'title': source['title'], + 'content': source['description'], + 'publishedDate': date, + 'metadata': metadata, + } + ) return results diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index d4c3b5f81..969688126 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -30,7 +30,7 @@ def request(query, params): def response(resp): """remove first and last lines to get only json""" - json_resp = resp.text[resp.text.find('\n') + 1:resp.text.rfind('\n') - 2] + json_resp = resp.text[resp.text.find('\n') + 1 : resp.text.rfind('\n') - 2] results = [] try: conversion_rate = float(json.loads(json_resp)['conversion']['converted-amount']) @@ -47,7 +47,8 @@ def response(resp): ) url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'.format( - resp.search_params['from'].upper(), resp.search_params['to']) + resp.search_params['from'].upper(), resp.search_params['to'] + ) results.append({'answer': answer, 'url': url}) diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 92d368c11..5607691a4 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -25,8 +25,10 @@ paging = True # search-url # see http://www.dailymotion.com/doc/api/obj-video.html search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,description,duration,url,thumbnail_360_url,id&sort=relevance&limit=5&page={pageno}&{query}' # noqa -embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\ - 'data-src="https://www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>' +embedded_url = ( + '<iframe frameborder="0" width="540" height="304" ' + + 'data-src="https://www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>' +) supported_languages_url = 'https://api.dailymotion.com/languages' @@ -39,8 +41,8 @@ def request(query, params): locale = match_language(params['language'], supported_languages) params['url'] = search_url.format( - query=urlencode({'search': query, 'localization': locale}), - pageno=params['pageno']) + query=urlencode({'search': query, 'localization': locale}), pageno=params['pageno'] + ) return params @@ -67,13 +69,17 @@ def response(resp): # http to https thumbnail = thumbnail.replace("http://", "https://") - results.append({'template': 'videos.html', - 'url': url, - 'title': title, - 'content': content, - 'publishedDate': publishedDate, - 'embedded': embedded, - 'thumbnail': thumbnail}) + results.append( + { + 'template': 'videos.html', + 'url': url, + 'title': title, + 'content': content, + 'publishedDate': publishedDate, + 'embedded': embedded, + 'thumbnail': thumbnail, + } + ) # return results return results diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py index 946bd3ebe..220ac599d 100644 --- a/searx/engines/deezer.py +++ b/searx/engines/deezer.py @@ -24,9 +24,11 @@ paging = True url = 'https://api.deezer.com/' search_url = url + 'search?{query}&index={offset}' -embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true" ' +\ - 'data-src="https://www.deezer.com/plugins/player?type=tracks&id={audioid}" ' +\ - 'width="540" height="80"></iframe>' +embedded_url = ( + '<iframe scrolling="no" frameborder="0" allowTransparency="true" ' + + 'data-src="https://www.deezer.com/plugins/player?type=tracks&id={audioid}" ' + + 'width="540" height="80"></iframe>' +) # do search-request @@ -53,18 +55,12 @@ def response(resp): if url.startswith('http://'): url = 'https' + url[4:] - content = '{} - {} - {}'.format( - result['artist']['name'], - result['album']['title'], - result['title']) + content = '{} - {} - {}'.format(result['artist']['name'], result['album']['title'], result['title']) embedded = embedded_url.format(audioid=result['id']) # append result - results.append({'url': url, - 'title': title, - 'embedded': embedded, - 'content': content}) + results.append({'url': url, 'title': title, 'embedded': embedded, 'content': content}) # return results return results diff --git a/searx/engines/demo_offline.py b/searx/engines/demo_offline.py index a4a632180..aeb74f443 100644 --- a/searx/engines/demo_offline.py +++ b/searx/engines/demo_offline.py @@ -31,6 +31,7 @@ about = { # if there is a need for globals, use a leading underline _my_offline_engine = None + def init(engine_settings=None): """Initialization of the (offline) engine. The origin of this demo engine is a simple json string which is loaded in this example while the engine is @@ -44,11 +45,10 @@ def init(engine_settings=None): ', {"value":"first item"}' ', {"value":"second item"}' ', {"value":"third item"}' - ']' - - % engine_settings.get('name') + ']' % engine_settings.get('name') ) + def search(query, request_params): """Query (offline) engine and return results. Assemble the list of results from your local engine. In this demo engine we ignore the 'query' term, usual @@ -62,11 +62,11 @@ def search(query, request_params): for row in result_list: entry = { - 'query' : query, - 'language' : request_params['language'], - 'value' : row.get("value"), + 'query': query, + 'language': request_params['language'], + 'value': row.get("value"), # choose a result template or comment out to use the *default* - 'template' : 'key-value.html', + 'template': 'key-value.html', } ret_val.append(entry) diff --git a/searx/engines/demo_online.py b/searx/engines/demo_online.py index a0f736e42..e53b3c15e 100644 --- a/searx/engines/demo_online.py +++ b/searx/engines/demo_online.py @@ -43,6 +43,7 @@ about = { # if there is a need for globals, use a leading underline _my_online_engine = None + def init(engine_settings): """Initialization of the (online) engine. If no initialization is needed, drop this init function. @@ -51,20 +52,24 @@ def init(engine_settings): global _my_online_engine # pylint: disable=global-statement _my_online_engine = engine_settings.get('name') + def request(query, params): """Build up the ``params`` for the online request. In this example we build a URL to fetch images from `artic.edu <https://artic.edu>`__ """ - args = urlencode({ - 'q' : query, - 'page' : params['pageno'], - 'fields' : 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles', - 'limit' : page_size, - }) + args = urlencode( + { + 'q': query, + 'page': params['pageno'], + 'fields': 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles', + 'limit': page_size, + } + ) params['url'] = search_api + args return params + def response(resp): """Parse out the result items from the response. In this example we parse the response from `api.artic.edu <https://artic.edu>`__ and filter out all @@ -79,14 +84,16 @@ def response(resp): if not result['image_id']: continue - results.append({ - 'url': 'https://artic.edu/artworks/%(id)s' % result, - 'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result, - 'content': result['medium_display'], - 'author': ', '.join(result['artist_titles']), - 'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result, - 'img_format': result['dimensions'], - 'template': 'images.html' - }) + results.append( + { + 'url': 'https://artic.edu/artworks/%(id)s' % result, + 'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result, + 'content': result['medium_display'], + 'author': ', '.join(result['artist_titles']), + 'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result, + 'img_format': result['dimensions'], + 'template': 'images.html', + } + ) return results diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py index b13d54dd5..e44ac28e5 100644 --- a/searx/engines/deviantart.py +++ b/searx/engines/deviantart.py @@ -32,13 +32,14 @@ time_range_dict = { # search-url base_url = 'https://www.deviantart.com' + def request(query, params): # https://www.deviantart.com/search/deviations?page=5&q=foo - query = { - 'page' : params['pageno'], - 'q' : query, + query = { + 'page': params['pageno'], + 'q': query, } if params['time_range'] in time_range_dict: query['order'] = time_range_dict[params['time_range']] @@ -47,6 +48,7 @@ def request(query, params): return params + def response(resp): results = [] @@ -67,11 +69,13 @@ def response(resp): continue img_tag = img_tag[0] - results.append({ - 'template': 'images.html', - 'url': a_tag.attrib.get('href'), - 'img_src': img_tag.attrib.get('src'), - 'title': img_tag.attrib.get('alt'), - }) + results.append( + { + 'template': 'images.html', + 'url': a_tag.attrib.get('href'), + 'img_src': img_tag.attrib.get('src'), + 'title': img_tag.attrib.get('alt'), + } + ) return results diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index 4a92a22c3..126e75374 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -27,9 +27,7 @@ https_support = True def request(query, params): - params['url'] = url.format(from_lang=params['from_lang'][2], - to_lang=params['to_lang'][2], - query=params['query']) + params['url'] = url.format(from_lang=params['from_lang'][2], to_lang=params['to_lang'][2], query=params['query']) return params @@ -51,10 +49,12 @@ def response(resp): if t.strip(): to_results.append(to_result.text_content()) - results.append({ - 'url': urljoin(str(resp.url), '?%d' % k), - 'title': from_result.text_content(), - 'content': '; '.join(to_results) - }) + results.append( + { + 'url': urljoin(str(resp.url), '?%d' % k), + 'title': from_result.text_content(), + 'content': '; '.join(to_results), + } + ) return results diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py index 109662a49..2914e9228 100644 --- a/searx/engines/digbt.py +++ b/searx/engines/digbt.py @@ -48,13 +48,17 @@ def response(resp): filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER]) magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0] - results.append({'url': url, - 'title': title, - 'content': content, - 'filesize': filesize, - 'magnetlink': magnetlink, - 'seed': 'N/A', - 'leech': 'N/A', - 'template': 'torrent.html'}) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'filesize': filesize, + 'magnetlink': magnetlink, + 'seed': 'N/A', + 'leech': 'N/A', + 'template': 'torrent.html', + } + ) return results diff --git a/searx/engines/docker_hub.py b/searx/engines/docker_hub.py index e69f677b3..1e492b196 100644 --- a/searx/engines/docker_hub.py +++ b/searx/engines/docker_hub.py @@ -9,13 +9,13 @@ from urllib.parse import urlencode from dateutil import parser about = { - "website": 'https://hub.docker.com', - "wikidata_id": 'Q100769064', - "official_api_documentation": 'https://docs.docker.com/registry/spec/api/', - "use_official_api": True, - "require_api_key": False, - "results": 'JSON', - } + "website": 'https://hub.docker.com', + "wikidata_id": 'Q100769064', + "official_api_documentation": 'https://docs.docker.com/registry/spec/api/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} categories = ['it'] # optional paging = True @@ -23,6 +23,7 @@ paging = True base_url = "https://hub.docker.com/" search_url = base_url + "api/content/v1/products/search?{query}&type=image&page_size=25" + def request(query, params): params['url'] = search_url.format(query=urlencode(dict(q=query, page=params["pageno"]))) @@ -30,6 +31,7 @@ def request(query, params): return params + def response(resp): '''post-response callback resp: requests response object @@ -53,12 +55,8 @@ def response(resp): result["url"] = base_url + "r/" + item.get('slug', "") result["title"] = item.get("name") result["content"] = item.get("short_description") - result["publishedDate"] = parser.parse( - item.get("updated_at") or item.get("created_at") - ) - result["thumbnail"] = ( - item["logo_url"].get("large") or item["logo_url"].get("small") - ) + result["publishedDate"] = parser.parse(item.get("updated_at") or item.get("created_at")) + result["thumbnail"] = item["logo_url"].get("large") or item["logo_url"].get("small") results.append(result) return results diff --git a/searx/engines/doku.py b/searx/engines/doku.py index cf38b3b9a..08f56bbe7 100644 --- a/searx/engines/doku.py +++ b/searx/engines/doku.py @@ -25,17 +25,20 @@ number_of_results = 5 # search-url # Doku is OpenSearch compatible base_url = 'http://localhost:8090' -search_url = '/?do=search'\ - '&{query}' -# TODO '&startRecord={offset}'\ -# TODO '&maximumRecords={limit}'\ +search_url = ( + # fmt: off + '/?do=search' + '&{query}' + # fmt: on +) +# TODO '&startRecord={offset}' +# TODO '&maximumRecords={limit}' # do search-request def request(query, params): - params['url'] = base_url +\ - search_url.format(query=urlencode({'id': query})) + params['url'] = base_url + search_url.format(query=urlencode({'id': query})) return params @@ -60,9 +63,7 @@ def response(resp): title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title')) # append result - results.append({'title': title, - 'content': "", - 'url': base_url + res_url}) + results.append({'title': title, 'content': "", 'url': base_url + res_url}) # Search results for r in eval_xpath(doc, '//dl[@class="search_results"]/*'): @@ -74,9 +75,7 @@ def response(resp): content = extract_text(eval_xpath(r, '.')) # append result - results.append({'title': title, - 'content': content, - 'url': base_url + res_url}) + results.append({'title': title, 'content': content, 'url': base_url + res_url}) except: continue diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index d283af81d..0d2a524df 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -39,15 +39,10 @@ language_aliases = { 'ko': 'kr-KR', 'sl-SI': 'sl-SL', 'zh-TW': 'tzh-TW', - 'zh-HK': 'tzh-HK' + 'zh-HK': 'tzh-HK', } -time_range_dict = { - 'day': 'd', - 'week': 'w', - 'month': 'm', - 'year': 'y' -} +time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'} # search-url url = 'https://lite.duckduckgo.com/lite' @@ -118,6 +113,7 @@ def request(query, params): logger.debug("param cookies: %s", params['cookies']) return params + # get response from search-request def response(resp): @@ -163,21 +159,24 @@ def response(resp): if td_content is None: continue - results.append({ - 'title': a_tag.text_content(), - 'content': extract_text(td_content), - 'url': a_tag.get('href'), - }) + results.append( + { + 'title': a_tag.text_content(), + 'content': extract_text(td_content), + 'url': a_tag.get('href'), + } + ) return results + # get supported languages from their site def _fetch_supported_languages(resp): # response is a js file with regions as an embedded object response_page = resp.text - response_page = response_page[response_page.find('regions:{') + 8:] - response_page = response_page[:response_page.find('}') + 1] + response_page = response_page[response_page.find('regions:{') + 8 :] + response_page = response_page[: response_page.find('}') + 1] regions_json = loads(response_page) supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys()) diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index 3ef043964..ad3c92169 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -10,7 +10,10 @@ from lxml import html from searx.data import WIKIDATA_UNITS from searx.engines.duckduckgo import language_aliases -from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +from searx.engines.duckduckgo import ( # pylint: disable=unused-import + _fetch_supported_languages, + supported_languages_url, +) from searx.utils import extract_text, html_to_text, match_language, get_string_replaces_function from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom @@ -24,19 +27,15 @@ about = { "results": 'JSON', } -URL = 'https://api.duckduckgo.com/'\ - + '?{query}&format=json&pretty=0&no_redirect=1&d=1' +URL = 'https://api.duckduckgo.com/' + '?{query}&format=json&pretty=0&no_redirect=1&d=1' -WIKIDATA_PREFIX = [ - 'http://www.wikidata.org/entity/', - 'https://www.wikidata.org/entity/' -] +WIKIDATA_PREFIX = ['http://www.wikidata.org/entity/', 'https://www.wikidata.org/entity/'] replace_http_by_https = get_string_replaces_function({'http:': 'https:'}) def is_broken_text(text): - """ duckduckgo may return something like "<a href="xxxx">http://somewhere Related website<a/>" + """duckduckgo may return something like "<a href="xxxx">http://somewhere Related website<a/>" The href URL is broken, the "Related website" may contains some HTML. @@ -61,11 +60,7 @@ def result_to_text(text, htmlResult): def request(query, params): params['url'] = URL.format(query=urlencode({'q': query})) - language = match_language( - params['language'], - supported_languages, - language_aliases - ) + language = match_language(params['language'], supported_languages, language_aliases) language = language.split('-')[0] params['headers']['Accept-Language'] = language return params @@ -127,23 +122,14 @@ def response(resp): firstURL = ddg_result.get('FirstURL') text = ddg_result.get('Text') if not is_broken_text(text): - suggestion = result_to_text( - text, - ddg_result.get('Result') - ) + suggestion = result_to_text(text, ddg_result.get('Result')) if suggestion != heading and suggestion is not None: results.append({'suggestion': suggestion}) elif 'Topics' in ddg_result: suggestions = [] - relatedTopics.append({ - 'name': ddg_result.get('Name', ''), - 'suggestions': suggestions - }) + relatedTopics.append({'name': ddg_result.get('Name', ''), 'suggestions': suggestions}) for topic_result in ddg_result.get('Topics', []): - suggestion = result_to_text( - topic_result.get('Text'), - topic_result.get('Result') - ) + suggestion = result_to_text(topic_result.get('Text'), topic_result.get('Result')) if suggestion != heading and suggestion is not None: suggestions.append(suggestion) @@ -152,25 +138,15 @@ def response(resp): if abstractURL != '': # add as result ? problem always in english infobox_id = abstractURL - urls.append({ - 'title': search_res.get('AbstractSource'), - 'url': abstractURL, - 'official': True - }) - results.append({ - 'url': abstractURL, - 'title': heading - }) + urls.append({'title': search_res.get('AbstractSource'), 'url': abstractURL, 'official': True}) + results.append({'url': abstractURL, 'title': heading}) # definition definitionURL = search_res.get('DefinitionURL', '') if definitionURL != '': # add as result ? as answer ? problem always in english infobox_id = definitionURL - urls.append({ - 'title': search_res.get('DefinitionSource'), - 'url': definitionURL - }) + urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL}) # to merge with wikidata's infobox if infobox_id: @@ -198,10 +174,7 @@ def response(resp): # * netflix_id external_url = get_external_url(data_type, data_value) if external_url is not None: - urls.append({ - 'title': data_label, - 'url': external_url - }) + urls.append({'title': data_label, 'url': external_url}) elif data_type in ['instance', 'wiki_maps_trigger', 'google_play_artist_id']: # ignore instance: Wikidata value from "Instance Of" (Qxxxx) # ignore wiki_maps_trigger: reference to a javascript @@ -211,11 +184,7 @@ def response(resp): # There is already an URL for the website pass elif data_type == 'area': - attributes.append({ - 'label': data_label, - 'value': area_to_str(data_value), - 'entity': 'P2046' - }) + attributes.append({'label': data_label, 'value': area_to_str(data_value), 'entity': 'P2046'}) osm_zoom = area_to_osm_zoom(data_value.get('amount')) elif data_type == 'coordinates': if data_value.get('globe') == 'http://www.wikidata.org/entity/Q2': @@ -224,16 +193,9 @@ def response(resp): coordinates = info else: # coordinate NOT on Earth - attributes.append({ - 'label': data_label, - 'value': data_value, - 'entity': 'P625' - }) + attributes.append({'label': data_label, 'value': data_value, 'entity': 'P625'}) elif data_type == 'string': - attributes.append({ - 'label': data_label, - 'value': data_value - }) + attributes.append({'label': data_label, 'value': data_value}) if coordinates: data_label = coordinates.get('label') @@ -241,31 +203,24 @@ def response(resp): latitude = data_value.get('latitude') longitude = data_value.get('longitude') url = get_earth_coordinates_url(latitude, longitude, osm_zoom) - urls.append({ - 'title': 'OpenStreetMap', - 'url': url, - 'entity': 'P625' - }) + urls.append({'title': 'OpenStreetMap', 'url': url, 'entity': 'P625'}) if len(heading) > 0: # TODO get infobox.meta.value where .label='article_title' # pylint: disable=fixme - if image is None and len(attributes) == 0 and len(urls) == 1 and\ - len(relatedTopics) == 0 and len(content) == 0: - results.append({ - 'url': urls[0]['url'], - 'title': heading, - 'content': content - }) + if image is None and len(attributes) == 0 and len(urls) == 1 and len(relatedTopics) == 0 and len(content) == 0: + results.append({'url': urls[0]['url'], 'title': heading, 'content': content}) else: - results.append({ - 'infobox': heading, - 'id': infobox_id, - 'content': content, - 'img_src': image, - 'attributes': attributes, - 'urls': urls, - 'relatedTopics': relatedTopics - }) + results.append( + { + 'infobox': heading, + 'id': infobox_id, + 'content': content, + 'img_src': image, + 'attributes': attributes, + 'urls': urls, + 'relatedTopics': relatedTopics, + } + ) return results @@ -273,7 +228,7 @@ def response(resp): def unit_to_str(unit): for prefix in WIKIDATA_PREFIX: if unit.startswith(prefix): - wikidata_entity = unit[len(prefix):] + wikidata_entity = unit[len(prefix) :] return WIKIDATA_UNITS.get(wikidata_entity, unit) return unit diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py index 0daaf41e9..2f75e16f1 100644 --- a/searx/engines/duckduckgo_images.py +++ b/searx/engines/duckduckgo_images.py @@ -7,7 +7,10 @@ from json import loads from urllib.parse import urlencode from searx.exceptions import SearxEngineAPIException from searx.engines.duckduckgo import get_region_code -from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +from searx.engines.duckduckgo import ( # pylint: disable=unused-import + _fetch_supported_languages, + supported_languages_url, +) from searx.network import get # about @@ -41,8 +44,8 @@ def get_vqd(query, headers): content = res.text if content.find('vqd=\'') == -1: raise SearxEngineAPIException('Request failed') - vqd = content[content.find('vqd=\'') + 5:] - vqd = vqd[:vqd.find('\'')] + vqd = content[content.find('vqd=\'') + 5 :] + vqd = vqd[: vqd.find('\'')] return vqd @@ -61,10 +64,10 @@ def request(query, params): region_code = get_region_code(params['language'], lang_list=supported_languages) if region_code: params['url'] = images_url.format( - query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd) + query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd + ) else: - params['url'] = images_url.format( - query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd) + params['url'] = images_url.format(query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd) return params @@ -84,11 +87,15 @@ def response(resp): image = result['image'] # append result - results.append({'template': 'images.html', - 'title': title, - 'content': '', - 'thumbnail_src': thumbnail, - 'img_src': image, - 'url': url}) + results.append( + { + 'template': 'images.html', + 'title': title, + 'content': '', + 'thumbnail_src': thumbnail, + 'img_src': image, + 'url': url, + } + ) return results diff --git a/searx/engines/duden.py b/searx/engines/duden.py index bc4211c67..600b61f3c 100644 --- a/searx/engines/duden.py +++ b/searx/engines/duden.py @@ -38,7 +38,7 @@ def request(query, params): pageno : 1 # number of the requested page ''' - offset = (params['pageno'] - 1) + offset = params['pageno'] - 1 if offset == 0: search_url_fmt = base_url + 'suchen/dudenonline/{query}' params['url'] = search_url_fmt.format(query=quote(query)) @@ -58,9 +58,9 @@ def response(resp): dom = html.fromstring(resp.text) - number_of_results_element =\ - eval_xpath_getindex(dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()', - 0, default=None) + number_of_results_element = eval_xpath_getindex( + dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()', 0, default=None + ) if number_of_results_element is not None: number_of_results_string = re.sub('[^0-9]', '', number_of_results_element) results.append({'number_of_results': int(number_of_results_string)}) @@ -71,8 +71,6 @@ def response(resp): title = eval_xpath(result, 'string(.//h2/a)').strip() content = extract_text(eval_xpath(result, './/p')) # append result - results.append({'url': url, - 'title': title, - 'content': content}) + results.append({'url': url, 'title': title, 'content': content}) return results diff --git a/searx/engines/dummy-offline.py b/searx/engines/dummy-offline.py index cf2f75312..632eeb2b3 100644 --- a/searx/engines/dummy-offline.py +++ b/searx/engines/dummy-offline.py @@ -15,6 +15,8 @@ about = { def search(query, request_params): - return [{ - 'result': 'this is what you get', - }] + return [ + { + 'result': 'this is what you get', + } + ] diff --git a/searx/engines/ebay.py b/searx/engines/ebay.py index 45c633b42..b7aefcb44 100644 --- a/searx/engines/ebay.py +++ b/searx/engines/ebay.py @@ -58,16 +58,17 @@ def response(resp): if title == "": continue - results.append({ - 'url': url, - 'title': title, - 'content': content, - 'price': price, - 'shipping': shipping, - 'source_country': source_country, - 'thumbnail': thumbnail, - 'template': 'products.html', - - }) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'price': price, + 'shipping': shipping, + 'source_country': source_country, + 'thumbnail': thumbnail, + 'template': 'products.html', + } + ) return results diff --git a/searx/engines/elasticsearch.py b/searx/engines/elasticsearch.py index db84a5c13..f6e207b4d 100644 --- a/searx/engines/elasticsearch.py +++ b/searx/engines/elasticsearch.py @@ -119,9 +119,7 @@ def response(resp): r['template'] = 'key-value.html' if show_metadata: - r['metadata'] = {'index': result['_index'], - 'id': result['_id'], - 'score': result['_score']} + r['metadata'] = {'index': result['_index'], 'id': result['_id'], 'score': result['_score']} results.append(r) @@ -133,12 +131,10 @@ _available_query_types = { # https://www.elastic.co/guide/en/elasticsearch/reference/current/full-text-queries.html 'match': _match_query, 'simple_query_string': _simple_query_string_query, - # Term-level queries # https://www.elastic.co/guide/en/elasticsearch/reference/current/term-level-queries.html 'term': _term_query, 'terms': _terms_query, - # Query JSON defined by the instance administrator. 'custom': _custom_query, } diff --git a/searx/engines/etools.py b/searx/engines/etools.py index bf4f4ea1f..347463291 100644 --- a/searx/engines/etools.py +++ b/searx/engines/etools.py @@ -22,10 +22,14 @@ paging = False safesearch = True base_url = 'https://www.etools.ch' -search_path = '/searchAdvancedSubmit.do'\ - '?query={search_term}'\ - '&pageResults=20'\ +search_path = ( + # fmt: off + '/searchAdvancedSubmit.do' + '?query={search_term}' + '&pageResults=20' '&safeSearch={safesearch}' + # fmt: on +) def request(query, params): @@ -49,8 +53,6 @@ def response(resp): title = extract_text(eval_xpath(result, './a//text()')) content = extract_text(eval_xpath(result, './/div[@class="text"]//text()')) - results.append({'url': url, - 'title': title, - 'content': content}) + results.append({'url': url, 'title': title, 'content': content}) return results diff --git a/searx/engines/fdroid.py b/searx/engines/fdroid.py index 8fff2e384..c381b25d4 100644 --- a/searx/engines/fdroid.py +++ b/searx/engines/fdroid.py @@ -42,13 +42,13 @@ def response(resp): for app in dom.xpath('//a[@class="package-header"]'): app_url = app.xpath('./@href')[0] app_title = extract_text(app.xpath('./div/h4[@class="package-name"]/text()')) - app_content = extract_text(app.xpath('./div/div/span[@class="package-summary"]')).strip() \ - + ' - ' + extract_text(app.xpath('./div/div/span[@class="package-license"]')).strip() + app_content = ( + extract_text(app.xpath('./div/div/span[@class="package-summary"]')).strip() + + ' - ' + + extract_text(app.xpath('./div/div/span[@class="package-license"]')).strip() + ) app_img_src = app.xpath('./img[@class="package-icon"]/@src')[0] - results.append({'url': app_url, - 'title': app_title, - 'content': app_content, - 'img_src': app_img_src}) + results.append({'url': app_url, 'title': app_title, 'content': app_content, 'img_src': app_img_src}) return results diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py index b0ddf6224..b7cd76808 100644 --- a/searx/engines/flickr.py +++ b/searx/engines/flickr.py @@ -25,10 +25,12 @@ paging = True api_key = None -url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search' +\ - '&api_key={api_key}&{text}&sort=relevance' +\ - '&extras=description%2C+owner_name%2C+url_o%2C+url_n%2C+url_z' +\ - '&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}' +url = ( + 'https://api.flickr.com/services/rest/?method=flickr.photos.search' + + '&api_key={api_key}&{text}&sort=relevance' + + '&extras=description%2C+owner_name%2C+url_o%2C+url_n%2C+url_z' + + '&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}' +) photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' paging = True @@ -39,10 +41,9 @@ def build_flickr_url(user_id, photo_id): def request(query, params): - params['url'] = url.format(text=urlencode({'text': query}), - api_key=api_key, - nb_per_page=nb_per_page, - page=params['pageno']) + params['url'] = url.format( + text=urlencode({'text': query}), api_key=api_key, nb_per_page=nb_per_page, page=params['pageno'] + ) return params @@ -69,7 +70,7 @@ def response(resp): else: continue -# For a bigger thumbnail, keep only the url_z, not the url_n + # For a bigger thumbnail, keep only the url_z, not the url_n if 'url_n' in photo: thumbnail_src = photo['url_n'] elif 'url_z' in photo: @@ -80,13 +81,17 @@ def response(resp): url = build_flickr_url(photo['owner'], photo['id']) # append result - results.append({'url': url, - 'title': photo['title'], - 'img_src': img_src, - 'thumbnail_src': thumbnail_src, - 'content': photo['description']['_content'], - 'author': photo['ownername'], - 'template': 'images.html'}) + results.append( + { + 'url': url, + 'title': photo['title'], + 'img_src': img_src, + 'thumbnail_src': thumbnail_src, + 'content': photo['description']['_content'], + 'author': photo['ownername'], + 'template': 'images.html', + } + ) # return results return results diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py index 1d670ee50..4ff59fc52 100644 --- a/searx/engines/flickr_noapi.py +++ b/searx/engines/flickr_noapi.py @@ -30,10 +30,12 @@ image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's') paging = True time_range_support = True -time_range_dict = {'day': 60 * 60 * 24, - 'week': 60 * 60 * 24 * 7, - 'month': 60 * 60 * 24 * 7 * 4, - 'year': 60 * 60 * 24 * 7 * 52} +time_range_dict = { + 'day': 60 * 60 * 24, + 'week': 60 * 60 * 24 * 7, + 'month': 60 * 60 * 24 * 7 * 4, + 'year': 60 * 60 * 24 * 7 * 52, +} def build_flickr_url(user_id, photo_id): @@ -47,8 +49,9 @@ def _get_time_range_url(time_range): def request(query, params): - params['url'] = (search_url.format(query=urlencode({'text': query}), page=params['pageno']) - + _get_time_range_url(params['time_range'])) + params['url'] = search_url.format(query=urlencode({'text': query}), page=params['pageno']) + _get_time_range_url( + params['time_range'] + ) return params @@ -83,10 +86,9 @@ def response(resp): for image_size in image_sizes: if image_size in photo['sizes']: img_src = photo['sizes'][image_size]['url'] - img_format = 'jpg ' \ - + str(photo['sizes'][image_size]['width']) \ - + 'x' \ - + str(photo['sizes'][image_size]['height']) + img_format = ( + 'jpg ' + str(photo['sizes'][image_size]['width']) + 'x' + str(photo['sizes'][image_size]['height']) + ) break if not img_src: @@ -113,7 +115,7 @@ def response(resp): 'thumbnail_src': thumbnail_src, 'source': source, 'img_format': img_format, - 'template': 'images.html' + 'template': 'images.html', } result['author'] = author.encode(errors='ignore').decode() result['source'] = source.encode(errors='ignore').decode() diff --git a/searx/engines/framalibre.py b/searx/engines/framalibre.py index 42c08cf95..b2c9d9077 100644 --- a/searx/engines/framalibre.py +++ b/searx/engines/framalibre.py @@ -35,9 +35,8 @@ content_xpath = './/div[@class="content"]//p' # do search-request def request(query, params): - offset = (params['pageno'] - 1) - params['url'] = search_url.format(query=urlencode({'keys': query}), - offset=offset) + offset = params['pageno'] - 1 + params['url'] = search_url.format(query=urlencode({'keys': query}), offset=offset) return params @@ -63,10 +62,7 @@ def response(resp): content = escape(extract_text(result.xpath(content_xpath))) # append result - results.append({'url': href, - 'title': title, - 'img_src': thumbnail, - 'content': content}) + results.append({'url': href, 'title': title, 'img_src': thumbnail, 'content': content}) # return results return results diff --git a/searx/engines/freesound.py b/searx/engines/freesound.py index d2564946c..121a6a5b0 100644 --- a/searx/engines/freesound.py +++ b/searx/engines/freesound.py @@ -26,8 +26,7 @@ paging = True # search url url = "https://freesound.org/apiv2/" search_url = ( - url - + "search/text/?query={query}&page={page}&fields=name,url,download,created,description,type&token={api_key}" + url + "search/text/?query={query}&page={page}&fields=name,url,download,created,description,type&token={api_key}" ) embedded_url = '<audio controls><source src="{uri}" type="audio/{ftype}"></audio>' diff --git a/searx/engines/frinkiac.py b/searx/engines/frinkiac.py index f43bb6e20..95a1366de 100644 --- a/searx/engines/frinkiac.py +++ b/searx/engines/frinkiac.py @@ -10,10 +10,7 @@ from urllib.parse import urlencode about = { "website": 'https://frinkiac.com', "wikidata_id": 'Q24882614', - "official_api_documentation": { - 'url': None, - 'comment': 'see https://github.com/MitchellAW/CompuGlobal' - }, + "official_api_documentation": {'url': None, 'comment': 'see https://github.com/MitchellAW/CompuGlobal'}, "use_official_api": False, "require_api_key": False, "results": 'JSON', @@ -40,12 +37,15 @@ def response(resp): episode = result['Episode'] timestamp = result['Timestamp'] - results.append({'template': 'images.html', - 'url': RESULT_URL.format(base=BASE, - query=urlencode({'p': 'caption', 'e': episode, 't': timestamp})), - 'title': episode, - 'content': '', - 'thumbnail_src': THUMB_URL.format(base=BASE, episode=episode, timestamp=timestamp), - 'img_src': IMAGE_URL.format(base=BASE, episode=episode, timestamp=timestamp)}) + results.append( + { + 'template': 'images.html', + 'url': RESULT_URL.format(base=BASE, query=urlencode({'p': 'caption', 'e': episode, 't': timestamp})), + 'title': episode, + 'content': '', + 'thumbnail_src': THUMB_URL.format(base=BASE, episode=episode, timestamp=timestamp), + 'img_src': IMAGE_URL.format(base=BASE, episode=episode, timestamp=timestamp), + } + ) return results diff --git a/searx/engines/gentoo.py b/searx/engines/gentoo.py index 325e132a6..5b9edafe0 100644 --- a/searx/engines/gentoo.py +++ b/searx/engines/gentoo.py @@ -37,15 +37,12 @@ def locale_to_lang_code(locale): # wikis for some languages were moved off from the main site, we need to make # requests to correct URLs to be able to get results in those languages lang_urls = { - 'en': { - 'base': 'https://wiki.gentoo.org', - 'search': '/index.php?title=Special:Search&offset={offset}&{query}' - }, + 'en': {'base': 'https://wiki.gentoo.org', 'search': '/index.php?title=Special:Search&offset={offset}&{query}'}, 'others': { 'base': 'https://wiki.gentoo.org', 'search': '/index.php?title=Special:Search&offset={offset}&{query}\ - &profile=translation&languagefilter={language}' - } + &profile=translation&languagefilter={language}', + }, } @@ -78,7 +75,7 @@ main_langs = { 'sl': 'Slovenský', 'th': 'ไทย', 'uk': 'Українська', - 'zh': '简体中文' + 'zh': '简体中文', } supported_languages = dict(lang_urls, **main_langs) @@ -101,8 +98,7 @@ def request(query, params): urls = get_lang_urls(language) search_url = urls['base'] + urls['search'] - params['url'] = search_url.format(query=query, offset=offset, - language=language) + params['url'] = search_url.format(query=query, offset=offset, language=language) return params @@ -123,7 +119,6 @@ def response(resp): href = urljoin(base_url, link.attrib.get('href')) title = extract_text(link) - results.append({'url': href, - 'title': title}) + results.append({'url': href, 'title': title}) return results diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index 0f685abc5..c657dca30 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -55,12 +55,12 @@ def fetch_extra_param(query_args, headers): extra_param_path = search_path + urlencode(query_args) text = get(base_url + extra_param_path, headers=headers).text - re_var= None + re_var = None for line in text.splitlines(): if re_var is None and extra_param_path in line: var = line.split("=")[0].split()[1] # e.g. var --> 'uxrl' re_var = re.compile(var + "\\s*=\\s*" + var + "\\s*\\+\\s*'" + "(.*)" + "'(.*)") - extra_param = line.split("'")[1][len(extra_param_path):] + extra_param = line.split("'")[1][len(extra_param_path) :] continue if re_var is not None and re_var.search(line): extra_param += re_var.search(line).group(1) @@ -69,12 +69,7 @@ def fetch_extra_param(query_args, headers): # do search-request def request(query, params): # pylint: disable=unused-argument - query_args = dict( - c = 'main' - , q = query - , dr = 1 - , showgoodimages = 0 - ) + query_args = dict(c='main', q=query, dr=1, showgoodimages=0) if params['language'] and params['language'] != 'all': query_args['qlangcountry'] = params['language'] @@ -93,6 +88,7 @@ def request(query, params): # pylint: disable=unused-argument return params + # get response from search-request def response(resp): results = [] @@ -125,10 +121,6 @@ def response(resp): if len(subtitle) > 3 and subtitle != title: title += " - " + subtitle - results.append(dict( - url = url - , title = title - , content = content - )) + results.append(dict(url=url, title=title, content=content)) return results diff --git a/searx/engines/github.py b/searx/engines/github.py index b68caa350..1d12d296a 100644 --- a/searx/engines/github.py +++ b/searx/engines/github.py @@ -55,9 +55,7 @@ def response(resp): content = '' # append result - results.append({'url': url, - 'title': title, - 'content': content}) + results.append({'url': url, 'title': title, 'content': content}) # return results return results diff --git a/searx/engines/google.py b/searx/engines/google.py index 578dec60c..685697d29 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -50,72 +50,63 @@ supported_languages_url = 'https://www.google.com/preferences?#languages' # based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests google_domains = { - 'BG': 'google.bg', # Bulgaria - 'CZ': 'google.cz', # Czech Republic - 'DE': 'google.de', # Germany - 'DK': 'google.dk', # Denmark - 'AT': 'google.at', # Austria - 'CH': 'google.ch', # Switzerland - 'GR': 'google.gr', # Greece + 'BG': 'google.bg', # Bulgaria + 'CZ': 'google.cz', # Czech Republic + 'DE': 'google.de', # Germany + 'DK': 'google.dk', # Denmark + 'AT': 'google.at', # Austria + 'CH': 'google.ch', # Switzerland + 'GR': 'google.gr', # Greece 'AU': 'google.com.au', # Australia - 'CA': 'google.ca', # Canada - 'GB': 'google.co.uk', # United Kingdom - 'ID': 'google.co.id', # Indonesia - 'IE': 'google.ie', # Ireland - 'IN': 'google.co.in', # India + 'CA': 'google.ca', # Canada + 'GB': 'google.co.uk', # United Kingdom + 'ID': 'google.co.id', # Indonesia + 'IE': 'google.ie', # Ireland + 'IN': 'google.co.in', # India 'MY': 'google.com.my', # Malaysia - 'NZ': 'google.co.nz', # New Zealand + 'NZ': 'google.co.nz', # New Zealand 'PH': 'google.com.ph', # Philippines 'SG': 'google.com.sg', # Singapore - 'US': 'google.com', # United States (google.us) redirects to .com - 'ZA': 'google.co.za', # South Africa + 'US': 'google.com', # United States (google.us) redirects to .com + 'ZA': 'google.co.za', # South Africa 'AR': 'google.com.ar', # Argentina - 'CL': 'google.cl', # Chile - 'ES': 'google.es', # Spain + 'CL': 'google.cl', # Chile + 'ES': 'google.es', # Spain 'MX': 'google.com.mx', # Mexico - 'EE': 'google.ee', # Estonia - 'FI': 'google.fi', # Finland - 'BE': 'google.be', # Belgium - 'FR': 'google.fr', # France - 'IL': 'google.co.il', # Israel - 'HR': 'google.hr', # Croatia - 'HU': 'google.hu', # Hungary - 'IT': 'google.it', # Italy - 'JP': 'google.co.jp', # Japan - 'KR': 'google.co.kr', # South Korea - 'LT': 'google.lt', # Lithuania - 'LV': 'google.lv', # Latvia - 'NO': 'google.no', # Norway - 'NL': 'google.nl', # Netherlands - 'PL': 'google.pl', # Poland + 'EE': 'google.ee', # Estonia + 'FI': 'google.fi', # Finland + 'BE': 'google.be', # Belgium + 'FR': 'google.fr', # France + 'IL': 'google.co.il', # Israel + 'HR': 'google.hr', # Croatia + 'HU': 'google.hu', # Hungary + 'IT': 'google.it', # Italy + 'JP': 'google.co.jp', # Japan + 'KR': 'google.co.kr', # South Korea + 'LT': 'google.lt', # Lithuania + 'LV': 'google.lv', # Latvia + 'NO': 'google.no', # Norway + 'NL': 'google.nl', # Netherlands + 'PL': 'google.pl', # Poland 'BR': 'google.com.br', # Brazil - 'PT': 'google.pt', # Portugal - 'RO': 'google.ro', # Romania - 'RU': 'google.ru', # Russia - 'SK': 'google.sk', # Slovakia - 'SI': 'google.si', # Slovenia - 'SE': 'google.se', # Sweden - 'TH': 'google.co.th', # Thailand + 'PT': 'google.pt', # Portugal + 'RO': 'google.ro', # Romania + 'RU': 'google.ru', # Russia + 'SK': 'google.sk', # Slovakia + 'SI': 'google.si', # Slovenia + 'SE': 'google.se', # Sweden + 'TH': 'google.co.th', # Thailand 'TR': 'google.com.tr', # Turkey 'UA': 'google.com.ua', # Ukraine 'CN': 'google.com.hk', # There is no google.cn, we use .com.hk for zh-CN 'HK': 'google.com.hk', # Hong Kong - 'TW': 'google.com.tw' # Taiwan + 'TW': 'google.com.tw', # Taiwan } -time_range_dict = { - 'day': 'd', - 'week': 'w', - 'month': 'm', - 'year': 'y' -} +time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'} # Filter results. 0: None, 1: Moderate, 2: Strict -filter_mapping = { - 0: 'off', - 1: 'medium', - 2: 'high' -} +filter_mapping = {0: 'off', 1: 'medium', 2: 'high'} # specific xpath variables # ------------------------ @@ -140,6 +131,7 @@ content_xpath = './/div[@class="IsZvec"]' # from the links not the links itself. suggestion_xpath = '//div[contains(@class, "EIaa9b")]//a' + def get_lang_info(params, lang_list, custom_aliases, supported_any_language): """Composing various language properties for the google engines. @@ -184,11 +176,11 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language): request's headers) """ ret_val = { - 'language' : None, - 'country' : None, - 'subdomain' : None, - 'params' : {}, - 'headers' : {}, + 'language': None, + 'country': None, + 'subdomain': None, + 'params': {}, + 'headers': {}, } # language ... @@ -213,7 +205,7 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language): # subdomain ... - ret_val['subdomain'] = 'www.' + google_domains.get(country.upper(), 'google.com') + ret_val['subdomain'] = 'www.' + google_domains.get(country.upper(), 'google.com') # params & headers @@ -250,15 +242,18 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language): ret_val['params']['lr'] = "lang_" + lang_list.get(lang_country, language) # Accept-Language: fr-CH, fr;q=0.8, en;q=0.6, *;q=0.5 - ret_val['headers']['Accept-Language'] = ','.join([ - lang_country, - language + ';q=0.8,', - 'en;q=0.6', - '*;q=0.5', - ]) + ret_val['headers']['Accept-Language'] = ','.join( + [ + lang_country, + language + ';q=0.8,', + 'en;q=0.6', + '*;q=0.5', + ] + ) return ret_val + def detect_google_sorry(resp): if resp.url.host == 'sorry.google.com' or resp.url.path.startswith('/sorry'): raise SearxEngineCaptchaException() @@ -269,9 +264,7 @@ def request(query, params): offset = (params['pageno'] - 1) * 10 - lang_info = get_lang_info( - params, supported_languages, language_aliases, True - ) + lang_info = get_lang_info(params, supported_languages, language_aliases, True) additional_parameters = {} if use_mobile_ui: @@ -281,15 +274,23 @@ def request(query, params): } # https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium - query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({ - 'q': query, - **lang_info['params'], - 'ie': "utf8", - 'oe': "utf8", - 'start': offset, - 'filter': '0', - **additional_parameters, - }) + query_url = ( + 'https://' + + lang_info['subdomain'] + + '/search' + + "?" + + urlencode( + { + 'q': query, + **lang_info['params'], + 'ie': "utf8", + 'oe': "utf8", + 'start': offset, + 'filter': '0', + **additional_parameters, + } + ) + ) if params['time_range'] in time_range_dict: query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]}) @@ -301,9 +302,7 @@ def request(query, params): if use_mobile_ui: params['headers']['Accept'] = '*/*' else: - params['headers']['Accept'] = ( - 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - ) + params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' return params @@ -325,7 +324,7 @@ def response(resp): else: logger.debug("did not find 'answer'") - # results --> number_of_results + # results --> number_of_results if not use_mobile_ui: try: _txt = eval_xpath_getindex(dom, '//div[@id="result-stats"]//text()', 0) @@ -355,11 +354,7 @@ def response(resp): if url is None: continue content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True) - results.append({ - 'url': url, - 'title': title, - 'content': content - }) + results.append({'url': url, 'title': title, 'content': content}) except Exception as e: # pylint: disable=broad-except logger.error(e, exc_info=True) # from lxml import etree diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 61d291e3f..203df404a 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -30,10 +30,8 @@ from searx.engines.google import ( ) # pylint: disable=unused-import -from searx.engines.google import ( - supported_languages_url - , _fetch_supported_languages -) +from searx.engines.google import supported_languages_url, _fetch_supported_languages + # pylint: enable=unused-import # about @@ -53,21 +51,16 @@ use_locale_domain = True time_range_support = True safesearch = True -filter_mapping = { - 0: 'images', - 1: 'active', - 2: 'active' -} +filter_mapping = {0: 'images', 1: 'active', 2: 'active'} def scrap_out_thumbs(dom): - """Scrap out thumbnail data from <script> tags. - """ + """Scrap out thumbnail data from <script> tags.""" ret_val = {} for script in eval_xpath(dom, '//script[contains(., "_setImgSrc(")]'): _script = script.text # _setImgSrc('0','data:image\/jpeg;base64,\/9j\/4AAQSkZJR ....'); - _thumb_no, _img_data = _script[len("_setImgSrc("):-2].split(",", 1) + _thumb_no, _img_data = _script[len("_setImgSrc(") : -2].split(",", 1) _thumb_no = _thumb_no.replace("'", "") _img_data = _img_data.replace("'", "") _img_data = _img_data.replace(r"\/", r"/") @@ -76,8 +69,7 @@ def scrap_out_thumbs(dom): def scrap_img_by_id(script, data_id): - """Get full image URL by data-id in parent element - """ + """Get full image URL by data-id in parent element""" img_url = '' _script = script.split('\n') for i, line in enumerate(_script): @@ -91,20 +83,25 @@ def scrap_img_by_id(script, data_id): def request(query, params): """Google-Video search request""" - lang_info = get_lang_info( - params, supported_languages, language_aliases, False + lang_info = get_lang_info(params, supported_languages, language_aliases, False) + logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) + + query_url = ( + 'https://' + + lang_info['subdomain'] + + '/search' + + "?" + + urlencode( + { + 'q': query, + 'tbm': "isch", + **lang_info['params'], + 'ie': "utf8", + 'oe': "utf8", + 'num': 30, + } + ) ) - logger.debug( - "HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) - - query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({ - 'q': query, - 'tbm': "isch", - **lang_info['params'], - 'ie': "utf8", - 'oe': "utf8", - 'num': 30, - }) if params['time_range'] in time_range_dict: query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]}) @@ -113,9 +110,7 @@ def request(query, params): params['url'] = query_url params['headers'].update(lang_info['headers']) - params['headers']['Accept'] = ( - 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - ) + params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' return params @@ -128,8 +123,7 @@ def response(resp): # convert the text to dom dom = html.fromstring(resp.text) img_bas64_map = scrap_out_thumbs(dom) - img_src_script = eval_xpath_getindex( - dom, '//script[contains(., "AF_initDataCallback({key: ")]', 1).text + img_src_script = eval_xpath_getindex(dom, '//script[contains(., "AF_initDataCallback({key: ")]', 1).text # parse results # @@ -189,15 +183,17 @@ def response(resp): if not src_url: src_url = thumbnail_src - results.append({ - 'url': url, - 'title': img_alt, - 'content': pub_descr, - 'source': pub_source, - 'img_src': src_url, - # 'img_format': img_format, - 'thumbnail_src': thumbnail_src, - 'template': 'images.html' - }) + results.append( + { + 'url': url, + 'title': img_alt, + 'content': pub_descr, + 'source': pub_source, + 'img_src': src_url, + # 'img_format': img_format, + 'thumbnail_src': thumbnail_src, + 'template': 'images.html', + } + ) return results diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index 87ac9a19d..162e4348e 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -32,6 +32,7 @@ from searx.engines.google import ( supported_languages_url, _fetch_supported_languages, ) + # pylint: enable=unused-import from searx.engines.google import ( @@ -71,14 +72,12 @@ time_range_support = True # safesearch : results are identitical for safesearch=0 and safesearch=2 safesearch = False + def request(query, params): """Google-News search request""" - lang_info = get_lang_info( - params, supported_languages, language_aliases, False - ) - logger.debug( - "HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) + lang_info = get_lang_info(params, supported_languages, language_aliases, False) + logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) # google news has only one domain lang_info['subdomain'] = 'news.google.com' @@ -94,19 +93,26 @@ def request(query, params): if params['time_range']: query += ' ' + time_range_dict[params['time_range']] - query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({ - 'q': query, - **lang_info['params'], - 'ie': "utf8", - 'oe': "utf8", - 'gl': lang_info['country'], - }) + ('&ceid=%s' % ceid) # ceid includes a ':' character which must not be urlencoded + query_url = ( + 'https://' + + lang_info['subdomain'] + + '/search' + + "?" + + urlencode( + { + 'q': query, + **lang_info['params'], + 'ie': "utf8", + 'oe': "utf8", + 'gl': lang_info['country'], + } + ) + + ('&ceid=%s' % ceid) + ) # ceid includes a ':' character which must not be urlencoded params['url'] = query_url params['headers'].update(lang_info['headers']) - params['headers']['Accept'] = ( - 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - ) + params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' params['headers']['Cookie'] = "CONSENT=YES+cb.%s-14-p0.en+F+941;" % datetime.now().strftime("%Y%m%d") return params @@ -141,7 +147,7 @@ def response(resp): # jslog="95014; 5:W251bGwsbnVsbCxudW...giXQ==; track:click" jslog = jslog.split(";")[1].split(':')[1].strip() try: - padding = (4 -(len(jslog) % 4)) * "=" + padding = (4 - (len(jslog) % 4)) * "=" jslog = b64decode(jslog + padding) except binascii.Error: # URL cant be read, skip this result @@ -178,12 +184,14 @@ def response(resp): img_src = extract_text(result.xpath('preceding-sibling::a/figure/img/@src')) - results.append({ - 'url': url, - 'title': title, - 'content': content, - 'img_src': img_src, - }) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'img_src': img_src, + } + ) # return results return results diff --git a/searx/engines/google_scholar.py b/searx/engines/google_scholar.py index e6726463d..e0700957c 100644 --- a/searx/engines/google_scholar.py +++ b/searx/engines/google_scholar.py @@ -32,6 +32,7 @@ from searx.engines.google import ( supported_languages_url, _fetch_supported_languages, ) + # pylint: enable=unused-import # about @@ -52,6 +53,7 @@ use_locale_domain = True time_range_support = True safesearch = False + def time_range_url(params): """Returns a URL query component for a google-Scholar time range based on ``params['time_range']``. Google-Scholar does only support ranges in years. @@ -64,7 +66,7 @@ def time_range_url(params): # as_ylo=2016&as_yhi=2019 ret_val = '' if params['time_range'] in time_range_dict: - ret_val= urlencode({'as_ylo': datetime.now().year -1 }) + ret_val = urlencode({'as_ylo': datetime.now().year - 1}) return '&' + ret_val @@ -72,34 +74,38 @@ def request(query, params): """Google-Scholar search request""" offset = (params['pageno'] - 1) * 10 - lang_info = get_lang_info( - params, supported_languages, language_aliases, False - ) - logger.debug( - "HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) + lang_info = get_lang_info(params, supported_languages, language_aliases, False) + logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) # subdomain is: scholar.google.xy lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.") - query_url = 'https://'+ lang_info['subdomain'] + '/scholar' + "?" + urlencode({ - 'q': query, - **lang_info['params'], - 'ie': "utf8", - 'oe': "utf8", - 'start' : offset, - }) + query_url = ( + 'https://' + + lang_info['subdomain'] + + '/scholar' + + "?" + + urlencode( + { + 'q': query, + **lang_info['params'], + 'ie': "utf8", + 'oe': "utf8", + 'start': offset, + } + ) + ) query_url += time_range_url(params) params['url'] = query_url params['headers'].update(lang_info['headers']) - params['headers']['Accept'] = ( - 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - ) + params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - #params['google_subdomain'] = subdomain + # params['google_subdomain'] = subdomain return params + def response(resp): """Get response from google's search request""" results = [] @@ -132,11 +138,13 @@ def response(resp): if pub_type: title = title + " " + pub_type - results.append({ - 'url': url, - 'title': title, - 'content': content, - }) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + } + ) # parse suggestion for suggestion in eval_xpath(dom, '//div[contains(@class, "gs_qsuggest_wrap")]//li//a'): diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index 77b0ab260..049f9138c 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -38,10 +38,8 @@ from searx.engines.google import ( ) # pylint: disable=unused-import -from searx.engines.google import ( - supported_languages_url - , _fetch_supported_languages -) +from searx.engines.google import supported_languages_url, _fetch_supported_languages + # pylint: enable=unused-import # about @@ -65,6 +63,7 @@ safesearch = True RE_CACHE = {} + def _re(regexpr): """returns compiled regular expression""" RE_CACHE[regexpr] = RE_CACHE.get(regexpr, re.compile(regexpr)) @@ -77,18 +76,17 @@ def scrap_out_thumbs_src(dom): for script in eval_xpath_list(dom, '//script[contains(., "google.ldi={")]'): _script = script.text # "dimg_35":"https://i.ytimg.c....", - _dimurl = _re("s='([^']*)").findall( _script) - for k,v in _re('(' + thumb_name + '[0-9]*)":"(http[^"]*)' ).findall(_script): - v = v.replace(r'\u003d','=') - v = v.replace(r'\u0026','&') + _dimurl = _re("s='([^']*)").findall(_script) + for k, v in _re('(' + thumb_name + '[0-9]*)":"(http[^"]*)').findall(_script): + v = v.replace(r'\u003d', '=') + v = v.replace(r'\u0026', '&') ret_val[k] = v logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys()) return ret_val def scrap_out_thumbs(dom): - """Scrap out thumbnail data from <script> tags. - """ + """Scrap out thumbnail data from <script> tags.""" ret_val = {} thumb_name = 'dimg_' @@ -96,7 +94,7 @@ def scrap_out_thumbs(dom): _script = script.text # var s='data:image/jpeg;base64, ...' - _imgdata = _re("s='([^']*)").findall( _script) + _imgdata = _re("s='([^']*)").findall(_script) if not _imgdata: continue @@ -112,19 +110,24 @@ def scrap_out_thumbs(dom): def request(query, params): """Google-Video search request""" - lang_info = get_lang_info( - params, supported_languages, language_aliases, False + lang_info = get_lang_info(params, supported_languages, language_aliases, False) + logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) + + query_url = ( + 'https://' + + lang_info['subdomain'] + + '/search' + + "?" + + urlencode( + { + 'q': query, + 'tbm': "vid", + **lang_info['params'], + 'ie': "utf8", + 'oe': "utf8", + } + ) ) - logger.debug( - "HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) - - query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({ - 'q': query, - 'tbm': "vid", - **lang_info['params'], - 'ie': "utf8", - 'oe': "utf8", - }) if params['time_range'] in time_range_dict: query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]}) @@ -133,9 +136,7 @@ def request(query, params): params['url'] = query_url params['headers'].update(lang_info['headers']) - params['headers']['Accept'] = ( - 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - ) + params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' return params @@ -171,21 +172,22 @@ def response(resp): title = extract_text(eval_xpath_getindex(result, title_xpath, 0)) url = eval_xpath_getindex(result, './/div[@class="dXiKIc"]//a/@href', 0) - length = extract_text(eval_xpath( - result, './/div[contains(@class, "P7xzyf")]/span/span')) + length = extract_text(eval_xpath(result, './/div[contains(@class, "P7xzyf")]/span/span')) c_node = eval_xpath_getindex(result, './/div[@class="Uroaid"]', 0) content = extract_text(c_node) pub_info = extract_text(eval_xpath(result, './/div[@class="Zg1NU"]')) - results.append({ - 'url': url, - 'title': title, - 'content': content, - 'length': length, - 'author': pub_info, - 'thumbnail': img_src, - 'template': 'videos.html', - }) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'length': length, + 'author': pub_info, + 'thumbnail': img_src, + 'template': 'videos.html', + } + ) # parse suggestion for suggestion in eval_xpath_list(dom, suggestion_xpath): diff --git a/searx/engines/imdb.py b/searx/engines/imdb.py index a7474fd5b..bb6258cf4 100644 --- a/searx/engines/imdb.py +++ b/searx/engines/imdb.py @@ -27,7 +27,9 @@ about = { "results": 'HTML', } -categories = ['general', ] +categories = [ + 'general', +] paging = False # suggestion_url = "https://sg.media-imdb.com/suggestion/{letter}/{query}.json" @@ -35,13 +37,7 @@ suggestion_url = "https://v2.sg.media-imdb.com/suggestion/{letter}/{query}.json" href_base = 'https://imdb.com/{category}/{entry_id}' -search_categories = { - "nm": "name", - "tt": "title", - "kw": "keyword", - "co": "company", - "ep": "episode" -} +search_categories = {"nm": "name", "tt": "title", "kw": "keyword", "co": "company", "ep": "episode"} def request(query, params): @@ -63,9 +59,7 @@ def response(resp): entry_id = entry['id'] categ = search_categories.get(entry_id[:2]) if categ is None: - logger.error( - 'skip unknown category tag %s in %s', entry_id[:2], entry_id - ) + logger.error('skip unknown category tag %s in %s', entry_id[:2], entry_id) continue title = entry['l'] @@ -95,11 +89,13 @@ def response(resp): if not image_url_name.endswith('_V1_'): magic = '_V1_' + magic image_url = image_url_name + magic + '.' + image_url_prefix - results.append({ - "title": title, - "url": href_base.format(category=categ, entry_id=entry_id), - "content": content, - "img_src" : image_url, - }) + results.append( + { + "title": title, + "url": href_base.format(category=categ, entry_id=entry_id), + "content": content, + "img_src": image_url, + } + ) return results diff --git a/searx/engines/ina.py b/searx/engines/ina.py index 81172ef8c..1e21bcef8 100644 --- a/searx/engines/ina.py +++ b/searx/engines/ina.py @@ -41,9 +41,7 @@ content_xpath = './/p[@class="media-body__summary"]' # do search-request def request(query, params): - params['url'] = search_url.format(ps=page_size, - start=params['pageno'] * page_size, - query=urlencode({'q': query})) + params['url'] = search_url.format(ps=page_size, start=params['pageno'] * page_size, query=urlencode({'q': query})) return params @@ -75,12 +73,16 @@ def response(resp): content = extract_text(result.xpath(content_xpath)) # append result - results.append({'url': url, - 'title': title, - 'content': content, - 'template': 'videos.html', - 'publishedDate': publishedDate, - 'thumbnail': thumbnail}) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'template': 'videos.html', + 'publishedDate': publishedDate, + 'thumbnail': thumbnail, + } + ) # return results return results diff --git a/searx/engines/invidious.py b/searx/engines/invidious.py index 5d0b00edb..914615d6f 100644 --- a/searx/engines/invidious.py +++ b/searx/engines/invidious.py @@ -46,14 +46,10 @@ def request(query, params): base_url_rand = base_url search_url = base_url_rand + "api/v1/search?q={query}" - params["url"] = search_url.format( - query=quote_plus(query) - ) + "&page={pageno}".format(pageno=params["pageno"]) + params["url"] = search_url.format(query=quote_plus(query)) + "&page={pageno}".format(pageno=params["pageno"]) if params["time_range"] in time_range_dict: - params["url"] += "&date={timerange}".format( - timerange=time_range_dict[params["time_range"]] - ) + params["url"] += "&date={timerange}".format(timerange=time_range_dict[params["time_range"]]) if params["language"] != "all": lang = params["language"].split("-") @@ -88,17 +84,13 @@ def response(resp): url = base_invidious_url + videoid embedded = embedded_url.format(videoid=videoid) thumbs = result.get("videoThumbnails", []) - thumb = next( - (th for th in thumbs if th["quality"] == "sddefault"), None - ) + thumb = next((th for th in thumbs if th["quality"] == "sddefault"), None) if thumb: thumbnail = thumb.get("url", "") else: thumbnail = "" - publishedDate = parser.parse( - time.ctime(result.get("published", 0)) - ) + publishedDate = parser.parse(time.ctime(result.get("published", 0))) length = time.gmtime(result.get("lengthSeconds")) if length.tm_hour: length = time.strftime("%H:%M:%S", length) diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py index 8a04d34b2..f53bc0bf4 100644 --- a/searx/engines/json_engine.py +++ b/searx/engines/json_engine.py @@ -119,22 +119,22 @@ def response(resp): content = query(result, content_query)[0] except: content = "" - results.append({ - 'url': to_string(url), - 'title': title_filter(to_string(title)), - 'content': content_filter(to_string(content)), - }) + results.append( + { + 'url': to_string(url), + 'title': title_filter(to_string(title)), + 'content': content_filter(to_string(content)), + } + ) else: - for url, title, content in zip( - query(json, url_query), - query(json, title_query), - query(json, content_query) - ): - results.append({ - 'url': to_string(url), - 'title': title_filter(to_string(title)), - 'content': content_filter(to_string(content)), - }) + for url, title, content in zip(query(json, url_query), query(json, title_query), query(json, content_query)): + results.append( + { + 'url': to_string(url), + 'title': title_filter(to_string(title)), + 'content': content_filter(to_string(content)), + } + ) if not suggestion_query: return results diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py index ad451dbb1..26364674c 100644 --- a/searx/engines/kickass.py +++ b/searx/engines/kickass.py @@ -34,8 +34,7 @@ content_xpath = './/span[@class="font11px lightgrey block"]' # do search-request def request(query, params): - params['url'] = search_url.format(search_term=quote(query), - pageno=params['pageno']) + params['url'] = search_url.format(search_term=quote(query), pageno=params['pageno']) return params @@ -79,16 +78,20 @@ def response(resp): torrentfileurl = quote(torrentfile, safe="%/:=&?~#+!$,;'@()*") # append result - results.append({'url': href, - 'title': title, - 'content': content, - 'seed': seed, - 'leech': leech, - 'filesize': filesize, - 'files': files, - 'magnetlink': magnetlink, - 'torrentfile': torrentfileurl, - 'template': 'torrent.html'}) + results.append( + { + 'url': href, + 'title': title, + 'content': content, + 'seed': seed, + 'leech': leech, + 'filesize': filesize, + 'files': files, + 'magnetlink': magnetlink, + 'torrentfile': torrentfileurl, + 'template': 'torrent.html', + } + ) # return results sorted by seeder return sorted(results, key=itemgetter('seed'), reverse=True) diff --git a/searx/engines/loc.py b/searx/engines/loc.py index 5c09ceff2..0b2f3a689 100644 --- a/searx/engines/loc.py +++ b/searx/engines/loc.py @@ -34,9 +34,7 @@ IMG_SRC_FIXES = { def request(query, params): - search_path = search_string.format( - query=urlencode({'q': query}), - page=params['pageno']) + search_path = search_string.format(query=urlencode({'q': query}), page=params['pageno']) params['url'] = base_url + search_path @@ -56,13 +54,15 @@ def response(resp): break else: img_src = result['image']['thumb'] - results.append({ - 'url': result['links']['item'], - 'title': result['title'], - 'img_src': img_src, - 'thumbnail_src': result['image']['thumb'], - 'author': result['creator'], - 'template': 'images.html' - }) + results.append( + { + 'url': result['links']['item'], + 'title': result['title'], + 'img_src': img_src, + 'thumbnail_src': result['image']['thumb'], + 'author': result['creator'], + 'template': 'images.html', + } + ) return results diff --git a/searx/engines/mediathekviewweb.py b/searx/engines/mediathekviewweb.py index d4cb853d4..991dcbc7b 100644 --- a/searx/engines/mediathekviewweb.py +++ b/searx/engines/mediathekviewweb.py @@ -22,29 +22,33 @@ paging = True time_range_support = False safesearch = False + def request(query, params): params['url'] = 'https://mediathekviewweb.de/api/query' params['method'] = 'POST' params['headers']['Content-type'] = 'text/plain' - params['data'] = dumps({ - 'queries' : [ - { - 'fields' : [ - 'title', - 'topic', - ], - 'query' : query - }, - ], - 'sortBy' : 'timestamp', - 'sortOrder' : 'desc', - 'future' : True, - 'offset' : (params['pageno'] - 1 )* 10, - 'size' : 10 - }) + params['data'] = dumps( + { + 'queries': [ + { + 'fields': [ + 'title', + 'topic', + ], + 'query': query, + }, + ], + 'sortBy': 'timestamp', + 'sortOrder': 'desc', + 'future': True, + 'offset': (params['pageno'] - 1) * 10, + 'size': 10, + } + ) return params + def response(resp): resp = loads(resp.text) @@ -58,11 +62,13 @@ def response(resp): item['hms'] = str(datetime.timedelta(seconds=item['duration'])) - results.append({ - 'url' : item['url_video_hd'], - 'title' : "%(channel)s: %(title)s (%(hms)s)" % item, - 'length' : item['hms'], - 'content' : "%(description)s" % item, - }) + results.append( + { + 'url': item['url_video_hd'], + 'title': "%(channel)s: %(title)s (%(hms)s)" % item, + 'length': item['hms'], + 'content': "%(description)s" % item, + } + ) return results diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py index da4321250..9002e9ba7 100644 --- a/searx/engines/mediawiki.py +++ b/searx/engines/mediawiki.py @@ -25,23 +25,24 @@ search_type = 'nearmatch' # possible values: title, text, nearmatch # search-url base_url = 'https://{language}.wikipedia.org/' -search_postfix = 'w/api.php?action=query'\ - '&list=search'\ - '&{query}'\ - '&format=json'\ - '&sroffset={offset}'\ - '&srlimit={limit}'\ +search_postfix = ( + 'w/api.php?action=query' + '&list=search' + '&{query}' + '&format=json' + '&sroffset={offset}' + '&srlimit={limit}' '&srwhat={searchtype}' +) # do search-request def request(query, params): offset = (params['pageno'] - 1) * number_of_results - string_args = dict(query=urlencode({'srsearch': query}), - offset=offset, - limit=number_of_results, - searchtype=search_type) + string_args = dict( + query=urlencode({'srsearch': query}), offset=offset, limit=number_of_results, searchtype=search_type + ) format_strings = list(Formatter().parse(base_url)) @@ -78,13 +79,14 @@ def response(resp): for result in search_results['query']['search']: if result.get('snippet', '').startswith('#REDIRECT'): continue - url = base_url.format(language=resp.search_params['language']) +\ - 'wiki/' + quote(result['title'].replace(' ', '_').encode()) + url = ( + base_url.format(language=resp.search_params['language']) + + 'wiki/' + + quote(result['title'].replace(' ', '_').encode()) + ) # append result - results.append({'url': url, - 'title': result['title'], - 'content': ''}) + results.append({'url': url, 'title': result['title'], 'content': ''}) # return results return results diff --git a/searx/engines/microsoft_academic.py b/searx/engines/microsoft_academic.py index c99611049..a869daf2f 100644 --- a/searx/engines/microsoft_academic.py +++ b/searx/engines/microsoft_academic.py @@ -26,17 +26,19 @@ def request(query, params): params['url'] = search_url params['method'] = 'POST' params['headers']['content-type'] = 'application/json; charset=utf-8' - params['data'] = dumps({ - 'query': query, - 'queryExpression': '', - 'filters': [], - 'orderBy': 0, - 'skip': (params['pageno'] - 1) * 10, - 'sortAscending': True, - 'take': 10, - 'includeCitationContexts': False, - 'profileId': '', - }) + params['data'] = dumps( + { + 'query': query, + 'queryExpression': '', + 'filters': [], + 'orderBy': 0, + 'skip': (params['pageno'] - 1) * 10, + 'sortAscending': True, + 'take': 10, + 'includeCitationContexts': False, + 'profileId': '', + } + ) return params @@ -54,11 +56,13 @@ def response(resp): title = result['paper']['dn'] content = _get_content(result['paper']) url = _paper_url.format(id=result['paper']['id']) - results.append({ - 'url': url, - 'title': html_to_text(title), - 'content': html_to_text(content), - }) + results.append( + { + 'url': url, + 'title': html_to_text(title), + 'content': html_to_text(content), + } + ) return results diff --git a/searx/engines/mixcloud.py b/searx/engines/mixcloud.py index a6fd1c0a1..f5e0f55fc 100644 --- a/searx/engines/mixcloud.py +++ b/searx/engines/mixcloud.py @@ -25,16 +25,17 @@ paging = True url = 'https://api.mixcloud.com/' search_url = url + 'search/?{query}&type=cloudcast&limit=10&offset={offset}' -embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true" ' +\ - 'data-src="https://www.mixcloud.com/widget/iframe/?feed={url}" width="300" height="300"></iframe>' +embedded_url = ( + '<iframe scrolling="no" frameborder="0" allowTransparency="true" ' + + 'data-src="https://www.mixcloud.com/widget/iframe/?feed={url}" width="300" height="300"></iframe>' +) # do search-request def request(query, params): offset = (params['pageno'] - 1) * 10 - params['url'] = search_url.format(query=urlencode({'q': query}), - offset=offset) + params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset) return params @@ -54,11 +55,9 @@ def response(resp): publishedDate = parser.parse(result['created_time']) # append result - results.append({'url': url, - 'title': title, - 'embedded': embedded, - 'publishedDate': publishedDate, - 'content': content}) + results.append( + {'url': url, 'title': title, 'embedded': embedded, 'publishedDate': publishedDate, 'content': content} + ) # return results return results diff --git a/searx/engines/mongodb.py b/searx/engines/mongodb.py index 2ebb90539..c833ca9e0 100644 --- a/searx/engines/mongodb.py +++ b/searx/engines/mongodb.py @@ -26,38 +26,35 @@ result_template = 'key-value.html' _client = None + def init(_): connect() + def connect(): global _client # pylint: disable=global-statement - kwargs = { 'port': port } + kwargs = {'port': port} if username: kwargs['username'] = username if password: kwargs['password'] = password _client = MongoClient(host, **kwargs)[database][collection] + def search(query, params): results = [] if exact_match_only: - q = { '$eq': query } + q = {'$eq': query} else: - _re = re.compile('.*{0}.*'.format(re.escape(query)), re.I | re.M ) - q = { '$regex': _re } + _re = re.compile('.*{0}.*'.format(re.escape(query)), re.I | re.M) + q = {'$regex': _re} - query = _client.find( - {key: q} - ).skip( - ( params['pageno'] -1 ) * results_per_page - ).limit( - results_per_page - ) + query = _client.find({key: q}).skip((params['pageno'] - 1) * results_per_page).limit(results_per_page) - results.append({ 'number_of_results': query.count() }) + results.append({'number_of_results': query.count()}) for r in query: del r['_id'] - r = { str(k):str(v) for k,v in r.items() } + r = {str(k): str(v) for k, v in r.items()} r['template'] = result_template results.append(r) diff --git a/searx/engines/mysql_server.py b/searx/engines/mysql_server.py index be89eb86e..d949ee0bc 100644 --- a/searx/engines/mysql_server.py +++ b/searx/engines/mysql_server.py @@ -20,6 +20,7 @@ paging = True result_template = 'key-value.html' _connection = None + def init(engine_settings): global _connection # pylint: disable=global-statement @@ -30,13 +31,14 @@ def init(engine_settings): raise ValueError('only SELECT query is supported') _connection = mysql.connector.connect( - database = database, - user = username, - password = password, - host = host, + database=database, + user=username, + password=password, + host=host, auth_plugin=auth_plugin, ) + def search(query, params): query_params = {'query': query} query_to_run = query_str + ' LIMIT {0} OFFSET {1}'.format(limit, (params['pageno'] - 1) * limit) @@ -46,6 +48,7 @@ def search(query, params): return _fetch_results(cur) + def _fetch_results(cur): results = [] for res in cur: diff --git a/searx/engines/nyaa.py b/searx/engines/nyaa.py index 4fe383efa..bdd3ea6dc 100644 --- a/searx/engines/nyaa.py +++ b/searx/engines/nyaa.py @@ -98,14 +98,18 @@ def response(resp): content = 'Category: "{category}". Downloaded {downloads} times.' content = content.format(category=category, downloads=downloads) - results.append({'url': href, - 'title': title, - 'content': content, - 'seed': seed, - 'leech': leech, - 'filesize': filesize, - 'torrentfile': torrent_link, - 'magnetlink': magnet_link, - 'template': 'torrent.html'}) + results.append( + { + 'url': href, + 'title': title, + 'content': content, + 'seed': seed, + 'leech': leech, + 'filesize': filesize, + 'torrentfile': torrent_link, + 'magnetlink': magnet_link, + 'template': 'torrent.html', + } + ) return results diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py index c6211a004..946869834 100644 --- a/searx/engines/openstreetmap.py +++ b/searx/engines/openstreetmap.py @@ -151,10 +151,12 @@ def response(resp): user_language = resp.search_params['language'] if resp.search_params['route']: - results.append({ - 'answer': gettext('Get directions'), - 'url': route_url.format(*resp.search_params['route'].groups()), - }) + results.append( + { + 'answer': gettext('Get directions'), + 'url': route_url.format(*resp.search_params['route'].groups()), + } + ) fetch_wikidata(nominatim_json, user_language) @@ -170,26 +172,26 @@ def response(resp): links, link_keys = get_links(result, user_language) data = get_data(result, user_language, link_keys) - results.append({ - 'template': 'map.html', - 'title': title, - 'address': address, - 'address_label': get_key_label('addr', user_language), - 'url': url, - 'osm': osm, - 'geojson': geojson, - 'img_src': img_src, - 'links': links, - 'data': data, - 'type': get_tag_label( - result.get('category'), result.get('type', ''), user_language - ), - 'type_icon': result.get('icon'), - 'content': '', - 'longitude': result['lon'], - 'latitude': result['lat'], - 'boundingbox': result['boundingbox'], - }) + results.append( + { + 'template': 'map.html', + 'title': title, + 'address': address, + 'address_label': get_key_label('addr', user_language), + 'url': url, + 'osm': osm, + 'geojson': geojson, + 'img_src': img_src, + 'links': links, + 'data': data, + 'type': get_tag_label(result.get('category'), result.get('type', ''), user_language), + 'type_icon': result.get('icon'), + 'content': '', + 'longitude': result['lon'], + 'latitude': result['lat'], + 'boundingbox': result['boundingbox'], + } + ) return results @@ -270,9 +272,9 @@ def get_title_address(result): # https://github.com/osm-search/Nominatim/issues/1662 address_name = address_raw.get('address29') else: - address_name = address_raw.get(result['category']) + address_name = address_raw.get(result['category']) elif result['type'] in address_raw: - address_name = address_raw.get(result['type']) + address_name = address_raw.get(result['type']) # add rest of adressdata, if something is already found if address_name: @@ -297,8 +299,7 @@ def get_title_address(result): def get_url_osm_geojson(result): - """Get url, osm and geojson - """ + """Get url, osm and geojson""" osm_type = result.get('osm_type', result.get('type')) if 'osm_id' not in result: # see https://github.com/osm-search/Nominatim/issues/1521 @@ -349,11 +350,13 @@ def get_links(result, user_language): url, url_label = mapping_function(raw_value) if url.startswith('https://wikidata.org'): url_label = result.get('wikidata', {}).get('itemLabel') or url_label - links.append({ - 'label': get_key_label(k, user_language), - 'url': url, - 'url_label': url_label, - }) + links.append( + { + 'label': get_key_label(k, user_language), + 'url': url, + 'url_label': url_label, + } + ) link_keys.add(k) return links, link_keys @@ -373,11 +376,13 @@ def get_data(result, user_language, ignore_keys): continue k_label = get_key_label(k, user_language) if k_label: - data.append({ - 'label': k_label, - 'key': k, - 'value': v, - }) + data.append( + { + 'label': k_label, + 'key': k, + 'value': v, + } + ) data.sort(key=lambda entry: (get_key_rank(entry['key']), entry['label'])) return data diff --git a/searx/engines/pdbe.py b/searx/engines/pdbe.py index b9bbfaf1b..34c8d3227 100644 --- a/searx/engines/pdbe.py +++ b/searx/engines/pdbe.py @@ -34,10 +34,7 @@ def request(query, params): params['url'] = pdbe_solr_url params['method'] = 'POST' - params['data'] = { - 'q': query, - 'wt': "json" # request response in parsable format - } + params['data'] = {'q': query, 'wt': "json"} # request response in parsable format return params @@ -53,12 +50,21 @@ def construct_body(result): if result['journal']: content = content.format( title=result['citation_title'], - authors=result['entry_author_list'][0], journal=result['journal'], volume=result['journal_volume'], - page=result['journal_page'], year=result['citation_year']) + authors=result['entry_author_list'][0], + journal=result['journal'], + volume=result['journal_volume'], + page=result['journal_page'], + year=result['citation_year'], + ) else: content = content.format( title=result['citation_title'], - authors=result['entry_author_list'][0], journal='', volume='', page='', year=result['release_year']) + authors=result['entry_author_list'][0], + journal='', + volume='', + page='', + year=result['release_year'], + ) img_src = pdbe_preview_url.format(pdb_id=result['pdb_id']) except (KeyError): content = None @@ -96,20 +102,21 @@ def response(resp): # since we can't construct a proper body from the response, we'll make up our own msg_superseded = gettext("This entry has been superseded by") content = '{msg_superseded}: {url} ({pdb_id})'.format( - msg_superseded=msg_superseded, - url=superseded_url, - pdb_id=result['superseded_by']) + msg_superseded=msg_superseded, url=superseded_url, pdb_id=result['superseded_by'] + ) # obsoleted entries don't have preview images img_src = None else: title, content, img_src = construct_body(result) - results.append({ - 'url': pdbe_entry_url.format(pdb_id=result['pdb_id']), - 'title': title, - 'content': content, - 'img_src': img_src - }) + results.append( + { + 'url': pdbe_entry_url.format(pdb_id=result['pdb_id']), + 'title': title, + 'content': content, + 'img_src': img_src, + } + ) return results diff --git a/searx/engines/peertube.py b/searx/engines/peertube.py index f9cd50be1..1ace14027 100644 --- a/searx/engines/peertube.py +++ b/searx/engines/peertube.py @@ -36,9 +36,7 @@ def request(query, params): language = params["language"].split("-")[0] if "all" != language and language in supported_languages: query_dict["languageOneOf"] = language - params["url"] = search_url.format( - query=urlencode(query_dict), pageno=pageno - ) + params["url"] = search_url.format(query=urlencode(query_dict), pageno=pageno) return params diff --git a/searx/engines/photon.py b/searx/engines/photon.py index f85dcad86..16ea88194 100644 --- a/searx/engines/photon.py +++ b/searx/engines/photon.py @@ -33,9 +33,7 @@ supported_languages = ['de', 'en', 'fr', 'it'] # do search-request def request(query, params): - params['url'] = base_url +\ - search_string.format(query=urlencode({'q': query}), - limit=number_of_results) + params['url'] = base_url + search_string.format(query=urlencode({'q': query}), limit=number_of_results) if params['language'] != 'all': language = params['language'].split('_')[0] @@ -75,59 +73,71 @@ def response(resp): # continue if invalide osm-type continue - url = result_base_url.format(osm_type=osm_type, - osm_id=properties.get('osm_id')) + url = result_base_url.format(osm_type=osm_type, osm_id=properties.get('osm_id')) - osm = {'type': osm_type, - 'id': properties.get('osm_id')} + osm = {'type': osm_type, 'id': properties.get('osm_id')} geojson = r.get('geometry') if properties.get('extent'): - boundingbox = [properties.get('extent')[3], - properties.get('extent')[1], - properties.get('extent')[0], - properties.get('extent')[2]] + boundingbox = [ + properties.get('extent')[3], + properties.get('extent')[1], + properties.get('extent')[0], + properties.get('extent')[2], + ] else: # TODO: better boundingbox calculation - boundingbox = [geojson['coordinates'][1], - geojson['coordinates'][1], - geojson['coordinates'][0], - geojson['coordinates'][0]] + boundingbox = [ + geojson['coordinates'][1], + geojson['coordinates'][1], + geojson['coordinates'][0], + geojson['coordinates'][0], + ] # address calculation address = {} # get name - if properties.get('osm_key') == 'amenity' or\ - properties.get('osm_key') == 'shop' or\ - properties.get('osm_key') == 'tourism' or\ - properties.get('osm_key') == 'leisure': + if ( + properties.get('osm_key') == 'amenity' + or properties.get('osm_key') == 'shop' + or properties.get('osm_key') == 'tourism' + or properties.get('osm_key') == 'leisure' + ): address = {'name': properties.get('name')} # add rest of adressdata, if something is already found if address.get('name'): - address.update({'house_number': properties.get('housenumber'), - 'road': properties.get('street'), - 'locality': properties.get('city', - properties.get('town', # noqa - properties.get('village'))), # noqa - 'postcode': properties.get('postcode'), - 'country': properties.get('country')}) + address.update( + { + 'house_number': properties.get('housenumber'), + 'road': properties.get('street'), + 'locality': properties.get( + 'city', properties.get('town', properties.get('village')) # noqa + ), # noqa + 'postcode': properties.get('postcode'), + 'country': properties.get('country'), + } + ) else: address = None # append result - results.append({'template': 'map.html', - 'title': title, - 'content': '', - 'longitude': geojson['coordinates'][0], - 'latitude': geojson['coordinates'][1], - 'boundingbox': boundingbox, - 'geojson': geojson, - 'address': address, - 'osm': osm, - 'url': url}) + results.append( + { + 'template': 'map.html', + 'title': title, + 'content': '', + 'longitude': geojson['coordinates'][0], + 'latitude': geojson['coordinates'][1], + 'boundingbox': boundingbox, + 'geojson': geojson, + 'address': address, + 'osm': osm, + 'url': url, + } + ) # return results return results diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py index d4b94ecfa..4b0984be5 100644 --- a/searx/engines/piratebay.py +++ b/searx/engines/piratebay.py @@ -40,17 +40,14 @@ trackers = [ ] # piratebay specific type-definitions -search_types = {"files": "0", - "music": "100", - "videos": "200"} +search_types = {"files": "0", "music": "100", "videos": "200"} # do search-request def request(query, params): search_type = search_types.get(params["category"], "0") - params["url"] = search_url.format(search_term=quote(query), - search_type=search_type) + params["url"] = search_url.format(search_term=quote(query), search_type=search_type) return params @@ -68,8 +65,9 @@ def response(resp): # parse results for result in search_res: link = url + "description.php?id=" + result["id"] - magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + "&dn=" + result["name"]\ - + "&tr=" + "&tr=".join(trackers) + magnetlink = ( + "magnet:?xt=urn:btih:" + result["info_hash"] + "&dn=" + result["name"] + "&tr=" + "&tr=".join(trackers) + ) params = { "url": link, @@ -77,7 +75,7 @@ def response(resp): "seed": result["seeders"], "leech": result["leechers"], "magnetlink": magnetlink, - "template": "torrent.html" + "template": "torrent.html", } # extract and convert creation date diff --git a/searx/engines/postgresql.py b/searx/engines/postgresql.py index 1eddcd519..d8bbabe27 100644 --- a/searx/engines/postgresql.py +++ b/searx/engines/postgresql.py @@ -20,6 +20,7 @@ paging = True result_template = 'key-value.html' _connection = None + def init(engine_settings): global _connection # pylint: disable=global-statement @@ -30,25 +31,24 @@ def init(engine_settings): raise ValueError('only SELECT query is supported') _connection = psycopg2.connect( - database = database, - user = username, - password = password, - host = host, - port = port, + database=database, + user=username, + password=password, + host=host, + port=port, ) + def search(query, params): query_params = {'query': query} - query_to_run = ( - query_str - + ' LIMIT {0} OFFSET {1}'.format(limit, (params['pageno'] - 1) * limit) - ) + query_to_run = query_str + ' LIMIT {0} OFFSET {1}'.format(limit, (params['pageno'] - 1) * limit) with _connection: with _connection.cursor() as cur: cur.execute(query_to_run, query_params) return _fetch_results(cur) + def _fetch_results(cur): results = [] titles = [] diff --git a/searx/engines/pubmed.py b/searx/engines/pubmed.py index 5d88d398e..27444ae24 100644 --- a/searx/engines/pubmed.py +++ b/searx/engines/pubmed.py @@ -15,7 +15,7 @@ about = { "wikidata_id": 'Q1540899', "official_api_documentation": { 'url': 'https://www.ncbi.nlm.nih.gov/home/develop/api/', - 'comment': 'More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/' + 'comment': 'More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/', }, "use_official_api": True, "require_api_key": False, @@ -24,8 +24,9 @@ about = { categories = ['science'] -base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'\ - + '?db=pubmed&{query}&retstart={offset}&retmax={hits}' +base_url = ( + 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' + '?db=pubmed&{query}&retstart={offset}&retmax={hits}' +) # engine dependent config number_of_results = 10 @@ -36,9 +37,7 @@ def request(query, params): # basic search offset = (params['pageno'] - 1) * number_of_results - string_args = dict(query=urlencode({'term': query}), - offset=offset, - hits=number_of_results) + string_args = dict(query=urlencode({'term': query}), offset=offset, hits=number_of_results) params['url'] = base_url.format(**string_args) @@ -49,8 +48,9 @@ def response(resp): results = [] # First retrieve notice of each result - pubmed_retrieve_api_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?'\ - + 'db=pubmed&retmode=xml&id={pmids_string}' + pubmed_retrieve_api_url = ( + 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' + 'db=pubmed&retmode=xml&id={pmids_string}' + ) pmids_results = etree.XML(resp.content) pmids = pmids_results.xpath('//eSearchResult/IdList/Id') @@ -88,14 +88,17 @@ def response(resp): content = content[0:300] + "..." # TODO: center snippet on query term - res_dict = {'url': url, - 'title': title, - 'content': content} + res_dict = {'url': url, 'title': title, 'content': content} try: - publishedDate = datetime.strptime(entry.xpath('.//DateCreated/Year')[0].text - + '-' + entry.xpath('.//DateCreated/Month')[0].text - + '-' + entry.xpath('.//DateCreated/Day')[0].text, '%Y-%m-%d') + publishedDate = datetime.strptime( + entry.xpath('.//DateCreated/Year')[0].text + + '-' + + entry.xpath('.//DateCreated/Month')[0].text + + '-' + + entry.xpath('.//DateCreated/Day')[0].text, + '%Y-%m-%d', + ) res_dict['publishedDate'] = publishedDate except: pass diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 0312e518c..a1799491a 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -61,6 +61,7 @@ category_to_keyword = { # search-url url = 'https://api.qwant.com/v3/search/{keyword}?{query}&count={count}&offset={offset}' + def request(query, params): """Qwant search request""" keyword = category_to_keyword[categories[0]] @@ -77,10 +78,10 @@ def request(query, params): offset = min(offset, 40) params['url'] = url.format( - keyword = keyword, - query = urlencode({'q': query}), - offset = offset, - count = count, + keyword=keyword, + query=urlencode({'q': query}), + offset=offset, + count=count, ) # add language tag @@ -111,7 +112,14 @@ def response(resp): # check for an API error if search_results.get('status') != 'success': - msg = ",".join(data.get('message', ['unknown', ])) + msg = ",".join( + data.get( + 'message', + [ + 'unknown', + ], + ) + ) raise SearxEngineAPIException('API error::' + msg) # raise for other errors @@ -128,7 +136,7 @@ def response(resp): # result['items']. mainline = data.get('result', {}).get('items', []) mainline = [ - {'type' : keyword, 'items' : mainline }, + {'type': keyword, 'items': mainline}, ] # return empty array if there are no results @@ -153,11 +161,13 @@ def response(resp): if mainline_type == 'web': content = item['desc'] - results.append({ - 'title': title, - 'url': res_url, - 'content': content, - }) + results.append( + { + 'title': title, + 'url': res_url, + 'content': content, + } + ) elif mainline_type == 'news': @@ -168,23 +178,27 @@ def response(resp): img_src = None if news_media: img_src = news_media[0].get('pict', {}).get('url', None) - results.append({ - 'title': title, - 'url': res_url, - 'publishedDate': pub_date, - 'img_src': img_src, - }) + results.append( + { + 'title': title, + 'url': res_url, + 'publishedDate': pub_date, + 'img_src': img_src, + } + ) elif mainline_type == 'images': thumbnail = item['thumbnail'] img_src = item['media'] - results.append({ - 'title': title, - 'url': res_url, - 'template': 'images.html', - 'thumbnail_src': thumbnail, - 'img_src': img_src, - }) + results.append( + { + 'title': title, + 'url': res_url, + 'template': 'images.html', + 'thumbnail_src': thumbnail, + 'img_src': img_src, + } + ) elif mainline_type == 'videos': # some videos do not have a description: while qwant-video @@ -208,19 +222,18 @@ def response(resp): thumbnail = item['thumbnail'] # from some locations (DE and others?) the s2 link do # response a 'Please wait ..' but does not deliver the thumbnail - thumbnail = thumbnail.replace( - 'https://s2.qwant.com', - 'https://s1.qwant.com', 1 + thumbnail = thumbnail.replace('https://s2.qwant.com', 'https://s1.qwant.com', 1) + results.append( + { + 'title': title, + 'url': res_url, + 'content': content, + 'publishedDate': pub_date, + 'thumbnail': thumbnail, + 'template': 'videos.html', + 'length': length, + } ) - results.append({ - 'title': title, - 'url': res_url, - 'content': content, - 'publishedDate': pub_date, - 'thumbnail': thumbnail, - 'template': 'videos.html', - 'length': length, - }) return results @@ -229,8 +242,8 @@ def response(resp): def _fetch_supported_languages(resp): # list of regions is embedded in page as a js object response_text = resp.text - response_text = response_text[response_text.find('INITIAL_PROPS'):] - response_text = response_text[response_text.find('{'):response_text.find('</script>')] + response_text = response_text[response_text.find('INITIAL_PROPS') :] + response_text = response_text[response_text.find('{') : response_text.find('</script>')] regions_json = loads(response_text) diff --git a/searx/engines/recoll.py b/searx/engines/recoll.py index 42f2858d7..ebcd83b8d 100644 --- a/searx/engines/recoll.py +++ b/searx/engines/recoll.py @@ -28,18 +28,12 @@ mount_prefix = None dl_prefix = None # embedded -embedded_url = '<{ttype} controls height="166px" ' +\ - 'src="{url}" type="{mtype}"></{ttype}>' +embedded_url = '<{ttype} controls height="166px" ' + 'src="{url}" type="{mtype}"></{ttype}>' # helper functions def get_time_range(time_range): - sw = { - 'day': 1, - 'week': 7, - 'month': 30, - 'year': 365 - } + sw = {'day': 1, 'week': 7, 'month': 30, 'year': 365} offset = sw.get(time_range, 0) if not offset: @@ -52,11 +46,9 @@ def get_time_range(time_range): def request(query, params): search_after = get_time_range(params['time_range']) search_url = base_url + 'json?{query}&highlight=0' - params['url'] = search_url.format(query=urlencode({ - 'query': query, - 'page': params['pageno'], - 'after': search_after, - 'dir': search_dir})) + params['url'] = search_url.format( + query=urlencode({'query': query, 'page': params['pageno'], 'after': search_after, 'dir': search_dir}) + ) return params @@ -76,10 +68,7 @@ def response(resp): content = '{}'.format(result['snippet']) # append result - item = {'url': url, - 'title': title, - 'content': content, - 'template': 'files.html'} + item = {'url': url, 'title': title, 'content': content, 'template': 'files.html'} if result['size']: item['size'] = int(result['size']) @@ -96,9 +85,8 @@ def response(resp): if mtype in ['audio', 'video']: item['embedded'] = embedded_url.format( - ttype=mtype, - url=quote(url.encode('utf8'), '/:'), - mtype=result['mtype']) + ttype=mtype, url=quote(url.encode('utf8'), '/:'), mtype=result['mtype'] + ) if mtype in ['image'] and subtype in ['bmp', 'gif', 'jpeg', 'png']: item['img_src'] = url diff --git a/searx/engines/reddit.py b/searx/engines/reddit.py index ca6cb28a8..36d92339d 100644 --- a/searx/engines/reddit.py +++ b/searx/engines/reddit.py @@ -52,10 +52,7 @@ def response(resp): data = post['data'] # extract post information - params = { - 'url': urljoin(base_url, data['permalink']), - 'title': data['title'] - } + params = {'url': urljoin(base_url, data['permalink']), 'title': data['title']} # if thumbnail field contains a valid URL, we need to change template thumbnail = data['thumbnail'] diff --git a/searx/engines/redis_server.py b/searx/engines/redis_server.py index f9726033d..03786f81d 100644 --- a/searx/engines/redis_server.py +++ b/searx/engines/redis_server.py @@ -20,16 +20,19 @@ result_template = 'key-value.html' exact_match_only = True _redis_client = None + + def init(_engine_settings): global _redis_client # pylint: disable=global-statement _redis_client = redis.StrictRedis( - host = host, - port = port, - db = db, - password = password or None, - decode_responses = True, + host=host, + port=port, + db=db, + password=password or None, + decode_responses=True, ) + def search(query, _params): if not exact_match_only: return search_keys(query) @@ -42,21 +45,20 @@ def search(query, _params): if ' ' in query: qset, rest = query.split(' ', 1) ret = [] - for res in _redis_client.hscan_iter( - qset, match='*{}*'.format(rest) - ): - ret.append({ - res[0]: res[1], - 'template': result_template, - }) + for res in _redis_client.hscan_iter(qset, match='*{}*'.format(rest)): + ret.append( + { + res[0]: res[1], + 'template': result_template, + } + ) return ret return [] + def search_keys(query): ret = [] - for key in _redis_client.scan_iter( - match='*{}*'.format(query) - ): + for key in _redis_client.scan_iter(match='*{}*'.format(query)): key_type = _redis_client.type(key) res = None diff --git a/searx/engines/rumble.py b/searx/engines/rumble.py index 407142467..beca2570c 100644 --- a/searx/engines/rumble.py +++ b/searx/engines/rumble.py @@ -68,14 +68,16 @@ def response(resp): else: content = f"{views} views - {rumbles} rumbles" - results.append({ - 'url': url, - 'title': title, - 'content': content, - 'author': author, - 'length': length, - 'template': 'videos.html', - 'publishedDate': fixed_date, - 'thumbnail': thumbnail, - }) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'author': author, + 'length': length, + 'template': 'videos.html', + 'publishedDate': fixed_date, + 'thumbnail': thumbnail, + } + ) return results diff --git a/searx/engines/scanr_structures.py b/searx/engines/scanr_structures.py index 51c925247..ad27079dd 100644 --- a/searx/engines/scanr_structures.py +++ b/searx/engines/scanr_structures.py @@ -32,12 +32,16 @@ def request(query, params): params['url'] = search_url params['method'] = 'POST' params['headers']['Content-type'] = "application/json" - params['data'] = dumps({"query": query, - "searchField": "ALL", - "sortDirection": "ASC", - "sortOrder": "RELEVANCY", - "page": params['pageno'], - "pageSize": page_size}) + params['data'] = dumps( + { + "query": query, + "searchField": "ALL", + "sortDirection": "ASC", + "sortOrder": "RELEVANCY", + "page": params['pageno'], + "pageSize": page_size, + } + ) return params @@ -69,11 +73,15 @@ def response(resp): content = result['highlights'][0]['value'] # append result - results.append({'url': url + 'structure/' + result['id'], - 'title': result['label'], - # 'thumbnail': thumbnail, - 'img_src': thumbnail, - 'content': html_to_text(content)}) + results.append( + { + 'url': url + 'structure/' + result['id'], + 'title': result['label'], + # 'thumbnail': thumbnail, + 'img_src': thumbnail, + 'content': html_to_text(content), + } + ) # return results return results diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py index 8c1330d98..a4b0308f9 100644 --- a/searx/engines/searchcode_code.py +++ b/searx/engines/searchcode_code.py @@ -25,10 +25,7 @@ url = 'https://searchcode.com/' search_url = url + 'api/codesearch_I/?{query}&p={pageno}' # special code-endings which are not recognised by the file ending -code_endings = {'cs': 'c#', - 'h': 'c', - 'hpp': 'cpp', - 'cxx': 'cpp'} +code_endings = {'cs': 'c#', 'h': 'c', 'hpp': 'cpp', 'cxx': 'cpp'} # do search-request @@ -55,17 +52,21 @@ def response(resp): lines[int(line)] = code code_language = code_endings.get( - result['filename'].split('.')[-1].lower(), - result['filename'].split('.')[-1].lower()) + result['filename'].split('.')[-1].lower(), result['filename'].split('.')[-1].lower() + ) # append result - results.append({'url': href, - 'title': title, - 'content': '', - 'repository': repo, - 'codelines': sorted(lines.items()), - 'code_language': code_language, - 'template': 'code.html'}) + results.append( + { + 'url': href, + 'title': title, + 'content': '', + 'repository': repo, + 'codelines': sorted(lines.items()), + 'code_language': code_language, + 'template': 'code.html', + } + ) # return results return results diff --git a/searx/engines/searx_engine.py b/searx/engines/searx_engine.py index 98ef0fb79..3e9035d6f 100644 --- a/searx/engines/searx_engine.py +++ b/searx/engines/searx_engine.py @@ -37,7 +37,7 @@ def request(query, params): 'language': params['language'], 'time_range': params['time_range'], 'category': params['category'], - 'format': 'json' + 'format': 'json', } return params diff --git a/searx/engines/semantic_scholar.py b/searx/engines/semantic_scholar.py index 297d0cf71..5d9d1a8e9 100644 --- a/searx/engines/semantic_scholar.py +++ b/searx/engines/semantic_scholar.py @@ -13,19 +13,21 @@ def request(query, params): params['url'] = search_url params['method'] = 'POST' params['headers']['content-type'] = 'application/json' - params['data'] = dumps({ - "queryString": query, - "page": params['pageno'], - "pageSize": 10, - "sort": "relevance", - "useFallbackRankerService": False, - "useFallbackSearchCluster": False, - "getQuerySuggestions": False, - "authors": [], - "coAuthors": [], - "venues": [], - "performTitleMatch": True, - }) + params['data'] = dumps( + { + "queryString": query, + "page": params['pageno'], + "pageSize": 10, + "sort": "relevance", + "useFallbackRankerService": False, + "useFallbackSearchCluster": False, + "getQuerySuggestions": False, + "authors": [], + "coAuthors": [], + "venues": [], + "performTitleMatch": True, + } + ) return params @@ -33,10 +35,12 @@ def response(resp): res = loads(resp.text) results = [] for result in res['results']: - results.append({ - 'url': result['primaryPaperLink']['url'], - 'title': result['title']['text'], - 'content': result['paperAbstractTruncated'] - }) + results.append( + { + 'url': result['primaryPaperLink']['url'], + 'title': result['title']['text'], + 'content': result['paperAbstractTruncated'], + } + ) return results diff --git a/searx/engines/sepiasearch.py b/searx/engines/sepiasearch.py index 8ccde404f..00b1b3672 100644 --- a/searx/engines/sepiasearch.py +++ b/searx/engines/sepiasearch.py @@ -23,23 +23,21 @@ paging = True time_range_support = True safesearch = True supported_languages = [ + # fmt: off 'en', 'fr', 'ja', 'eu', 'ca', 'cs', 'eo', 'el', 'de', 'it', 'nl', 'es', 'oc', 'gd', 'zh', 'pt', 'sv', 'pl', 'fi', 'ru' + # fmt: on ] base_url = 'https://sepiasearch.org/api/v1/search/videos' -safesearch_table = { - 0: 'both', - 1: 'false', - 2: 'false' -} +safesearch_table = {0: 'both', 1: 'false', 2: 'false'} time_range_table = { 'day': relativedelta.relativedelta(), 'week': relativedelta.relativedelta(weeks=-1), 'month': relativedelta.relativedelta(months=-1), - 'year': relativedelta.relativedelta(years=-1) + 'year': relativedelta.relativedelta(years=-1), } @@ -53,13 +51,19 @@ def minute_to_hm(minute): def request(query, params): - params['url'] = base_url + '?' + urlencode({ - 'search': query, - 'start': (params['pageno'] - 1) * 10, - 'count': 10, - 'sort': '-match', - 'nsfw': safesearch_table[params['safesearch']] - }) + params['url'] = ( + base_url + + '?' + + urlencode( + { + 'search': query, + 'start': (params['pageno'] - 1) * 10, + 'count': 10, + 'sort': '-match', + 'nsfw': safesearch_table[params['safesearch']], + } + ) + ) language = params['language'].split('-')[0] if language in supported_languages: @@ -89,14 +93,18 @@ def response(resp): length = minute_to_hm(result.get('duration')) url = result['url'] - results.append({'url': url, - 'title': title, - 'content': content, - 'author': author, - 'length': length, - 'template': 'videos.html', - 'publishedDate': publishedDate, - 'embedded': embedded, - 'thumbnail': thumbnail}) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'author': author, + 'length': length, + 'template': 'videos.html', + 'publishedDate': publishedDate, + 'embedded': embedded, + 'thumbnail': thumbnail, + } + ) return results diff --git a/searx/engines/seznam.py b/searx/engines/seznam.py index 85cb25b7f..2e95b4769 100644 --- a/searx/engines/seznam.py +++ b/searx/engines/seznam.py @@ -58,10 +58,12 @@ def response(resp): if result_data is None: continue title_element = eval_xpath_getindex(result_element, './/h3/a', 0) - results.append({ - 'url': title_element.get('href'), - 'title': extract_text(title_element), - 'content': extract_text(eval_xpath(result_data, './/div[@class="_3eded7"]')), - }) + results.append( + { + 'url': title_element.get('href'), + 'title': extract_text(title_element), + 'content': extract_text(eval_xpath(result_data, './/div[@class="_3eded7"]')), + } + ) return results diff --git a/searx/engines/sjp.py b/searx/engines/sjp.py index 884fddd2d..ad498b847 100644 --- a/searx/engines/sjp.py +++ b/searx/engines/sjp.py @@ -28,9 +28,11 @@ URL = 'https://sjp.pwn.pl' SEARCH_URL = URL + '/szukaj/{query}.html' word_xpath = '//div[@class="query"]' -dict_xpath = ['//div[@class="wyniki sjp-so-wyniki sjp-so-anchor"]', - '//div[@class="wyniki sjp-wyniki sjp-anchor"]', - '//div[@class="wyniki sjp-doroszewski-wyniki sjp-doroszewski-anchor"]'] +dict_xpath = [ + '//div[@class="wyniki sjp-so-wyniki sjp-so-anchor"]', + '//div[@class="wyniki sjp-wyniki sjp-anchor"]', + '//div[@class="wyniki sjp-doroszewski-wyniki sjp-doroszewski-anchor"]', +] def request(query, params): @@ -85,9 +87,11 @@ def response(resp): infobox += "</ol>" infobox += "</ul></div>" - results.append({ - 'infobox': word, - 'content': infobox, - }) + results.append( + { + 'infobox': word, + 'content': infobox, + } + ) return results diff --git a/searx/engines/solidtorrents.py b/searx/engines/solidtorrents.py index 7fbef9190..614b38277 100644 --- a/searx/engines/solidtorrents.py +++ b/searx/engines/solidtorrents.py @@ -36,14 +36,16 @@ def response(resp): search_results = loads(resp.text) for result in search_results["results"]: - results.append({ - 'infohash': result["infohash"], - 'seed': result["swarm"]["seeders"], - 'leech': result["swarm"]["leechers"], - 'title': result["title"], - 'url': "https://solidtorrents.net/view/" + result["_id"], - 'filesize': result["size"], - 'magnetlink': result["magnet"], - 'template': "torrent.html", - }) + results.append( + { + 'infohash': result["infohash"], + 'seed': result["swarm"]["seeders"], + 'leech': result["swarm"]["leechers"], + 'title': result["title"], + 'url': "https://solidtorrents.net/view/" + result["_id"], + 'filesize': result["size"], + 'magnetlink': result["magnet"], + 'template': "torrent.html", + } + ) return results diff --git a/searx/engines/solr.py b/searx/engines/solr.py index e26f19442..3e7846f8e 100644 --- a/searx/engines/solr.py +++ b/searx/engines/solr.py @@ -14,10 +14,10 @@ from searx.exceptions import SearxEngineAPIException base_url = 'http://localhost:8983' collection = '' rows = 10 -sort = '' # sorting: asc or desc -field_list = 'name' # list of field names to display on the UI -default_fields = '' # default field to query -query_fields = '' # query fields +sort = '' # sorting: asc or desc +field_list = 'name' # list of field names to display on the UI +default_fields = '' # default field to query +query_fields = '' # query fields _search_url = '' paging = True diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py index d5bfc0f6f..004164e37 100644 --- a/searx/engines/soundcloud.py +++ b/searx/engines/soundcloud.py @@ -27,17 +27,21 @@ paging = True # search-url # missing attribute: user_id, app_version, app_locale url = 'https://api-v2.soundcloud.com/' -search_url = url + 'search?{query}'\ - '&variant_ids='\ - '&facet=model'\ - '&limit=20'\ - '&offset={offset}'\ - '&linked_partitioning=1'\ - '&client_id={client_id}' # noqa - -embedded_url = '<iframe width="100%" height="166" ' +\ - 'scrolling="no" frameborder="no" ' +\ - 'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>' +search_url = ( + url + 'search?{query}' + '&variant_ids=' + '&facet=model' + '&limit=20' + '&offset={offset}' + '&linked_partitioning=1' + '&client_id={client_id}' +) # noqa + +embedded_url = ( + '<iframe width="100%" height="166" ' + + 'scrolling="no" frameborder="no" ' + + 'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>' +) cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U) guest_client_id = '' @@ -75,9 +79,7 @@ def init(engine_settings=None): def request(query, params): offset = (params['pageno'] - 1) * 20 - params['url'] = search_url.format(query=urlencode({'q': query}), - offset=offset, - client_id=guest_client_id) + params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset, client_id=guest_client_id) return params @@ -98,11 +100,15 @@ def response(resp): embedded = embedded_url.format(uri=uri) # append result - results.append({'url': result['permalink_url'], - 'title': title, - 'publishedDate': publishedDate, - 'embedded': embedded, - 'content': content}) + results.append( + { + 'url': result['permalink_url'], + 'title': title, + 'publishedDate': publishedDate, + 'embedded': embedded, + 'content': content, + } + ) # return results return results diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py index 6816fe672..15517e3eb 100644 --- a/searx/engines/spotify.py +++ b/searx/engines/spotify.py @@ -42,9 +42,10 @@ def request(query, params): r = http_post( 'https://accounts.spotify.com/api/token', data={'grant_type': 'client_credentials'}, - headers={'Authorization': 'Basic ' + base64.b64encode( - "{}:{}".format(api_client_id, api_client_secret).encode() - ).decode()} + headers={ + 'Authorization': 'Basic ' + + base64.b64encode("{}:{}".format(api_client_id, api_client_secret).encode()).decode() + }, ) j = loads(r.text) params['headers'] = {'Authorization': 'Bearer {}'.format(j.get('access_token'))} @@ -63,18 +64,12 @@ def response(resp): if result['type'] == 'track': title = result['name'] url = result['external_urls']['spotify'] - content = '{} - {} - {}'.format( - result['artists'][0]['name'], - result['album']['name'], - result['name']) + content = '{} - {} - {}'.format(result['artists'][0]['name'], result['album']['name'], result['name']) embedded = embedded_url.format(audioid=result['id']) # append result - results.append({'url': url, - 'title': title, - 'embedded': embedded, - 'content': content}) + results.append({'url': url, 'title': title, 'embedded': embedded, 'content': content}) # return results return results diff --git a/searx/engines/springer.py b/searx/engines/springer.py index 246e59b44..512d71e5e 100644 --- a/searx/engines/springer.py +++ b/searx/engines/springer.py @@ -26,15 +26,11 @@ api_key = 'unset' base_url = 'https://api.springernature.com/metadata/json?' + def request(query, params): if api_key == 'unset': raise SearxEngineAPIException('missing Springer-Nature API key') - args = urlencode({ - 'q' : query, - 's' : nb_per_page * (params['pageno'] - 1), - 'p' : nb_per_page, - 'api_key' : api_key - }) + args = urlencode({'q': query, 's': nb_per_page * (params['pageno'] - 1), 'p': nb_per_page, 'api_key': api_key}) params['url'] = base_url + args logger.debug("query_url --> %s", params['url']) return params @@ -50,21 +46,27 @@ def response(resp): content += "..." published = datetime.strptime(record['publicationDate'], '%Y-%m-%d') - metadata = [record[x] for x in [ - 'publicationName', - 'identifier', - 'contentType', - ] if record.get(x) is not None] + metadata = [ + record[x] + for x in [ + 'publicationName', + 'identifier', + 'contentType', + ] + if record.get(x) is not None + ] metadata = ' / '.join(metadata) if record.get('startingPage') and record.get('endingPage') is not None: metadata += " (%(startingPage)s-%(endingPage)s)" % record - results.append({ - 'title': record['title'], - 'url': record['url'][0]['value'].replace('http://', 'https://', 1), - 'content' : content, - 'publishedDate' : published, - 'metadata' : metadata - }) + results.append( + { + 'title': record['title'], + 'url': record['url'][0]['value'].replace('http://', 'https://', 1), + 'content': content, + 'publishedDate': published, + 'metadata': metadata, + } + ) return results diff --git a/searx/engines/sqlite.py b/searx/engines/sqlite.py index 43a85efbb..6de12f5fe 100644 --- a/searx/engines/sqlite.py +++ b/searx/engines/sqlite.py @@ -47,9 +47,9 @@ def search(query, params): query_params = { 'query': query, - 'wildcard': r'%' + query.replace(' ', r'%') + r'%', + 'wildcard': r'%' + query.replace(' ', r'%') + r'%', 'limit': limit, - 'offset': (params['pageno'] - 1) * limit + 'offset': (params['pageno'] - 1) * limit, } query_to_run = query_str + ' LIMIT :limit OFFSET :offset' @@ -59,7 +59,7 @@ def search(query, params): col_names = [cn[0] for cn in cur.description] for row in cur.fetchall(): - item = dict( zip(col_names, map(str, row)) ) + item = dict(zip(col_names, map(str, row))) item['template'] = result_template logger.debug("append result --> %s", item) results.append(item) diff --git a/searx/engines/stackexchange.py b/searx/engines/stackexchange.py index 34cba687c..99615b1a7 100644 --- a/searx/engines/stackexchange.py +++ b/searx/engines/stackexchange.py @@ -23,26 +23,30 @@ paging = True pagesize = 10 api_site = 'stackoverflow' -api_sort= 'activity' +api_sort = 'activity' api_order = 'desc' # https://api.stackexchange.com/docs/advanced-search search_api = 'https://api.stackexchange.com/2.3/search/advanced?' + def request(query, params): - args = urlencode({ - 'q' : query, - 'page' : params['pageno'], - 'pagesize' : pagesize, - 'site' : api_site, - 'sort' : api_sort, - 'order': 'desc', - }) + args = urlencode( + { + 'q': query, + 'page': params['pageno'], + 'pagesize': pagesize, + 'site': api_site, + 'sort': api_sort, + 'order': 'desc', + } + ) params['url'] = search_api + args return params + def response(resp): results = [] @@ -56,10 +60,12 @@ def response(resp): content += ' // is answered' content += " // score: %s" % result['score'] - results.append({ - 'url': "https://%s.com/q/%s" % (api_site, result['question_id']), - 'title': html.unescape(result['title']), - 'content': html.unescape(content), - }) + results.append( + { + 'url': "https://%s.com/q/%s" % (api_site, result['question_id']), + 'title': html.unescape(result['title']), + 'content': html.unescape(content), + } + ) return results diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index e71310be6..65d90debe 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -101,7 +101,7 @@ def response(resp): # check if search result starts with something like: "2 Sep 2014 ... " if re.match(r"^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content): date_pos = content.find('...') + 4 - date_string = content[0:date_pos - 5] + date_string = content[0 : date_pos - 5] # fix content string content = content[date_pos:] @@ -113,7 +113,7 @@ def response(resp): # check if search result starts with something like: "5 days ago ... " elif re.match(r"^[0-9]+ days? ago \.\.\. ", content): date_pos = content.find('...') + 4 - date_string = content[0:date_pos - 5] + date_string = content[0 : date_pos - 5] # calculate datetime published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group())) @@ -123,15 +123,10 @@ def response(resp): if published_date: # append result - results.append({'url': url, - 'title': title, - 'content': content, - 'publishedDate': published_date}) + results.append({'url': url, 'title': title, 'content': content, 'publishedDate': published_date}) else: # append result - results.append({'url': url, - 'title': title, - 'content': content}) + results.append({'url': url, 'title': title, 'content': content}) # return results return results @@ -152,7 +147,7 @@ def _fetch_supported_languages(resp): 'malayam': 'ml', 'norsk': 'nb', 'sinhalese': 'si', - 'sudanese': 'su' + 'sudanese': 'su', } # get the English name of every language known by babel diff --git a/searx/engines/tokyotoshokan.py b/searx/engines/tokyotoshokan.py index 0d62453a9..b01de38c1 100644 --- a/searx/engines/tokyotoshokan.py +++ b/searx/engines/tokyotoshokan.py @@ -56,11 +56,7 @@ def response(resp): name_row = rows[i] links = name_row.xpath('./td[@class="desc-top"]/a') - params = { - 'template': 'torrent.html', - 'url': links[-1].attrib.get('href'), - 'title': extract_text(links[-1]) - } + params = {'template': 'torrent.html', 'url': links[-1].attrib.get('href'), 'title': extract_text(links[-1])} # I have not yet seen any torrents without magnet links, but # it's better to be prepared to stumble upon one some day if len(links) == 2: diff --git a/searx/engines/torznab.py b/searx/engines/torznab.py index 960d1ee90..a48017c13 100644 --- a/searx/engines/torznab.py +++ b/searx/engines/torznab.py @@ -35,10 +35,12 @@ api_key = '' # https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories torznab_categories = [] -def init(engine_settings=None): # pylint: disable=unused-argument + +def init(engine_settings=None): # pylint: disable=unused-argument if len(base_url) < 1: raise ValueError('missing torznab base_url') + def request(query, params): search_url = base_url + '?t=search&q={search_query}' @@ -48,13 +50,12 @@ def request(query, params): search_url += '&cat={torznab_categories}' params['url'] = search_url.format( - search_query = quote(query), - api_key = api_key, - torznab_categories = ",".join([str(x) for x in torznab_categories]) + search_query=quote(query), api_key=api_key, torznab_categories=",".join([str(x) for x in torznab_categories]) ) return params + def response(resp): results = [] @@ -103,8 +104,7 @@ def response(resp): result["publishedDate"] = None try: - result["publishedDate"] = datetime.strptime( - get_property(item, 'pubDate'), '%a, %d %b %Y %H:%M:%S %z') + result["publishedDate"] = datetime.strptime(get_property(item, 'pubDate'), '%a, %d %b %Y %H:%M:%S %z') except (ValueError, TypeError) as e: logger.debug("ignore exception (publishedDate): %s", e) @@ -134,9 +134,7 @@ def get_property(item, property_name): def get_torznab_attr(item, attr_name): element = item.find( './/torznab:attr[@name="{attr_name}"]'.format(attr_name=attr_name), - { - 'torznab': 'http://torznab.com/schemas/2015/feed' - } + {'torznab': 'http://torznab.com/schemas/2015/feed'}, ) if element is not None: diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 8d67ca0bb..62ade49e2 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -28,24 +28,25 @@ def request(query, params): key_form = '&key=' + api_key else: key_form = '' - params['url'] = url.format(from_lang=params['from_lang'][1], - to_lang=params['to_lang'][1], - query=params['query'], - key=key_form) + params['url'] = url.format( + from_lang=params['from_lang'][1], to_lang=params['to_lang'][1], query=params['query'], key=key_form + ) return params def response(resp): results = [] - results.append({ - 'url': web_url.format( - from_lang=resp.search_params['from_lang'][2], - to_lang=resp.search_params['to_lang'][2], - query=resp.search_params['query']), - 'title': '[{0}-{1}] {2}'.format( - resp.search_params['from_lang'][1], - resp.search_params['to_lang'][1], - resp.search_params['query']), - 'content': resp.json()['responseData']['translatedText'] - }) + results.append( + { + 'url': web_url.format( + from_lang=resp.search_params['from_lang'][2], + to_lang=resp.search_params['to_lang'][2], + query=resp.search_params['query'], + ), + 'title': '[{0}-{1}] {2}'.format( + resp.search_params['from_lang'][1], resp.search_params['to_lang'][1], resp.search_params['query'] + ), + 'content': resp.json()['responseData']['translatedText'], + } + ) return results diff --git a/searx/engines/unsplash.py b/searx/engines/unsplash.py index 1445b4cec..1967fefd2 100644 --- a/searx/engines/unsplash.py +++ b/searx/engines/unsplash.py @@ -26,23 +26,13 @@ paging = True def clean_url(url): parsed = urlparse(url) - query = [(k, v) for (k, v) - in parse_qsl(parsed.query) if k not in ['ixid', 's']] + query = [(k, v) for (k, v) in parse_qsl(parsed.query) if k not in ['ixid', 's']] - return urlunparse(( - parsed.scheme, - parsed.netloc, - parsed.path, - parsed.params, - urlencode(query), - parsed.fragment - )) + return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, urlencode(query), parsed.fragment)) def request(query, params): - params['url'] = search_url + urlencode({ - 'query': query, 'page': params['pageno'], 'per_page': page_size - }) + params['url'] = search_url + urlencode({'query': query, 'page': params['pageno'], 'per_page': page_size}) logger.debug("query_url --> %s", params['url']) return params @@ -53,13 +43,15 @@ def response(resp): if 'results' in json_data: for result in json_data['results']: - results.append({ - 'template': 'images.html', - 'url': clean_url(result['links']['html']), - 'thumbnail_src': clean_url(result['urls']['thumb']), - 'img_src': clean_url(result['urls']['raw']), - 'title': result.get('alt_description') or 'unknown', - 'content': result.get('description') or '' - }) + results.append( + { + 'template': 'images.html', + 'url': clean_url(result['links']['html']), + 'thumbnail_src': clean_url(result['urls']['thumb']), + 'img_src': clean_url(result['urls']['raw']), + 'title': result.get('alt_description') or 'unknown', + 'content': result.get('description') or '', + } + ) return results diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py index 824579256..52d201eac 100644 --- a/searx/engines/vimeo.py +++ b/searx/engines/vimeo.py @@ -25,15 +25,16 @@ paging = True base_url = 'https://vimeo.com/' search_url = base_url + '/search/page:{pageno}?{query}' -embedded_url = '<iframe data-src="https://player.vimeo.com/video/{videoid}" ' +\ - 'width="540" height="304" frameborder="0" ' +\ - 'webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>' +embedded_url = ( + '<iframe data-src="https://player.vimeo.com/video/{videoid}" ' + + 'width="540" height="304" frameborder="0" ' + + 'webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>' +) # do search-request def request(query, params): - params['url'] = search_url.format(pageno=params['pageno'], - query=urlencode({'q': query})) + params['url'] = search_url.format(pageno=params['pageno'], query=urlencode({'q': query})) return params @@ -56,13 +57,17 @@ def response(resp): embedded = embedded_url.format(videoid=videoid) # append result - results.append({'url': url, - 'title': title, - 'content': '', - 'template': 'videos.html', - 'publishedDate': publishedDate, - 'embedded': embedded, - 'thumbnail': thumbnail}) + results.append( + { + 'url': url, + 'title': title, + 'content': '', + 'template': 'videos.html', + 'publishedDate': publishedDate, + 'embedded': embedded, + 'thumbnail': thumbnail, + } + ) # return results return results diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index f0dfc7595..e5d3f55c0 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -14,7 +14,10 @@ from searx.data import WIKIDATA_UNITS from searx.network import post, get from searx.utils import match_language, searx_useragent, get_string_replaces_function from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom -from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +from searx.engines.wikipedia import ( # pylint: disable=unused-import + _fetch_supported_languages, + supported_languages_url, +) # about about = { @@ -92,24 +95,27 @@ WHERE { # https://www.w3.org/TR/sparql11-query/#rSTRING_LITERAL1 # https://lists.w3.org/Archives/Public/public-rdf-dawg/2011OctDec/0175.html -sparql_string_escape = get_string_replaces_function({'\t': '\\\t', - '\n': '\\\n', - '\r': '\\\r', - '\b': '\\\b', - '\f': '\\\f', - '\"': '\\\"', - '\'': '\\\'', - '\\': '\\\\'}) +sparql_string_escape = get_string_replaces_function( + # fmt: off + { + '\t': '\\\t', + '\n': '\\\n', + '\r': '\\\r', + '\b': '\\\b', + '\f': '\\\f', + '\"': '\\\"', + '\'': '\\\'', + '\\': '\\\\' + } + # fmt: on +) replace_http_by_https = get_string_replaces_function({'http:': 'https:'}) def get_headers(): # user agent: https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits - return { - 'Accept': 'application/sparql-results+json', - 'User-Agent': searx_useragent() - } + return {'Accept': 'application/sparql-results+json', 'User-Agent': searx_useragent()} def get_label_for_entity(entity_id, language): @@ -205,9 +211,9 @@ def get_results(attribute_result, attributes, language): results.append({'title': infobox_title, 'url': url}) # update the infobox_id with the wikipedia URL # first the local wikipedia URL, and as fallback the english wikipedia URL - if attribute_type == WDArticle\ - and ((attribute.language == 'en' and infobox_id_lang is None) - or attribute.language != 'en'): + if attribute_type == WDArticle and ( + (attribute.language == 'en' and infobox_id_lang is None) or attribute.language != 'en' + ): infobox_id_lang = attribute.language infobox_id = url elif attribute_type == WDImageAttribute: @@ -226,13 +232,11 @@ def get_results(attribute_result, attributes, language): osm_zoom = area_to_osm_zoom(area) if area else 19 url = attribute.get_geo_url(attribute_result, osm_zoom=osm_zoom) if url: - infobox_urls.append({'title': attribute.get_label(language), - 'url': url, - 'entity': attribute.name}) + infobox_urls.append({'title': attribute.get_label(language), 'url': url, 'entity': attribute.name}) else: - infobox_attributes.append({'label': attribute.get_label(language), - 'value': value, - 'entity': attribute.name}) + infobox_attributes.append( + {'label': attribute.get_label(language), 'value': value, 'entity': attribute.name} + ) if infobox_id: infobox_id = replace_http_by_https(infobox_id) @@ -240,22 +244,19 @@ def get_results(attribute_result, attributes, language): # add the wikidata URL at the end infobox_urls.append({'title': 'Wikidata', 'url': attribute_result['item']}) - if img_src is None and len(infobox_attributes) == 0 and len(infobox_urls) == 1 and\ - len(infobox_content) == 0: - results.append({ - 'url': infobox_urls[0]['url'], - 'title': infobox_title, - 'content': infobox_content - }) + if img_src is None and len(infobox_attributes) == 0 and len(infobox_urls) == 1 and len(infobox_content) == 0: + results.append({'url': infobox_urls[0]['url'], 'title': infobox_title, 'content': infobox_content}) else: - results.append({ - 'infobox': infobox_title, - 'id': infobox_id, - 'content': infobox_content, - 'img_src': img_src, - 'urls': infobox_urls, - 'attributes': infobox_attributes - }) + results.append( + { + 'infobox': infobox_title, + 'id': infobox_id, + 'content': infobox_content, + 'img_src': img_src, + 'urls': infobox_urls, + 'attributes': infobox_attributes, + } + ) return results @@ -265,13 +266,14 @@ def get_query(query, language): where = list(filter(lambda s: len(s) > 0, [a.get_where() for a in attributes])) wikibase_label = list(filter(lambda s: len(s) > 0, [a.get_wikibase_label() for a in attributes])) group_by = list(filter(lambda s: len(s) > 0, [a.get_group_by() for a in attributes])) - query = QUERY_TEMPLATE\ - .replace('%QUERY%', sparql_string_escape(query))\ - .replace('%SELECT%', ' '.join(select))\ - .replace('%WHERE%', '\n '.join(where))\ - .replace('%WIKIBASE_LABELS%', '\n '.join(wikibase_label))\ - .replace('%GROUP_BY%', ' '.join(group_by))\ + query = ( + QUERY_TEMPLATE.replace('%QUERY%', sparql_string_escape(query)) + .replace('%SELECT%', ' '.join(select)) + .replace('%WHERE%', '\n '.join(where)) + .replace('%WIKIBASE_LABELS%', '\n '.join(wikibase_label)) + .replace('%GROUP_BY%', ' '.join(group_by)) .replace('%LANGUAGE%', language) + ) return query, attributes @@ -297,90 +299,98 @@ def get_attributes(language): attributes.append(WDDateAttribute(name)) # Dates - for p in ['P571', # inception date - 'P576', # dissolution date - 'P580', # start date - 'P582', # end date - 'P569', # date of birth - 'P570', # date of death - 'P619', # date of spacecraft launch - 'P620']: # date of spacecraft landing + for p in [ + 'P571', # inception date + 'P576', # dissolution date + 'P580', # start date + 'P582', # end date + 'P569', # date of birth + 'P570', # date of death + 'P619', # date of spacecraft launch + 'P620', + ]: # date of spacecraft landing add_date(p) - for p in ['P27', # country of citizenship - 'P495', # country of origin - 'P17', # country - 'P159']: # headquarters location + for p in [ + 'P27', # country of citizenship + 'P495', # country of origin + 'P17', # country + 'P159', + ]: # headquarters location add_label(p) # Places - for p in ['P36', # capital - 'P35', # head of state - 'P6', # head of government - 'P122', # basic form of government - 'P37']: # official language + for p in [ + 'P36', # capital + 'P35', # head of state + 'P6', # head of government + 'P122', # basic form of government + 'P37', + ]: # official language add_label(p) - add_value('P1082') # population + add_value('P1082') # population add_amount('P2046') # area - add_amount('P281') # postal code - add_label('P38') # currency + add_amount('P281') # postal code + add_label('P38') # currency add_amount('P2048') # heigth (building) # Media - for p in ['P400', # platform (videogames, computing) - 'P50', # author - 'P170', # creator - 'P57', # director - 'P175', # performer - 'P178', # developer - 'P162', # producer - 'P176', # manufacturer - 'P58', # screenwriter - 'P272', # production company - 'P264', # record label - 'P123', # publisher - 'P449', # original network - 'P750', # distributed by - 'P86']: # composer + for p in [ + 'P400', # platform (videogames, computing) + 'P50', # author + 'P170', # creator + 'P57', # director + 'P175', # performer + 'P178', # developer + 'P162', # producer + 'P176', # manufacturer + 'P58', # screenwriter + 'P272', # production company + 'P264', # record label + 'P123', # publisher + 'P449', # original network + 'P750', # distributed by + 'P86', + ]: # composer add_label(p) - add_date('P577') # publication date - add_label('P136') # genre (music, film, artistic...) - add_label('P364') # original language - add_value('P212') # ISBN-13 - add_value('P957') # ISBN-10 - add_label('P275') # copyright license - add_label('P277') # programming language - add_value('P348') # version - add_label('P840') # narrative location + add_date('P577') # publication date + add_label('P136') # genre (music, film, artistic...) + add_label('P364') # original language + add_value('P212') # ISBN-13 + add_value('P957') # ISBN-10 + add_label('P275') # copyright license + add_label('P277') # programming language + add_value('P348') # version + add_label('P840') # narrative location # Languages - add_value('P1098') # number of speakers - add_label('P282') # writing system - add_label('P1018') # language regulatory body - add_value('P218') # language code (ISO 639-1) + add_value('P1098') # number of speakers + add_label('P282') # writing system + add_label('P1018') # language regulatory body + add_value('P218') # language code (ISO 639-1) # Other - add_label('P169') # ceo - add_label('P112') # founded by - add_label('P1454') # legal form (company, organization) - add_label('P137') # operator (service, facility, ...) - add_label('P1029') # crew members (tripulation) - add_label('P225') # taxon name - add_value('P274') # chemical formula - add_label('P1346') # winner (sports, contests, ...) - add_value('P1120') # number of deaths - add_value('P498') # currency code (ISO 4217) + add_label('P169') # ceo + add_label('P112') # founded by + add_label('P1454') # legal form (company, organization) + add_label('P137') # operator (service, facility, ...) + add_label('P1029') # crew members (tripulation) + add_label('P225') # taxon name + add_value('P274') # chemical formula + add_label('P1346') # winner (sports, contests, ...) + add_value('P1120') # number of deaths + add_value('P498') # currency code (ISO 4217) # URL - add_url('P856', official=True) # official website + add_url('P856', official=True) # official website attributes.append(WDArticle(language)) # wikipedia (user language) if not language.startswith('en'): attributes.append(WDArticle('en')) # wikipedia (english) - add_url('P1324') # source code repository - add_url('P1581') # blog + add_url('P1324') # source code repository + add_url('P1581') # blog add_url('P434', url_id='musicbrainz_artist') add_url('P435', url_id='musicbrainz_work') add_url('P436', url_id='musicbrainz_release_group') @@ -396,11 +406,11 @@ def get_attributes(language): attributes.append(WDGeoAttribute('P625')) # Image - add_image('P15', priority=1, url_id='wikimedia_image') # route map - add_image('P242', priority=2, url_id='wikimedia_image') # locator map - add_image('P154', priority=3, url_id='wikimedia_image') # logo - add_image('P18', priority=4, url_id='wikimedia_image') # image - add_image('P41', priority=5, url_id='wikimedia_image') # flag + add_image('P15', priority=1, url_id='wikimedia_image') # route map + add_image('P242', priority=2, url_id='wikimedia_image') # locator map + add_image('P154', priority=3, url_id='wikimedia_image') # logo + add_image('P18', priority=4, url_id='wikimedia_image') # image + add_image('P41', priority=5, url_id='wikimedia_image') # flag add_image('P2716', priority=6, url_id='wikimedia_image') # collage add_image('P2910', priority=7, url_id='wikimedia_image') # icon @@ -409,7 +419,7 @@ def get_attributes(language): class WDAttribute: - __slots__ = 'name', + __slots__ = ('name',) def __init__(self, name): self.name = name @@ -437,14 +447,15 @@ class WDAttribute: class WDAmountAttribute(WDAttribute): - def get_select(self): return '?{name} ?{name}Unit'.replace('{name}', self.name) def get_where(self): return """ OPTIONAL { ?item p:{name} ?{name}Node . ?{name}Node rdf:type wikibase:BestRank ; ps:{name} ?{name} . - OPTIONAL { ?{name}Node psv:{name}/wikibase:quantityUnit ?{name}Unit. } }""".replace('{name}', self.name) + OPTIONAL { ?{name}Node psv:{name}/wikibase:quantityUnit ?{name}Unit. } }""".replace( + '{name}', self.name + ) def get_group_by(self): return self.get_select() @@ -478,7 +489,9 @@ class WDArticle(WDAttribute): return """OPTIONAL { ?article{language} schema:about ?item ; schema:inLanguage "{language}" ; schema:isPartOf <https://{language}.wikipedia.org/> ; - schema:name ?articleName{language} . }""".replace('{language}', self.language) + schema:name ?articleName{language} . }""".replace( + '{language}', self.language + ) def get_group_by(self): return self.get_select() @@ -489,7 +502,6 @@ class WDArticle(WDAttribute): class WDLabelAttribute(WDAttribute): - def get_select(self): return '(group_concat(distinct ?{name}Label;separator=", ") as ?{name}Labels)'.replace('{name}', self.name) @@ -520,14 +532,13 @@ class WDURLAttribute(WDAttribute): value = value.split(',')[0] url_id = self.url_id if value.startswith(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE): - value = value[len(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE):] + value = value[len(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE) :] url_id = 'wikimedia_image' return get_external_url(url_id, value) return value class WDGeoAttribute(WDAttribute): - def get_label(self, language): return "OpenStreetMap" @@ -537,7 +548,9 @@ class WDGeoAttribute(WDAttribute): def get_where(self): return """OPTIONAL { ?item p:{name}/psv:{name} [ wikibase:geoLatitude ?{name}Lat ; - wikibase:geoLongitude ?{name}Long ] }""".replace('{name}', self.name) + wikibase:geoLongitude ?{name}Long ] }""".replace( + '{name}', self.name + ) def get_group_by(self): return self.get_select() @@ -559,7 +572,7 @@ class WDGeoAttribute(WDAttribute): class WDImageAttribute(WDURLAttribute): - __slots__ = 'priority', + __slots__ = ('priority',) def __init__(self, name, url_id=None, priority=100): super().__init__(name, url_id) @@ -567,7 +580,6 @@ class WDImageAttribute(WDURLAttribute): class WDDateAttribute(WDAttribute): - def get_select(self): return '?{name} ?{name}timePrecision ?{name}timeZone ?{name}timeCalendar'.replace('{name}', self.name) @@ -581,7 +593,9 @@ class WDDateAttribute(WDAttribute): wikibase:timePrecision ?{name}timePrecision ; wikibase:timeTimezone ?{name}timeZone ; wikibase:timeCalendarModel ?{name}timeCalendar ] . } - hint:Prior hint:rangeSafe true;""".replace('{name}', self.name) + hint:Prior hint:rangeSafe true;""".replace( + '{name}', self.name + ) def get_group_by(self): return self.get_select() @@ -613,11 +627,12 @@ class WDDateAttribute(WDAttribute): def format_13(self, value, locale): timestamp = isoparse(value) # precision: minute - return get_datetime_format(format, locale=locale) \ - .replace("'", "") \ - .replace('{0}', format_time(timestamp, 'full', tzinfo=None, - locale=locale)) \ + return ( + get_datetime_format(format, locale=locale) + .replace("'", "") + .replace('{0}', format_time(timestamp, 'full', tzinfo=None, locale=locale)) .replace('{1}', format_date(timestamp, 'short', locale=locale)) + ) def format_14(self, value, locale): # precision: second. @@ -638,7 +653,7 @@ class WDDateAttribute(WDAttribute): '11': ('format_11', 0), # day '12': ('format_13', 0), # hour (not supported by babel, display minute) '13': ('format_13', 0), # minute - '14': ('format_14', 0) # second + '14': ('format_14', 0), # second } def get_str(self, result, language): diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 5e34db9a7..cc806a8de 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -39,8 +39,7 @@ def request(query, params): query = query.title() language = url_lang(params['language']) - params['url'] = search_url.format(title=quote(query), - language=language) + params['url'] = search_url.format(title=quote(query), language=language) if params['language'].lower() in language_variants.get(language, []): params['headers']['Accept-Language'] = params['language'].lower() @@ -63,8 +62,10 @@ def response(resp): except: pass else: - if api_result['type'] == 'https://mediawiki.org/wiki/HyperSwitch/errors/bad_request' \ - and api_result['detail'] == 'title-invalid-characters': + if ( + api_result['type'] == 'https://mediawiki.org/wiki/HyperSwitch/errors/bad_request' + and api_result['detail'] == 'title-invalid-characters' + ): return [] raise_for_httperror(resp) @@ -81,11 +82,15 @@ def response(resp): results.append({'url': wikipedia_link, 'title': title}) - results.append({'infobox': title, - 'id': wikipedia_link, - 'content': api_result.get('extract', ''), - 'img_src': api_result.get('thumbnail', {}).get('source'), - 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]}) + results.append( + { + 'infobox': title, + 'id': wikipedia_link, + 'content': api_result.get('extract', ''), + 'img_src': api_result.get('thumbnail', {}).get('source'), + 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}], + } + ) return results diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 9c84e2809..1c882c582 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -36,8 +36,7 @@ img_alt_xpath = './@alt' # pods to display as image in infobox # this pods do return a plaintext, but they look better and are more useful as images -image_pods = {'VisualRepresentation', - 'Illustration'} +image_pods = {'VisualRepresentation', 'Illustration'} # do search-request @@ -50,15 +49,17 @@ def request(query, params): # replace private user area characters to make text legible def replace_pua_chars(text): - pua_chars = {'\uf522': '\u2192', # rigth arrow - '\uf7b1': '\u2115', # set of natural numbers - '\uf7b4': '\u211a', # set of rational numbers - '\uf7b5': '\u211d', # set of real numbers - '\uf7bd': '\u2124', # set of integer numbers - '\uf74c': 'd', # differential - '\uf74d': '\u212f', # euler's number - '\uf74e': 'i', # imaginary number - '\uf7d9': '='} # equals sign + pua_chars = { + '\uf522': '\u2192', # rigth arrow + '\uf7b1': '\u2115', # set of natural numbers + '\uf7b4': '\u211a', # set of rational numbers + '\uf7b5': '\u211d', # set of real numbers + '\uf7bd': '\u2124', # set of integer numbers + '\uf74c': 'd', # differential + '\uf74d': '\u212f', # euler's number + '\uf74e': 'i', # imaginary number + '\uf7d9': '=', + } # equals sign for k, v in pua_chars.items(): text = text.replace(k, v) @@ -112,9 +113,12 @@ def response(resp): result_chunks.append({'label': pod_title, 'value': content}) elif image: - result_chunks.append({'label': pod_title, - 'image': {'src': image[0].xpath(img_src_xpath)[0], - 'alt': image[0].xpath(img_alt_xpath)[0]}}) + result_chunks.append( + { + 'label': pod_title, + 'image': {'src': image[0].xpath(img_src_xpath)[0], 'alt': image[0].xpath(img_alt_xpath)[0]}, + } + ) if not result_chunks: return [] @@ -122,13 +126,15 @@ def response(resp): title = "Wolfram|Alpha (%s)" % infobox_title # append infobox - results.append({'infobox': infobox_title, - 'attributes': result_chunks, - 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]}) + results.append( + { + 'infobox': infobox_title, + 'attributes': result_chunks, + 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}], + } + ) # append link to site - results.append({'url': resp.request.headers['Referer'], - 'title': title, - 'content': result_content}) + results.append({'url': resp.request.headers['Referer'], 'title': title, 'content': result_content}) return results diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 1f2cfa4e6..bad25602a 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -22,30 +22,29 @@ about = { # search-url url = 'https://www.wolframalpha.com/' -search_url = url + 'input/json.jsp'\ - '?async=false'\ - '&banners=raw'\ - '&debuggingdata=false'\ - '&format=image,plaintext,imagemap,minput,moutput'\ - '&formattimeout=2'\ - '&{query}'\ - '&output=JSON'\ - '&parsetimeout=2'\ - '&proxycode={token}'\ - '&scantimeout=0.5'\ - '&sponsorcategories=true'\ +search_url = ( + url + 'input/json.jsp' + '?async=false' + '&banners=raw' + '&debuggingdata=false' + '&format=image,plaintext,imagemap,minput,moutput' + '&formattimeout=2' + '&{query}' + '&output=JSON' + '&parsetimeout=2' + '&proxycode={token}' + '&scantimeout=0.5' + '&sponsorcategories=true' '&statemethod=deploybutton' +) referer_url = url + 'input/?{query}' -token = {'value': '', - 'last_updated': None} +token = {'value': '', 'last_updated': None} # pods to display as image in infobox # this pods do return a plaintext, but they look better and are more useful as images -image_pods = {'VisualRepresentation', - 'Illustration', - 'Symbol'} +image_pods = {'VisualRepresentation', 'Illustration', 'Symbol'} # seems, wolframalpha resets its token in every hour @@ -115,12 +114,20 @@ def response(resp): if not result_chunks: return [] - results.append({'infobox': infobox_title, - 'attributes': result_chunks, - 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]}) - - results.append({'url': resp.request.headers['Referer'], - 'title': 'Wolfram|Alpha (' + infobox_title + ')', - 'content': result_content}) + results.append( + { + 'infobox': infobox_title, + 'attributes': result_chunks, + 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}], + } + ) + + results.append( + { + 'url': resp.request.headers['Referer'], + 'title': 'Wolfram|Alpha (' + infobox_title + ')', + 'content': result_content, + } + ) return results diff --git a/searx/engines/wordnik.py b/searx/engines/wordnik.py index 0c3785cfb..21eaeccc3 100644 --- a/searx/engines/wordnik.py +++ b/searx/engines/wordnik.py @@ -48,7 +48,7 @@ def response(resp): def_abbr = extract_text(def_item.xpath('.//abbr')).strip() def_text = extract_text(def_item).strip() if def_abbr: - def_text = def_text[len(def_abbr):].strip() + def_text = def_text[len(def_abbr) :].strip() src_defs.append((def_abbr, def_text)) definitions.append((src_text, src_defs)) @@ -66,9 +66,11 @@ def response(resp): infobox += f"<li><i>{def_abbr}</i> {def_text}</li>" infobox += "</ul>" - results.append({ - 'infobox': word, - 'content': infobox, - }) + results.append( + { + 'infobox': word, + 'content': infobox, + } + ) return results diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py index 96b8d680c..f6b82944d 100644 --- a/searx/engines/www1x.py +++ b/searx/engines/www1x.py @@ -46,12 +46,16 @@ def response(resp): thumbnail_src = urljoin(gallery_url, eval_xpath_getindex(link, './/img', 0).attrib['src']) # append result - results.append({'url': url, - 'title': title, - 'img_src': thumbnail_src, - 'content': '', - 'thumbnail_src': thumbnail_src, - 'template': 'images.html'}) + results.append( + { + 'url': url, + 'title': title, + 'img_src': thumbnail_src, + 'content': '', + 'thumbnail_src': thumbnail_src, + 'template': 'images.html', + } + ) # return results return results diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 08677b708..2737bf94a 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -56,7 +56,7 @@ Replacements are: """ -lang_all='en' +lang_all = 'en' '''Replacement ``{lang}`` in :py:obj:`search_url` if language ``all`` is selected. ''' @@ -110,9 +110,9 @@ requested by the user, the URL paramter is an empty string. The time_range_map = { 'day': 24, - 'week': 24*7, - 'month': 24*30, - 'year': 24*365, + 'week': 24 * 7, + 'month': 24 * 30, + 'year': 24 * 365, } '''Maps time range value from user to ``{time_range_val}`` in :py:obj:`time_range_url`. @@ -129,11 +129,7 @@ time_range_map = { safe_search_support = False '''Engine supports safe-search.''' -safe_search_map = { - 0: '&filter=none', - 1: '&filter=moderate', - 2: '&filter=strict' -} +safe_search_map = {0: '&filter=none', 1: '&filter=moderate', 2: '&filter=strict'} '''Maps safe-search value to ``{safe_search}`` in :py:obj:`search_url`. .. code:: yaml @@ -146,10 +142,9 @@ safe_search_map = { ''' -def request(query, params): - '''Build request parameters (see :ref:`engine request`). - ''' +def request(query, params): + '''Build request parameters (see :ref:`engine request`).''' lang = lang_all if params['language'] != 'all': lang = params['language'][:2] @@ -167,8 +162,8 @@ def request(query, params): 'query': urlencode({'q': query})[2:], 'lang': lang, 'pageno': (params['pageno'] - 1) * page_size + first_page_num, - 'time_range' : time_range, - 'safe_search' : safe_search, + 'time_range': time_range, + 'safe_search': safe_search, } params['url'] = search_url.format(**fargs) @@ -176,10 +171,9 @@ def request(query, params): return params -def response(resp): - '''Scrap *results* from the response (see :ref:`engine results`). - ''' +def response(resp): + '''Scrap *results* from the response (see :ref:`engine results`).''' results = [] dom = html.fromstring(resp.text) is_onion = 'onions' in categories @@ -200,10 +194,7 @@ def response(resp): # add alternative cached url if available if cached_xpath: - tmp_result['cached_url'] = ( - cached_url - + extract_text(eval_xpath_list(result, cached_xpath, min_len=1)) - ) + tmp_result['cached_url'] = cached_url + extract_text(eval_xpath_list(result, cached_xpath, min_len=1)) if is_onion: tmp_result['is_onion'] = True @@ -213,31 +204,27 @@ def response(resp): else: if cached_xpath: for url, title, content, cached in zip( - (extract_url(x, search_url) for - x in eval_xpath_list(dom, url_xpath)), + (extract_url(x, search_url) for x in eval_xpath_list(dom, url_xpath)), map(extract_text, eval_xpath_list(dom, title_xpath)), map(extract_text, eval_xpath_list(dom, content_xpath)), - map(extract_text, eval_xpath_list(dom, cached_xpath)) + map(extract_text, eval_xpath_list(dom, cached_xpath)), ): - results.append({ - 'url': url, - 'title': title, - 'content': content, - 'cached_url': cached_url + cached, 'is_onion': is_onion - }) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'cached_url': cached_url + cached, + 'is_onion': is_onion, + } + ) else: for url, title, content in zip( - (extract_url(x, search_url) for - x in eval_xpath_list(dom, url_xpath)), + (extract_url(x, search_url) for x in eval_xpath_list(dom, url_xpath)), map(extract_text, eval_xpath_list(dom, title_xpath)), - map(extract_text, eval_xpath_list(dom, content_xpath)) + map(extract_text, eval_xpath_list(dom, content_xpath)), ): - results.append({ - 'url': url, - 'title': title, - 'content': content, - 'is_onion': is_onion - }) + results.append({'url': url, 'title': title, 'content': content, 'is_onion': is_onion}) if suggestion_xpath: for suggestion in eval_xpath(dom, suggestion_xpath): diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py index fbd99c47b..12e7305db 100644 --- a/searx/engines/yacy.py +++ b/searx/engines/yacy.py @@ -30,18 +30,16 @@ http_digest_auth_pass = "" # search-url base_url = 'http://localhost:8090' -search_url = '/yacysearch.json?{query}'\ - '&startRecord={offset}'\ - '&maximumRecords={limit}'\ - '&contentdom={search_type}'\ - '&resource=global' +search_url = ( + '/yacysearch.json?{query}' + '&startRecord={offset}' + '&maximumRecords={limit}' + '&contentdom={search_type}' + '&resource=global' +) # yacy specific type-definitions -search_types = {'general': 'text', - 'images': 'image', - 'files': 'app', - 'music': 'audio', - 'videos': 'video'} +search_types = {'general': 'text', 'images': 'image', 'files': 'app', 'music': 'audio', 'videos': 'video'} # do search-request @@ -49,11 +47,9 @@ def request(query, params): offset = (params['pageno'] - 1) * number_of_results search_type = search_types.get(params.get('category'), '0') - params['url'] = base_url +\ - search_url.format(query=urlencode({'query': query}), - offset=offset, - limit=number_of_results, - search_type=search_type) + params['url'] = base_url + search_url.format( + query=urlencode({'query': query}), offset=offset, limit=number_of_results, search_type=search_type + ) if http_digest_auth_user and http_digest_auth_pass: params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass) @@ -93,21 +89,29 @@ def response(resp): continue # append result - results.append({'url': result_url, - 'title': result['title'], - 'content': '', - 'img_src': result['image'], - 'template': 'images.html'}) + results.append( + { + 'url': result_url, + 'title': result['title'], + 'content': '', + 'img_src': result['image'], + 'template': 'images.html', + } + ) # parse general results else: publishedDate = parser.parse(result['pubDate']) # append result - results.append({'url': result['link'], - 'title': result['title'], - 'content': html_to_text(result['description']), - 'publishedDate': publishedDate}) + results.append( + { + 'url': result['link'], + 'title': result['title'], + 'content': html_to_text(result['description']), + 'publishedDate': publishedDate, + } + ) # TODO parse video, audio and file results diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index bd6e6721c..08bde6665 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -50,59 +50,59 @@ language_aliases = { } lang2domain = { - 'zh_chs' : 'hk.search.yahoo.com', - 'zh_cht' : 'tw.search.yahoo.com', - 'en' : 'search.yahoo.com', - - 'bg' : 'search.yahoo.com', - 'cs' : 'search.yahoo.com', - 'da' : 'search.yahoo.com', - 'el' : 'search.yahoo.com', - 'et' : 'search.yahoo.com', - 'he' : 'search.yahoo.com', - 'hr' : 'search.yahoo.com', - 'ja' : 'search.yahoo.com', - 'ko' : 'search.yahoo.com', - 'sk' : 'search.yahoo.com', - 'sl' : 'search.yahoo.com', - + 'zh_chs': 'hk.search.yahoo.com', + 'zh_cht': 'tw.search.yahoo.com', + 'en': 'search.yahoo.com', + 'bg': 'search.yahoo.com', + 'cs': 'search.yahoo.com', + 'da': 'search.yahoo.com', + 'el': 'search.yahoo.com', + 'et': 'search.yahoo.com', + 'he': 'search.yahoo.com', + 'hr': 'search.yahoo.com', + 'ja': 'search.yahoo.com', + 'ko': 'search.yahoo.com', + 'sk': 'search.yahoo.com', + 'sl': 'search.yahoo.com', } """Map language to domain""" + def _get_language(params): lang = language_aliases.get(params['language']) if lang is None: - lang = match_language( - params['language'], supported_languages, language_aliases - ) + lang = match_language(params['language'], supported_languages, language_aliases) lang = lang.split('-')[0] - logger.debug("params['language']: %s --> %s" , params['language'], lang) + logger.debug("params['language']: %s --> %s", params['language'], lang) return lang + def request(query, params): """build request""" offset = (params['pageno'] - 1) * 7 + 1 - lang = _get_language(params) - age, btf = time_range_dict.get( - params['time_range'], ('', '')) - - args = urlencode({ - 'p' : query, - 'ei' : 'UTF-8', - 'fl' : 1, - 'vl' : 'lang_' + lang, - 'btf' : btf, - 'fr2' : 'time', - 'age' : age, - 'b' : offset, - 'xargs' :0 - }) + lang = _get_language(params) + age, btf = time_range_dict.get(params['time_range'], ('', '')) + + args = urlencode( + { + 'p': query, + 'ei': 'UTF-8', + 'fl': 1, + 'vl': 'lang_' + lang, + 'btf': btf, + 'fr2': 'time', + 'age': age, + 'b': offset, + 'xargs': 0, + } + ) domain = lang2domain.get(lang, '%s.search.yahoo.com' % lang) params['url'] = 'https://%s/search?%s' % (domain, args) return params + def parse_url(url_string): """remove yahoo-specific tracking-url""" @@ -121,6 +121,7 @@ def parse_url(url_string): end = min(endpositions) return unquote(url_string[start:end]) + def response(resp): """parse response""" @@ -140,18 +141,12 @@ def response(resp): offset = len(extract_text(title.xpath('span'))) title = extract_text(title)[offset:] - content = eval_xpath_getindex( - result, './/div[contains(@class, "compText")]', 0, default='' - ) + content = eval_xpath_getindex(result, './/div[contains(@class, "compText")]', 0, default='') if content: content = extract_text(content) # append result - results.append({ - 'url': url, - 'title': title, - 'content': content - }) + results.append({'url': url, 'title': title, 'content': content}) for suggestion in eval_xpath_list(dom, '//div[contains(@class, "AlsoTry")]//table//a'): # append suggestion @@ -167,6 +162,6 @@ def _fetch_supported_languages(resp): offset = len('lang_') for val in eval_xpath_list(dom, '//div[contains(@class, "lang-item")]/input/@value'): - supported_languages.append( val[offset:] ) + supported_languages.append(val[offset:]) return supported_languages diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py index ec07cd408..00f208b17 100644 --- a/searx/engines/yahoo_news.py +++ b/searx/engines/yahoo_news.py @@ -40,35 +40,35 @@ categories = ['news'] # search-url search_url = ( + # fmt: off 'https://news.search.yahoo.com/search' '?{query}&b={offset}' - ) + # fmt: on +) AGO_RE = re.compile(r'([0-9]+)\s*(year|month|week|day|minute|hour)') AGO_TIMEDELTA = { - 'minute': timedelta(minutes=1), - 'hour': timedelta(hours=1), - 'day': timedelta(days=1), - 'week': timedelta(days=7), - 'month': timedelta(days=30), - 'year': timedelta(days=365), + 'minute': timedelta(minutes=1), + 'hour': timedelta(hours=1), + 'day': timedelta(days=1), + 'week': timedelta(days=7), + 'month': timedelta(days=30), + 'year': timedelta(days=365), } + def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 - params['url'] = search_url.format( - offset = offset, - query = urlencode({'p': query}) - ) + params['url'] = search_url.format(offset=offset, query=urlencode({'p': query})) logger.debug("query_url --> %s", params['url']) return params + def response(resp): results = [] dom = html.fromstring(resp.text) - # parse results for result in eval_xpath_list(dom, '//ol[contains(@class,"searchCenterMiddle")]//li'): @@ -80,12 +80,7 @@ def response(resp): content = extract_text(result.xpath('.//p')) img_src = eval_xpath_getindex(result, './/img/@data-src', 0, None) - item = { - 'url': url, - 'title': title, - 'content': content, - 'img_src' : img_src - } + item = {'url': url, 'title': title, 'content': content, 'img_src': img_src} pub_date = extract_text(result.xpath('.//span[contains(@class,"s-time")]')) ago = AGO_RE.search(pub_date) diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py index ed27db07b..52db45960 100644 --- a/searx/engines/youtube_api.py +++ b/searx/engines/youtube_api.py @@ -27,17 +27,18 @@ api_key = None base_url = 'https://www.googleapis.com/youtube/v3/search' search_url = base_url + '?part=snippet&{query}&maxResults=20&key={api_key}' -embedded_url = '<iframe width="540" height="304" ' +\ - 'data-src="https://www.youtube-nocookie.com/embed/{videoid}" ' +\ - 'frameborder="0" allowfullscreen></iframe>' +embedded_url = ( + '<iframe width="540" height="304" ' + + 'data-src="https://www.youtube-nocookie.com/embed/{videoid}" ' + + 'frameborder="0" allowfullscreen></iframe>' +) base_youtube_url = 'https://www.youtube.com/watch?v=' # do search-request def request(query, params): - params['url'] = search_url.format(query=urlencode({'q': query}), - api_key=api_key) + params['url'] = search_url.format(query=urlencode({'q': query}), api_key=api_key) # add language tag if specified if params['language'] != 'all': @@ -79,13 +80,17 @@ def response(resp): embedded = embedded_url.format(videoid=videoid) # append result - results.append({'url': url, - 'title': title, - 'content': content, - 'template': 'videos.html', - 'publishedDate': publishedDate, - 'embedded': embedded, - 'thumbnail': thumbnail}) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'template': 'videos.html', + 'publishedDate': publishedDate, + 'embedded': embedded, + 'thumbnail': thumbnail, + } + ) # return results return results diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py index 68b75bc72..239830cc7 100644 --- a/searx/engines/youtube_noapi.py +++ b/searx/engines/youtube_noapi.py @@ -30,14 +30,13 @@ search_url = base_url + '?search_query={query}&page={page}' time_range_url = '&sp=EgII{time_range}%253D%253D' # the key seems to be constant next_page_url = 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' -time_range_dict = {'day': 'Ag', - 'week': 'Aw', - 'month': 'BA', - 'year': 'BQ'} +time_range_dict = {'day': 'Ag', 'week': 'Aw', 'month': 'BA', 'year': 'BQ'} -embedded_url = '<iframe width="540" height="304" ' +\ - 'data-src="https://www.youtube-nocookie.com/embed/{videoid}" ' +\ - 'frameborder="0" allowfullscreen></iframe>' +embedded_url = ( + '<iframe width="540" height="304" ' + + 'data-src="https://www.youtube-nocookie.com/embed/{videoid}" ' + + 'frameborder="0" allowfullscreen></iframe>' +) base_youtube_url = 'https://www.youtube.com/watch?v=' @@ -51,10 +50,12 @@ def request(query, params): else: params['url'] = next_page_url params['method'] = 'POST' - params['data'] = dumps({ - 'context': {"client": {"clientName": "WEB", "clientVersion": "2.20210310.12.01"}}, - 'continuation': params['engine_data']['next_page_token'], - }) + params['data'] = dumps( + { + 'context': {"client": {"clientName": "WEB", "clientVersion": "2.20210310.12.01"}}, + 'continuation': params['engine_data']['next_page_token'], + } + ) params['headers']['Content-Type'] = 'application/json' params['headers']['Cookie'] = "CONSENT=YES+cb.%s-17-p0.en+F+941;" % datetime.now().strftime("%Y%m%d") @@ -71,34 +72,42 @@ def response(resp): def parse_next_page_response(response_text): results = [] result_json = loads(response_text) - for section in (result_json['onResponseReceivedCommands'][0] - .get('appendContinuationItemsAction')['continuationItems'][0] - .get('itemSectionRenderer')['contents']): + for section in ( + result_json['onResponseReceivedCommands'][0] + .get('appendContinuationItemsAction')['continuationItems'][0] + .get('itemSectionRenderer')['contents'] + ): if 'videoRenderer' not in section: continue section = section['videoRenderer'] content = "-" if 'descriptionSnippet' in section: content = ' '.join(x['text'] for x in section['descriptionSnippet']['runs']) - results.append({ - 'url': base_youtube_url + section['videoId'], - 'title': ' '.join(x['text'] for x in section['title']['runs']), - 'content': content, - 'author': section['ownerText']['runs'][0]['text'], - 'length': section['lengthText']['simpleText'], - 'template': 'videos.html', - 'embedded': embedded_url.format(videoid=section['videoId']), - 'thumbnail': section['thumbnail']['thumbnails'][-1]['url'], - }) + results.append( + { + 'url': base_youtube_url + section['videoId'], + 'title': ' '.join(x['text'] for x in section['title']['runs']), + 'content': content, + 'author': section['ownerText']['runs'][0]['text'], + 'length': section['lengthText']['simpleText'], + 'template': 'videos.html', + 'embedded': embedded_url.format(videoid=section['videoId']), + 'thumbnail': section['thumbnail']['thumbnails'][-1]['url'], + } + ) try: - token = result_json['onResponseReceivedCommands'][0]\ - .get('appendContinuationItemsAction')['continuationItems'][1]\ - .get('continuationItemRenderer')['continuationEndpoint']\ + token = ( + result_json['onResponseReceivedCommands'][0] + .get('appendContinuationItemsAction')['continuationItems'][1] + .get('continuationItemRenderer')['continuationEndpoint'] .get('continuationCommand')['token'] - results.append({ - "engine_data": token, - "key": "next_page_token", - }) + ) + results.append( + { + "engine_data": token, + "key": "next_page_token", + } + ) except: pass @@ -107,26 +116,32 @@ def parse_next_page_response(response_text): def parse_first_page_response(response_text): results = [] - results_data = response_text[response_text.find('ytInitialData'):] - results_data = results_data[results_data.find('{'):results_data.find(';</script>')] + results_data = response_text[response_text.find('ytInitialData') :] + results_data = results_data[results_data.find('{') : results_data.find(';</script>')] results_json = loads(results_data) if results_data else {} - sections = results_json.get('contents', {})\ - .get('twoColumnSearchResultsRenderer', {})\ - .get('primaryContents', {})\ - .get('sectionListRenderer', {})\ - .get('contents', []) + sections = ( + results_json.get('contents', {}) + .get('twoColumnSearchResultsRenderer', {}) + .get('primaryContents', {}) + .get('sectionListRenderer', {}) + .get('contents', []) + ) for section in sections: if "continuationItemRenderer" in section: - next_page_token = section["continuationItemRenderer"]\ - .get("continuationEndpoint", {})\ - .get("continuationCommand", {})\ + next_page_token = ( + section["continuationItemRenderer"] + .get("continuationEndpoint", {}) + .get("continuationCommand", {}) .get("token", "") + ) if next_page_token: - results.append({ - "engine_data": next_page_token, - "key": "next_page_token", - }) + results.append( + { + "engine_data": next_page_token, + "key": "next_page_token", + } + ) for video_container in section.get('itemSectionRenderer', {}).get('contents', []): video = video_container.get('videoRenderer', {}) videoid = video.get('videoId') @@ -140,14 +155,18 @@ def parse_first_page_response(response_text): length = get_text_from_json(video.get('lengthText', {})) # append result - results.append({'url': url, - 'title': title, - 'content': content, - 'author': author, - 'length': length, - 'template': 'videos.html', - 'embedded': embedded, - 'thumbnail': thumbnail}) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'author': author, + 'length': length, + 'template': 'videos.html', + 'embedded': embedded, + 'thumbnail': thumbnail, + } + ) # return results return results diff --git a/searx/engines/zlibrary.py b/searx/engines/zlibrary.py index 180e9e355..81d93ac84 100644 --- a/searx/engines/zlibrary.py +++ b/searx/engines/zlibrary.py @@ -31,25 +31,23 @@ categories = ['files'] paging = True base_url = '' + def init(engine_settings=None): - global base_url # pylint: disable=global-statement + global base_url # pylint: disable=global-statement if "base_url" not in engine_settings: resp = http_get('https://z-lib.org', timeout=5.0) if resp.ok: dom = html.fromstring(resp.text) - base_url = "https:" + extract_text(eval_xpath(dom, - './/a[contains(@class, "domain-check-link") and @data-mode="books"]/@href' - )) + base_url = "https:" + extract_text( + eval_xpath(dom, './/a[contains(@class, "domain-check-link") and @data-mode="books"]/@href') + ) logger.debug("using base_url: %s" % base_url) def request(query, params): search_url = base_url + '/s/{search_query}/?page={pageno}' - params['url'] = search_url.format( - search_query=quote(query), - pageno=params['pageno'] - ) + params['url'] = search_url.format(search_query=quote(query), pageno=params['pageno']) return params @@ -60,36 +58,34 @@ def response(resp): for item in dom.xpath('//div[@id="searchResultBox"]//div[contains(@class, "resItemBox")]'): result = {} - result["url"] = base_url + \ - item.xpath('(.//a[starts-with(@href, "/book/")])[1]/@href')[0] + result["url"] = base_url + item.xpath('(.//a[starts-with(@href, "/book/")])[1]/@href')[0] result["title"] = extract_text(eval_xpath(item, './/*[@itemprop="name"]')) - year = extract_text(eval_xpath( - item, './/div[contains(@class, "property_year")]//div[contains(@class, "property_value")]')) + year = extract_text( + eval_xpath(item, './/div[contains(@class, "property_year")]//div[contains(@class, "property_value")]') + ) if year: year = '(%s) ' % year - result["content"] = "{year}{authors}. {publisher}. Language: {language}. {file_type}. \ + result[ + "content" + ] = "{year}{authors}. {publisher}. Language: {language}. {file_type}. \ Book rating: {book_rating}, book quality: {book_quality}".format( - year = year, - authors = extract_text(eval_xpath(item, './/div[@class="authors"]')), - publisher = extract_text(eval_xpath(item, './/div[@title="Publisher"]')), - file_type = extract_text( - eval_xpath( - item, - './/div[contains(@class, "property__file")]//div[contains(@class, "property_value")]')), - language = extract_text( - eval_xpath( - item, - './/div[contains(@class, "property_language")]//div[contains(@class, "property_value")]')), - book_rating = extract_text( - eval_xpath( - item, './/span[contains(@class, "book-rating-interest-score")]')), - book_quality = extract_text( - eval_xpath( - item, './/span[contains(@class, "book-rating-quality-score")]')), - ) + year=year, + authors=extract_text(eval_xpath(item, './/div[@class="authors"]')), + publisher=extract_text(eval_xpath(item, './/div[@title="Publisher"]')), + file_type=extract_text( + eval_xpath(item, './/div[contains(@class, "property__file")]//div[contains(@class, "property_value")]') + ), + language=extract_text( + eval_xpath( + item, './/div[contains(@class, "property_language")]//div[contains(@class, "property_value")]' + ) + ), + book_rating=extract_text(eval_xpath(item, './/span[contains(@class, "book-rating-interest-score")]')), + book_quality=extract_text(eval_xpath(item, './/span[contains(@class, "book-rating-quality-score")]')), + ) result["img_src"] = extract_text(eval_xpath(item, './/img[contains(@class, "cover")]/@data-src')) diff --git a/searx/exceptions.py b/searx/exceptions.py index 67a282da2..1b106d40c 100644 --- a/searx/exceptions.py +++ b/searx/exceptions.py @@ -21,7 +21,6 @@ class SearxException(Exception): class SearxParameterException(SearxException): - def __init__(self, name, value): if value == '' or value is None: message = 'Empty ' + name + ' parameter' diff --git a/searx/external_urls.py b/searx/external_urls.py index 11c6a32d9..2657dba4b 100644 --- a/searx/external_urls.py +++ b/searx/external_urls.py @@ -8,7 +8,7 @@ IMDB_PREFIX_TO_URL_ID = { 'mn': 'imdb_name', 'ch': 'imdb_character', 'co': 'imdb_company', - 'ev': 'imdb_event' + 'ev': 'imdb_event', } HTTP_WIKIMEDIA_IMAGE = 'http://commons.wikimedia.org/wiki/Special:FilePath/' @@ -20,9 +20,9 @@ def get_imdb_url_id(imdb_item_id): def get_wikimedia_image_id(url): if url.startswith(HTTP_WIKIMEDIA_IMAGE): - return url[len(HTTP_WIKIMEDIA_IMAGE):] + return url[len(HTTP_WIKIMEDIA_IMAGE) :] if url.startswith('File:'): - return url[len('File:'):] + return url[len('File:') :] return url @@ -52,10 +52,12 @@ def get_external_url(url_id, item_id, alternative="default"): def get_earth_coordinates_url(latitude, longitude, osm_zoom, alternative='default'): - url = get_external_url('map', None, alternative)\ - .replace('${latitude}', str(latitude))\ - .replace('${longitude}', str(longitude))\ + url = ( + get_external_url('map', None, alternative) + .replace('${latitude}', str(latitude)) + .replace('${longitude}', str(longitude)) .replace('${zoom}', str(osm_zoom)) + ) return url diff --git a/searx/flaskfix.py b/searx/flaskfix.py index 47aabfa53..326c4b981 100644 --- a/searx/flaskfix.py +++ b/searx/flaskfix.py @@ -29,6 +29,7 @@ class ReverseProxyPathFix: :param wsgi_app: the WSGI application ''' + # pylint: disable=too-few-public-methods def __init__(self, wsgi_app): @@ -58,7 +59,7 @@ class ReverseProxyPathFix: environ['SCRIPT_NAME'] = script_name path_info = environ['PATH_INFO'] if path_info.startswith(script_name): - environ['PATH_INFO'] = path_info[len(script_name):] + environ['PATH_INFO'] = path_info[len(script_name) :] scheme = self.scheme or environ.get('HTTP_X_SCHEME', '') if scheme: diff --git a/searx/languages.py b/searx/languages.py index c44eb0b9e..1f157e517 100644 --- a/searx/languages.py +++ b/searx/languages.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- # list of language codes # this file is generated automatically by utils/fetch_languages.py -language_codes = \ -( ('af-ZA', 'Afrikaans', '', 'Afrikaans'), +language_codes = ( + ('af-ZA', 'Afrikaans', '', 'Afrikaans'), ('ar-EG', 'العربية', '', 'Arabic'), ('be-BY', 'Беларуская', '', 'Belarusian'), ('bg-BG', 'Български', '', 'Bulgarian'), @@ -74,4 +74,5 @@ language_codes = \ ('zh', '中文', '', 'Chinese'), ('zh-CN', '中文', '中国', 'Chinese'), ('zh-HK', '中文', '中國香港特別行政區', 'Chinese'), - ('zh-TW', '中文', '台灣', 'Chinese'))
\ No newline at end of file + ('zh-TW', '中文', '台灣', 'Chinese'), +) diff --git a/searx/locales.py b/searx/locales.py index b791f35f3..62f64204f 100644 --- a/searx/locales.py +++ b/searx/locales.py @@ -51,11 +51,10 @@ def _get_locale_name(locale, locale_name): def initialize_locales(directory): - """Initialize global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`. - """ + """Initialize global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.""" for dirname in sorted(os.listdir(directory)): # Based on https://flask-babel.tkte.ch/_modules/flask_babel.html#Babel.list_translations - if not os.path.isdir( os.path.join(directory, dirname, 'LC_MESSAGES') ): + if not os.path.isdir(os.path.join(directory, dirname, 'LC_MESSAGES')): continue locale_name = dirname.replace('_', '-') info = LOCALE_NAMES.get(locale_name) diff --git a/searx/metrics/__init__.py b/searx/metrics/__init__.py index 995f182af..37f0ba121 100644 --- a/searx/metrics/__init__.py +++ b/searx/metrics/__init__.py @@ -12,11 +12,19 @@ from searx.engines import engines from .models import HistogramStorage, CounterStorage from .error_recorder import count_error, count_exception, errors_per_engines -__all__ = ["initialize", - "get_engines_stats", "get_engine_errors", - "histogram", "histogram_observe", "histogram_observe_time", - "counter", "counter_inc", "counter_add", - "count_error", "count_exception"] +__all__ = [ + "initialize", + "get_engines_stats", + "get_engine_errors", + "histogram", + "histogram_observe", + "histogram_observe_time", + "counter", + "counter_inc", + "counter_add", + "count_error", + "count_exception", +] ENDPOINTS = {'search'} @@ -72,7 +80,7 @@ def initialize(engine_names=None): # max_timeout = max of all the engine.timeout max_timeout = 2 - for engine_name in (engine_names or engines): + for engine_name in engine_names or engines: if engine_name in engines: max_timeout = max(max_timeout, engines[engine_name].timeout) @@ -81,7 +89,7 @@ def initialize(engine_names=None): histogram_size = int(1.5 * max_timeout / histogram_width) # engines - for engine_name in (engine_names or engines): + for engine_name in engine_names or engines: # search count counter_storage.configure('engine', engine_name, 'search', 'count', 'sent') counter_storage.configure('engine', engine_name, 'search', 'count', 'successful') @@ -112,17 +120,19 @@ def get_engine_errors(engline_name_list): r = [] for context, count in sorted_context_count_list: percentage = round(20 * count / sent_search_count) * 5 - r.append({ - 'filename': context.filename, - 'function': context.function, - 'line_no': context.line_no, - 'code': context.code, - 'exception_classname': context.exception_classname, - 'log_message': context.log_message, - 'log_parameters': context.log_parameters, - 'secondary': context.secondary, - 'percentage': percentage, - }) + r.append( + { + 'filename': context.filename, + 'function': context.function, + 'line_no': context.line_no, + 'code': context.code, + 'exception_classname': context.exception_classname, + 'log_message': context.log_message, + 'log_parameters': context.log_parameters, + 'secondary': context.secondary, + 'percentage': percentage, + } + ) result[engine_name] = sorted(r, reverse=True, key=lambda d: d['percentage']) return result diff --git a/searx/metrics/error_recorder.py b/searx/metrics/error_recorder.py index 37594e5e8..76d27f64f 100644 --- a/searx/metrics/error_recorder.py +++ b/searx/metrics/error_recorder.py @@ -3,8 +3,12 @@ import inspect from json import JSONDecodeError from urllib.parse import urlparse from httpx import HTTPError, HTTPStatusError -from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathException, SearxEngineAPIException, - SearxEngineAccessDeniedException) +from searx.exceptions import ( + SearxXPathSyntaxException, + SearxEngineXPathException, + SearxEngineAPIException, + SearxEngineAccessDeniedException, +) from searx import searx_parent_dir from searx.engines import engines @@ -14,8 +18,16 @@ errors_per_engines = {} class ErrorContext: - __slots__ = ('filename', 'function', 'line_no', 'code', 'exception_classname', - 'log_message', 'log_parameters', 'secondary') + __slots__ = ( + 'filename', + 'function', + 'line_no', + 'code', + 'exception_classname', + 'log_message', + 'log_parameters', + 'secondary', + ) def __init__(self, filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary): self.filename = filename @@ -30,19 +42,41 @@ class ErrorContext: def __eq__(self, o) -> bool: if not isinstance(o, ErrorContext): return False - return self.filename == o.filename and self.function == o.function and self.line_no == o.line_no\ - and self.code == o.code and self.exception_classname == o.exception_classname\ - and self.log_message == o.log_message and self.log_parameters == o.log_parameters \ + return ( + self.filename == o.filename + and self.function == o.function + and self.line_no == o.line_no + and self.code == o.code + and self.exception_classname == o.exception_classname + and self.log_message == o.log_message + and self.log_parameters == o.log_parameters and self.secondary == o.secondary + ) def __hash__(self): - return hash((self.filename, self.function, self.line_no, self.code, self.exception_classname, self.log_message, - self.log_parameters, self.secondary)) + return hash( + ( + self.filename, + self.function, + self.line_no, + self.code, + self.exception_classname, + self.log_message, + self.log_parameters, + self.secondary, + ) + ) def __repr__(self): - return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r}) {!r}".\ - format(self.filename, self.line_no, self.code, self.exception_classname, self.log_message, - self.log_parameters, self.secondary) + return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r}) {!r}".format( + self.filename, + self.line_no, + self.code, + self.exception_classname, + self.log_message, + self.log_parameters, + self.secondary, + ) def add_error_context(engine_name: str, error_context: ErrorContext) -> None: @@ -68,8 +102,9 @@ def get_hostname(exc: HTTPError) -> typing.Optional[None]: return urlparse(url).netloc -def get_request_exception_messages(exc: HTTPError)\ - -> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]: +def get_request_exception_messages( + exc: HTTPError, +) -> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]: url = None status_code = None reason = None @@ -90,11 +125,11 @@ def get_request_exception_messages(exc: HTTPError)\ def get_messages(exc, filename) -> typing.Tuple: if isinstance(exc, JSONDecodeError): - return (exc.msg, ) + return (exc.msg,) if isinstance(exc, TypeError): - return (str(exc), ) + return (str(exc),) if isinstance(exc, ValueError) and 'lxml' in filename: - return (str(exc), ) + return (str(exc),) if isinstance(exc, HTTPError): return get_request_exception_messages(exc) if isinstance(exc, SearxXPathSyntaxException): @@ -102,9 +137,9 @@ def get_messages(exc, filename) -> typing.Tuple: if isinstance(exc, SearxEngineXPathException): return (exc.xpath_str, exc.message) if isinstance(exc, SearxEngineAPIException): - return (str(exc.args[0]), ) + return (str(exc.args[0]),) if isinstance(exc, SearxEngineAccessDeniedException): - return (exc.message, ) + return (exc.message,) return () @@ -121,7 +156,7 @@ def get_error_context(framerecords, exception_classname, log_message, log_parame searx_frame = get_trace(framerecords) filename = searx_frame.filename if filename.startswith(searx_parent_dir): - filename = filename[len(searx_parent_dir) + 1:] + filename = filename[len(searx_parent_dir) + 1 :] function = searx_frame.function line_no = searx_frame.lineno code = searx_frame.code_context[0].strip() @@ -140,8 +175,9 @@ def count_exception(engine_name: str, exc: Exception, secondary: bool = False) - del framerecords -def count_error(engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None, - secondary: bool = False) -> None: +def count_error( + engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None, secondary: bool = False +) -> None: framerecords = list(reversed(inspect.stack()[1:])) try: error_context = get_error_context(framerecords, None, log_message, log_parameters or (), secondary) diff --git a/searx/metrics/models.py b/searx/metrics/models.py index 8936a51e3..d42569b7f 100644 --- a/searx/metrics/models.py +++ b/searx/metrics/models.py @@ -58,7 +58,7 @@ class Histogram: @property def quartile_percentage(self): - ''' Quartile in percentage ''' + '''Quartile in percentage''' with self._lock: if self._count > 0: return [int(q * 100 / self._count) for q in self._quartiles] diff --git a/searx/network/__init__.py b/searx/network/__init__.py index 82959e355..7d02a0014 100644 --- a/searx/network/__init__.py +++ b/searx/network/__init__.py @@ -35,7 +35,7 @@ except ImportError: self._count.release() def get(self): - if not self._count.acquire(True): #pylint: disable=consider-using-with + if not self._count.acquire(True): # pylint: disable=consider-using-with raise Empty return self._queue.popleft() @@ -43,6 +43,7 @@ except ImportError: THREADLOCAL = threading.local() """Thread-local data is data for thread specific values.""" + def reset_time_for_thread(): THREADLOCAL.total_time = 0 @@ -187,10 +188,7 @@ async def stream_chunk_to_queue(network, queue, method, url, **kwargs): def _stream_generator(method, url, **kwargs): queue = SimpleQueue() network = get_context_network() - future = asyncio.run_coroutine_threadsafe( - stream_chunk_to_queue(network, queue, method, url, **kwargs), - get_loop() - ) + future = asyncio.run_coroutine_threadsafe(stream_chunk_to_queue(network, queue, method, url, **kwargs), get_loop()) # yield chunks obj_or_exception = queue.get() @@ -203,10 +201,7 @@ def _stream_generator(method, url, **kwargs): def _close_response_method(self): - asyncio.run_coroutine_threadsafe( - self.aclose(), - get_loop() - ) + asyncio.run_coroutine_threadsafe(self.aclose(), get_loop()) # reach the end of _self.generator ( _stream_generator ) to an avoid memory leak. # it makes sure that : # * the httpx response is closed (see the stream_chunk_to_queue function) diff --git a/searx/network/client.py b/searx/network/client.py index a6cec352d..cd1e41460 100644 --- a/searx/network/client.py +++ b/searx/network/client.py @@ -10,12 +10,7 @@ import anyio import httpcore import httpx from httpx_socks import AsyncProxyTransport -from python_socks import ( - parse_proxy_url, - ProxyConnectionError, - ProxyTimeoutError, - ProxyError -) +from python_socks import parse_proxy_url, ProxyConnectionError, ProxyTimeoutError, ProxyError from searx import logger @@ -41,9 +36,7 @@ TRANSPORT_KWARGS = { # pylint: disable=protected-access -async def close_connections_for_url( - connection_pool: httpcore.AsyncConnectionPool, url: httpcore._utils.URL -): +async def close_connections_for_url(connection_pool: httpcore.AsyncConnectionPool, url: httpcore._utils.URL): origin = httpcore._utils.url_to_origin(url) logger.debug('Drop connections for %r', origin) @@ -54,6 +47,8 @@ async def close_connections_for_url( await connection.aclose() except httpx.NetworkError as e: logger.warning('Error closing an existing connection', exc_info=e) + + # pylint: enable=protected-access @@ -67,9 +62,7 @@ def get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True, http class AsyncHTTPTransportNoHttp(httpx.AsyncHTTPTransport): """Block HTTP request""" - async def handle_async_request( - self, method, url, headers=None, stream=None, extensions=None - ): + async def handle_async_request(self, method, url, headers=None, stream=None, extensions=None): raise httpx.UnsupportedProtocol('HTTP protocol is disabled') @@ -83,9 +76,7 @@ class AsyncProxyTransportFixed(AsyncProxyTransport): Note: AsyncProxyTransport inherit from AsyncConnectionPool """ - async def handle_async_request( - self, method, url, headers=None, stream=None, extensions=None - ): + async def handle_async_request(self, method, url, headers=None, stream=None, extensions=None): retry = 2 while retry > 0: retry -= 1 @@ -116,9 +107,7 @@ class AsyncProxyTransportFixed(AsyncProxyTransport): class AsyncHTTPTransportFixed(httpx.AsyncHTTPTransport): """Fix httpx.AsyncHTTPTransport""" - async def handle_async_request( - self, method, url, headers=None, stream=None, extensions=None - ): + async def handle_async_request(self, method, url, headers=None, stream=None, extensions=None): retry = 2 while retry > 0: retry -= 1 @@ -152,14 +141,17 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit rdns = False socks5h = 'socks5h://' if proxy_url.startswith(socks5h): - proxy_url = 'socks5://' + proxy_url[len(socks5h):] + proxy_url = 'socks5://' + proxy_url[len(socks5h) :] rdns = True proxy_type, proxy_host, proxy_port, proxy_username, proxy_password = parse_proxy_url(proxy_url) verify = get_sslcontexts(proxy_url, None, True, False, http2) if verify is True else verify return AsyncProxyTransportFixed( - proxy_type=proxy_type, proxy_host=proxy_host, proxy_port=proxy_port, - username=proxy_username, password=proxy_password, + proxy_type=proxy_type, + proxy_host=proxy_host, + proxy_port=proxy_port, + username=proxy_username, + password=proxy_password, rdns=rdns, loop=get_loop(), verify=verify, @@ -169,7 +161,7 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit max_keepalive_connections=limit.max_keepalive_connections, keepalive_expiry=limit.keepalive_expiry, retries=retries, - **TRANSPORT_KWARGS + **TRANSPORT_KWARGS, ) @@ -183,36 +175,40 @@ def get_transport(verify, http2, local_address, proxy_url, limit, retries): proxy=httpx._config.Proxy(proxy_url) if proxy_url else None, limits=limit, retries=retries, - **TRANSPORT_KWARGS + **TRANSPORT_KWARGS, ) def new_client( - # pylint: disable=too-many-arguments - enable_http, verify, enable_http2, - max_connections, max_keepalive_connections, keepalive_expiry, - proxies, local_address, retries, max_redirects, hook_log_response ): + # pylint: disable=too-many-arguments + enable_http, + verify, + enable_http2, + max_connections, + max_keepalive_connections, + keepalive_expiry, + proxies, + local_address, + retries, + max_redirects, + hook_log_response, +): limit = httpx.Limits( max_connections=max_connections, max_keepalive_connections=max_keepalive_connections, - keepalive_expiry=keepalive_expiry + keepalive_expiry=keepalive_expiry, ) # See https://www.python-httpx.org/advanced/#routing mounts = {} for pattern, proxy_url in proxies.items(): if not enable_http and pattern.startswith('http://'): continue - if (proxy_url.startswith('socks4://') - or proxy_url.startswith('socks5://') - or proxy_url.startswith('socks5h://') - ): + if proxy_url.startswith('socks4://') or proxy_url.startswith('socks5://') or proxy_url.startswith('socks5h://'): mounts[pattern] = get_transport_for_socks_proxy( verify, enable_http2, local_address, proxy_url, limit, retries ) else: - mounts[pattern] = get_transport( - verify, enable_http2, local_address, proxy_url, limit, retries - ) + mounts[pattern] = get_transport(verify, enable_http2, local_address, proxy_url, limit, retries) if not enable_http: mounts['http://'] = AsyncHTTPTransportNoHttp() @@ -221,7 +217,7 @@ def new_client( event_hooks = None if hook_log_response: - event_hooks = {'response': [ hook_log_response ]} + event_hooks = {'response': [hook_log_response]} return httpx.AsyncClient( transport=transport, diff --git a/searx/network/network.py b/searx/network/network.py index 613b9ff27..9e14e14bd 100644 --- a/searx/network/network.py +++ b/searx/network/network.py @@ -31,39 +31,49 @@ PROXY_PATTERN_MAPPING = { 'socks5h:': 'socks5h://', } -ADDRESS_MAPPING = { - 'ipv4': '0.0.0.0', - 'ipv6': '::' -} +ADDRESS_MAPPING = {'ipv4': '0.0.0.0', 'ipv6': '::'} class Network: __slots__ = ( - 'enable_http', 'verify', 'enable_http2', - 'max_connections', 'max_keepalive_connections', 'keepalive_expiry', - 'local_addresses', 'proxies', 'using_tor_proxy', 'max_redirects', 'retries', 'retry_on_http_error', - '_local_addresses_cycle', '_proxies_cycle', '_clients', '_logger' + 'enable_http', + 'verify', + 'enable_http2', + 'max_connections', + 'max_keepalive_connections', + 'keepalive_expiry', + 'local_addresses', + 'proxies', + 'using_tor_proxy', + 'max_redirects', + 'retries', + 'retry_on_http_error', + '_local_addresses_cycle', + '_proxies_cycle', + '_clients', + '_logger', ) _TOR_CHECK_RESULT = {} def __init__( - # pylint: disable=too-many-arguments - self, - enable_http=True, - verify=True, - enable_http2=False, - max_connections=None, - max_keepalive_connections=None, - keepalive_expiry=None, - proxies=None, - using_tor_proxy=False, - local_addresses=None, - retries=0, - retry_on_http_error=None, - max_redirects=30, - logger_name=None): + # pylint: disable=too-many-arguments + self, + enable_http=True, + verify=True, + enable_http2=False, + max_connections=None, + max_keepalive_connections=None, + keepalive_expiry=None, + proxies=None, + using_tor_proxy=False, + local_addresses=None, + retries=0, + retry_on_http_error=None, + max_redirects=30, + logger_name=None, + ): self.enable_http = enable_http self.verify = verify @@ -144,9 +154,7 @@ class Network: response_line = f"{response.http_version} {status}" content_type = response.headers.get("Content-Type") content_type = f' ({content_type})' if content_type else '' - self._logger.debug( - f'HTTP Request: {request.method} {request.url} "{response_line}"{content_type}' - ) + self._logger.debug(f'HTTP Request: {request.method} {request.url} "{response_line}"{content_type}') @staticmethod async def check_tor_proxy(client: httpx.AsyncClient, proxies) -> bool: @@ -187,7 +195,7 @@ class Network: local_address, 0, max_redirects, - hook_log_response + hook_log_response, ) if self.using_tor_proxy and not await self.check_tor_proxy(client, proxies): await client.aclose() @@ -201,6 +209,7 @@ class Network: await client.aclose() except httpx.HTTPError: pass + await asyncio.gather(*[close_client(client) for client in self._clients.values()], return_exceptions=False) @staticmethod @@ -214,7 +223,8 @@ class Network: def is_valid_respones(self, response): # pylint: disable=too-many-boolean-expressions - if ((self.retry_on_http_error is True and 400 <= response.status_code <= 599) + if ( + (self.retry_on_http_error is True and 400 <= response.status_code <= 599) or (isinstance(self.retry_on_http_error, list) and response.status_code in self.retry_on_http_error) or (isinstance(self.retry_on_http_error, int) and response.status_code == self.retry_on_http_error) ): @@ -269,6 +279,7 @@ def check_network_configuration(): network._logger.exception('Error') # pylint: disable=protected-access exception_count += 1 return exception_count + future = asyncio.run_coroutine_threadsafe(check(), get_loop()) exception_count = future.result() if exception_count > 0: @@ -279,6 +290,7 @@ def initialize(settings_engines=None, settings_outgoing=None): # pylint: disable=import-outside-toplevel) from searx.engines import engines from searx import settings + # pylint: enable=import-outside-toplevel) settings_engines = settings_engines or settings['engines'] diff --git a/searx/network/raise_for_httperror.py b/searx/network/raise_for_httperror.py index a2f554614..414074977 100644 --- a/searx/network/raise_for_httperror.py +++ b/searx/network/raise_for_httperror.py @@ -10,13 +10,14 @@ from searx.exceptions import ( SearxEngineAccessDeniedException, ) + def is_cloudflare_challenge(resp): if resp.status_code in [429, 503]: - if (('__cf_chl_jschl_tk__=' in resp.text) - or ('/cdn-cgi/challenge-platform/' in resp.text - and 'orchestrate/jsch/v1' in resp.text - and 'window._cf_chl_enter(' in resp.text - )): + if ('__cf_chl_jschl_tk__=' in resp.text) or ( + '/cdn-cgi/challenge-platform/' in resp.text + and 'orchestrate/jsch/v1' in resp.text + and 'window._cf_chl_enter(' in resp.text + ): return True if resp.status_code == 403 and '__cf_chl_captcha_tk__=' in resp.text: return True @@ -32,21 +33,14 @@ def raise_for_cloudflare_captcha(resp): if is_cloudflare_challenge(resp): # https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha- # suspend for 2 weeks - raise SearxEngineCaptchaException( - message='Cloudflare CAPTCHA', - suspended_time=3600 * 24 * 15 - ) + raise SearxEngineCaptchaException(message='Cloudflare CAPTCHA', suspended_time=3600 * 24 * 15) if is_cloudflare_firewall(resp): - raise SearxEngineAccessDeniedException( - message='Cloudflare Firewall', suspended_time=3600 * 24 - ) + raise SearxEngineAccessDeniedException(message='Cloudflare Firewall', suspended_time=3600 * 24) def raise_for_recaptcha(resp): - if (resp.status_code == 503 - and '"https://www.google.com/recaptcha/' in resp.text - ): + if resp.status_code == 503 and '"https://www.google.com/recaptcha/' in resp.text: raise SearxEngineCaptchaException(message='ReCAPTCHA', suspended_time=3600 * 24 * 7) @@ -71,8 +65,7 @@ def raise_for_httperror(resp): raise_for_captcha(resp) if resp.status_code in (402, 403): raise SearxEngineAccessDeniedException( - message='HTTP error ' + str(resp.status_code), - suspended_time=3600 * 24 + message='HTTP error ' + str(resp.status_code), suspended_time=3600 * 24 ) if resp.status_code == 429: raise SearxEngineTooManyRequestsException() diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py index 91636fe33..7815c2099 100644 --- a/searx/plugins/__init__.py +++ b/searx/plugins/__init__.py @@ -17,15 +17,19 @@ from searx import logger, settings logger = logger.getChild("plugins") required_attrs = ( + # fmt: off ("name", str), ("description", str), ("default_on", bool) + # fmt: on ) optional_attrs = ( + # fmt: off ("js_dependencies", tuple), ("css_dependencies", tuple), ("preference_section", str), + # fmt: on ) @@ -47,11 +51,7 @@ def sync_resource(base_path, resource_path, name, target_dir, plugin_dir): dep_stat = stat(dep_path) utime(resource_path, ns=(dep_stat.st_atime_ns, dep_stat.st_mtime_ns)) except IOError: - logger.critical( - "failed to copy plugin resource {0} for plugin {1}".format( - file_name, name - ) - ) + logger.critical("failed to copy plugin resource {0} for plugin {1}".format(file_name, name)) sys.exit(3) # returning with the web path of the resource @@ -62,36 +62,28 @@ def prepare_package_resources(plugin, plugin_module_name): plugin_base_path = dirname(abspath(plugin.__file__)) plugin_dir = plugin_module_name - target_dir = join( - settings["ui"]["static_path"], "plugins/external_plugins", plugin_dir - ) + target_dir = join(settings["ui"]["static_path"], "plugins/external_plugins", plugin_dir) try: makedirs(target_dir, exist_ok=True) except IOError: - logger.critical( - "failed to create resource directory {0} for plugin {1}".format( - target_dir, plugin_module_name - ) - ) + logger.critical("failed to create resource directory {0} for plugin {1}".format(target_dir, plugin_module_name)) sys.exit(3) resources = [] if hasattr(plugin, "js_dependencies"): resources.extend(map(basename, plugin.js_dependencies)) - plugin.js_dependencies = ([ - sync_resource( - plugin_base_path, x, plugin_module_name, target_dir, plugin_dir - ) for x in plugin.js_dependencies - ]) + plugin.js_dependencies = [ + sync_resource(plugin_base_path, x, plugin_module_name, target_dir, plugin_dir) + for x in plugin.js_dependencies + ] if hasattr(plugin, "css_dependencies"): resources.extend(map(basename, plugin.css_dependencies)) - plugin.css_dependencies = ([ - sync_resource( - plugin_base_path, x, plugin_module_name, target_dir, plugin_dir - ) for x in plugin.css_dependencies - ]) + plugin.css_dependencies = [ + sync_resource(plugin_base_path, x, plugin_module_name, target_dir, plugin_dir) + for x in plugin.css_dependencies + ] for f in listdir(target_dir): if basename(f) not in resources: @@ -100,9 +92,7 @@ def prepare_package_resources(plugin, plugin_module_name): remove(resource_path) except IOError: logger.critical( - "failed to remove unused resource file {0} for plugin {1}".format( - resource_path, plugin_module_name - ) + "failed to remove unused resource file {0} for plugin {1}".format(resource_path, plugin_module_name) ) sys.exit(3) @@ -133,9 +123,7 @@ def load_plugin(plugin_module_name, external): for plugin_attr, plugin_attr_type in required_attrs: if not hasattr(plugin, plugin_attr): - logger.critical( - '%s: missing attribute "%s", cannot load plugin', plugin, plugin_attr - ) + logger.critical('%s: missing attribute "%s", cannot load plugin', plugin, plugin_attr) sys.exit(3) attr = getattr(plugin, plugin_attr) if not isinstance(attr, plugin_attr_type): @@ -148,9 +136,7 @@ def load_plugin(plugin_module_name, external): sys.exit(3) for plugin_attr, plugin_attr_type in optional_attrs: - if not hasattr(plugin, plugin_attr) or not isinstance( - getattr(plugin, plugin_attr), plugin_attr_type - ): + if not hasattr(plugin, plugin_attr) or not isinstance(getattr(plugin, plugin_attr), plugin_attr_type): setattr(plugin, plugin_attr, plugin_attr_type()) if not hasattr(plugin, "preference_section"): @@ -160,19 +146,12 @@ def load_plugin(plugin_module_name, external): if plugin.preference_section == "query": for plugin_attr in ("query_keywords", "query_examples"): if not hasattr(plugin, plugin_attr): - logger.critical( - 'missing attribute "{0}", cannot load plugin: {1}'.format( - plugin_attr, plugin - ) - ) + logger.critical('missing attribute "{0}", cannot load plugin: {1}'.format(plugin_attr, plugin)) sys.exit(3) if settings.get("enabled_plugins"): # searx compatibility: plugin.name in settings['enabled_plugins'] - plugin.default_on = ( - plugin.name in settings["enabled_plugins"] - or plugin.id in settings["enabled_plugins"] - ) + plugin.default_on = plugin.name in settings["enabled_plugins"] or plugin.id in settings["enabled_plugins"] # copy ressources if this is an external plugin if external: @@ -189,9 +168,7 @@ def load_and_initialize_plugin(plugin_module_name, external, init_args): try: return plugin if plugin.init(*init_args) else None except Exception: # pylint: disable=broad-except - plugin.logger.exception( - "Exception while calling init, the plugin is disabled" - ) + plugin.logger.exception("Exception while calling init, the plugin is disabled") return None return plugin diff --git a/searx/plugins/oa_doi_rewrite.py b/searx/plugins/oa_doi_rewrite.py index 2dcc01e05..54d28bc9a 100644 --- a/searx/plugins/oa_doi_rewrite.py +++ b/searx/plugins/oa_doi_rewrite.py @@ -39,7 +39,7 @@ def on_result(request, search, result): if doi and len(doi) < 50: for suffix in ('/', '.pdf', '.xml', '/full', '/meta', '/abstract'): if doi.endswith(suffix): - doi = doi[:-len(suffix)] + doi = doi[: -len(suffix)] result['url'] = get_doi_resolver(request.preferences) + doi result['parsed_url'] = urlparse(result['url']) return True diff --git a/searx/plugins/search_on_category_select.py b/searx/plugins/search_on_category_select.py index 2a38cac78..48d537cee 100644 --- a/searx/plugins/search_on_category_select.py +++ b/searx/plugins/search_on_category_select.py @@ -15,9 +15,11 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2015 by Adam Tauber, <asciimoo@gmail.com> ''' from flask_babel import gettext + name = gettext('Search on category select') -description = gettext('Perform search immediately if a category selected. ' - 'Disable to select multiple categories. (JavaScript required)') +description = gettext( + 'Perform search immediately if a category selected. ' 'Disable to select multiple categories. (JavaScript required)' +) default_on = True preference_section = 'ui' diff --git a/searx/plugins/self_info.py b/searx/plugins/self_info.py index 053899483..29bd5ca5c 100644 --- a/searx/plugins/self_info.py +++ b/searx/plugins/self_info.py @@ -16,6 +16,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. ''' from flask_babel import gettext import re + name = gettext('Self Informations') description = gettext('Displays your IP if the query is "ip" and your user agent if the query contains "user agent".') default_on = True diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py index 98ddddbcd..42c58e524 100644 --- a/searx/plugins/tracker_url_remover.py +++ b/searx/plugins/tracker_url_remover.py @@ -19,10 +19,12 @@ from flask_babel import gettext import re from urllib.parse import urlunparse, parse_qsl, urlencode -regexes = {re.compile(r'utm_[^&]+'), - re.compile(r'(wkey|wemail)[^&]*'), - re.compile(r'(_hsenc|_hsmi|hsCtaTracking|__hssc|__hstc|__hsfp)[^&]*'), - re.compile(r'&$')} +regexes = { + re.compile(r'utm_[^&]+'), + re.compile(r'(wkey|wemail)[^&]*'), + re.compile(r'(_hsenc|_hsmi|hsCtaTracking|__hssc|__hstc|__hsfp)[^&]*'), + re.compile(r'&$'), +} name = gettext('Tracker URL remover') description = gettext('Remove trackers arguments from the returned URL') diff --git a/searx/plugins/vim_hotkeys.py b/searx/plugins/vim_hotkeys.py index 47b830c79..fb61d413b 100644 --- a/searx/plugins/vim_hotkeys.py +++ b/searx/plugins/vim_hotkeys.py @@ -1,9 +1,11 @@ from flask_babel import gettext name = gettext('Vim-like hotkeys') -description = gettext('Navigate search results with Vim-like hotkeys ' - '(JavaScript required). ' - 'Press "h" key on main or result page to get help.') +description = gettext( + 'Navigate search results with Vim-like hotkeys ' + '(JavaScript required). ' + 'Press "h" key on main or result page to get help.' +) default_on = False preference_section = 'ui' diff --git a/searx/preferences.py b/searx/preferences.py index 4d0cc5c0a..2a9b0af0c 100644 --- a/searx/preferences.py +++ b/searx/preferences.py @@ -21,14 +21,12 @@ DOI_RESOLVERS = list(settings['doi_resolvers']) class MissingArgumentException(Exception): - """Exption from ``cls._post_init`` when a argument is missed. - """ + """Exption from ``cls._post_init`` when a argument is missed.""" class ValidationException(Exception): - """Exption from ``cls._post_init`` when configuration value is invalid. - """ + """Exption from ``cls._post_init`` when configuration value is invalid.""" class Setting: @@ -84,8 +82,7 @@ class EnumStringSetting(Setting): raise ValidationException('Invalid value: "{0}"'.format(selection)) def parse(self, data): - """Parse and validate ``data`` and store the result at ``self.value`` - """ + """Parse and validate ``data`` and store the result at ``self.value``""" self._validate_selection(data) self.value = data @@ -104,8 +101,7 @@ class MultipleChoiceSetting(EnumStringSetting): self._validate_selections(self.value) def parse(self, data): - """Parse and validate ``data`` and store the result at ``self.value`` - """ + """Parse and validate ``data`` and store the result at ``self.value``""" if data == '': self.value = [] return @@ -124,25 +120,23 @@ class MultipleChoiceSetting(EnumStringSetting): self.value.append(choice) def save(self, name, resp): - """Save cookie ``name`` in the HTTP reponse obect - """ + """Save cookie ``name`` in the HTTP reponse obect""" resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE) class SetSetting(Setting): - """Setting of values of type ``set`` (comma separated string) """ + """Setting of values of type ``set`` (comma separated string)""" + def _post_init(self): if not hasattr(self, 'values'): self.values = set() def get_value(self): - """Returns a string with comma separated values. - """ + """Returns a string with comma separated values.""" return ','.join(self.values) def parse(self, data): - """Parse and validate ``data`` and store the result at ``self.value`` - """ + """Parse and validate ``data`` and store the result at ``self.value``""" if data == '': self.values = set() # pylint: disable=attribute-defined-outside-init return @@ -159,8 +153,7 @@ class SetSetting(Setting): self.values = set(elements) # pylint: disable=attribute-defined-outside-init def save(self, name, resp): - """Save cookie ``name`` in the HTTP reponse obect - """ + """Save cookie ``name`` in the HTTP reponse obect""" resp.set_cookie(name, ','.join(self.values), max_age=COOKIE_MAX_AGE) @@ -172,8 +165,7 @@ class SearchLanguageSetting(EnumStringSetting): raise ValidationException('Invalid language code: "{0}"'.format(selection)) def parse(self, data): - """Parse and validate ``data`` and store the result at ``self.value`` - """ + """Parse and validate ``data`` and store the result at ``self.value``""" if data not in self.choices and data != self.value: # pylint: disable=no-member # hack to give some backwards compatibility with old language cookies data = str(data).replace('_', '-') @@ -199,8 +191,7 @@ class MapSetting(Setting): raise ValidationException('Invalid default value') def parse(self, data): - """Parse and validate ``data`` and store the result at ``self.value`` - """ + """Parse and validate ``data`` and store the result at ``self.value``""" # pylint: disable=no-member if data not in self.map: raise ValidationException('Invalid choice: {0}'.format(data)) @@ -208,14 +199,13 @@ class MapSetting(Setting): self.key = data # pylint: disable=attribute-defined-outside-init def save(self, name, resp): - """Save cookie ``name`` in the HTTP reponse obect - """ + """Save cookie ``name`` in the HTTP reponse obect""" if hasattr(self, 'key'): resp.set_cookie(name, self.key, max_age=COOKIE_MAX_AGE) class SwitchableSetting(Setting): - """ Base class for settings that can be turned on && off""" + """Base class for settings that can be turned on && off""" def _post_init(self): self.disabled = set() @@ -244,7 +234,7 @@ class SwitchableSetting(Setting): items = self.transform_form_items(items) self.disabled = set() # pylint: disable=attribute-defined-outside-init - self.enabled = set() # pylint: disable=attribute-defined-outside-init + self.enabled = set() # pylint: disable=attribute-defined-outside-init for choice in self.choices: # pylint: disable=no-member if choice['default_on']: if choice['id'] in items: @@ -254,8 +244,7 @@ class SwitchableSetting(Setting): self.enabled.add(choice['id']) def save(self, resp): # pylint: disable=arguments-differ - """Save cookie in the HTTP reponse obect - """ + """Save cookie in the HTTP reponse obect""" resp.set_cookie('disabled_{0}'.format(self.value), ','.join(self.disabled), max_age=COOKIE_MAX_AGE) resp.set_cookie('enabled_{0}'.format(self.value), ','.join(self.enabled), max_age=COOKIE_MAX_AGE) @@ -289,7 +278,7 @@ class EnginesSetting(SwitchableSetting): self.choices = transformed_choices def transform_form_items(self, items): - return [item[len('engine_'):].replace('_', ' ').replace(' ', '__') for item in items] + return [item[len('engine_') :].replace('_', ' ').replace(' ', '__') for item in items] def transform_values(self, values): if len(values) == 1 and next(iter(values)) == '': @@ -315,7 +304,7 @@ class PluginsSetting(SwitchableSetting): self.choices = transformed_choices def transform_form_items(self, items): - return [item[len('plugin_'):] for item in items] + return [item[len('plugin_') :] for item in items] class Preferences: @@ -325,6 +314,7 @@ class Preferences: super().__init__() self.key_value_settings = { + # fmt: off 'categories': MultipleChoiceSetting( ['general'], is_locked('categories'), @@ -422,6 +412,7 @@ class Preferences: 'False': False } ), + # fmt: on } self.engines = EnginesSetting('engines', choices=engines) @@ -466,19 +457,18 @@ class Preferences: continue self.key_value_settings[user_setting_name].parse(user_setting) elif user_setting_name == 'disabled_engines': - self.engines.parse_cookie((input_data.get('disabled_engines', ''), - input_data.get('enabled_engines', ''))) + self.engines.parse_cookie( + (input_data.get('disabled_engines', ''), input_data.get('enabled_engines', '')) + ) elif user_setting_name == 'disabled_plugins': - self.plugins.parse_cookie((input_data.get('disabled_plugins', ''), - input_data.get('enabled_plugins', ''))) + self.plugins.parse_cookie( + (input_data.get('disabled_plugins', ''), input_data.get('enabled_plugins', '')) + ) elif user_setting_name == 'tokens': self.tokens.parse(user_setting) - elif not any(user_setting_name.startswith(x) for x in [ - 'enabled_', - 'disabled_', - 'engine_', - 'category_', - 'plugin_']): + elif not any( + user_setting_name.startswith(x) for x in ['enabled_', 'disabled_', 'engine_', 'category_', 'plugin_'] + ): self.unknown_params[user_setting_name] = user_setting def parse_form(self, input_data): @@ -492,7 +482,7 @@ class Preferences: elif user_setting_name.startswith('engine_'): disabled_engines.append(user_setting_name) elif user_setting_name.startswith('category_'): - enabled_categories.append(user_setting_name[len('category_'):]) + enabled_categories.append(user_setting_name[len('category_') :]) elif user_setting_name.startswith('plugin_'): disabled_plugins.append(user_setting_name) elif user_setting_name == 'tokens': @@ -505,8 +495,7 @@ class Preferences: # cannot be used in case of engines or plugins def get_value(self, user_setting_name): - """Returns the value for ``user_setting_name`` - """ + """Returns the value for ``user_setting_name``""" ret_val = None if user_setting_name in self.key_value_settings: ret_val = self.key_value_settings[user_setting_name].get_value() @@ -515,8 +504,7 @@ class Preferences: return ret_val def save(self, resp): - """Save cookie in the HTTP reponse obect - """ + """Save cookie in the HTTP reponse obect""" for user_setting_name, user_setting in self.key_value_settings.items(): # pylint: disable=unnecessary-dict-index-lookup if self.key_value_settings[user_setting_name].locked: @@ -542,8 +530,7 @@ class Preferences: def is_locked(setting_name): - """Checks if a given setting name is locked by settings.yml - """ + """Checks if a given setting name is locked by settings.yml""" if 'preferences' not in settings: return False if 'lock' not in settings['preferences']: diff --git a/searx/query.py b/searx/query.py index 7f252e93f..b7f64fe82 100644 --- a/searx/query.py +++ b/searx/query.py @@ -40,7 +40,6 @@ class QueryPartParser(ABC): class TimeoutParser(QueryPartParser): - @staticmethod def check(raw_value): return raw_value[0] == '<' @@ -70,7 +69,6 @@ class TimeoutParser(QueryPartParser): class LanguageParser(QueryPartParser): - @staticmethod def check(raw_value): return raw_value[0] == ':' @@ -92,11 +90,9 @@ class LanguageParser(QueryPartParser): # if correct language-code is found # set it as new search-language - if (value == lang_id - or value == lang_name - or value == english_name - or value.replace('-', ' ') == country)\ - and value not in self.raw_text_query.languages: + if ( + value == lang_id or value == lang_name or value == english_name or value.replace('-', ' ') == country + ) and value not in self.raw_text_query.languages: found = True lang_parts = lang_id.split('-') if len(lang_parts) == 2: @@ -152,7 +148,6 @@ class LanguageParser(QueryPartParser): class ExternalBangParser(QueryPartParser): - @staticmethod def check(raw_value): return raw_value.startswith('!!') @@ -180,7 +175,6 @@ class ExternalBangParser(QueryPartParser): class BangParser(QueryPartParser): - @staticmethod def check(raw_value): return raw_value[0] == '!' or raw_value[0] == '?' @@ -208,9 +202,11 @@ class BangParser(QueryPartParser): if value in categories: # using all engines for that search, which # are declared under that categorie name - self.raw_text_query.enginerefs.extend(EngineRef(engine.name, value) - for engine in categories[value] - if (engine.name, value) not in self.raw_text_query.disabled_engines) + self.raw_text_query.enginerefs.extend( + EngineRef(engine.name, value) + for engine in categories[value] + if (engine.name, value) not in self.raw_text_query.disabled_engines + ) return True return False @@ -246,7 +242,7 @@ class RawTextQuery: TimeoutParser, # this force the timeout LanguageParser, # this force a language ExternalBangParser, # external bang (must be before BangParser) - BangParser # this force a engine or category + BangParser, # this force a engine or category ] def __init__(self, query, disabled_engines): @@ -281,8 +277,7 @@ class RawTextQuery: for i, query_part in enumerate(raw_query_parts): # part does only contain spaces, skip - if query_part.isspace()\ - or query_part == '': + if query_part.isspace() or query_part == '': continue # parse special commands @@ -324,14 +319,16 @@ class RawTextQuery: return self.getFullQuery() def __repr__(self): - return f"<{self.__class__.__name__} " \ - + f"query={self.query!r} " \ - + f"disabled_engines={self.disabled_engines!r}\n " \ - + f"languages={self.languages!r} " \ - + f"timeout_limit={self.timeout_limit!r} "\ - + f"external_bang={self.external_bang!r} " \ - + f"specific={self.specific!r} " \ - + f"enginerefs={self.enginerefs!r}\n " \ - + f"autocomplete_list={self.autocomplete_list!r}\n " \ - + f"query_parts={self.query_parts!r}\n " \ - + f"user_query_parts={self.user_query_parts!r} >" + return ( + f"<{self.__class__.__name__} " + + f"query={self.query!r} " + + f"disabled_engines={self.disabled_engines!r}\n " + + f"languages={self.languages!r} " + + f"timeout_limit={self.timeout_limit!r} " + + f"external_bang={self.external_bang!r} " + + f"specific={self.specific!r} " + + f"enginerefs={self.enginerefs!r}\n " + + f"autocomplete_list={self.autocomplete_list!r}\n " + + f"query_parts={self.query_parts!r}\n " + + f"user_query_parts={self.user_query_parts!r} >" + ) diff --git a/searx/results.py b/searx/results.py index 10a26aa3f..6ab751c56 100644 --- a/searx/results.py +++ b/searx/results.py @@ -47,12 +47,8 @@ def compare_urls(url_a, url_b): return False # remove / from the end of the url if required - path_a = url_a.path[:-1]\ - if url_a.path.endswith('/')\ - else url_a.path - path_b = url_b.path[:-1]\ - if url_b.path.endswith('/')\ - else url_b.path + path_a = url_a.path[:-1] if url_a.path.endswith('/') else url_a.path + path_b = url_b.path[:-1] if url_b.path.endswith('/') else url_b.path return unquote(path_a) == unquote(path_b) @@ -83,8 +79,9 @@ def merge_two_infoboxes(infobox1, infobox2): parsed_url2 = urlparse(url2.get('url', '')) entity_url2 = url2.get('entity') for url1 in urls1: - if (entity_url2 is not None and url1.get('entity') == entity_url2)\ - or compare_urls(urlparse(url1.get('url', '')), parsed_url2): + if (entity_url2 is not None and url1.get('entity') == entity_url2) or compare_urls( + urlparse(url1.get('url', '')), parsed_url2 + ): unique_url = False break if unique_url: @@ -115,8 +112,7 @@ def merge_two_infoboxes(infobox1, infobox2): attributeSet.add(entity) for attribute in infobox2.get('attributes', []): - if attribute.get('label') not in attributeSet\ - and attribute.get('entity') not in attributeSet: + if attribute.get('label') not in attributeSet and attribute.get('entity') not in attributeSet: attributes1.append(attribute) if 'content' in infobox2: @@ -144,9 +140,22 @@ def result_score(result): class ResultContainer: """docstring for ResultContainer""" - __slots__ = '_merged_results', 'infoboxes', 'suggestions', 'answers', 'corrections', '_number_of_results',\ - '_closed', 'paging', 'unresponsive_engines', 'timings', 'redirect_url', 'engine_data', 'on_result',\ - '_lock' + __slots__ = ( + '_merged_results', + 'infoboxes', + 'suggestions', + 'answers', + 'corrections', + '_number_of_results', + '_closed', + 'paging', + 'unresponsive_engines', + 'timings', + 'redirect_url', + 'engine_data', + 'on_result', + '_lock', + ) def __init__(self): super().__init__() @@ -208,8 +217,7 @@ class ResultContainer: if engine_name in engines: histogram_observe(standard_result_count, 'engine', engine_name, 'result', 'count') - if not self.paging and standard_result_count > 0 and engine_name in engines\ - and engines[engine_name].paging: + if not self.paging and standard_result_count > 0 and engine_name in engines and engines[engine_name].paging: self.paging = True def _merge_infobox(self, infobox): @@ -248,8 +256,7 @@ class ResultContainer: return True def _normalize_url_result(self, result): - """Return True if the result is valid - """ + """Return True if the result is valid""" result['parsed_url'] = urlparse(result['url']) # if the result has no scheme, use http as default @@ -280,8 +287,9 @@ class ResultContainer: for merged_result in self._merged_results: if 'parsed_url' not in merged_result: continue - if compare_urls(result['parsed_url'], merged_result['parsed_url'])\ - and result_template == merged_result.get('template'): + if compare_urls(result['parsed_url'], merged_result['parsed_url']) and result_template == merged_result.get( + 'template' + ): if result_template != 'images.html': # not an image, same template, same url : it's a duplicate return merged_result @@ -294,8 +302,7 @@ class ResultContainer: def __merge_duplicated_http_result(self, duplicated, result, position): # using content with more text - if result_content_len(result.get('content', '')) >\ - result_content_len(duplicated.get('content', '')): + if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')): duplicated['content'] = result['content'] # merge all result's parameters not found in duplicate @@ -341,18 +348,20 @@ class ResultContainer: res['category'] = engine.categories[0] if len(engine.categories) > 0 else '' # FIXME : handle more than one category per engine - category = res['category']\ - + ':' + res.get('template', '')\ - + ':' + ('img_src' if 'img_src' in res or 'thumbnail' in res else '') + category = ( + res['category'] + + ':' + + res.get('template', '') + + ':' + + ('img_src' if 'img_src' in res or 'thumbnail' in res else '') + ) - current = None if category not in categoryPositions\ - else categoryPositions[category] + current = None if category not in categoryPositions else categoryPositions[category] # group with previous results using the same category # if the group can accept more result and is not too far # from the current position - if current is not None and (current['count'] > 0)\ - and (len(gresults) - current['index'] < 20): + if current is not None and (current['count'] > 0) and (len(gresults) - current['index'] < 20): # group with the previous results using # the same category with this one index = current['index'] diff --git a/searx/search/__init__.py b/searx/search/__init__.py index 0a3c5b3ac..d66f3362d 100644 --- a/searx/search/__init__.py +++ b/searx/search/__init__.py @@ -123,8 +123,11 @@ class Search: # Max & user query: From user query except if above max actual_timeout = min(query_timeout, max_request_timeout) - logger.debug("actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})" - .format(actual_timeout, default_timeout, query_timeout, max_request_timeout)) + logger.debug( + "actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})".format( + actual_timeout, default_timeout, query_timeout, max_request_timeout + ) + ) return requests, actual_timeout diff --git a/searx/search/checker/__main__.py b/searx/search/checker/__main__.py index 4ce4ca76b..1311288f3 100644 --- a/searx/search/checker/__main__.py +++ b/searx/search/checker/__main__.py @@ -37,12 +37,12 @@ else: stdout = io.TextIOWrapper( # pylint: disable=consider-using-with open(sys.stdout.fileno(), 'wb', 0), - write_through=True + write_through=True, ) stderr = io.TextIOWrapper( # pylint: disable=consider-using-with - open(sys.stderr.fileno(), 'wb', 0) - , write_through=True + open(sys.stderr.fileno(), 'wb', 0), + write_through=True, ) @@ -91,12 +91,21 @@ def run(engine_name_list, verbose): # call by setup.py def main(): parser = argparse.ArgumentParser(description='Check searx engines.') - parser.add_argument('engine_name_list', metavar='engine name', type=str, nargs='*', - help='engines name or shortcut list. Empty for all engines.') - parser.add_argument('--verbose', '-v', - action='store_true', dest='verbose', - help='Display details about the test results', - default=False) + parser.add_argument( + 'engine_name_list', + metavar='engine name', + type=str, + nargs='*', + help='engines name or shortcut list. Empty for all engines.', + ) + parser.add_argument( + '--verbose', + '-v', + action='store_true', + dest='verbose', + help='Display details about the test results', + default=False, + ) args = parser.parse_args() run(args.engine_name_list, args.verbose) diff --git a/searx/search/checker/background.py b/searx/search/checker/background.py index d9f11a71c..ff005dd91 100644 --- a/searx/search/checker/background.py +++ b/searx/search/checker/background.py @@ -23,10 +23,12 @@ running = threading.Lock() def _get_interval(every, error_msg): if isinstance(every, int): every = (every, every) - if not isinstance(every, (tuple, list))\ - or len(every) != 2\ - or not isinstance(every[0], int)\ - or not isinstance(every[1], int): + if ( + not isinstance(every, (tuple, list)) + or len(every) != 2 + or not isinstance(every[0], int) + or not isinstance(every[1], int) + ): raise SearxSettingsException(error_msg, None) return every @@ -50,14 +52,11 @@ def _set_result(result, include_timestamp=True): def run(): - if not running.acquire(blocking=False): # pylint: disable=consider-using-with + if not running.acquire(blocking=False): # pylint: disable=consider-using-with return try: logger.info('Starting checker') - result = { - 'status': 'ok', - 'engines': {} - } + result = {'status': 'ok', 'engines': {}} for name, processor in PROCESSORS.items(): logger.debug('Checking %s engine', name) checker = Checker(processor) diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py index 626aa8ce0..c0dd966d0 100644 --- a/searx/search/checker/impl.py +++ b/searx/search/checker/impl.py @@ -25,6 +25,7 @@ from searx.metrics import counter_inc logger = logger.getChild('searx.search.checker') HTML_TAGS = [ + # fmt: off 'embed', 'iframe', 'object', 'param', 'picture', 'source', 'svg', 'math', 'canvas', 'noscript', 'script', 'del', 'ins', 'area', 'audio', 'img', 'map', 'track', 'video', 'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite', 'code', 'data', 'dfn', 'em', 'i', 'kdb', 'mark', 'q', 'rb', 'rp', 'rt', 'rtc', 'ruby', 's', 'samp', 'small', @@ -32,6 +33,7 @@ HTML_TAGS = [ 'figcaption', 'figure', 'hr', 'li', 'ol', 'p', 'pre', 'ul', 'button', 'datalist', 'fieldset', 'form', 'input', 'label', 'legend', 'meter', 'optgroup', 'option', 'output', 'progress', 'select', 'textarea', 'applet', 'frame', 'frameset' + # fmt: on ] @@ -72,17 +74,23 @@ def _download_and_check_if_image(image_url: str) -> bool: try: # use "image_proxy" (avoid HTTP/2) network.set_context_network_name('image_proxy') - stream = network.stream('GET', image_url, timeout=10.0, allow_redirects=True, headers={ - 'User-Agent': gen_useragent(), - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', - 'Accept-Language': 'en-US;q=0.5,en;q=0.3', - 'Accept-Encoding': 'gzip, deflate, br', - 'DNT': '1', - 'Connection': 'keep-alive', - 'Upgrade-Insecure-Requests': '1', - 'Sec-GPC': '1', - 'Cache-Control': 'max-age=0' - }) + stream = network.stream( + 'GET', + image_url, + timeout=10.0, + allow_redirects=True, + headers={ + 'User-Agent': gen_useragent(), + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Accept-Language': 'en-US;q=0.5,en;q=0.3', + 'Accept-Encoding': 'gzip, deflate, br', + 'DNT': '1', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + 'Sec-GPC': '1', + 'Cache-Control': 'max-age=0', + }, + ) r = next(stream) r.close() if r.status_code == 200: @@ -102,8 +110,7 @@ def _download_and_check_if_image(image_url: str) -> bool: def _is_url_image(image_url) -> bool: - """Normalize image_url - """ + """Normalize image_url""" if not isinstance(image_url, str): return False @@ -129,8 +136,9 @@ def _search_query_to_dict(search_query: SearchQuery) -> typing.Dict[str, typing. } -def _search_query_diff(sq1: SearchQuery, sq2: SearchQuery)\ - -> typing.Tuple[typing.Dict[str, typing.Any], typing.Dict[str, typing.Any]]: +def _search_query_diff( + sq1: SearchQuery, sq2: SearchQuery +) -> typing.Tuple[typing.Dict[str, typing.Any], typing.Dict[str, typing.Any]]: param1 = _search_query_to_dict(sq1) param2 = _search_query_to_dict(sq2) common = {} @@ -180,11 +188,9 @@ class ResultContainerTests: __slots__ = 'test_name', 'search_query', 'result_container', 'languages', 'stop_test', 'test_results' - def __init__(self, - test_results: TestResults, - test_name: str, - search_query: SearchQuery, - result_container: ResultContainer): + def __init__( + self, test_results: TestResults, test_name: str, search_query: SearchQuery, result_container: ResultContainer + ): self.test_name = test_name self.search_query = search_query self.result_container = result_container @@ -324,10 +330,9 @@ class CheckerTests: __slots__ = 'test_results', 'test_name', 'result_container_tests_list' - def __init__(self, - test_results: TestResults, - test_name: str, - result_container_tests_list: typing.List[ResultContainerTests]): + def __init__( + self, test_results: TestResults, test_name: str, result_container_tests_list: typing.List[ResultContainerTests] + ): self.test_results = test_results self.test_name = test_name self.result_container_tests_list = result_container_tests_list @@ -340,14 +345,17 @@ class CheckerTests: for i, urls_i in enumerate(urls_list): for j, urls_j in enumerate(urls_list): if i < j and urls_i == urls_j: - common, diff = _search_query_diff(self.result_container_tests_list[i].search_query, - self.result_container_tests_list[j].search_query) + common, diff = _search_query_diff( + self.result_container_tests_list[i].search_query, + self.result_container_tests_list[j].search_query, + ) common_str = ' '.join(['{}={!r}'.format(k, v) for k, v in common.items()]) - diff1_str = ', ' .join(['{}={!r}'.format(k, v1) for (k, (v1, v2)) in diff.items()]) - diff2_str = ', ' .join(['{}={!r}'.format(k, v2) for (k, (v1, v2)) in diff.items()]) - self.test_results.add_error(self.test_name, - 'results are identitical for {} and {} ({})' - .format(diff1_str, diff2_str, common_str)) + diff1_str = ', '.join(['{}={!r}'.format(k, v1) for (k, (v1, v2)) in diff.items()]) + diff2_str = ', '.join(['{}={!r}'.format(k, v2) for (k, (v1, v2)) in diff.items()]) + self.test_results.add_error( + self.test_name, + 'results are identitical for {} and {} ({})'.format(diff1_str, diff2_str, common_str), + ) class Checker: @@ -393,9 +401,10 @@ class Checker: elif isinstance(method, types.FunctionType): method(*args) else: - self.test_results.add_error(obj.test_name, - 'method {!r} ({}) not found for {}' - .format(method, method.__class__.__name__, obj.__class__.__name__)) + self.test_results.add_error( + obj.test_name, + 'method {!r} ({}) not found for {}'.format(method, method.__class__.__name__, obj.__class__.__name__), + ) def call_tests(self, obj, test_descriptions): for test_description in test_descriptions: diff --git a/searx/search/models.py b/searx/search/models.py index e48cb3611..ff5897966 100644 --- a/searx/search/models.py +++ b/searx/search/models.py @@ -25,19 +25,30 @@ class EngineRef: class SearchQuery: """container for all the search parameters (query, language, etc...)""" - __slots__ = 'query', 'engineref_list', 'lang', 'safesearch', 'pageno', 'time_range',\ - 'timeout_limit', 'external_bang', 'engine_data' - - def __init__(self, - query: str, - engineref_list: typing.List[EngineRef], - lang: str='all', - safesearch: int=0, - pageno: int=1, - time_range: typing.Optional[str]=None, - timeout_limit: typing.Optional[float]=None, - external_bang: typing.Optional[str]=None, - engine_data: typing.Optional[typing.Dict[str, str]]=None): + __slots__ = ( + 'query', + 'engineref_list', + 'lang', + 'safesearch', + 'pageno', + 'time_range', + 'timeout_limit', + 'external_bang', + 'engine_data', + ) + + def __init__( + self, + query: str, + engineref_list: typing.List[EngineRef], + lang: str = 'all', + safesearch: int = 0, + pageno: int = 1, + time_range: typing.Optional[str] = None, + timeout_limit: typing.Optional[float] = None, + external_bang: typing.Optional[str] = None, + engine_data: typing.Optional[typing.Dict[str, str]] = None, + ): self.query = query self.engineref_list = engineref_list self.lang = lang @@ -53,20 +64,39 @@ class SearchQuery: return list(set(map(lambda engineref: engineref.category, self.engineref_list))) def __repr__(self): - return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\ - format(self.query, self.engineref_list, self.lang, self.safesearch, - self.pageno, self.time_range, self.timeout_limit, self.external_bang) + return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".format( + self.query, + self.engineref_list, + self.lang, + self.safesearch, + self.pageno, + self.time_range, + self.timeout_limit, + self.external_bang, + ) def __eq__(self, other): - return self.query == other.query\ - and self.engineref_list == other.engineref_list\ - and self.lang == other.lang\ - and self.safesearch == other.safesearch\ - and self.pageno == other.pageno\ - and self.time_range == other.time_range\ - and self.timeout_limit == other.timeout_limit\ + return ( + self.query == other.query + and self.engineref_list == other.engineref_list + and self.lang == other.lang + and self.safesearch == other.safesearch + and self.pageno == other.pageno + and self.time_range == other.time_range + and self.timeout_limit == other.timeout_limit and self.external_bang == other.external_bang + ) def __hash__(self): - return hash((self.query, tuple(self.engineref_list), self.lang, self.safesearch, self.pageno, self.time_range, - self.timeout_limit, self.external_bang)) + return hash( + ( + self.query, + tuple(self.engineref_list), + self.lang, + self.safesearch, + self.pageno, + self.time_range, + self.timeout_limit, + self.external_bang, + ) + ) diff --git a/searx/search/processors/__init__.py b/searx/search/processors/__init__.py index 8108f8dfa..966b990ec 100644 --- a/searx/search/processors/__init__.py +++ b/searx/search/processors/__init__.py @@ -29,6 +29,7 @@ logger = logger.getChild('search.processors') PROCESSORS = {} """Cache request processores, stored by *engine-name* (:py:func:`initialize`)""" + def get_processor_class(engine_type): """Return processor class according to the ``engine_type``""" for c in [OnlineProcessor, OfflineProcessor, OnlineDictionaryProcessor, OnlineCurrencyProcessor]: diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index b5fa063fd..732b55d52 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -19,6 +19,7 @@ from searx.utils import get_engine_from_settings logger = logger.getChild('searx.search.processor') SUSPENDED_STATUS = {} + class SuspendedStatus: """Class to handle suspend state.""" @@ -39,8 +40,10 @@ class SuspendedStatus: # update continuous_errors / suspend_end_time self.continuous_errors += 1 if suspended_time is None: - suspended_time = min(settings['search']['max_ban_time_on_fail'], - self.continuous_errors * settings['search']['ban_time_on_fail']) + suspended_time = min( + settings['search']['max_ban_time_on_fail'], + self.continuous_errors * settings['search']['ban_time_on_fail'], + ) self.suspend_end_time = default_timer() + suspended_time self.suspend_reason = suspend_reason logger.debug('Suspend for %i seconds', suspended_time) @@ -127,9 +130,9 @@ class EngineProcessor(ABC): def extend_container_if_suspended(self, result_container): if self.suspended_status.is_suspended: - result_container.add_unresponsive_engine(self.engine_name, - self.suspended_status.suspend_reason, - suspended=True) + result_container.add_unresponsive_engine( + self.engine_name, self.suspended_status.suspend_reason, suspended=True + ) return True return False diff --git a/searx/search/processors/offline.py b/searx/search/processors/offline.py index ec7a4a36e..13f077cb1 100644 --- a/searx/search/processors/offline.py +++ b/searx/search/processors/offline.py @@ -23,6 +23,6 @@ class OfflineProcessor(EngineProcessor): except ValueError as e: # do not record the error self.logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e)) - except Exception as e: # pylint: disable=broad-except + except Exception as e: # pylint: disable=broad-except self.handle_exception(result_container, e) self.logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e)) diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index c4ee58e11..8d8275df1 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -23,6 +23,7 @@ from .abstract import EngineProcessor def default_request_params(): """Default request parameters for ``online`` engines.""" return { + # fmt: off 'method': 'GET', 'headers': {}, 'data': {}, @@ -30,6 +31,7 @@ def default_request_params(): 'cookies': {}, 'verify': True, 'auth': None + # fmt: on } @@ -64,10 +66,7 @@ class OnlineProcessor(EngineProcessor): # create dictionary which contain all # informations about the request request_args = dict( - headers=params['headers'], - cookies=params['cookies'], - verify=params['verify'], - auth=params['auth'] + headers=params['headers'], cookies=params['cookies'], verify=params['verify'], auth=params['auth'] ) # max_redirects @@ -103,10 +102,12 @@ class OnlineProcessor(EngineProcessor): status_code = str(response.status_code or '') reason = response.reason_phrase or '' hostname = response.url.host - count_error(self.engine_name, - '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects), - (status_code, reason, hostname), - secondary=True) + count_error( + self.engine_name, + '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects), + (status_code, reason, hostname), + secondary=True, + ) return response @@ -145,22 +146,16 @@ class OnlineProcessor(EngineProcessor): # requests timeout (connect or read) self.handle_exception(result_container, e, suspend=True) self.logger.error( - "HTTP requests timeout (search duration : {0} s, timeout: {1} s) : {2}" - .format( - default_timer() - start_time, - timeout_limit, - e.__class__.__name__ + "HTTP requests timeout (search duration : {0} s, timeout: {1} s) : {2}".format( + default_timer() - start_time, timeout_limit, e.__class__.__name__ ) ) except (httpx.HTTPError, httpx.StreamError) as e: # other requests exception self.handle_exception(result_container, e, suspend=True) self.logger.exception( - "requests exception (search duration : {0} s, timeout: {1} s) : {2}" - .format( - default_timer() - start_time, - timeout_limit, - e + "requests exception (search duration : {0} s, timeout: {1} s) : {2}".format( + default_timer() - start_time, timeout_limit, e ) ) except SearxEngineCaptchaException as e: @@ -186,10 +181,9 @@ class OnlineProcessor(EngineProcessor): if getattr(self.engine, 'paging', False): tests['paging'] = { - 'matrix': {'query': 'time', - 'pageno': (1, 2, 3)}, + 'matrix': {'query': 'time', 'pageno': (1, 2, 3)}, 'result_container': ['not_empty'], - 'test': ['unique_results'] + 'test': ['unique_results'], } if 'general' in self.engine.categories: # avoid documentation about HTML tags (<time> and <input type="time">) @@ -197,10 +191,9 @@ class OnlineProcessor(EngineProcessor): if getattr(self.engine, 'time_range', False): tests['time_range'] = { - 'matrix': {'query': 'news', - 'time_range': (None, 'day')}, + 'matrix': {'query': 'news', 'time_range': (None, 'day')}, 'result_container': ['not_empty'], - 'test': ['unique_results'] + 'test': ['unique_results'], } if getattr(self.engine, 'supported_languages', []): @@ -214,10 +207,6 @@ class OnlineProcessor(EngineProcessor): } if getattr(self.engine, 'safesearch', False): - tests['safesearch'] = { - 'matrix': {'query': 'porn', - 'safesearch': (0, 2)}, - 'test': ['unique_results'] - } + tests['safesearch'] = {'matrix': {'query': 'porn', 'safesearch': (0, 2)}, 'test': ['unique_results']} return tests diff --git a/searx/search/processors/online_currency.py b/searx/search/processors/online_currency.py index 4e5c57264..6bd891b1d 100644 --- a/searx/search/processors/online_currency.py +++ b/searx/search/processors/online_currency.py @@ -12,11 +12,13 @@ from .online import OnlineProcessor parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) + def normalize_name(name): name = name.lower().replace('-', ' ').rstrip('s') name = re.sub(' +', ' ', name) return unicodedata.normalize('NFKD', name).lower() + def name_to_iso4217(name): name = normalize_name(name) currency = CURRENCIES['names'].get(name, [name]) @@ -24,9 +26,11 @@ def name_to_iso4217(name): return currency return currency[0] + def iso4217_to_name(iso4217, language): return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217) + class OnlineCurrencyProcessor(OnlineProcessor): """Processor class used by ``online_currency`` engines.""" diff --git a/searx/search/processors/online_dictionary.py b/searx/search/processors/online_dictionary.py index 72941d57a..3e7f6ed59 100644 --- a/searx/search/processors/online_dictionary.py +++ b/searx/search/processors/online_dictionary.py @@ -11,6 +11,7 @@ from .online import OnlineProcessor parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) + class OnlineDictionaryProcessor(OnlineProcessor): """Processor class used by ``online_dictionary`` engines.""" @@ -44,10 +45,9 @@ class OnlineDictionaryProcessor(OnlineProcessor): if getattr(self.engine, 'paging', False): tests['translation_paging'] = { - 'matrix': {'query': 'en-es house', - 'pageno': (1, 2, 3)}, + 'matrix': {'query': 'en-es house', 'pageno': (1, 2, 3)}, 'result_container': ['not_empty', ('one_title_contains', 'house')], - 'test': ['unique_results'] + 'test': ['unique_results'], } else: tests['translation'] = { diff --git a/searx/settings_defaults.py b/searx/settings_defaults.py index db020db7d..9c4711bfc 100644 --- a/searx/settings_defaults.py +++ b/searx/settings_defaults.py @@ -53,29 +53,24 @@ SEARX_ENVIRON_VARIABLES = { } - class SettingsValue: - """Check and update a setting value - """ - - def __init__(self, - type_definition: typing.Union[None, typing.Any, typing.Tuple[typing.Any]]=None, - default: typing.Any=None, - environ_name: str=None): + """Check and update a setting value""" + + def __init__( + self, + type_definition: typing.Union[None, typing.Any, typing.Tuple[typing.Any]] = None, + default: typing.Any = None, + environ_name: str = None, + ): self.type_definition = ( - type_definition - if type_definition is None or isinstance(type_definition, tuple) - else (type_definition,) + type_definition if type_definition is None or isinstance(type_definition, tuple) else (type_definition,) ) self.default = default self.environ_name = environ_name @property def type_definition_repr(self): - types_str = [ - t.__name__ if isinstance(t, type) else repr(t) - for t in self.type_definition - ] + types_str = [t.__name__ if isinstance(t, type) else repr(t) for t in self.type_definition] return ', '.join(types_str) def check_type_definition(self, value: typing.Any) -> None: @@ -83,9 +78,7 @@ class SettingsValue: return type_list = tuple(t for t in self.type_definition if isinstance(t, type)) if not isinstance(value, type_list): - raise ValueError( - 'The value has to be one of these types/values: {}'.format( - self.type_definition_repr)) + raise ValueError('The value has to be one of these types/values: {}'.format(self.type_definition_repr)) def __call__(self, value: typing.Any) -> typing.Any: if value == _UNDEFINED: @@ -101,8 +94,7 @@ class SettingsValue: class SettingSublistValue(SettingsValue): - """Check the value is a sublist of type definition. - """ + """Check the value is a sublist of type definition.""" def check_type_definition(self, value: typing.Any) -> typing.Any: if not isinstance(value, list): @@ -111,9 +103,9 @@ class SettingSublistValue(SettingsValue): if not item in self.type_definition[0]: raise ValueError('{} not in {}'.format(item, self.type_definition)) + class SettingsDirectoryValue(SettingsValue): - """Check and update a setting value that is a directory path - """ + """Check and update a setting value that is a directory path""" def check_type_definition(self, value: typing.Any) -> typing.Any: super().check_type_definition(value) @@ -159,7 +151,7 @@ SCHEMA = { 'wiki_url': SettingsValue(str, 'https://github.com/searxng/searxng/wiki'), }, 'search': { - 'safe_search': SettingsValue((0,1,2), 0), + 'safe_search': SettingsValue((0, 1, 2), 0), 'autocomplete': SettingsValue(str, ''), 'default_lang': SettingsValue(tuple(LANGUAGE_CODES + ['']), ''), 'languages': SettingSublistValue(LANGUAGE_CODES, LANGUAGE_CODES), @@ -168,7 +160,7 @@ SCHEMA = { 'formats': SettingsValue(list, OUTPUT_FORMATS), }, 'server': { - 'port': SettingsValue((int,str), 8888, 'SEARXNG_PORT'), + 'port': SettingsValue((int, str), 8888, 'SEARXNG_PORT'), 'bind_address': SettingsValue(str, '127.0.0.1', 'SEARXNG_BIND_ADDRESS'), 'secret_key': SettingsValue(str, environ_name='SEARXNG_SECRET'), 'base_url': SettingsValue((False, str), False), @@ -213,8 +205,7 @@ SCHEMA = { # Tor configuration 'using_tor_proxy': SettingsValue(bool, False), 'extra_proxy_timeout': SettingsValue(int, 0), - 'networks': { - }, + 'networks': {}, }, 'plugins': SettingsValue(list, []), 'enabled_plugins': SettingsValue((None, list), None), @@ -222,10 +213,10 @@ SCHEMA = { 'off_when_debug': SettingsValue(bool, True), }, 'engines': SettingsValue(list, []), - 'doi_resolvers': { - }, + 'doi_resolvers': {}, } + def settings_set_defaults(settings): # compatibility with searx variables for searx, searxng in SEARX_ENVIRON_VARIABLES.items(): diff --git a/searx/settings_loader.py b/searx/settings_loader.py index f688be8ba..14ca8b4aa 100644 --- a/searx/settings_loader.py +++ b/searx/settings_loader.py @@ -125,8 +125,7 @@ def load_settings(load_user_setttings=True): user_settings_path = get_user_settings_path() if user_settings_path is None or not load_user_setttings: # no user settings - return (load_yaml(default_settings_path), - 'load the default settings from {}'.format(default_settings_path)) + return (load_yaml(default_settings_path), 'load the default settings from {}'.format(default_settings_path)) # user settings user_settings = load_yaml(user_settings_path) @@ -134,10 +133,12 @@ def load_settings(load_user_setttings=True): # the user settings are merged with the default configuration default_settings = load_yaml(default_settings_path) update_settings(default_settings, user_settings) - return (default_settings, - 'merge the default settings ( {} ) and the user setttings ( {} )' - .format(default_settings_path, user_settings_path)) + return ( + default_settings, + 'merge the default settings ( {} ) and the user setttings ( {} )'.format( + default_settings_path, user_settings_path + ), + ) # the user settings, fully replace the default configuration - return (user_settings, - 'load the user settings from {}'.format(user_settings_path)) + return (user_settings, 'load the user settings from {}'.format(user_settings_path)) diff --git a/searx/shared/__init__.py b/searx/shared/__init__.py index cbe24d239..98c9a11c2 100644 --- a/searx/shared/__init__.py +++ b/searx/shared/__init__.py @@ -9,6 +9,7 @@ try: except: # no uwsgi from .shared_simple import SimpleSharedDict as SharedDict, schedule + logger.info('Use shared_simple implementation') else: try: @@ -17,15 +18,19 @@ else: raise Exception() except: # uwsgi.ini configuration problem: disable all scheduling - logger.error('uwsgi.ini configuration error, add this line to your uwsgi.ini\n' - 'cache2 = name=searxcache,items=2000,blocks=2000,blocksize=4096,bitmap=1') + logger.error( + 'uwsgi.ini configuration error, add this line to your uwsgi.ini\n' + 'cache2 = name=searxcache,items=2000,blocks=2000,blocksize=4096,bitmap=1' + ) from .shared_simple import SimpleSharedDict as SharedDict def schedule(delay, func, *args): return False + else: # uwsgi from .shared_uwsgi import UwsgiCacheSharedDict as SharedDict, schedule + logger.info('Use shared_uwsgi implementation') storage = SharedDict() diff --git a/searx/shared/shared_abstract.py b/searx/shared/shared_abstract.py index b1c72aabe..b4b15bea6 100644 --- a/searx/shared/shared_abstract.py +++ b/searx/shared/shared_abstract.py @@ -3,7 +3,6 @@ from abc import ABC, abstractmethod class SharedDict(ABC): - @abstractmethod def get_int(self, key): pass diff --git a/searx/shared/shared_simple.py b/searx/shared/shared_simple.py index 48d8cb822..0bf13a2a6 100644 --- a/searx/shared/shared_simple.py +++ b/searx/shared/shared_simple.py @@ -7,7 +7,7 @@ from . import shared_abstract class SimpleSharedDict(shared_abstract.SharedDict): - __slots__ = 'd', + __slots__ = ('d',) def __init__(self): self.d = {} diff --git a/searx/shared/shared_uwsgi.py b/searx/shared/shared_uwsgi.py index a6dba9f59..592e24a4b 100644 --- a/searx/shared/shared_uwsgi.py +++ b/searx/shared/shared_uwsgi.py @@ -9,7 +9,6 @@ _last_signal = 10 class UwsgiCacheSharedDict(shared_abstract.SharedDict): - def get_int(self, key): value = uwsgi.cache_get(key) if value is None: diff --git a/searx/unixthreadname.py b/searx/unixthreadname.py index 0f1f54936..3c2a68917 100644 --- a/searx/unixthreadname.py +++ b/searx/unixthreadname.py @@ -11,10 +11,12 @@ except ImportError: pass else: import threading + old_thread_init = threading.Thread.__init__ def new_thread_init(self, *args, **kwargs): # pylint: disable=protected-access, disable=c-extension-no-member old_thread_init(self, *args, **kwargs) setproctitle.setthreadtitle(self._name) + threading.Thread.__init__ = new_thread_init diff --git a/searx/utils.py b/searx/utils.py index 163892e93..d44bb73ea 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -23,8 +23,7 @@ from searx import logger logger = logger.getChild('utils') -blocked_tags = ('script', - 'style') +blocked_tags = ('script', 'style') ecma_unescape4_re = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE) ecma_unescape2_re = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE) @@ -43,8 +42,8 @@ NOTSET = NotSetClass() def searx_useragent(): """Return the searx User Agent""" return 'searx/{searx_version} {suffix}'.format( - searx_version=VERSION_TAG, - suffix=settings['outgoing']['useragent_suffix']).strip() + searx_version=VERSION_TAG, suffix=settings['outgoing']['useragent_suffix'] + ).strip() def gen_useragent(os=None): @@ -60,7 +59,6 @@ class HTMLTextExtractorException(Exception): class HTMLTextExtractor(HTMLParser): # pylint: disable=W0223 # (see https://bugs.python.org/issue31844) - def __init__(self): HTMLParser.__init__(self) self.result = [] @@ -135,10 +133,10 @@ def html_to_text(html_str): def extract_text(xpath_results, allow_none=False): """Extract text from a lxml result - * if xpath_results is list, extract the text from each result and concat the list - * if xpath_results is a xml element, extract all the text node from it - ( text_content() method from lxml ) - * if xpath_results is a string element, then it's already done + * if xpath_results is list, extract the text from each result and concat the list + * if xpath_results is a xml element, extract all the text node from it + ( text_content() method from lxml ) + * if xpath_results is a string element, then it's already done """ if isinstance(xpath_results, list): # it's list of result : concat everything using recursive call @@ -148,9 +146,7 @@ def extract_text(xpath_results, allow_none=False): return result.strip() elif isinstance(xpath_results, ElementBase): # it's a element - text = html.tostring( - xpath_results, encoding='unicode', method='text', with_tail=False - ) + text = html.tostring(xpath_results, encoding='unicode', method='text', with_tail=False) text = text.strip().replace('\n', ' ') return ' '.join(text.split()) elif isinstance(xpath_results, (_ElementStringResult, _ElementUnicodeResult, str, Number, bool)): @@ -344,7 +340,7 @@ def is_valid_lang(lang): """ if isinstance(lang, bytes): lang = lang.decode() - is_abbr = (len(lang) == 2) + is_abbr = len(lang) == 2 lang = lang.lower() if is_abbr: for l in language_codes: diff --git a/searx/version.py b/searx/version.py index ac42834d9..9b3fcc35e 100644 --- a/searx/version.py +++ b/searx/version.py @@ -81,16 +81,12 @@ def get_git_version(): # add "-dirty" suffix if there are uncommited changes except searx/settings.yml try: - subprocess_run( - "git diff --quiet -- . ':!searx/settings.yml' ':!utils/brand.env'" - ) + subprocess_run("git diff --quiet -- . ':!searx/settings.yml' ':!utils/brand.env'") except subprocess.CalledProcessError as e: if e.returncode == 1: git_version += "-dirty" else: - logger.warning( - '"%s" returns an unexpected return code %i', e.returncode, e.cmd - ) + logger.warning('"%s" returns an unexpected return code %i', e.returncode, e.cmd) return git_version, tag_version @@ -125,9 +121,7 @@ VERSION_TAG = "{VERSION_TAG}" GIT_URL = "{GIT_URL}" GIT_BRANCH = "{GIT_BRANCH}" """ - with open( - os.path.join(os.path.dirname(__file__), "version_frozen.py"), - "w", encoding="utf8") as f: + with open(os.path.join(os.path.dirname(__file__), "version_frozen.py"), "w", encoding="utf8") as f: f.write(python_code) print(f"{f.name} created") else: diff --git a/searx/webadapter.py b/searx/webadapter.py index 58cbf1d90..4fd18cee9 100644 --- a/searx/webadapter.py +++ b/searx/webadapter.py @@ -15,8 +15,9 @@ def deduplicate_engineref_list(engineref_list: List[EngineRef]) -> List[EngineRe return list(engineref_dict.values()) -def validate_engineref_list(engineref_list: List[EngineRef], preferences: Preferences)\ - -> Tuple[List[EngineRef], List[EngineRef], List[EngineRef]]: +def validate_engineref_list( + engineref_list: List[EngineRef], preferences: Preferences +) -> Tuple[List[EngineRef], List[EngineRef], List[EngineRef]]: """Validate query_engines according to the preferences Returns: @@ -154,9 +155,11 @@ def get_selected_categories(preferences: Preferences, form: Optional[Dict[str, s def get_engineref_from_category_list(category_list: List[str], disabled_engines: List[str]) -> List[EngineRef]: result = [] for categ in category_list: - result.extend(EngineRef(engine.name, categ) - for engine in categories[categ] - if (engine.name, categ) not in disabled_engines) + result.extend( + EngineRef(engine.name, categ) + for engine in categories[categ] + if (engine.name, categ) not in disabled_engines + ) return result @@ -170,8 +173,11 @@ def parse_generic(preferences: Preferences, form: Dict[str, str], disabled_engin # parse the form only if the categories are not locked for pd_name, pd in form.items(): if pd_name == 'engines': - pd_engines = [EngineRef(engine_name, engines[engine_name].categories[0]) - for engine_name in map(str.strip, pd.split(',')) if engine_name in engines] + pd_engines = [ + EngineRef(engine_name, engines[engine_name].categories[0]) + for engine_name in map(str.strip, pd.split(',')) + if engine_name in engines + ] if pd_engines: query_engineref_list.extend(pd_engines) explicit_engine_list = True @@ -206,8 +212,9 @@ def parse_engine_data(form): return engine_data -def get_search_query_from_webapp(preferences: Preferences, form: Dict[str, str])\ - -> Tuple[SearchQuery, RawTextQuery, List[EngineRef], List[EngineRef]]: +def get_search_query_from_webapp( + preferences: Preferences, form: Dict[str, str] +) -> Tuple[SearchQuery, RawTextQuery, List[EngineRef], List[EngineRef]]: # no text for the query ? if not form.get('q'): raise SearxParameterException('q', '') @@ -239,12 +246,23 @@ def get_search_query_from_webapp(preferences: Preferences, form: Dict[str, str]) query_engineref_list = parse_generic(preferences, form, disabled_engines) query_engineref_list = deduplicate_engineref_list(query_engineref_list) - query_engineref_list, query_engineref_list_unknown, query_engineref_list_notoken =\ - validate_engineref_list(query_engineref_list, preferences) - - return (SearchQuery(query, query_engineref_list, query_lang, query_safesearch, query_pageno, - query_time_range, query_timeout, external_bang=external_bang, - engine_data=engine_data), - raw_text_query, - query_engineref_list_unknown, - query_engineref_list_notoken) + query_engineref_list, query_engineref_list_unknown, query_engineref_list_notoken = validate_engineref_list( + query_engineref_list, preferences + ) + + return ( + SearchQuery( + query, + query_engineref_list, + query_lang, + query_safesearch, + query_pageno, + query_time_range, + query_timeout, + external_bang=external_bang, + engine_data=engine_data, + ), + raw_text_query, + query_engineref_list_unknown, + query_engineref_list_notoken, + ) diff --git a/searx/webapp.py b/searx/webapp.py index b6dc18937..a7812f181 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -146,11 +146,7 @@ STATS_SORT_PARAMETERS = { } # Flask app -app = Flask( - __name__, - static_folder=settings['ui']['static_path'], - template_folder=templates_path -) +app = Flask(__name__, static_folder=settings['ui']['static_path'], template_folder=templates_path) app.jinja_env.trim_blocks = True app.jinja_env.lstrip_blocks = True @@ -171,14 +167,10 @@ _category_names = ( gettext('news'), gettext('map'), gettext('onions'), - gettext('science') + gettext('science'), ) -_simple_style = ( - gettext('auto'), - gettext('light'), - gettext('dark') -) +_simple_style = (gettext('auto'), gettext('light'), gettext('dark')) # timeout_text = gettext('timeout') @@ -214,11 +206,15 @@ exception_classname_to_text = { # monkey patch for flask_babel.get_translations _flask_babel_get_translations = flask_babel.get_translations + + def _get_translations(): if has_request_context() and request.form.get('use-translation') == 'oc': babel_ext = flask_babel.current_app.extensions['babel'] return Translations.load(next(babel_ext.translation_directories), 'oc') return _flask_babel_get_translations() + + flask_babel.get_translations = _get_translations @@ -286,13 +282,10 @@ def code_highlighter(codelines, language=None): line_code_start = line # new codeblock is detected - if last_line is not None and\ - last_line + 1 != line: + if last_line is not None and last_line + 1 != line: # highlight last codepart - formatter = HtmlFormatter( - linenos='inline', linenostart=line_code_start, cssclass="code-highlight" - ) + formatter = HtmlFormatter(linenos='inline', linenostart=line_code_start, cssclass="code-highlight") html_code = html_code + highlight(tmp_code, lexer, formatter) # reset conditions for next codepart @@ -355,16 +348,9 @@ def proxify(url): url_params = dict(mortyurl=url.encode()) if settings['result_proxy'].get('key'): - url_params['mortyhash'] = hmac.new( - settings['result_proxy']['key'], - url.encode(), - hashlib.sha256 - ).hexdigest() - - return '{0}?{1}'.format( - settings['result_proxy']['url'], - urlencode(url_params) - ) + url_params['mortyhash'] = hmac.new(settings['result_proxy']['key'], url.encode(), hashlib.sha256).hexdigest() + + return '{0}?{1}'.format(settings['result_proxy']['url'], urlencode(url_params)) def image_proxify(url): @@ -377,10 +363,12 @@ def image_proxify(url): if url.startswith('data:image/'): # 50 is an arbitrary number to get only the beginning of the image. - partial_base64 = url[len('data:image/'):50].split(';') - if len(partial_base64) == 2 \ - and partial_base64[0] in ['gif', 'png', 'jpeg', 'pjpeg', 'webp', 'tiff', 'bmp']\ - and partial_base64[1].startswith('base64,'): + partial_base64 = url[len('data:image/') : 50].split(';') + if ( + len(partial_base64) == 2 + and partial_base64[0] in ['gif', 'png', 'jpeg', 'pjpeg', 'webp', 'tiff', 'bmp'] + and partial_base64[1].startswith('base64,') + ): return url return None @@ -389,8 +377,7 @@ def image_proxify(url): h = new_hmac(settings['server']['secret_key'], url.encode()) - return '{0}?{1}'.format(url_for('image_proxy'), - urlencode(dict(url=url.encode(), h=h))) + return '{0}?{1}'.format(url_for('image_proxy'), urlencode(dict(url=url.encode(), h=h))) def get_translations(): @@ -412,7 +399,8 @@ def _get_enable_categories(all_categories): disabled_engines = request.preferences.engines.get_disabled() enabled_categories = set( # pylint: disable=consider-using-dict-items - category for engine_name in engines + category + for engine_name in engines for category in engines[engine_name].categories if (engine_name, category) not in disabled_engines ) @@ -423,10 +411,7 @@ def get_pretty_url(parsed_url): path = parsed_url.path path = path[:-1] if len(path) > 0 and path[-1] == '/' else path path = path.replace("/", " › ") - return [ - parsed_url.scheme + "://" + parsed_url.netloc, - path - ] + return [parsed_url.scheme + "://" + parsed_url.netloc, path] def render(template_name, override_theme=None, **kwargs): @@ -448,7 +433,7 @@ def render(template_name, override_theme=None, **kwargs): kwargs['categories'] = _get_enable_categories(kwargs['all_categories']) # i18n - kwargs['language_codes'] = [ l for l in languages if l[0] in settings['search']['languages'] ] + kwargs['language_codes'] = [l for l in languages if l[0] in settings['search']['languages']] kwargs['translations'] = json.dumps(get_translations(), separators=(',', ':')) locale = request.preferences.get_value('locale') @@ -458,12 +443,11 @@ def render(template_name, override_theme=None, **kwargs): kwargs['rtl'] = True if 'current_language' not in kwargs: kwargs['current_language'] = match_language( - request.preferences.get_value('language'), settings['search']['languages'] ) + request.preferences.get_value('language'), settings['search']['languages'] + ) # values from settings - kwargs['search_formats'] = [ - x for x in settings['search']['formats'] if x != 'html' - ] + kwargs['search_formats'] = [x for x in settings['search']['formats'] if x != 'html'] kwargs['instance_name'] = get_setting('general.instance_name') kwargs['searx_version'] = VERSION_STRING kwargs['searx_git_url'] = GIT_URL @@ -477,9 +461,7 @@ def render(template_name, override_theme=None, **kwargs): kwargs['proxify_results'] = settings.get('result_proxy', {}).get('proxify_results', True) kwargs['get_result_template'] = get_result_template kwargs['opensearch_url'] = ( - url_for('opensearch') - + '?' - + urlencode({'method': kwargs['method'], 'autocomplete': kwargs['autocomplete']}) + url_for('opensearch') + '?' + urlencode({'method': kwargs['method'], 'autocomplete': kwargs['autocomplete']}) ) # scripts from plugins @@ -495,8 +477,7 @@ def render(template_name, override_theme=None, **kwargs): kwargs['styles'].add(css) start_time = default_timer() - result = render_template( - '{}/{}'.format(kwargs['theme'], template_name), **kwargs) + result = render_template('{}/{}'.format(kwargs['theme'], template_name), **kwargs) request.render_time += default_timer() - start_time # pylint: disable=assigning-non-slot return result @@ -541,7 +522,7 @@ def pre_request(): # language is defined neither in settings nor in preferences # use browser headers if not preferences.get_value("language"): - language = _get_browser_language(request, settings['search']['languages']) + language = _get_browser_language(request, settings['search']['languages']) preferences.parse_dict({"language": language}) # locale is defined neither in settings nor in preferences @@ -555,8 +536,7 @@ def pre_request(): allowed_plugins = preferences.plugins.get_enabled() disabled_plugins = preferences.plugins.get_disabled() for plugin in plugins: - if ((plugin.default_on and plugin.id not in disabled_plugins) - or plugin.id in allowed_plugins): + if (plugin.default_on and plugin.id not in disabled_plugins) or plugin.id in allowed_plugins: request.user_plugins.append(plugin) @@ -573,17 +553,20 @@ def add_default_headers(response): @app.after_request def post_request(response): total_time = default_timer() - request.start_time - timings_all = ['total;dur=' + str(round(total_time * 1000, 3)), - 'render;dur=' + str(round(request.render_time * 1000, 3))] + timings_all = [ + 'total;dur=' + str(round(total_time * 1000, 3)), + 'render;dur=' + str(round(request.render_time * 1000, 3)), + ] if len(request.timings) > 0: timings = sorted(request.timings, key=lambda v: v['total']) timings_total = [ - 'total_' + str(i) + '_' + v['engine'] + ';dur=' + str(round(v['total'] * 1000, 3)) + 'total_' + str(i) + '_' + v['engine'] + ';dur=' + str(round(v['total'] * 1000, 3)) for i, v in enumerate(timings) ] timings_load = [ 'load_' + str(i) + '_' + v['engine'] + ';dur=' + str(round(v['load'] * 1000, 3)) - for i, v in enumerate(timings) if v.get('load') + for i, v in enumerate(timings) + if v.get('load') ] timings_all = timings_all + timings_total + timings_load response.headers.add('Server-Timing', ', '.join(timings_all)) @@ -592,10 +575,7 @@ def post_request(response): def index_error(output_format, error_message): if output_format == 'json': - return Response( - json.dumps({'error': error_message}), - mimetype='application/json' - ) + return Response(json.dumps({'error': error_message}), mimetype='application/json') if output_format == 'csv': response = Response('', mimetype='application/csv') cont_disp = 'attachment;Filename=searx.csv' @@ -616,8 +596,10 @@ def index_error(output_format, error_message): # html request.errors.append(gettext('search error')) return render( + # fmt: off 'index.html', selected_categories=get_selected_categories(request.preferences, request.form), + # fmt: on ) @@ -631,8 +613,10 @@ def index(): return redirect(url_for('search') + query, 308) return render( + # fmt: off 'index.html', selected_categories=get_selected_categories(request.preferences, request.form), + # fmt: on ) @@ -662,8 +646,10 @@ def search(): if not request.form.get('q'): if output_format == 'html': return render( + # fmt: off 'index.html', selected_categories=get_selected_categories(request.preferences, request.form), + # fmt: on ) return index_error(output_format, 'No query'), 400 @@ -672,9 +658,7 @@ def search(): raw_text_query = None result_container = None try: - search_query, raw_text_query, _, _ = get_search_query_from_webapp( - request.preferences, request.form - ) + search_query, raw_text_query, _, _ = get_search_query_from_webapp(request.preferences, request.form) # search = Search(search_query) # without plugins search = SearchWithPlugins(search_query, request.user_plugins, request) # pylint: disable=redefined-outer-name @@ -730,10 +714,9 @@ def search(): if hours == 0: result['publishedDate'] = gettext('{minutes} minute(s) ago').format(minutes=minutes) else: - result['publishedDate'] = gettext( - '{hours} hour(s), {minutes} minute(s) ago').format( - hours=hours, minutes=minutes - ) + result['publishedDate'] = gettext('{hours} hour(s), {minutes} minute(s) ago').format( + hours=hours, minutes=minutes + ) else: result['publishedDate'] = format_date(result['publishedDate']) @@ -746,11 +729,9 @@ def search(): 'corrections': list(result_container.corrections), 'infoboxes': result_container.infoboxes, 'suggestions': list(result_container.suggestions), - 'unresponsive_engines': __get_translated_errors(result_container.unresponsive_engines) + 'unresponsive_engines': __get_translated_errors(result_container.unresponsive_engines), } - response = json.dumps( - x, default = lambda item: list(item) if isinstance(item, set) else item - ) + response = json.dumps(x, default=lambda item: list(item) if isinstance(item, set) else item) return Response(response, mimetype='application/json') if output_format == 'csv': @@ -794,23 +775,20 @@ def search(): # suggestions: use RawTextQuery to get the suggestion URLs with the same bang suggestion_urls = list( map( - lambda suggestion: { - 'url': raw_text_query.changeQuery(suggestion).getFullQuery(), - 'title': suggestion - }, - result_container.suggestions - )) + lambda suggestion: {'url': raw_text_query.changeQuery(suggestion).getFullQuery(), 'title': suggestion}, + result_container.suggestions, + ) + ) correction_urls = list( map( - lambda correction: { - 'url': raw_text_query.changeQuery(correction).getFullQuery(), - 'title': correction - }, - result_container.corrections - )) + lambda correction: {'url': raw_text_query.changeQuery(correction).getFullQuery(), 'title': correction}, + result_container.corrections, + ) + ) return render( + # fmt: off 'results.html', results = results, q=request.form['q'], @@ -835,6 +813,7 @@ def search(): theme = get_current_theme_name(), favicons = global_favicons[themes.index(get_current_theme_name())], timeout_limit = request.form.get('timeout_limit', None) + # fmt: on ) @@ -891,9 +870,7 @@ def autocompleter(): language = language.split('-')[0] # run autocompletion - raw_results = search_autocomplete( - request.preferences.get_value('autocomplete'), sug_prefix, language - ) + raw_results = search_autocomplete(request.preferences.get_value('autocomplete'), sug_prefix, language) for result in raw_results: # attention: this loop will change raw_text_query object and this is # the reason why the sug_prefix was stored before (see above) @@ -944,16 +921,11 @@ def preferences(): allowed_plugins = request.preferences.plugins.get_enabled() # stats for preferences page - filtered_engines = dict( - filter( - lambda kv: (kv[0], request.preferences.validate_token(kv[1])), - engines.items() - ) - ) + filtered_engines = dict(filter(lambda kv: (kv[0], request.preferences.validate_token(kv[1])), engines.items())) engines_by_category = {} - for c in categories: # pylint: disable=consider-using-dict-items + for c in categories: # pylint: disable=consider-using-dict-items engines_by_category[c] = [e for e in categories[c] if e.name in filtered_engines] # sort the engines alphabetically since the order in settings.yml is meaningless. list.sort(engines_by_category[c], key=lambda e: e.name) @@ -988,8 +960,9 @@ def preferences(): reliabilities = {} engine_errors = get_engine_errors(filtered_engines) checker_results = checker_get_result() - checker_results = checker_results['engines'] \ - if checker_results['status'] == 'ok' and 'engines' in checker_results else {} + checker_results = ( + checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {} + ) for _, e in filtered_engines.items(): checker_result = checker_results.get(e.name, {}) checker_success = checker_result.get('success', True) @@ -1045,6 +1018,7 @@ def preferences(): } return render( + # fmt: off 'preferences.html', selected_categories = get_selected_categories(request.preferences, request.form), locales = LOCALE_NAMES, @@ -1071,6 +1045,7 @@ def preferences(): preferences_url_params = request.preferences.get_as_url_params(), locked_preferences = settings['preferences']['lock'], preferences = True + # fmt: on ) @@ -1079,10 +1054,7 @@ def _is_selected_language_supported(engine, preferences): # pylint: disable=red if language == 'all': return True x = match_language( - language, - getattr(engine, 'supported_languages', []), - getattr(engine, 'language_aliases', {}), - None + language, getattr(engine, 'supported_languages', []), getattr(engine, 'language_aliases', {}), None ) return bool(x) @@ -1111,15 +1083,9 @@ def image_proxy(): 'DNT': '1', } set_context_network_name('image_proxy') - resp, stream = http_stream( - method = 'GET', - url = url, - headers = request_headers - ) + resp, stream = http_stream(method='GET', url=url, headers=request_headers) content_length = resp.headers.get('Content-Length') - if (content_length - and content_length.isdigit() - and int(content_length) > maximum_size ): + if content_length and content_length.isdigit() and int(content_length) > maximum_size: return 'Max size', 400 if resp.status_code != 200: @@ -1155,15 +1121,8 @@ def image_proxy(): logger.debug('Exception while closing response', e) try: - headers = dict_subset( - resp.headers, - {'Content-Type', 'Content-Encoding', 'Content-Length', 'Length'} - ) - response = Response( - stream, - mimetype=resp.headers['Content-Type'], - headers=headers, - direct_passthrough=True) + headers = dict_subset(resp.headers, {'Content-Type', 'Content-Encoding', 'Content-Length', 'Length'}) + response = Response(stream, mimetype=resp.headers['Content-Type'], headers=headers, direct_passthrough=True) response.call_on_close(close_stream) return response except httpx.HTTPError: @@ -1179,11 +1138,11 @@ def engine_descriptions(): for engine, description in ENGINE_DESCRIPTIONS.get(locale, {}).items(): result[engine] = description for engine, description in result.items(): - if len(description) ==2 and description[1] == 'ref': + if len(description) == 2 and description[1] == 'ref': ref_engine, ref_lang = description[0].split(':') description = ENGINE_DESCRIPTIONS[ref_lang][ref_engine] if isinstance(description, str): - description = [ description, 'wikipedia' ] + description = [description, 'wikipedia'] result[engine] = description return jsonify(result) @@ -1194,11 +1153,7 @@ def stats(): sort_order = request.args.get('sort', default='name', type=str) selected_engine_name = request.args.get('engine', default=None, type=str) - filtered_engines = dict( - filter( - lambda kv: (kv[0], request.preferences.validate_token(kv[1])), - engines.items() - )) + filtered_engines = dict(filter(lambda kv: (kv[0], request.preferences.validate_token(kv[1])), engines.items())) if selected_engine_name: if selected_engine_name not in filtered_engines: selected_engine_name = None @@ -1207,8 +1162,7 @@ def stats(): checker_results = checker_get_result() checker_results = ( - checker_results['engines'] - if checker_results['status'] == 'ok' and 'engines' in checker_results else {} + checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {} ) engine_stats = get_engines_stats(filtered_engines) @@ -1233,22 +1187,20 @@ def stats(): engine_stats['time'] = sorted(engine_stats['time'], reverse=reverse, key=get_key) return render( + # fmt: off 'stats.html', sort_order = sort_order, engine_stats = engine_stats, engine_reliabilities = engine_reliabilities, selected_engine_name = selected_engine_name, searx_git_branch = GIT_BRANCH, + # fmt: on ) @app.route('/stats/errors', methods=['GET']) def stats_errors(): - filtered_engines = dict( - filter( - lambda kv: (kv[0], request.preferences.validate_token(kv[1])), - engines.items() - )) + filtered_engines = dict(filter(lambda kv: (kv[0], request.preferences.validate_token(kv[1])), engines.items())) result = get_engine_errors(filtered_engines) return jsonify(result) @@ -1261,13 +1213,16 @@ def stats_checker(): @app.route('/robots.txt', methods=['GET']) def robots(): - return Response("""User-agent: * + return Response( + """User-agent: * Allow: / Allow: /about Disallow: /stats Disallow: /preferences Disallow: /*?*q=* -""", mimetype='text/plain') +""", + mimetype='text/plain', + ) @app.route('/opensearch.xml', methods=['GET']) @@ -1281,34 +1236,21 @@ def opensearch(): if request.headers.get('User-Agent', '').lower().find('webkit') >= 0: method = 'get' - ret = render( - 'opensearch.xml', - opensearch_method=method, - override_theme='__common__' - ) + ret = render('opensearch.xml', opensearch_method=method, override_theme='__common__') - resp = Response( - response = ret, - status = 200, - mimetype = "application/opensearchdescription+xml" - ) + resp = Response(response=ret, status=200, mimetype="application/opensearchdescription+xml") return resp @app.route('/favicon.ico') def favicon(): return send_from_directory( - os.path.join( - app.root_path, - settings['ui']['static_path'], - 'themes', - get_current_theme_name(), - 'img' - ), + os.path.join(app.root_path, settings['ui']['static_path'], 'themes', get_current_theme_name(), 'img'), 'favicon.png', - mimetype = 'image/vnd.microsoft.icon' + mimetype='image/vnd.microsoft.icon', ) + @app.route('/clear_cookies') def clear_cookies(): resp = make_response(redirect(url_for('index', _external=True))) @@ -1329,43 +1271,47 @@ def config(): if isinstance(engine.supported_languages, dict): supported_languages = list(engine.supported_languages.keys()) - _engines.append({ - 'name': name, - 'categories': engine.categories, - 'shortcut': engine.shortcut, - 'enabled': not engine.disabled, - 'paging': engine.paging, - 'language_support': engine.language_support, - 'supported_languages': supported_languages, - 'safesearch': engine.safesearch, - 'time_range_support': engine.time_range_support, - 'timeout': engine.timeout - }) + _engines.append( + { + 'name': name, + 'categories': engine.categories, + 'shortcut': engine.shortcut, + 'enabled': not engine.disabled, + 'paging': engine.paging, + 'language_support': engine.language_support, + 'supported_languages': supported_languages, + 'safesearch': engine.safesearch, + 'time_range_support': engine.time_range_support, + 'timeout': engine.timeout, + } + ) _plugins = [] for _ in plugins: _plugins.append({'name': _.name, 'enabled': _.default_on}) - return jsonify({ - 'categories': list(categories.keys()), - 'engines': _engines, - 'plugins': _plugins, - 'instance_name': settings['general']['instance_name'], - 'locales': LOCALE_NAMES, - 'default_locale': settings['ui']['default_locale'], - 'autocomplete': settings['search']['autocomplete'], - 'safe_search': settings['search']['safe_search'], - 'default_theme': settings['ui']['default_theme'], - 'version': VERSION_STRING, - 'brand': { - 'CONTACT_URL': get_setting('general.contact_url'), - 'GIT_URL': GIT_URL, - 'GIT_BRANCH': GIT_BRANCH, - 'DOCS_URL': get_setting('brand.docs_url'), - }, - 'doi_resolvers': list(settings['doi_resolvers'].keys()), - 'default_doi_resolver': settings['default_doi_resolver'], - }) + return jsonify( + { + 'categories': list(categories.keys()), + 'engines': _engines, + 'plugins': _plugins, + 'instance_name': settings['general']['instance_name'], + 'locales': LOCALE_NAMES, + 'default_locale': settings['ui']['default_locale'], + 'autocomplete': settings['search']['autocomplete'], + 'safe_search': settings['search']['safe_search'], + 'default_theme': settings['ui']['default_theme'], + 'version': VERSION_STRING, + 'brand': { + 'CONTACT_URL': get_setting('general.contact_url'), + 'GIT_URL': GIT_URL, + 'GIT_BRANCH': GIT_BRANCH, + 'DOCS_URL': get_setting('brand.docs_url'), + }, + 'doi_resolvers': list(settings['doi_resolvers'].keys()), + 'default_doi_resolver': settings['default_doi_resolver'], + } + ) @app.errorhandler(404) @@ -1376,9 +1322,7 @@ def page_not_found(_e): # see https://flask.palletsprojects.com/en/1.1.x/cli/ # True if "FLASK_APP=searx/webapp.py FLASK_ENV=development flask run" flask_run_development = ( - os.environ.get("FLASK_APP") is not None - and os.environ.get("FLASK_ENV") == 'development' - and is_flask_run_cmdline() + os.environ.get("FLASK_APP") is not None and os.environ.get("FLASK_ENV") == 'development' and is_flask_run_cmdline() ) # True if reload feature is activated of werkzeug, False otherwise (including uwsgi, etc..) @@ -1387,30 +1331,23 @@ flask_run_development = ( werkzeug_reloader = flask_run_development or (searx_debug and __name__ == "__main__") # initialize the engines except on the first run of the werkzeug server. -if (not werkzeug_reloader - or (werkzeug_reloader - and os.environ.get("WERKZEUG_RUN_MAIN") == "true") ): +if not werkzeug_reloader or (werkzeug_reloader and os.environ.get("WERKZEUG_RUN_MAIN") == "true"): plugin_initialize(app) search_initialize(enable_checker=True, check_network=True) def run(): - logger.debug( - 'starting webserver on %s:%s', - settings['server']['bind_address'], - settings['server']['port'] - ) + logger.debug('starting webserver on %s:%s', settings['server']['bind_address'], settings['server']['port']) app.run( - debug = searx_debug, - use_debugger = searx_debug, - port = settings['server']['port'], - host = settings['server']['bind_address'], - threaded = True, - extra_files = [ - get_default_settings_path() - ], + debug=searx_debug, + use_debugger=searx_debug, + port=settings['server']['port'], + host=settings['server']['bind_address'], + threaded=True, + extra_files=[get_default_settings_path()], ) + application = app patch_application(app) diff --git a/searx/webutils.py b/searx/webutils.py index c27324908..737e5a82f 100644 --- a/searx/webutils.py +++ b/searx/webutils.py @@ -106,8 +106,7 @@ def highlight_content(content, query): if content.lower().find(query.lower()) > -1: query_regex = '({0})'.format(re.escape(query)) - content = re.sub(query_regex, '<span class="highlight">\\1</span>', - content, flags=re.I | re.U) + content = re.sub(query_regex, '<span class="highlight">\\1</span>', content, flags=re.I | re.U) else: regex_parts = [] for chunk in query.split(): @@ -119,8 +118,7 @@ def highlight_content(content, query): else: regex_parts.append('{0}'.format(re.escape(chunk))) query_regex = '({0})'.format('|'.join(regex_parts)) - content = re.sub(query_regex, '<span class="highlight">\\1</span>', - content, flags=re.I | re.U) + content = re.sub(query_regex, '<span class="highlight">\\1</span>', content, flags=re.I | re.U) return content diff --git a/searxng_extra/standalone_searx.py b/searxng_extra/standalone_searx.py index de8a0d77f..9ac8c8af2 100755 --- a/searxng_extra/standalone_searx.py +++ b/searxng_extra/standalone_searx.py @@ -62,7 +62,7 @@ Example to run it from python: }, "suggestions": [...] } -""" # pylint: disable=line-too-long +""" # pylint: disable=line-too-long import argparse import sys @@ -80,7 +80,7 @@ EngineCategoriesVar = Optional[List[str]] def get_search_query( - args: argparse.Namespace, engine_categories: EngineCategoriesVar = None + args: argparse.Namespace, engine_categories: EngineCategoriesVar = None ) -> searx.search.SearchQuery: """Get search results for the query""" if engine_categories is None: @@ -94,14 +94,12 @@ def get_search_query( "categories": category, "pageno": str(args.pageno), "language": args.lang, - "time_range": args.timerange + "time_range": args.timerange, } - preferences = searx.preferences.Preferences( - ['oscar'], engine_categories, searx.engines.engines, []) + preferences = searx.preferences.Preferences(['oscar'], engine_categories, searx.engines.engines, []) preferences.key_value_settings['safesearch'].parse(args.safesearch) - search_query = searx.webadapter.get_search_query_from_webapp( - preferences, form)[0] + search_query = searx.webadapter.get_search_query_from_webapp(preferences, form)[0] return search_query @@ -143,14 +141,13 @@ def to_dict(search_query: searx.search.SearchQuery) -> Dict[str, Any]: "suggestions": list(result_container.suggestions), "answers": list(result_container.answers), "paging": result_container.paging, - "results_number": result_container.results_number() + "results_number": result_container.results_number(), } return result_container_json def parse_argument( - args: Optional[List[str]]=None, - category_choices: EngineCategoriesVar=None + args: Optional[List[str]] = None, category_choices: EngineCategoriesVar = None ) -> argparse.Namespace: """Parse command line. @@ -174,24 +171,23 @@ def parse_argument( if not category_choices: category_choices = list(searx.engines.categories.keys()) parser = argparse.ArgumentParser(description='Standalone searx.') - parser.add_argument('query', type=str, - help='Text query') - parser.add_argument('--category', type=str, nargs='?', - choices=category_choices, - default='general', - help='Search category') - parser.add_argument('--lang', type=str, nargs='?', default='all', - help='Search language') - parser.add_argument('--pageno', type=int, nargs='?', default=1, - help='Page number starting from 1') + parser.add_argument('query', type=str, help='Text query') parser.add_argument( - '--safesearch', type=str, nargs='?', - choices=['0', '1', '2'], default='0', - help='Safe content filter from none to strict') + '--category', type=str, nargs='?', choices=category_choices, default='general', help='Search category' + ) + parser.add_argument('--lang', type=str, nargs='?', default='all', help='Search language') + parser.add_argument('--pageno', type=int, nargs='?', default=1, help='Page number starting from 1') parser.add_argument( - '--timerange', type=str, - nargs='?', choices=['day', 'week', 'month', 'year'], - help='Filter by time range') + '--safesearch', + type=str, + nargs='?', + choices=['0', '1', '2'], + default='0', + help='Safe content filter from none to strict', + ) + parser.add_argument( + '--timerange', type=str, nargs='?', choices=['day', 'week', 'month', 'year'], help='Filter by time range' + ) return parser.parse_args(args) @@ -206,6 +202,4 @@ if __name__ == '__main__': searx.search.initialize_processors(settings_engines) search_q = get_search_query(prog_args, engine_categories=engine_cs) res_dict = to_dict(search_q) - sys.stdout.write(dumps( - res_dict, sort_keys=True, indent=4, ensure_ascii=False, - default=json_serial)) + sys.stdout.write(dumps(res_dict, sort_keys=True, indent=4, ensure_ascii=False, default=json_serial)) diff --git a/searxng_extra/update/update_currencies.py b/searxng_extra/update/update_currencies.py index 93ac15aa5..3373e2455 100755 --- a/searxng_extra/update/update_currencies.py +++ b/searxng_extra/update/update_currencies.py @@ -85,9 +85,7 @@ def add_currency_label(db, label, iso4217, language): def wikidata_request_result_iterator(request): - result = wikidata.send_wikidata_query( - request.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL) - ) + result = wikidata.send_wikidata_query(request.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)) if result is not None: for r in result['results']['bindings']: yield r @@ -151,5 +149,6 @@ def main(): with open(get_filename(), 'w', encoding='utf8') as f: json.dump(db, f, ensure_ascii=False, indent=4) + if __name__ == '__main__': main() diff --git a/searxng_extra/update/update_engine_descriptions.py b/searxng_extra/update/update_engine_descriptions.py index 59a9a72a0..51cfc7cc2 100755 --- a/searxng_extra/update/update_engine_descriptions.py +++ b/searxng_extra/update/update_engine_descriptions.py @@ -55,7 +55,10 @@ NOT_A_DESCRIPTION = [ ] SKIP_ENGINE_SOURCE = [ - ('gitlab', 'wikidata') # descriptions are about wikipedia disambiguation pages + # fmt: off + ('gitlab', 'wikidata') + # descriptions are about wikipedia disambiguation pages + # fmt: on ] LANGUAGES = LOCALE_NAMES.keys() @@ -92,10 +95,7 @@ def update_description(engine_name, lang, description, source, replace=True): def get_wikipedia_summary(lang, pageid): - params = { - 'language': lang.replace('_','-'), - 'headers': {} - } + params = {'language': lang.replace('_', '-'), 'headers': {}} searx.engines.engines['wikipedia'].request(pageid, params) try: response = searx.network.get(params['url'], headers=params['headers'], timeout=10) @@ -160,10 +160,7 @@ def initialize(): global IDS, WIKIPEDIA_LANGUAGES, LANGUAGES_SPARQL searx.search.initialize() wikipedia_engine = searx.engines.engines['wikipedia'] - WIKIPEDIA_LANGUAGES = { - language: wikipedia_engine.url_lang(language.replace('_', '-')) - for language in LANGUAGES - } + WIKIPEDIA_LANGUAGES = {language: wikipedia_engine.url_lang(language.replace('_', '-')) for language in LANGUAGES} WIKIPEDIA_LANGUAGES['nb_NO'] = 'no' LANGUAGES_SPARQL = ', '.join(f"'{l}'" for l in set(WIKIPEDIA_LANGUAGES.values())) for engine_name, engine in searx.engines.engines.items(): @@ -178,9 +175,7 @@ def initialize(): def fetch_wikidata_descriptions(): searx.network.set_timeout_for_thread(60) result = wikidata.send_wikidata_query( - SPARQL_DESCRIPTION - .replace('%IDS%', IDS) - .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL) + SPARQL_DESCRIPTION.replace('%IDS%', IDS).replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL) ) if result is not None: for binding in result['results']['bindings']: @@ -195,9 +190,7 @@ def fetch_wikidata_descriptions(): def fetch_wikipedia_descriptions(): result = wikidata.send_wikidata_query( - SPARQL_WIKIPEDIA_ARTICLE - .replace('%IDS%', IDS) - .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL) + SPARQL_WIKIPEDIA_ARTICLE.replace('%IDS%', IDS).replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL) ) if result is not None: for binding in result['results']['bindings']: @@ -224,9 +217,9 @@ def fetch_website_description(engine_name, website): # the front page can't be fetched: skip this engine return - wikipedia_languages_r = { V: K for K, V in WIKIPEDIA_LANGUAGES.items() } + wikipedia_languages_r = {V: K for K, V in WIKIPEDIA_LANGUAGES.items()} languages = ['en', 'es', 'pt', 'ru', 'tr', 'fr'] - languages = languages + [ l for l in LANGUAGES if l not in languages] + languages = languages + [l for l in LANGUAGES if l not in languages] previous_matched_lang = None previous_count = 0 @@ -279,9 +272,7 @@ def get_output(): * description (if source = "wikipedia") * [f"engine:lang", "ref"] (reference to another existing description) """ - output = { - locale: {} for locale in LOCALE_NAMES - } + output = {locale: {} for locale in LOCALE_NAMES} seen_descriptions = {} diff --git a/searxng_extra/update/update_external_bangs.py b/searxng_extra/update/update_external_bangs.py index 27882bb24..d5c6b585a 100755 --- a/searxng_extra/update/update_external_bangs.py +++ b/searxng_extra/update/update_external_bangs.py @@ -113,13 +113,13 @@ def parse_ddg_bangs(ddg_bangs): # only for the https protocol: "https://example.com" becomes "//example.com" if bang_url.startswith(HTTPS_COLON + '//'): - bang_url = bang_url[len(HTTPS_COLON):] + bang_url = bang_url[len(HTTPS_COLON) :] # - if bang_url.startswith(HTTP_COLON + '//') and bang_url[len(HTTP_COLON):] in bang_urls: + if bang_url.startswith(HTTP_COLON + '//') and bang_url[len(HTTP_COLON) :] in bang_urls: # if the bang_url uses the http:// protocol, and the same URL exists in https:// # then reuse the https:// bang definition. (written //example.com) - bang_def_output = bang_urls[bang_url[len(HTTP_COLON):]] + bang_def_output = bang_urls[bang_url[len(HTTP_COLON) :]] else: # normal use case : new http:// URL or https:// URL (without "https:", see above) bang_rank = str(bang_definition['r']) @@ -151,9 +151,6 @@ def get_bangs_filename(): if __name__ == '__main__': bangs_url, bangs_version = get_bang_url() print(f'fetch bangs from {bangs_url}') - output = { - 'version': bangs_version, - 'trie': parse_ddg_bangs(fetch_ddg_bangs(bangs_url)) - } + output = {'version': bangs_version, 'trie': parse_ddg_bangs(fetch_ddg_bangs(bangs_url))} with open(get_bangs_filename(), 'w', encoding="utf8") as fp: json.dump(output, fp, ensure_ascii=False, indent=4) diff --git a/searxng_extra/update/update_firefox_version.py b/searxng_extra/update/update_firefox_version.py index 07bcef7f6..750e955fd 100755 --- a/searxng_extra/update/update_firefox_version.py +++ b/searxng_extra/update/update_firefox_version.py @@ -19,10 +19,12 @@ NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?$') # useragents = { + # fmt: off "versions": (), "os": ('Windows NT 10.0; Win64; x64', 'X11; Linux x86_64'), - "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}" + "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}", + # fmt: on } @@ -38,7 +40,7 @@ def fetch_firefox_versions(): url = urlparse(urljoin(URL, link)) path = url.path if path.startswith(RELEASE_PATH): - version = path[len(RELEASE_PATH):-1] + version = path[len(RELEASE_PATH) : -1] if NORMAL_REGEX.match(version): versions.append(LooseVersion(version)) diff --git a/searxng_extra/update/update_languages.py b/searxng_extra/update/update_languages.py index 2d7ffc104..526469342 100755 --- a/searxng_extra/update/update_languages.py +++ b/searxng_extra/update/update_languages.py @@ -31,8 +31,7 @@ def fetch_supported_languages(): for engine_name in names: if hasattr(engines[engine_name], 'fetch_supported_languages'): engines_languages[engine_name] = engines[engine_name].fetch_supported_languages() - print("fetched %s languages from engine %s" % ( - len(engines_languages[engine_name]), engine_name)) + print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name)) if type(engines_languages[engine_name]) == list: engines_languages[engine_name] = sorted(engines_languages[engine_name]) @@ -60,8 +59,9 @@ def join_language_lists(engines_languages): # apply custom fixes if necessary if lang_code in getattr(engines[engine_name], 'language_aliases', {}).values(): - lang_code = next(lc for lc, alias in engines[engine_name].language_aliases.items() - if lang_code == alias) + lang_code = next( + lc for lc, alias in engines[engine_name].language_aliases.items() if lang_code == alias + ) locale = get_locale(lang_code) @@ -85,10 +85,12 @@ def join_language_lists(engines_languages): english_name = None # add language to list - language_list[short_code] = {'name': language_name, - 'english_name': english_name, - 'counter': set(), - 'countries': dict()} + language_list[short_code] = { + 'name': language_name, + 'english_name': english_name, + 'counter': set(), + 'countries': dict(), + } # add language with country if not in list if lang_code != short_code and lang_code not in language_list[short_code]['countries']: @@ -97,8 +99,7 @@ def join_language_lists(engines_languages): # get country name from babel's Locale object country_name = locale.get_territory_name() - language_list[short_code]['countries'][lang_code] = {'country_name': country_name, - 'counter': set()} + language_list[short_code]['countries'][lang_code] = {'country_name': country_name, 'counter': set()} # count engine for both language_country combination and language alone language_list[short_code]['counter'].add(engine_name) @@ -112,17 +113,23 @@ def join_language_lists(engines_languages): def filter_language_list(all_languages): min_engines_per_lang = 13 min_engines_per_country = 7 - main_engines = [engine_name for engine_name in engines.keys() - if 'general' in engines[engine_name].categories and - engines[engine_name].supported_languages and - not engines[engine_name].disabled] + main_engines = [ + engine_name + for engine_name in engines.keys() + if 'general' in engines[engine_name].categories + and engines[engine_name].supported_languages + and not engines[engine_name].disabled + ] # filter list to include only languages supported by most engines or all default general engines - filtered_languages = {code: lang for code, lang - in all_languages.items() - if (len(lang['counter']) >= min_engines_per_lang or - all(main_engine in lang['counter'] - for main_engine in main_engines))} + filtered_languages = { + code: lang + for code, lang in all_languages.items() + if ( + len(lang['counter']) >= min_engines_per_lang + or all(main_engine in lang['counter'] for main_engine in main_engines) + ) + } def _copy_lang_data(lang, country_name=None): new_dict = dict() @@ -176,22 +183,24 @@ def write_languages_file(languages): "# -*- coding: utf-8 -*-", "# list of language codes", "# this file is generated automatically by utils/fetch_languages.py", - "language_codes =" + "language_codes =", ) - language_codes = tuple([ - ( - code, - languages[code]['name'].split(' (')[0], - languages[code].get('country_name') or '', - languages[code].get('english_name') or '' - ) for code in sorted(languages) - ]) + language_codes = tuple( + [ + ( + code, + languages[code]['name'].split(' (')[0], + languages[code].get('country_name') or '', + languages[code].get('english_name') or '', + ) + for code in sorted(languages) + ] + ) with open(languages_file, 'w') as new_file: file_content = "{file_headers} \\\n{language_codes}".format( - file_headers='\n'.join(file_headers), - language_codes=pformat(language_codes, indent=4) + file_headers='\n'.join(file_headers), language_codes=pformat(language_codes, indent=4) ) new_file.write(file_content) new_file.close() diff --git a/searxng_extra/update/update_osm_keys_tags.py b/searxng_extra/update/update_osm_keys_tags.py index 77c715ba7..2916cbff1 100755 --- a/searxng_extra/update/update_osm_keys_tags.py +++ b/searxng_extra/update/update_osm_keys_tags.py @@ -84,9 +84,8 @@ PRESET_KEYS = { ('internet_access', 'ssid'): {'en': 'Wi-Fi'}, } -INCLUDED_KEYS = { - ('addr', ) -} +INCLUDED_KEYS = {('addr',)} + def get_preset_keys(): results = collections.OrderedDict() @@ -97,6 +96,7 @@ def get_preset_keys(): r.setdefault('*', value) return results + def get_keys(): results = get_preset_keys() response = wikidata.send_wikidata_query(SPARQL_KEYS_REQUEST) @@ -110,18 +110,16 @@ def get_keys(): # label for the key "contact.email" is "Email" # whatever the language r = results.setdefault('contact', {}) - r[keys[1]] = { - '*': { - 'en': keys[1] - } - } + r[keys[1]] = {'*': {'en': keys[1]}} continue if tuple(keys) in PRESET_KEYS: # skip presets (already set above) continue - if get_key_rank(':'.join(keys)) is None\ - and ':'.join(keys) not in VALUE_TO_LINK\ - and tuple(keys) not in INCLUDED_KEYS: + if ( + get_key_rank(':'.join(keys)) is None + and ':'.join(keys) not in VALUE_TO_LINK + and tuple(keys) not in INCLUDED_KEYS + ): # keep only keys that will be displayed by openstreetmap.py continue label = key['itemLabel']['value'].lower() @@ -160,6 +158,7 @@ def get_tags(): results.setdefault(tag_category, {}).setdefault(tag_type, {}).setdefault(lang, label) return results + def optimize_data_lang(translations): language_to_delete = [] # remove "zh-hk" entry if the value is the same as "zh" @@ -184,12 +183,14 @@ def optimize_data_lang(translations): for language in language_to_delete: del translations[language] + def optimize_tags(data): for v in data.values(): for translations in v.values(): optimize_data_lang(translations) return data + def optimize_keys(data): for k, v in data.items(): if k == '*': @@ -198,9 +199,11 @@ def optimize_keys(data): optimize_keys(v) return data + def get_osm_tags_filename(): return Path(searx_dir) / "data" / "osm_keys_tags.json" + if __name__ == '__main__': set_timeout_for_thread(60) diff --git a/searxng_extra/update/update_pygments.py b/searxng_extra/update/update_pygments.py index 3b1c525a5..68aaad0f7 100755 --- a/searxng_extra/update/update_pygments.py +++ b/searxng_extra/update/update_pygments.py @@ -26,61 +26,63 @@ class LogicodevStyle(Style): # pylint: disable=R0903 background_color = '#282C34' styles = { - Comment: "#556366 italic", - Comment.Multiline: "#556366 italic", - Comment.Preproc: "#BC7A00", - Comment.Single: "#556366 italic", - Comment.Special: "#556366 italic", - Error: "border:#ff0000", - Generic.Deleted: "#A00000", - Generic.Emph: "italic", - Generic.Error: "#FF0000", - Generic.Heading: "#000080 bold", - Generic.Inserted: "#00A000", - Generic.Output: "#888888", - Generic.Prompt: "#000080 bold", - Generic.Strong: "bold", - Generic.Subheading: "#800080 bold", - Generic.Traceback: "#0044DD", - Keyword: "#BE74D5 bold", - Keyword.Constant: "#BE74D5 bold", - Keyword.Declaration: "#BE74D5 bold", - Keyword.Namespace: "#BE74D5 bold", - Keyword.Pseudo: "#BE74D5", - Keyword.Reserved: "#BE74D5 bold", - Keyword.Type: "#D46C72", - Literal.Number: "#D19A66", - Literal.String: "#86C372", - Literal.String.Backtick:"#86C372", - Literal.String.Char: "#86C372", - Literal.String.Doc: "#86C372 italic", - Literal.String.Double: "#86C372", - Literal.String.Escape: "#BB6622 bold", - Literal.String.Heredoc: "#86C372", - Literal.String.Interpol:"#BB6688 bold", - Literal.String.Other: "#BE74D5", - Literal.String.Regex: "#BB6688", - Literal.String.Single: "#86C372", - Literal.String.Symbol: "#DFC06F", - Name.Attribute: "#7D9029", - Name.Builtin: "#BE74D5", - Name.Builtin.Pseudo: "#BE74D5", - Name.Class: "#61AFEF bold", - Name.Constant: "#D19A66", - Name.Decorator: "#AA22FF", - Name.Entity: "#999999 bold", - Name.Exception: "#D2413A bold", - Name.Function: "#61AFEF", - Name.Label: "#A0A000", - Name.Namespace: "#61AFEF bold", - Name.Tag: "#BE74D5 bold", - Name.Variable: "#DFC06F", - Name.Variable.Class: "#DFC06F", - Name.Variable.Global: "#DFC06F", - Name.Variable.Instance: "#DFC06F", - Operator: "#D19A66", - Operator.Word: "#AA22FF bold", - Text.Whitespace: "#D7DAE0", + # fmt: off + Comment: "#556366 italic", + Comment.Multiline: "#556366 italic", + Comment.Preproc: "#BC7A00", + Comment.Single: "#556366 italic", + Comment.Special: "#556366 italic", + Error: "border:#ff0000", + Generic.Deleted: "#A00000", + Generic.Emph: "italic", + Generic.Error: "#FF0000", + Generic.Heading: "#000080 bold", + Generic.Inserted: "#00A000", + Generic.Output: "#888888", + Generic.Prompt: "#000080 bold", + Generic.Strong: "bold", + Generic.Subheading: "#800080 bold", + Generic.Traceback: "#0044DD", + Keyword: "#BE74D5 bold", + Keyword.Constant: "#BE74D5 bold", + Keyword.Declaration: "#BE74D5 bold", + Keyword.Namespace: "#BE74D5 bold", + Keyword.Pseudo: "#BE74D5", + Keyword.Reserved: "#BE74D5 bold", + Keyword.Type: "#D46C72", + Literal.Number: "#D19A66", + Literal.String: "#86C372", + Literal.String.Backtick: "#86C372", + Literal.String.Char: "#86C372", + Literal.String.Doc: "#86C372 italic", + Literal.String.Double: "#86C372", + Literal.String.Escape: "#BB6622 bold", + Literal.String.Heredoc: "#86C372", + Literal.String.Interpol: "#BB6688 bold", + Literal.String.Other: "#BE74D5", + Literal.String.Regex: "#BB6688", + Literal.String.Single: "#86C372", + Literal.String.Symbol: "#DFC06F", + Name.Attribute: "#7D9029", + Name.Builtin: "#BE74D5", + Name.Builtin.Pseudo: "#BE74D5", + Name.Class: "#61AFEF bold", + Name.Constant: "#D19A66", + Name.Decorator: "#AA22FF", + Name.Entity: "#999999 bold", + Name.Exception: "#D2413A bold", + Name.Function: "#61AFEF", + Name.Label: "#A0A000", + Name.Namespace: "#61AFEF bold", + Name.Tag: "#BE74D5 bold", + Name.Variable: "#DFC06F", + Name.Variable.Class: "#DFC06F", + Name.Variable.Global: "#DFC06F", + Name.Variable.Instance: "#DFC06F", + Operator: "#D19A66", + Operator.Word: "#AA22FF bold", + Text.Whitespace: "#D7DAE0", + # fmt: on } @@ -118,7 +120,7 @@ def get_css(cssclass, style): css_text = HtmlFormatter(style=style).get_style_defs(cssclass) result += cssclass + RULE_CODE_LINENOS + '\n\n' for line in css_text.splitlines(): - if ' ' in line and not line.startswith(cssclass): + if ' ' in line and not line.startswith(cssclass): line = cssclass + ' ' + line result += line + '\n' return result diff --git a/tests/__init__.py b/tests/__init__.py index c823cec87..8399f0604 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -50,5 +50,6 @@ class SearxTestCase(aiounittest.AsyncTestCase): def cleanup_patch(): setattr(obj, attr, previous_value) + self.addCleanup(cleanup_patch) setattr(obj, attr, value) diff --git a/tests/robot/__main__.py b/tests/robot/__main__.py index d4d6642a9..758a521ee 100644 --- a/tests/robot/__main__.py +++ b/tests/robot/__main__.py @@ -16,7 +16,7 @@ import tests as searx_tests from tests.robot import test_webapp -class SearxRobotLayer(): +class SearxRobotLayer: """Searx Robot Test Layer""" def setUp(self): @@ -42,9 +42,7 @@ class SearxRobotLayer(): # run the server self.server = subprocess.Popen( # pylint: disable=consider-using-with - [exe, webapp], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT + [exe, webapp], stdout=subprocess.PIPE, stderr=subprocess.STDOUT ) if hasattr(self.server.stdout, 'read1'): print(self.server.stdout.read1(1024).decode()) diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index d1c97ec81..9094e836a 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -2,5 +2,4 @@ import os from os.path import dirname, sep, abspath # In unit tests the user settings from unit/settings/test_settings.yml are used. -os.environ['SEARXNG_SETTINGS_PATH'] = abspath( - dirname(__file__) + sep + 'settings' + sep + 'test_settings.yml') +os.environ['SEARXNG_SETTINGS_PATH'] = abspath(dirname(__file__) + sep + 'settings' + sep + 'test_settings.yml') diff --git a/tests/unit/engines/test_command.py b/tests/unit/engines/test_command.py index d2bb123f7..7876dd55e 100644 --- a/tests/unit/engines/test_command.py +++ b/tests/unit/engines/test_command.py @@ -114,7 +114,6 @@ INFO:werkzeug: * Debugger PIN: 299-578-362''' 'template': 'key-value.html', 'level': 'DEBUG', }, - ], [ { @@ -136,7 +135,6 @@ INFO:werkzeug: * Debugger PIN: 299-578-362''' 'level': 'INFO', }, ], - ] for i in [0, 1]: @@ -171,7 +169,7 @@ commit ''' 'commit': '\w{40}', 'author': '[\w* ]* <\w*@?\w*\.?\w*>', 'date': 'Date: .*', - 'message': '\n\n.*$' + 'message': '\n\n.*$', } expected_results = [ { @@ -195,7 +193,6 @@ commit ''' 'message': '\n\nthird interesting message', 'template': 'key-value.html', }, - ] results = git_log_engine.search(''.encode('utf-8'), {'pageno': 1}) diff --git a/tests/unit/engines/test_xpath.py b/tests/unit/engines/test_xpath.py index 287beeab4..e616ff025 100644 --- a/tests/unit/engines/test_xpath.py +++ b/tests/unit/engines/test_xpath.py @@ -6,7 +6,6 @@ from tests import SearxTestCase class TestXpathEngine(SearxTestCase): - def test_request(self): xpath.search_url = 'https://url.com/{query}' xpath.categories = [] diff --git a/tests/unit/network/test_network.py b/tests/unit/network/test_network.py index 02628760b..d25a0d77b 100644 --- a/tests/unit/network/test_network.py +++ b/tests/unit/network/test_network.py @@ -9,7 +9,6 @@ from tests import SearxTestCase class TestNetwork(SearxTestCase): - def setUp(self): initialize() @@ -51,23 +50,23 @@ class TestNetwork(SearxTestCase): network = Network(proxies='http://localhost:1337') self.assertEqual(next(network._proxies_cycle), (('all://', 'http://localhost:1337'),)) - network = Network(proxies={ - 'https': 'http://localhost:1337', - 'http': 'http://localhost:1338' - }) - self.assertEqual(next(network._proxies_cycle), - (('https://', 'http://localhost:1337'), ('http://', 'http://localhost:1338'))) - self.assertEqual(next(network._proxies_cycle), - (('https://', 'http://localhost:1337'), ('http://', 'http://localhost:1338'))) - - network = Network(proxies={ - 'https': ['http://localhost:1337', 'http://localhost:1339'], - 'http': 'http://localhost:1338' - }) - self.assertEqual(next(network._proxies_cycle), - (('https://', 'http://localhost:1337'), ('http://', 'http://localhost:1338'))) - self.assertEqual(next(network._proxies_cycle), - (('https://', 'http://localhost:1339'), ('http://', 'http://localhost:1338'))) + network = Network(proxies={'https': 'http://localhost:1337', 'http': 'http://localhost:1338'}) + self.assertEqual( + next(network._proxies_cycle), (('https://', 'http://localhost:1337'), ('http://', 'http://localhost:1338')) + ) + self.assertEqual( + next(network._proxies_cycle), (('https://', 'http://localhost:1337'), ('http://', 'http://localhost:1338')) + ) + + network = Network( + proxies={'https': ['http://localhost:1337', 'http://localhost:1339'], 'http': 'http://localhost:1338'} + ) + self.assertEqual( + next(network._proxies_cycle), (('https://', 'http://localhost:1337'), ('http://', 'http://localhost:1338')) + ) + self.assertEqual( + next(network._proxies_cycle), (('https://', 'http://localhost:1339'), ('http://', 'http://localhost:1338')) + ) with self.assertRaises(ValueError): Network(proxies=1) @@ -134,6 +133,7 @@ class TestNetworkRequestRetries(SearxTestCase): first = False return httpx.Response(status_code=403, text=TestNetworkRequestRetries.TEXT) return httpx.Response(status_code=200, text=TestNetworkRequestRetries.TEXT) + return get_response async def test_retries_ok(self): @@ -206,6 +206,7 @@ class TestNetworkStreamRetries(SearxTestCase): first = False raise httpx.RequestError('fake exception', request=None) return httpx.Response(status_code=200, text=TestNetworkStreamRetries.TEXT) + return stream async def test_retries_ok(self): diff --git a/tests/unit/test_answerers.py b/tests/unit/test_answerers.py index 1119b697d..73148f327 100644 --- a/tests/unit/test_answerers.py +++ b/tests/unit/test_answerers.py @@ -7,7 +7,6 @@ from tests import SearxTestCase class AnswererTest(SearxTestCase): - def test_unicode_input(self): query = Mock() unicode_payload = 'árvíztűrő tükörfúrógép' diff --git a/tests/unit/test_engines_init.py b/tests/unit/test_engines_init.py index dffeaf8e8..c72f5c8e5 100644 --- a/tests/unit/test_engines_init.py +++ b/tests/unit/test_engines_init.py @@ -3,15 +3,16 @@ from tests import SearxTestCase class TestEnginesInit(SearxTestCase): - @classmethod def tearDownClass(cls): settings['outgoing']['using_tor_proxy'] = False settings['outgoing']['extra_proxy_timeout'] = 0 def test_initialize_engines_default(self): - engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1'}, - {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2'}] + engine_list = [ + {'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1'}, + {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2'}, + ] engines.load_engines(engine_list) self.assertEqual(len(engines.engines), 2) @@ -20,8 +21,10 @@ class TestEnginesInit(SearxTestCase): def test_initialize_engines_exclude_onions(self): settings['outgoing']['using_tor_proxy'] = False - engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1', 'categories': 'general'}, - {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}] + engine_list = [ + {'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1', 'categories': 'general'}, + {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}, + ] engines.load_engines(engine_list) self.assertEqual(len(engines.engines), 1) @@ -31,9 +34,17 @@ class TestEnginesInit(SearxTestCase): def test_initialize_engines_include_onions(self): settings['outgoing']['using_tor_proxy'] = True settings['outgoing']['extra_proxy_timeout'] = 100.0 - engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1', 'categories': 'general', - 'timeout': 20.0, 'onion_url': 'http://engine1.onion'}, - {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}] + engine_list = [ + { + 'engine': 'dummy', + 'name': 'engine1', + 'shortcut': 'e1', + 'categories': 'general', + 'timeout': 20.0, + 'onion_url': 'http://engine1.onion', + }, + {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}, + ] engines.load_engines(engine_list) self.assertEqual(len(engines.engines), 2) diff --git a/tests/unit/test_external_bangs.py b/tests/unit/test_external_bangs.py index 68b3b5a78..698ce36c6 100644 --- a/tests/unit/test_external_bangs.py +++ b/tests/unit/test_external_bangs.py @@ -18,9 +18,9 @@ TEST_DB = { 's': { 'on': 'season' + chr(2) + chr(1) + '0', 'capes': 'seascape' + chr(2) + chr(1) + '0', - } + }, }, - 'error': ['error in external_bangs.json'] + 'error': ['error in external_bangs.json'], } } @@ -57,7 +57,6 @@ class TestGetNode(SearxTestCase): class TestResolveBangDefinition(SearxTestCase): - def test_https(self): url, rank = resolve_bang_definition('//example.com/' + chr(2) + chr(1) + '42', 'query') self.assertEqual(url, 'https://example.com/query') @@ -70,7 +69,6 @@ class TestResolveBangDefinition(SearxTestCase): class TestGetBangDefinitionAndAutocomplete(SearxTestCase): - def test_found(self): bang_definition, new_autocomplete = get_bang_definition_and_autocomplete('exam', external_bangs_db=TEST_DB) self.assertEqual(bang_definition, TEST_DB['trie']['exam']['*']) @@ -103,7 +101,6 @@ class TestGetBangDefinitionAndAutocomplete(SearxTestCase): class TestExternalBangJson(SearxTestCase): - def test_no_external_bang_query(self): result = get_bang_url(SearchQuery('test', engineref_list=[EngineRef('wikipedia', 'general')])) self.assertEqual(result, None) diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py index 5bad4e5c4..28df835e5 100644 --- a/tests/unit/test_plugins.py +++ b/tests/unit/test_plugins.py @@ -6,18 +6,16 @@ from tests import SearxTestCase def get_search_mock(query, **kwargs): - return Mock(search_query=Mock(query=query, **kwargs), - result_container=Mock(answers=dict())) + return Mock(search_query=Mock(query=query, **kwargs), result_container=Mock(answers=dict())) -class PluginMock(): +class PluginMock: default_on = False name = 'Default plugin' description = 'Default plugin description' class PluginStoreTest(SearxTestCase): - def test_PluginStore_init(self): store = plugins.PluginStore() self.assertTrue(isinstance(store.plugins, list) and len(store.plugins) == 0) @@ -44,7 +42,6 @@ class PluginStoreTest(SearxTestCase): class SelfIPTest(SearxTestCase): - def test_PluginStore_init(self): plugin = plugins.load_and_initialize_plugin('searx.plugins.self_info', False, (None, {})) store = plugins.PluginStore() @@ -93,7 +90,6 @@ class SelfIPTest(SearxTestCase): class HashPluginTest(SearxTestCase): - def test_PluginStore_init(self): store = plugins.PluginStore() plugin = plugins.load_and_initialize_plugin('searx.plugins.hash_plugin', False, (None, {})) @@ -107,8 +103,9 @@ class HashPluginTest(SearxTestCase): # MD5 search = get_search_mock(query='md5 test', pageno=1) store.call(store.plugins, 'post_search', request, search) - self.assertTrue('md5 hash digest: 098f6bcd4621d373cade4e832627b4f6' - in search.result_container.answers['hash']['answer']) + self.assertTrue( + 'md5 hash digest: 098f6bcd4621d373cade4e832627b4f6' in search.result_container.answers['hash']['answer'] + ) search = get_search_mock(query=b'md5 test', pageno=2) store.call(store.plugins, 'post_search', request, search) @@ -117,31 +114,41 @@ class HashPluginTest(SearxTestCase): # SHA1 search = get_search_mock(query='sha1 test', pageno=1) store.call(store.plugins, 'post_search', request, search) - self.assertTrue('sha1 hash digest: a94a8fe5ccb19ba61c4c0873d391e9879' - '82fbbd3' in search.result_container.answers['hash']['answer']) + self.assertTrue( + 'sha1 hash digest: a94a8fe5ccb19ba61c4c0873d391e9879' + '82fbbd3' in search.result_container.answers['hash']['answer'] + ) # SHA224 search = get_search_mock(query='sha224 test', pageno=1) store.call(store.plugins, 'post_search', request, search) - self.assertTrue('sha224 hash digest: 90a3ed9e32b2aaf4c61c410eb9254261' - '19e1a9dc53d4286ade99a809' in search.result_container.answers['hash']['answer']) + self.assertTrue( + 'sha224 hash digest: 90a3ed9e32b2aaf4c61c410eb9254261' + '19e1a9dc53d4286ade99a809' in search.result_container.answers['hash']['answer'] + ) # SHA256 search = get_search_mock(query='sha256 test', pageno=1) store.call(store.plugins, 'post_search', request, search) - self.assertTrue('sha256 hash digest: 9f86d081884c7d659a2feaa0c55ad015a' - '3bf4f1b2b0b822cd15d6c15b0f00a08' in search.result_container.answers['hash']['answer']) + self.assertTrue( + 'sha256 hash digest: 9f86d081884c7d659a2feaa0c55ad015a' + '3bf4f1b2b0b822cd15d6c15b0f00a08' in search.result_container.answers['hash']['answer'] + ) # SHA384 search = get_search_mock(query='sha384 test', pageno=1) store.call(store.plugins, 'post_search', request, search) - self.assertTrue('sha384 hash digest: 768412320f7b0aa5812fce428dc4706b3c' - 'ae50e02a64caa16a782249bfe8efc4b7ef1ccb126255d196047dfedf1' - '7a0a9' in search.result_container.answers['hash']['answer']) + self.assertTrue( + 'sha384 hash digest: 768412320f7b0aa5812fce428dc4706b3c' + 'ae50e02a64caa16a782249bfe8efc4b7ef1ccb126255d196047dfedf1' + '7a0a9' in search.result_container.answers['hash']['answer'] + ) # SHA512 search = get_search_mock(query='sha512 test', pageno=1) store.call(store.plugins, 'post_search', request, search) - self.assertTrue('sha512 hash digest: ee26b0dd4af7e749aa1a8ee3c10ae9923f6' - '18980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5' - 'fa9ad8e6f57f50028a8ff' in search.result_container.answers['hash']['answer']) + self.assertTrue( + 'sha512 hash digest: ee26b0dd4af7e749aa1a8ee3c10ae9923f6' + '18980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5' + 'fa9ad8e6f57f50028a8ff' in search.result_container.answers['hash']['answer'] + ) diff --git a/tests/unit/test_preferences.py b/tests/unit/test_preferences.py index 903b9b54d..1ffed5c1a 100644 --- a/tests/unit/test_preferences.py +++ b/tests/unit/test_preferences.py @@ -1,10 +1,16 @@ -from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException, SearchLanguageSetting, - MultipleChoiceSetting, PluginsSetting, ValidationException) +from searx.preferences import ( + EnumStringSetting, + MapSetting, + MissingArgumentException, + SearchLanguageSetting, + MultipleChoiceSetting, + PluginsSetting, + ValidationException, +) from tests import SearxTestCase class PluginStub: - def __init__(self, plugin_id, default_on): self.id = plugin_id self.default_on = default_on @@ -121,20 +127,23 @@ class TestSettings(SearxTestCase): class TestPreferences(SearxTestCase): - def test_encode(self): from searx.preferences import Preferences + pref = Preferences(['oscar'], ['general'], {}, []) - url_params = 'eJx1VMmO2zAM_Zr6YrTocujJh6JF0QEKzKAz7VVgJNohLIseUU7ivy-VcWy5yyGOTVGP73GLKJNPYjiYgGeT4NB8BS9YOSY' \ - 'TUdifMDYM-vmGY1d5CN0EHTYOK88W_PXNkcDBozOjnzoK0vyi4bWnHs2RU4-zvHr_-RF9a-5Cy3GARByy7X7EkKMoBeMp9CuPQ-SzYMx' \ - '8Vr9P1qKI-XJ_p1fOkRJWNCgVM0a-zAttmBJbHkaPSZlNts-_jiuBFgUh2mPztkpHHLBhsRArDHvm356eHh5vATS0Mqagr0ZsZO_V8hT' \ - 'B9srt54_v6jewJugqL4Nn_hYSdhxnI-jRpi05GDQCStOT7UGVmJY8ZnltRKyF23SGiLWjqNcygKGkpyeGZIywJfD1gI5AjRTAmBM55Aw' \ - 'Q0Tn626lj7jzWo4e5hnEsIlprX6dTgdBRpyRBFKTDgBF8AasVyT4gvSTEoXRpXWRyG3CYQYld65I_V6lboILTMAlZY65_ejRDcHgp0Tv' \ - 'EPtGAsqTiBf3m76g7pP9B84mwjPvuUtASRDei1nDF2ix_JXW91UJkXrPh6RAhznVmKyQl7dwJdMJ6bz1QOmgzYlrEzHDMcEUuo44AgS1' \ - 'CvkjaOb2Q2AyY5oGDTs_OLXE_c2I5cg9hk3kEJZ0fu4SuktsIA2RhuJwP86AdripThCBeO9uVUejyPGmFSxPrqEYcuWi25zOEXV9tc1m' \ - '_KP1nafYtdfv6Q9hKfWmGm9A_3G635UwiVndLGdFCiLWkONk0xUxGLGGweGWTa2nZYZ0fS1YKlE3Uuw8fPl52E5U8HJYbC7sbjXUsrnT' \ + url_params = ( + 'eJx1VMmO2zAM_Zr6YrTocujJh6JF0QEKzKAz7VVgJNohLIseUU7ivy-VcWy5yyGOTVGP73GLKJNPYjiYgGeT4NB8BS9YOSY' + 'TUdifMDYM-vmGY1d5CN0EHTYOK88W_PXNkcDBozOjnzoK0vyi4bWnHs2RU4-zvHr_-RF9a-5Cy3GARByy7X7EkKMoBeMp9CuPQ-SzYMx' + '8Vr9P1qKI-XJ_p1fOkRJWNCgVM0a-zAttmBJbHkaPSZlNts-_jiuBFgUh2mPztkpHHLBhsRArDHvm356eHh5vATS0Mqagr0ZsZO_V8hT' + 'B9srt54_v6jewJugqL4Nn_hYSdhxnI-jRpi05GDQCStOT7UGVmJY8ZnltRKyF23SGiLWjqNcygKGkpyeGZIywJfD1gI5AjRTAmBM55Aw' + 'Q0Tn626lj7jzWo4e5hnEsIlprX6dTgdBRpyRBFKTDgBF8AasVyT4gvSTEoXRpXWRyG3CYQYld65I_V6lboILTMAlZY65_ejRDcHgp0Tv' + 'EPtGAsqTiBf3m76g7pP9B84mwjPvuUtASRDei1nDF2ix_JXW91UJkXrPh6RAhznVmKyQl7dwJdMJ6bz1QOmgzYlrEzHDMcEUuo44AgS1' + 'CvkjaOb2Q2AyY5oGDTs_OLXE_c2I5cg9hk3kEJZ0fu4SuktsIA2RhuJwP86AdripThCBeO9uVUejyPGmFSxPrqEYcuWi25zOEXV9tc1m' + '_KP1nafYtdfv6Q9hKfWmGm9A_3G635UwiVndLGdFCiLWkONk0xUxGLGGweGWTa2nZYZ0fS1YKlE3Uuw8fPl52E5U8HJYbC7sbjXUsrnT' 'XHXRbELfO-1fGSqskiGnMK7B0dV3t8Lq08pbdtYpuVdoKWA2Yjuyah_vHp2rZWjo0zXL8Gw8DTj0=' + ) pref.parse_encoded_data(url_params) self.assertEqual( vars(pref.key_value_settings['categories']), - {'value': ['general'], 'locked': False, 'choices': ['general', 'none']}) + {'value': ['general'], 'locked': False, 'choices': ['general', 'none']}, + ) diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index edb0a18f7..9a53f8f47 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -17,7 +17,6 @@ TEST_ENGINES = [ class TestQuery(SearxTestCase): - def test_simple_query(self): query_text = 'the query' query = RawTextQuery(query_text, []) @@ -58,7 +57,6 @@ class TestQuery(SearxTestCase): class TestLanguageParser(SearxTestCase): - def test_language_code(self): language = 'es-ES' query_text = 'the query' @@ -136,7 +134,6 @@ class TestLanguageParser(SearxTestCase): class TestTimeoutParser(SearxTestCase): - def test_timeout_below100(self): query_text = '<3 the query' query = RawTextQuery(query_text, []) @@ -189,7 +186,6 @@ class TestTimeoutParser(SearxTestCase): class TestExternalBangParser(SearxTestCase): - def test_external_bang(self): query_text = '!!ddg the query' query = RawTextQuery(query_text, []) diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py index 07d170130..113e9cd3c 100644 --- a/tests/unit/test_results.py +++ b/tests/unit/test_results.py @@ -4,21 +4,21 @@ from searx.results import ResultContainer from tests import SearxTestCase -def fake_result(url='https://aa.bb/cc?dd=ee#ff', - title='aaa', - content='bbb', - engine='wikipedia', **kwargs): - result = {'url': url, - 'title': title, - 'content': content, - 'engine': engine} +def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', engine='wikipedia', **kwargs): + result = { + # fmt: off + 'url': url, + 'title': title, + 'content': content, + 'engine': engine, + # fmt: on + } result.update(kwargs) return result # TODO class ResultContainerTestCase(SearxTestCase): - def test_empty(self): c = ResultContainer() self.assertEqual(c.get_ordered_results(), []) diff --git a/tests/unit/test_search.py b/tests/unit/test_search.py index c7f15a681..fa16947be 100644 --- a/tests/unit/test_search.py +++ b/tests/unit/test_search.py @@ -22,11 +22,11 @@ TEST_ENGINES = [ class SearchQueryTestCase(SearxTestCase): - def test_repr(self): s = SearchQuery('test', [EngineRef('bing', 'general')], 'all', 0, 1, '1', 5.0, 'g') - self.assertEqual(repr(s), - "SearchQuery('test', [EngineRef('bing', 'general')], 'all', 0, 1, '1', 5.0, 'g')") # noqa + self.assertEqual( + repr(s), "SearchQuery('test', [EngineRef('bing', 'general')], 'all', 0, 1, '1', 5.0, 'g')" + ) # noqa def test_eq(self): s = SearchQuery('test', [EngineRef('bing', 'general')], 'all', 0, 1, None, None, None) @@ -36,64 +36,80 @@ class SearchQueryTestCase(SearxTestCase): class SearchTestCase(SearxTestCase): - @classmethod def setUpClass(cls): searx.search.initialize(TEST_ENGINES) def test_timeout_simple(self): settings['outgoing']['max_request_timeout'] = None - search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - 'en-US', SAFESEARCH, PAGENO, None, None) + search_query = SearchQuery( + 'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, None + ) search = searx.search.Search(search_query) search.search() self.assertEqual(search.actual_timeout, 3.0) def test_timeout_query_above_default_nomax(self): settings['outgoing']['max_request_timeout'] = None - search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - 'en-US', SAFESEARCH, PAGENO, None, 5.0) + search_query = SearchQuery( + 'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 5.0 + ) search = searx.search.Search(search_query) search.search() self.assertEqual(search.actual_timeout, 3.0) def test_timeout_query_below_default_nomax(self): settings['outgoing']['max_request_timeout'] = None - search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - 'en-US', SAFESEARCH, PAGENO, None, 1.0) + search_query = SearchQuery( + 'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 1.0 + ) search = searx.search.Search(search_query) search.search() self.assertEqual(search.actual_timeout, 1.0) def test_timeout_query_below_max(self): settings['outgoing']['max_request_timeout'] = 10.0 - search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - 'en-US', SAFESEARCH, PAGENO, None, 5.0) + search_query = SearchQuery( + 'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 5.0 + ) search = searx.search.Search(search_query) search.search() self.assertEqual(search.actual_timeout, 5.0) def test_timeout_query_above_max(self): settings['outgoing']['max_request_timeout'] = 10.0 - search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - 'en-US', SAFESEARCH, PAGENO, None, 15.0) + search_query = SearchQuery( + 'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 15.0 + ) search = searx.search.Search(search_query) search.search() self.assertEqual(search.actual_timeout, 10.0) def test_external_bang(self): - search_query = SearchQuery('yes yes', - [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - 'en-US', SAFESEARCH, PAGENO, None, None, - external_bang="yt") + search_query = SearchQuery( + 'yes yes', + [EngineRef(PUBLIC_ENGINE_NAME, 'general')], + 'en-US', + SAFESEARCH, + PAGENO, + None, + None, + external_bang="yt", + ) search = searx.search.Search(search_query) results = search.search() # For checking if the user redirected with the youtube external bang self.assertTrue(results.redirect_url is not None) - search_query = SearchQuery('youtube never gonna give you up', - [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - 'en-US', SAFESEARCH, PAGENO, None, None) + search_query = SearchQuery( + 'youtube never gonna give you up', + [EngineRef(PUBLIC_ENGINE_NAME, 'general')], + 'en-US', + SAFESEARCH, + PAGENO, + None, + None, + ) search = searx.search.Search(search_query) results = search.search() diff --git a/tests/unit/test_settings_loader.py b/tests/unit/test_settings_loader.py index 9e04df653..13a2d4f37 100644 --- a/tests/unit/test_settings_loader.py +++ b/tests/unit/test_settings_loader.py @@ -12,7 +12,6 @@ test_dir = abspath(dirname(__file__)) class TestLoad(SearxTestCase): - def test_load_zero(self): with self.assertRaises(SearxSettingsException): settings_loader.load_yaml('/dev/zero') @@ -31,7 +30,6 @@ class TestLoad(SearxTestCase): class TestDefaultSettings(SearxTestCase): - def test_load(self): settings, msg = settings_loader.load_settings(load_user_setttings=False) self.assertTrue(msg.startswith('load the default settings from')) @@ -46,7 +44,6 @@ class TestDefaultSettings(SearxTestCase): class TestUserSettings(SearxTestCase): - def test_is_use_default_settings(self): self.assertFalse(settings_loader.is_use_default_settings({})) self.assertTrue(settings_loader.is_use_default_settings({'use_default_settings': True})) @@ -57,23 +54,24 @@ class TestUserSettings(SearxTestCase): self.assertFalse(settings_loader.is_use_default_settings({'use_default_settings': 0})) def test_user_settings_not_found(self): - with patch.dict(settings_loader.environ, - {'SEARXNG_SETTINGS_PATH': '/dev/null'}): + with patch.dict(settings_loader.environ, {'SEARXNG_SETTINGS_PATH': '/dev/null'}): settings, msg = settings_loader.load_settings() self.assertTrue(msg.startswith('load the default settings from')) self.assertEqual(settings['server']['secret_key'], "ultrasecretkey") def test_user_settings(self): - with patch.dict(settings_loader.environ, - {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_simple.yml')}): + with patch.dict( + settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_simple.yml')} + ): settings, msg = settings_loader.load_settings() self.assertTrue(msg.startswith('merge the default settings')) self.assertEqual(settings['server']['secret_key'], "user_secret_key") self.assertEqual(settings['server']['default_http_headers']['Custom-Header'], "Custom-Value") def test_user_settings_remove(self): - with patch.dict(settings_loader.environ, - {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_remove.yml')}): + with patch.dict( + settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_remove.yml')} + ): settings, msg = settings_loader.load_settings() self.assertTrue(msg.startswith('merge the default settings')) self.assertEqual(settings['server']['secret_key'], "user_secret_key") @@ -84,8 +82,9 @@ class TestUserSettings(SearxTestCase): self.assertIn('wikipedia', engine_names) def test_user_settings_remove2(self): - with patch.dict(settings_loader.environ, - {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_remove2.yml')}): + with patch.dict( + settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_remove2.yml')} + ): settings, msg = settings_loader.load_settings() self.assertTrue(msg.startswith('merge the default settings')) self.assertEqual(settings['server']['secret_key'], "user_secret_key") @@ -101,8 +100,9 @@ class TestUserSettings(SearxTestCase): self.assertEqual(newengine[0]['engine'], 'dummy') def test_user_settings_keep_only(self): - with patch.dict(settings_loader.environ, - {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_keep_only.yml')}): + with patch.dict( + settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_keep_only.yml')} + ): settings, msg = settings_loader.load_settings() self.assertTrue(msg.startswith('merge the default settings')) engine_names = [engine['name'] for engine in settings['engines']] @@ -111,8 +111,9 @@ class TestUserSettings(SearxTestCase): self.assertEqual(len(settings['engines'][2]), 1) def test_custom_settings(self): - with patch.dict(settings_loader.environ, - {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings.yml')}): + with patch.dict( + settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings.yml')} + ): settings, msg = settings_loader.load_settings() self.assertTrue(msg.startswith('load the user settings from')) self.assertEqual(settings['server']['port'], 9000) diff --git a/tests/unit/test_standalone_searx.py b/tests/unit/test_standalone_searx.py index c81598160..a3d8b4d4f 100644 --- a/tests/unit/test_standalone_searx.py +++ b/tests/unit/test_standalone_searx.py @@ -23,8 +23,7 @@ class StandaloneSearx(SearxTestCase): def test_parse_argument_no_args(self): """Test parse argument without args.""" - with patch.object(sys, 'argv', ['standalone_searx']), \ - self.assertRaises(SystemExit): + with patch.object(sys, 'argv', ['standalone_searx']), self.assertRaises(SystemExit): sys.stderr = io.StringIO() sas.parse_argument() sys.stdout = sys.__stderr__ @@ -33,8 +32,13 @@ class StandaloneSearx(SearxTestCase): """Test parse argument with basic args.""" query = 'red box' exp_dict = { - 'query': query, 'category': 'general', 'lang': 'all', 'pageno': 1, - 'safesearch': '0', 'timerange': None} + 'query': query, + 'category': 'general', + 'lang': 'all', + 'pageno': 1, + 'safesearch': '0', + 'timerange': None, + } args = ['standalone_searx', query] with patch.object(sys, 'argv', args): res = sas.parse_argument() @@ -45,16 +49,16 @@ class StandaloneSearx(SearxTestCase): def test_to_dict(self): """test to_dict.""" self.assertEqual( - sas.to_dict( - sas.get_search_query(sas.parse_argument(['red box']))), + sas.to_dict(sas.get_search_query(sas.parse_argument(['red box']))), { - 'search': { - 'q': 'red box', 'pageno': 1, 'lang': 'all', - 'safesearch': 0, 'timerange': None - }, - 'results': [], 'infoboxes': [], 'suggestions': [], - 'answers': [], 'paging': False, 'results_number': 0 - } + 'search': {'q': 'red box', 'pageno': 1, 'lang': 'all', 'safesearch': 0, 'timerange': None}, + 'results': [], + 'infoboxes': [], + 'suggestions': [], + 'answers': [], + 'paging': False, + 'results_number': 0, + }, ) def test_to_dict_with_mock(self): @@ -77,30 +81,28 @@ class StandaloneSearx(SearxTestCase): 'safesearch': m_sq.safesearch, 'timerange': m_sq.time_range, }, - 'suggestions': [] - } + 'suggestions': [], + }, ) def test_get_search_query(self): """test get_search_query.""" - args = sas.parse_argument(['rain', ]) + args = sas.parse_argument( + [ + 'rain', + ] + ) search_q = sas.get_search_query(args) self.assertTrue(search_q) - self.assertEqual(search_q, SearchQuery('rain', [EngineRef('engine1', 'general')], - 'all', 0, 1, None, None, None)) + self.assertEqual( + search_q, SearchQuery('rain', [EngineRef('engine1', 'general')], 'all', 0, 1, None, None, None) + ) def test_no_parsed_url(self): """test no_parsed_url func""" - self.assertEqual( - sas.no_parsed_url([{'parsed_url': 'http://example.com'}]), - [{}] - ) + self.assertEqual(sas.no_parsed_url([{'parsed_url': 'http://example.com'}]), [{}]) - @params( - (datetime.datetime(2020, 1, 1), '2020-01-01T00:00:00'), - ('a'.encode('utf8'), 'a'), - (set([1]), [1]) - ) + @params((datetime.datetime(2020, 1, 1), '2020-01-01T00:00:00'), ('a'.encode('utf8'), 'a'), (set([1]), [1])) def test_json_serial(self, arg, exp_res): """test json_serial func""" self.assertEqual(sas.json_serial(arg), exp_res) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index bea28c0cc..3b79797e3 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -9,7 +9,6 @@ from tests import SearxTestCase class TestUtils(SearxTestCase): - def test_gen_useragent(self): self.assertIsInstance(utils.gen_useragent(), str) self.assertIsNotNone(utils.gen_useragent()) @@ -73,6 +72,7 @@ class TestUtils(SearxTestCase): def test_extract_url(self): def f(html_str, search_url): return utils.extract_url(html.fromstring(html_str), search_url) + self.assertEqual(f('<span id="42">https://example.com</span>', 'http://example.com/'), 'https://example.com/') self.assertEqual(f('https://example.com', 'http://example.com/'), 'https://example.com/') self.assertEqual(f('//example.com', 'http://example.com/'), 'http://example.com/') @@ -122,14 +122,11 @@ class TestUtils(SearxTestCase): def test_ecma_unscape(self): self.assertEqual(utils.ecma_unescape('text%20with%20space'), 'text with space') - self.assertEqual(utils.ecma_unescape('text using %xx: %F3'), - 'text using %xx: ó') - self.assertEqual(utils.ecma_unescape('text using %u: %u5409, %u4E16%u754c'), - 'text using %u: 吉, 世界') + self.assertEqual(utils.ecma_unescape('text using %xx: %F3'), 'text using %xx: ó') + self.assertEqual(utils.ecma_unescape('text using %u: %u5409, %u4E16%u754c'), 'text using %u: 吉, 世界') class TestHTMLTextExtractor(SearxTestCase): - def setUp(self): self.html_text_extractor = utils.HTMLTextExtractor() diff --git a/tests/unit/test_webadapter.py b/tests/unit/test_webadapter.py index 9d8ff5f28..975d846cc 100644 --- a/tests/unit/test_webadapter.py +++ b/tests/unit/test_webadapter.py @@ -25,7 +25,6 @@ SEARCHQUERY = [EngineRef(PRIVATE_ENGINE_NAME, 'general')] class ValidateQueryCase(SearxTestCase): - @classmethod def setUpClass(cls): searx.search.initialize(TEST_ENGINES) diff --git a/tests/unit/test_webapp.py b/tests/unit/test_webapp.py index 43b631cb8..920a346a5 100644 --- a/tests/unit/test_webapp.py +++ b/tests/unit/test_webapp.py @@ -10,11 +10,11 @@ from tests import SearxTestCase class ViewsTestCase(SearxTestCase): - def setUp(self): # skip init function (no external HTTP request) def dummy(*args, **kwargs): pass + self.setattr4test(searx.search.processors, 'initialize_processor', dummy) from searx import webapp # pylint disable=import-outside-toplevel @@ -30,43 +30,39 @@ class ViewsTestCase(SearxTestCase): 'url': 'http://first.test.xyz', 'engines': ['youtube', 'startpage'], 'engine': 'startpage', - 'parsed_url': ParseResult(scheme='http', netloc='first.test.xyz', path='/', params='', query='', fragment=''), # noqa - }, { + 'parsed_url': ParseResult( + scheme='http', netloc='first.test.xyz', path='/', params='', query='', fragment='' + ), # noqa + }, + { 'content': 'second test content', 'title': 'Second Test', 'url': 'http://second.test.xyz', 'engines': ['youtube', 'startpage'], 'engine': 'youtube', - 'parsed_url': ParseResult(scheme='http', netloc='second.test.xyz', path='/', params='', query='', fragment=''), # noqa + 'parsed_url': ParseResult( + scheme='http', netloc='second.test.xyz', path='/', params='', query='', fragment='' + ), # noqa }, ] - timings = [ - { - 'engine': 'startpage', - 'total': 0.8, - 'load': 0.7 - }, - { - 'engine': 'youtube', - 'total': 0.9, - 'load': 0.6 - } - ] + timings = [{'engine': 'startpage', 'total': 0.8, 'load': 0.7}, {'engine': 'youtube', 'total': 0.9, 'load': 0.6}] def search_mock(search_self, *args): - search_self.result_container = Mock(get_ordered_results=lambda: test_results, - answers=dict(), - corrections=set(), - suggestions=set(), - infoboxes=[], - unresponsive_engines=set(), - results=test_results, - results_number=lambda: 3, - results_length=lambda: len(test_results), - get_timings=lambda: timings, - redirect_url=None, - engine_data={}) + search_self.result_container = Mock( + get_ordered_results=lambda: test_results, + answers=dict(), + corrections=set(), + suggestions=set(), + infoboxes=[], + unresponsive_engines=set(), + results=test_results, + results_number=lambda: 3, + results_length=lambda: len(test_results), + get_timings=lambda: timings, + redirect_url=None, + engine_data={}, + ) self.setattr4test(Search, 'search', search_mock) @@ -82,9 +78,12 @@ class ViewsTestCase(SearxTestCase): def test_index_empty(self): result = self.app.post('/') self.assertEqual(result.status_code, 200) - self.assertIn(b'<div class="text-hide center-block" id="main-logo">' - + b'<img class="center-block img-responsive" src="/static/themes/oscar/img/searxng.svg"' - + b' alt="searx logo" />SearXNG</div>', result.data) + self.assertIn( + b'<div class="text-hide center-block" id="main-logo">' + + b'<img class="center-block img-responsive" src="/static/themes/oscar/img/searxng.svg"' + + b' alt="searx logo" />SearXNG</div>', + result.data, + ) def test_index_html_post(self): result = self.app.post('/', data={'q': 'test'}) @@ -120,11 +119,10 @@ class ViewsTestCase(SearxTestCase): b'<h4 class="result_header" id="result-2"><img width="32" height="32" class="favicon"' + b' src="/static/themes/oscar/img/icons/youtube.png" alt="youtube" /><a href="http://second.test.xyz"' + b' rel="noreferrer" aria-labelledby="result-2">Second <span class="highlight">Test</span></a></h4>', # noqa - result.data + result.data, ) self.assertIn( - b'<p class="result-content">second <span class="highlight">test</span> content</p>', # noqa - result.data + b'<p class="result-content">second <span class="highlight">test</span> content</p>', result.data # noqa ) def test_index_json(self): @@ -151,7 +149,7 @@ class ViewsTestCase(SearxTestCase): b'title,url,content,host,engine,score,type\r\n' b'First Test,http://first.test.xyz,first test content,first.test.xyz,startpage,,result\r\n' # noqa b'Second Test,http://second.test.xyz,second test content,second.test.xyz,youtube,,result\r\n', # noqa - result.data + result.data, ) def test_index_rss(self): @@ -161,30 +159,15 @@ class ViewsTestCase(SearxTestCase): def test_search_rss(self): result = self.app.post('/search', data={'q': 'test', 'format': 'rss'}) - self.assertIn( - b'<description>Search results for "test" - searx</description>', - result.data - ) + self.assertIn(b'<description>Search results for "test" - searx</description>', result.data) - self.assertIn( - b'<opensearch:totalResults>3</opensearch:totalResults>', - result.data - ) + self.assertIn(b'<opensearch:totalResults>3</opensearch:totalResults>', result.data) - self.assertIn( - b'<title>First Test</title>', - result.data - ) + self.assertIn(b'<title>First Test</title>', result.data) - self.assertIn( - b'<link>http://first.test.xyz</link>', - result.data - ) + self.assertIn(b'<link>http://first.test.xyz</link>', result.data) - self.assertIn( - b'<description>first test content</description>', - result.data - ) + self.assertIn(b'<description>first test content</description>', result.data) def test_about(self): result = self.app.get('/about') @@ -199,18 +182,9 @@ class ViewsTestCase(SearxTestCase): def test_preferences(self): result = self.app.get('/preferences') self.assertEqual(result.status_code, 200) - self.assertIn( - b'<form method="post" action="/preferences" id="search_form">', - result.data - ) - self.assertIn( - b'<label class="col-sm-3 col-md-2" for="categories">Default categories</label>', - result.data - ) - self.assertIn( - b'<label class="col-sm-3 col-md-2" for="locale">Interface language</label>', - result.data - ) + self.assertIn(b'<form method="post" action="/preferences" id="search_form">', result.data) + self.assertIn(b'<label class="col-sm-3 col-md-2" for="categories">Default categories</label>', result.data) + self.assertIn(b'<label class="col-sm-3 col-md-2" for="locale">Interface language</label>', result.data) def test_browser_locale(self): result = self.app.get('/preferences', headers={'Accept-Language': 'zh-tw;q=0.8'}) @@ -218,30 +192,26 @@ class ViewsTestCase(SearxTestCase): self.assertIn( b'<option value="zh-Hant-TW" selected="selected">', result.data, - 'Interface locale ignored browser preference.' + 'Interface locale ignored browser preference.', ) self.assertIn( b'<option value="zh-Hant-TW" selected="selected">', result.data, - 'Search language ignored browser preference.' + 'Search language ignored browser preference.', ) def test_brower_empty_locale(self): result = self.app.get('/preferences', headers={'Accept-Language': ''}) self.assertEqual(result.status_code, 200) self.assertIn( - b'<option value="en" selected="selected">', - result.data, - 'Interface locale ignored browser preference.' + b'<option value="en" selected="selected">', result.data, 'Interface locale ignored browser preference.' ) def test_locale_occitan(self): result = self.app.get('/preferences?locale=oc') self.assertEqual(result.status_code, 200) self.assertIn( - b'<option value="oc" selected="selected">', - result.data, - 'Interface locale ignored browser preference.' + b'<option value="oc" selected="selected">', result.data, 'Interface locale ignored browser preference.' ) def test_stats(self): diff --git a/tests/unit/test_webutils.py b/tests/unit/test_webutils.py index 6da39a071..2b7c6fe5a 100644 --- a/tests/unit/test_webutils.py +++ b/tests/unit/test_webutils.py @@ -5,12 +5,13 @@ from tests import SearxTestCase class TestWebUtils(SearxTestCase): - def test_prettify_url(self): - data = (('https://searx.me/', 'https://searx.me/'), - ('https://searx.me/ű', 'https://searx.me/ű'), - ('https://searx.me/' + (100 * 'a'), 'https://searx.me/[...]aaaaaaaaaaaaaaaaa'), - ('https://searx.me/' + (100 * 'ű'), 'https://searx.me/[...]űűűűűűűűűűűűűűűűű')) + data = ( + ('https://searx.me/', 'https://searx.me/'), + ('https://searx.me/ű', 'https://searx.me/ű'), + ('https://searx.me/' + (100 * 'a'), 'https://searx.me/[...]aaaaaaaaaaaaaaaaa'), + ('https://searx.me/' + (100 * 'ű'), 'https://searx.me/[...]űűűűűűűűűűűűűűűűű'), + ) for test_url, expected in data: self.assertEqual(webutils.prettify_url(test_url, max_length=32), expected) @@ -21,10 +22,7 @@ class TestWebUtils(SearxTestCase): self.assertEqual(webutils.highlight_content('', None), None) self.assertEqual(webutils.highlight_content(False, None), None) - contents = [ - '<html></html>' - 'not<' - ] + contents = ['<html></html>not<'] for content in contents: self.assertEqual(webutils.highlight_content(content, None), content) @@ -35,30 +33,35 @@ class TestWebUtils(SearxTestCase): self.assertEqual(webutils.highlight_content(content, query), content) data = ( - ('" test "', - 'a test string', - 'a <span class="highlight">test</span> string'), - ('"a"', - 'this is a test string', - 'this is<span class="highlight"> a </span>test string'), - ('a test', - 'this is a test string that matches entire query', - 'this is <span class="highlight">a test</span> string that matches entire query'), - ('this a test', - 'this is a string to test.', - ('<span class="highlight">this</span> is<span class="highlight"> a </span>' - 'string to <span class="highlight">test</span>.')), - ('match this "exact phrase"', - 'this string contains the exact phrase we want to match', - ('<span class="highlight">this</span> string contains the <span class="highlight">exact</span>' - ' <span class="highlight">phrase</span> we want to <span class="highlight">match</span>')) + ('" test "', 'a test string', 'a <span class="highlight">test</span> string'), + ('"a"', 'this is a test string', 'this is<span class="highlight"> a </span>test string'), + ( + 'a test', + 'this is a test string that matches entire query', + 'this is <span class="highlight">a test</span> string that matches entire query', + ), + ( + 'this a test', + 'this is a string to test.', + ( + '<span class="highlight">this</span> is<span class="highlight"> a </span>' + 'string to <span class="highlight">test</span>.' + ), + ), + ( + 'match this "exact phrase"', + 'this string contains the exact phrase we want to match', + ( + '<span class="highlight">this</span> string contains the <span class="highlight">exact</span>' + ' <span class="highlight">phrase</span> we want to <span class="highlight">match</span>' + ), + ), ) for query, content, expected in data: self.assertEqual(webutils.highlight_content(content, query), expected) class TestUnicodeWriter(SearxTestCase): - def setUp(self): self.unicode_writer = webutils.UnicodeWriter(mock.MagicMock()) @@ -74,7 +77,6 @@ class TestUnicodeWriter(SearxTestCase): class TestNewHmac(SearxTestCase): - def test_bytes(self): for secret_key in ['secret', b'secret', 1]: if secret_key == 1: @@ -82,6 +84,4 @@ class TestNewHmac(SearxTestCase): webutils.new_hmac(secret_key, b'http://example.com') continue res = webutils.new_hmac(secret_key, b'http://example.com') - self.assertEqual( - res, - '23e2baa2404012a5cc8e4a18b4aabf0dde4cb9b56f679ddc0fd6d7c24339d819') + self.assertEqual(res, '23e2baa2404012a5cc8e4a18b4aabf0dde4cb9b56f679ddc0fd6d7c24339d819') |