diff options
| -rw-r--r-- | searx/engines/json_engine.py | 8 | ||||
| -rw-r--r-- | searx/engines/wikidata.py | 9 | ||||
| -rw-r--r-- | searx/locales.py | 130 | ||||
| -rw-r--r-- | searx/settings.yml | 24 | ||||
| -rwxr-xr-x | searx/webapp.py | 57 | ||||
| -rw-r--r-- | tests/unit/test_preferences.py | 3 |
6 files changed, 161 insertions, 70 deletions
diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py index f53bc0bf4..2dd3bc55e 100644 --- a/searx/engines/json_engine.py +++ b/searx/engines/json_engine.py @@ -16,6 +16,11 @@ paging = False suggestion_query = '' results_query = '' +cookies = {} +headers = {} +'''Some engines might offer different result based on cookies or headers. +Possible use-case: To set safesearch cookie or header to moderate.''' + # parameters for engines with paging support # # number of results on each page @@ -88,6 +93,9 @@ def request(query, params): if paging and search_url.find('{pageno}') >= 0: fp['pageno'] = (params['pageno'] - 1) * page_size + first_page_num + params['cookies'].update(cookies) + params['headers'].update(headers) + params['url'] = search_url.format(**fp) params['query'] = query diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index b7c318e53..d828f4be8 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -65,6 +65,7 @@ WHERE mwapi:language "%LANGUAGE%". ?item wikibase:apiOutputItem mwapi:item. } + hint:Prior hint:runFirst "true". %WHERE% @@ -93,6 +94,12 @@ WHERE { } """ +# see the property "dummy value" of https://www.wikidata.org/wiki/Q2013 (Wikidata) +# hard coded here to avoid to an additional SPARQL request when the server starts +DUMMY_ENTITY_URLS = set( + "http://www.wikidata.org/entity/" + wid for wid in ("Q4115189", "Q13406268", "Q15397819", "Q17339402") +) + # https://www.w3.org/TR/sparql11-query/#rSTRING_LITERAL1 # https://lists.w3.org/Archives/Public/public-rdf-dawg/2011OctDec/0175.html @@ -177,7 +184,7 @@ def response(resp): for result in jsonresponse.get('results', {}).get('bindings', []): attribute_result = {key: value['value'] for key, value in result.items()} entity_url = attribute_result['item'] - if entity_url not in seen_entities: + if entity_url not in seen_entities and entity_url not in DUMMY_ENTITY_URLS: seen_entities.add(entity_url) results += get_results(attribute_result, attributes, language) else: diff --git a/searx/locales.py b/searx/locales.py index 677b13334..fbdf305ad 100644 --- a/searx/locales.py +++ b/searx/locales.py @@ -9,61 +9,139 @@ import os import pathlib from babel import Locale +from babel.support import Translations +import flask_babel +import flask +from flask.ctx import has_request_context +from searx import logger -LOCALE_NAMES = { +logger = logger.getChild('locales') + + +# safe before monkey patching flask_babel.get_translations +_flask_babel_get_translations = flask_babel.get_translations + +LOCALE_NAMES = {} +"""Mapping of locales and their description. Locales e.g. 'fr' or 'pt-BR' (see +:py:obj:`locales_initialize`).""" + +RTL_LOCALES: Set[str] = set() +"""List of *Right-To-Left* locales e.g. 'he' or 'fa-IR' (see +:py:obj:`locales_initialize`).""" + +ADDITIONAL_TRANSLATIONS = { "oc": "Occitan", - "nl-BE": "Vlaams (Dutch, Belgium)", "szl": "Ślōnski (Silesian)", } -"""Mapping of locales and their description. Locales e.g. 'fr' or 'pt-BR' -(delimiter is *underline* '-')""" +"""Additional languages SearXNG has translations for but not supported by +python-babel (see :py:obj:`locales_initialize`).""" -RTL_LOCALES: Set[str] = set() -"""List of *Right-To-Left* locales e.g. 'he' or 'fa-IR' (delimiter is -*underline* '-')""" +LOCALE_BEST_MATCH = { + "oc": 'fr-FR', + "szl": "pl", + "nl-BE": "nl", + "zh-HK": "zh-Hant-TW", +} +"""Map a locale we do not have a translations for to a locale we have a +translation for. By example: use Taiwan version of the translation for Hong +Kong.""" -def _get_name(locale, language_code): - language_name = locale.get_language_name(language_code).capitalize() - if language_name and ('a' <= language_name[0] <= 'z'): - language_name = language_name.capitalize() - terrirtory_name = locale.get_territory_name(language_code) - return language_name, terrirtory_name +def localeselector(): + locale = 'en' + if has_request_context(): + value = flask.request.preferences.get_value('locale') + if value: + locale = value + + # first, set the language that is not supported by babel + if locale in ADDITIONAL_TRANSLATIONS: + flask.request.form['use-translation'] = locale + + # second, map locale to a value python-babel supports + locale = LOCALE_BEST_MATCH.get(locale, locale) + + if locale == '': + # if there is an error loading the preferences + # the locale is going to be '' + locale = 'en' + + # babel uses underscore instead of hyphen. + locale = locale.replace('-', '_') + return locale + + +def get_translations(): + """Monkey patch of flask_babel.get_translations""" + if has_request_context() and flask.request.form.get('use-translation') == 'oc': + babel_ext = flask_babel.current_app.extensions['babel'] + return Translations.load(next(babel_ext.translation_directories), 'oc') + if has_request_context() and flask.request.form.get('use-translation') == 'szl': + babel_ext = flask_babel.current_app.extensions['babel'] + return Translations.load(next(babel_ext.translation_directories), 'szl') + return _flask_babel_get_translations() -def _get_locale_name(locale, locale_name): +def get_locale_descr(locale, locale_name): """Get locale name e.g. 'Français - fr' or 'Português (Brasil) - pt-BR' :param locale: instance of :py:class:`Locale` :param locale_name: name e.g. 'fr' or 'pt_BR' (delimiter is *underscore*) """ - native_language, native_territory = _get_name(locale, locale_name) - english_language, english_territory = _get_name(locale, 'en') + + native_language, native_territory = _get_locale_descr(locale, locale_name) + english_language, english_territory = _get_locale_descr(locale, 'en') + if native_territory == english_territory: english_territory = None + if not native_territory and not english_territory: if native_language == english_language: return native_language return native_language + ' (' + english_language + ')' + result = native_language + ', ' + native_territory + ' (' + english_language if english_territory: return result + ', ' + english_territory + ')' return result + ')' -def initialize_locales(directory): - """Initialize global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.""" +def _get_locale_descr(locale, language_code): + language_name = locale.get_language_name(language_code).capitalize() + if language_name and ('a' <= language_name[0] <= 'z'): + language_name = language_name.capitalize() + terrirtory_name = locale.get_territory_name(language_code) + return language_name, terrirtory_name + + +def locales_initialize(directory=None): + """Initialize locales environment of the SearXNG session. + + - monkey patch :py:obj:`flask_babel.get_translations` by :obj:py:`get_translations` + - init global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES` + """ + + directory = directory or pathlib.Path(__file__).parent / 'translations' + logger.debug("locales_initialize: %s", directory) + flask_babel.get_translations = get_translations + + for tag, descr in ADDITIONAL_TRANSLATIONS.items(): + LOCALE_NAMES[tag] = descr + + for tag in LOCALE_BEST_MATCH: + descr = LOCALE_NAMES.get(tag) + if not descr: + locale = Locale.parse(tag, sep='-') + LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_')) + for dirname in sorted(os.listdir(directory)): # Based on https://flask-babel.tkte.ch/_modules/flask_babel.html#Babel.list_translations if not os.path.isdir(os.path.join(directory, dirname, 'LC_MESSAGES')): continue - locale_name = dirname.replace('_', '-') - info = LOCALE_NAMES.get(locale_name) - if not info: + tag = dirname.replace('_', '-') + descr = LOCALE_NAMES.get(tag) + if not descr: locale = Locale.parse(dirname) - LOCALE_NAMES[locale_name] = _get_locale_name(locale, dirname) + LOCALE_NAMES[tag] = get_locale_descr(locale, dirname) if locale.text_direction == 'rtl': - RTL_LOCALES.add(locale_name) - - -initialize_locales(pathlib.Path(__file__).parent / 'translations') + RTL_LOCALES.add(tag) diff --git a/searx/settings.yml b/searx/settings.yml index 6a2142b83..34c1bb0b1 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -402,6 +402,30 @@ engines: require_api_key: false results: JSON + - name: yep + engine: json_engine + shortcut: yep + categories: general + disabled: true + paging: false + page_size: 10 + content_html_to_text: true + title_html_to_text: true + search_url: https://api.yep.com/fs/1/?type=web&q={query}&no_correct=false + results_query: 1/results + title_query: title + url_query: url + content_query: snippet + timeout: 12.0 + headers: + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8' + 'Sec-Fetch-Dest': 'document' + about: + website: https://yep.com + use_official_api: false + require_api_key: false + results: JSON + - name: currency engine: currency_convert categories: general diff --git a/searx/webapp.py b/searx/webapp.py index 493468a22..2ec2f7edd 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -40,11 +40,8 @@ from flask import ( send_from_directory, ) from flask.wrappers import Response -from flask.ctx import has_request_context from flask.json import jsonify -from babel.support import Translations -import flask_babel from flask_babel import ( Babel, gettext, @@ -114,11 +111,16 @@ from searx.metrics import ( ) from searx.flaskfix import patch_application -# renaming names from searx imports ... +from searx.locales import ( + LOCALE_NAMES, + RTL_LOCALES, + localeselector, + locales_initialize, +) +# renaming names from searx imports ... from searx.autocomplete import search_autocomplete, backends as autocomplete_backends from searx.languages import language_codes as languages -from searx.locales import LOCALE_NAMES, RTL_LOCALES from searx.search import SearchWithPlugins, initialize as search_initialize from searx.network import stream as http_stream, set_context_network_name from searx.search.checker import get_result as checker_get_result @@ -148,7 +150,6 @@ STATS_SORT_PARAMETERS = { 'time': (False, 'total', 0), 'reliability': (False, 'reliability', 100), } -_INFO_PAGES = infopage.InfoPageSet() # Flask app app = Flask(__name__, static_folder=settings['ui']['static_path'], template_folder=templates_path) @@ -192,10 +193,6 @@ exception_classname_to_text = { } -# monkey patch for flask_babel.get_translations -_flask_babel_get_translations = flask_babel.get_translations - - class ExtendedRequest(flask.Request): """This class is never initialized and only used for type checking.""" @@ -211,40 +208,9 @@ class ExtendedRequest(flask.Request): request = typing.cast(ExtendedRequest, flask.request) -def _get_translations(): - if has_request_context() and request.form.get('use-translation') == 'oc': - babel_ext = flask_babel.current_app.extensions['babel'] - return Translations.load(next(babel_ext.translation_directories), 'oc') - if has_request_context() and request.form.get('use-translation') == 'szl': - babel_ext = flask_babel.current_app.extensions['babel'] - return Translations.load(next(babel_ext.translation_directories), 'szl') - return _flask_babel_get_translations() - - -flask_babel.get_translations = _get_translations - - @babel.localeselector def get_locale(): - locale = 'en' - - if has_request_context(): - value = request.preferences.get_value('locale') - if value: - locale = value - - if locale == 'oc': - request.form['use-translation'] = 'oc' - locale = 'fr_FR' - if locale == 'szl': - request.form['use-translation'] = 'szl' - locale = 'pl' - if locale == '': - # if there is an error loading the preferences - # the locale is going to be '' - locale = 'en' - # babel uses underscore instead of hyphen. - locale = locale.replace('-', '_') + locale = localeselector() logger.debug("%s uses locale `%s`", urllib.parse.quote(request.url), locale) return locale @@ -564,12 +530,14 @@ def pre_request(): if not preferences.get_value("language"): language = _get_browser_language(request, settings['search']['languages']) preferences.parse_dict({"language": language}) + logger.debug('set language %s (from browser)', preferences.get_value("language")) # locale is defined neither in settings nor in preferences # use browser headers if not preferences.get_value("locale"): locale = _get_browser_language(request, LOCALE_NAMES.keys()) preferences.parse_dict({"locale": locale}) + logger.debug('set locale %s (from browser)', preferences.get_value("locale")) # request.user_plugins request.user_plugins = [] # pylint: disable=assigning-non-slot @@ -941,7 +909,8 @@ def autocompleter(): for result in raw_results: # attention: this loop will change raw_text_query object and this is # the reason why the sug_prefix was stored before (see above) - results.append(raw_text_query.changeQuery(result).getFullQuery()) + if result != sug_prefix: + results.append(raw_text_query.changeQuery(result).getFullQuery()) if len(raw_text_query.autocomplete_list) > 0: for autocomplete_text in raw_text_query.autocomplete_list: @@ -1415,6 +1384,8 @@ werkzeug_reloader = flask_run_development or (searx_debug and __name__ == "__mai # initialize the engines except on the first run of the werkzeug server. if not werkzeug_reloader or (werkzeug_reloader and os.environ.get("WERKZEUG_RUN_MAIN") == "true"): + locales_initialize() + _INFO_PAGES = infopage.InfoPageSet() plugin_initialize(app) search_initialize(enable_checker=True, check_network=True, enable_metrics=settings['general']['enable_metrics']) diff --git a/tests/unit/test_preferences.py b/tests/unit/test_preferences.py index 4fc6007d9..a33c78a44 100644 --- a/tests/unit/test_preferences.py +++ b/tests/unit/test_preferences.py @@ -1,3 +1,4 @@ +from searx.locales import locales_initialize from searx.preferences import ( EnumStringSetting, MapSetting, @@ -8,6 +9,8 @@ from searx.preferences import ( ) from tests import SearxTestCase +locales_initialize() + class PluginStub: def __init__(self, plugin_id, default_on): |