From 0f2fc5879dfd789467609bc0611a6e4d7b6a7ab5 Mon Sep 17 00:00:00 2001 From: Bnyro Date: Thu, 13 Feb 2025 11:43:45 +0100 Subject: [feat] startpage: support for news and images --- searx/utils.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'searx/utils.py') diff --git a/searx/utils.py b/searx/utils.py index 4fcbd9e3c..c7a579451 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -470,6 +470,21 @@ def ecma_unescape(string: str) -> str: return string +def remove_pua_from_str(string): + """Removes unicode's "PRIVATE USE CHARACTER"s (PUA_) from a string. + + _PUA: https://en.wikipedia.org/wiki/Private_Use_Areas + """ + pua_ranges = ((0xE000, 0xF8FF), (0xF0000, 0xFFFFD), (0x100000, 0x10FFFD)) + s = [] + for c in string: + i = ord(c) + if any(a <= i <= b for (a, b) in pua_ranges): + continue + s.append(c) + return "".join(s) + + def get_string_replaces_function(replaces: Dict[str, str]) -> Callable[[str], str]: rep = {re.escape(k): v for k, v in replaces.items()} pattern = re.compile("|".join(rep.keys())) -- cgit v1.2.3