diff options
| author | Bnyro <bnyro@tutanota.com> | 2025-02-13 11:43:45 +0100 |
|---|---|---|
| committer | Markus Heiser <markus.heiser@darmarIT.de> | 2025-02-20 13:44:28 +0100 |
| commit | 0f2fc5879dfd789467609bc0611a6e4d7b6a7ab5 (patch) | |
| tree | ff40351c2498e2465be8c1cef67d7e5331883c6b /searx/utils.py | |
| parent | feb15e3878920ee7bf6e3d726fac0fcd1f89a896 (diff) | |
[feat] startpage: support for news and images
Diffstat (limited to 'searx/utils.py')
| -rw-r--r-- | searx/utils.py | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/searx/utils.py b/searx/utils.py index 4fcbd9e3c..c7a579451 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -470,6 +470,21 @@ def ecma_unescape(string: str) -> str: return string +def remove_pua_from_str(string): + """Removes unicode's "PRIVATE USE CHARACTER"s (PUA_) from a string. + + _PUA: https://en.wikipedia.org/wiki/Private_Use_Areas + """ + pua_ranges = ((0xE000, 0xF8FF), (0xF0000, 0xFFFFD), (0x100000, 0x10FFFD)) + s = [] + for c in string: + i = ord(c) + if any(a <= i <= b for (a, b) in pua_ranges): + continue + s.append(c) + return "".join(s) + + def get_string_replaces_function(replaces: Dict[str, str]) -> Callable[[str], str]: rep = {re.escape(k): v for k, v in replaces.items()} pattern = re.compile("|".join(rep.keys())) |