diff options
Diffstat (limited to 'searx/utils.py')
| -rw-r--r-- | searx/utils.py | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/searx/utils.py b/searx/utils.py index 4fcbd9e3c..c7a579451 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -470,6 +470,21 @@ def ecma_unescape(string: str) -> str: return string +def remove_pua_from_str(string): + """Removes unicode's "PRIVATE USE CHARACTER"s (PUA_) from a string. + + _PUA: https://en.wikipedia.org/wiki/Private_Use_Areas + """ + pua_ranges = ((0xE000, 0xF8FF), (0xF0000, 0xFFFFD), (0x100000, 0x10FFFD)) + s = [] + for c in string: + i = ord(c) + if any(a <= i <= b for (a, b) in pua_ranges): + continue + s.append(c) + return "".join(s) + + def get_string_replaces_function(replaces: Dict[str, str]) -> Callable[[str], str]: rep = {re.escape(k): v for k, v in replaces.items()} pattern = re.compile("|".join(rep.keys())) |