summaryrefslogtreecommitdiff
path: root/searx/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'searx/utils.py')
-rw-r--r--searx/utils.py15
1 files changed, 15 insertions, 0 deletions
diff --git a/searx/utils.py b/searx/utils.py
index 4fcbd9e3c..c7a579451 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -470,6 +470,21 @@ def ecma_unescape(string: str) -> str:
return string
+def remove_pua_from_str(string):
+ """Removes unicode's "PRIVATE USE CHARACTER"s (PUA_) from a string.
+
+ _PUA: https://en.wikipedia.org/wiki/Private_Use_Areas
+ """
+ pua_ranges = ((0xE000, 0xF8FF), (0xF0000, 0xFFFFD), (0x100000, 0x10FFFD))
+ s = []
+ for c in string:
+ i = ord(c)
+ if any(a <= i <= b for (a, b) in pua_ranges):
+ continue
+ s.append(c)
+ return "".join(s)
+
+
def get_string_replaces_function(replaces: Dict[str, str]) -> Callable[[str], str]:
rep = {re.escape(k): v for k, v in replaces.items()}
pattern = re.compile("|".join(rep.keys()))