summaryrefslogtreecommitdiff
path: root/searx/utils.py
diff options
context:
space:
mode:
authorBnyro <bnyro@tutanota.com>2025-02-13 11:43:45 +0100
committerMarkus Heiser <markus.heiser@darmarIT.de>2025-02-20 13:44:28 +0100
commit0f2fc5879dfd789467609bc0611a6e4d7b6a7ab5 (patch)
treeff40351c2498e2465be8c1cef67d7e5331883c6b /searx/utils.py
parentfeb15e3878920ee7bf6e3d726fac0fcd1f89a896 (diff)
[feat] startpage: support for news and images
Diffstat (limited to 'searx/utils.py')
-rw-r--r--searx/utils.py15
1 files changed, 15 insertions, 0 deletions
diff --git a/searx/utils.py b/searx/utils.py
index 4fcbd9e3c..c7a579451 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -470,6 +470,21 @@ def ecma_unescape(string: str) -> str:
return string
+def remove_pua_from_str(string):
+ """Removes unicode's "PRIVATE USE CHARACTER"s (PUA_) from a string.
+
+ _PUA: https://en.wikipedia.org/wiki/Private_Use_Areas
+ """
+ pua_ranges = ((0xE000, 0xF8FF), (0xF0000, 0xFFFFD), (0x100000, 0x10FFFD))
+ s = []
+ for c in string:
+ i = ord(c)
+ if any(a <= i <= b for (a, b) in pua_ranges):
+ continue
+ s.append(c)
+ return "".join(s)
+
+
def get_string_replaces_function(replaces: Dict[str, str]) -> Callable[[str], str]:
rep = {re.escape(k): v for k, v in replaces.items()}
pattern = re.compile("|".join(rep.keys()))