From dd4662978dd74c0dce089790689fe0a8a4f9bb16 Mon Sep 17 00:00:00 2001 From: Matej Cotman Date: Sun, 19 Jan 2014 22:59:01 +0100 Subject: fix: robot fw, entry points, some flake8, package searx egg --- searx/utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'searx/utils.py') diff --git a/searx/utils.py b/searx/utils.py index 416055dfa..4b8cb615c 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -5,10 +5,12 @@ import codecs import cStringIO import re + def gen_useragent(): # TODO return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0" + def highlight_content(content, query): if not content: @@ -34,10 +36,11 @@ def highlight_content(content, query): return content + class HTMLTextExtractor(HTMLParser): def __init__(self): HTMLParser.__init__(self) - self.result = [ ] + self.result = [] def handle_data(self, d): self.result.append(d) @@ -54,6 +57,7 @@ class HTMLTextExtractor(HTMLParser): def get_text(self): return u''.join(self.result) + def html_to_text(html): s = HTMLTextExtractor() s.feed(html) -- cgit v1.2.3