diff options
| author | Adam Tauber <adam.tauber@balabit.com> | 2015-02-02 09:37:12 +0100 |
|---|---|---|
| committer | Adam Tauber <adam.tauber@balabit.com> | 2015-02-02 09:37:12 +0100 |
| commit | 0e6f8393ab8b29b2e85d1fafdc7442455767f753 (patch) | |
| tree | 60e9acb27577968a41136c04f248c24871e83860 /searx/utils.py | |
| parent | 03137eebd9fdfaa57452cb364c1bc9f31b243f67 (diff) | |
| parent | 7f865356f9a6c1b40d0c668c59b3d081de618bac (diff) | |
Merge branch 'Cqoicebordel-unit-tests'
Diffstat (limited to 'searx/utils.py')
| -rw-r--r-- | searx/utils.py | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/searx/utils.py b/searx/utils.py index c47d3be17..c0afc94cb 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -115,10 +115,12 @@ class HTMLTextExtractor(HTMLParser): self.result.append(name) def get_text(self): - return u''.join(self.result) + return u''.join(self.result).strip() def html_to_text(html): + html = html.replace('\n', ' ') + html = ' '.join(html.split()) s = HTMLTextExtractor() s.feed(html) return s.get_text() |