diff options
| author | asciimoo <asciimoo@gmail.com> | 2013-11-15 18:55:18 +0100 |
|---|---|---|
| committer | asciimoo <asciimoo@gmail.com> | 2013-11-15 18:55:18 +0100 |
| commit | a192438e9a59b14290b8e719fe4d5679a6371a25 (patch) | |
| tree | 7a1c0051208acfab4c2e3e6a6edb472d6707228c /searx/utils.py | |
| parent | 1297d4109d7a1fd15c9c41f192d74ae6b0b9b179 (diff) | |
[enh] csv output support
Diffstat (limited to 'searx/utils.py')
| -rw-r--r-- | searx/utils.py | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/searx/utils.py b/searx/utils.py index b6c448566..8e3b10d39 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -1,5 +1,8 @@ from HTMLParser import HTMLParser import htmlentitydefs +import csv +import codecs +import cStringIO class HTMLTextExtractor(HTMLParser): def __init__(self): @@ -24,3 +27,33 @@ def html_to_text(html): s = HTMLTextExtractor() s.feed(html) return s.get_text() + + +class UnicodeWriter: + """ + A CSV writer which will write rows to CSV file "f", + which is encoded in the given encoding. + """ + + def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): + # Redirect output to a queue + self.queue = cStringIO.StringIO() + self.writer = csv.writer(self.queue, dialect=dialect, **kwds) + self.stream = f + self.encoder = codecs.getincrementalencoder(encoding)() + + def writerow(self, row): + self.writer.writerow([(s.encode("utf-8").strip() if type(s) == str or type(s) == unicode else str(s)) for s in row]) + # Fetch UTF-8 output from the queue ... + data = self.queue.getvalue() + data = data.decode("utf-8") + # ... and reencode it into the target encoding + data = self.encoder.encode(data) + # write to the target stream + self.stream.write(data) + # empty queue + self.queue.truncate(0) + + def writerows(self, rows): + for row in rows: + self.writerow(row) |