aboutsummaryrefslogtreecommitdiffstats
path: root/account/csv_unicode.py
diff options
context:
space:
mode:
Diffstat (limited to 'account/csv_unicode.py')
-rw-r--r--account/csv_unicode.py72
1 files changed, 72 insertions, 0 deletions
diff --git a/account/csv_unicode.py b/account/csv_unicode.py
new file mode 100644
index 0000000..5cc7f4e
--- /dev/null
+++ b/account/csv_unicode.py
@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+#
+# Python 2's csv module does not support Unicode input and output.
+# Use the UnicodeReader and UnicodeWriter class to provide
+# unicode csv i/o in django.
+#
+# Ref:
+# https://docs.djangoproject.com/en/dev/howto/outputting-csv/
+# https://docs.python.org/2/library/csv.html
+#
+
+import csv, codecs, cStringIO
+
+class UTF8Recoder:
+ """
+ Iterator that reads an encoded stream and reencodes the input to UTF-8
+ """
+ def __init__(self, f, encoding):
+ self.reader = codecs.getreader(encoding)(f)
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ return self.reader.next().encode('utf-8')
+
+class UnicodeReader:
+ """
+ A CSV reader which will iterate over lines in the CSV file "f",
+ which is encoded in the given encoding.
+ """
+
+ def __init__(self, f, dialect='excel', encoding='utf-8', **kwargs):
+ f = UTF8Recoder(f, encoding)
+ self.reader = csv.reader(f, dialect=dialect, **kwargs)
+
+ def next(self):
+ row = self.reader.next()
+ return [unicode(s, 'utf-8') for s in row]
+
+ def __iter__(self):
+ return self
+
+class UnicodeWriter:
+ """
+ A CSV writer which will write rows to CSV file "f",
+ which is encoded in the given encoding.
+ """
+
+ def __init__(self, f, dialect='excel', encoding='utf-8', **kwargs):
+ # Redirect output to a queue
+ self.queue = cStringIO.StringIO()
+ self.writer = csv.writer(self.queue, dialect=dialect, **kwargs)
+ self.stream = f
+ self.encoder = codecs.getincrementalencoder(encoding)()
+
+ def writerow(self, row):
+ self.writer.writerow([s.encode('utf-8') for s in row])
+ # Fetch UTF-8 output from the queue ...
+ data = self.queue.getvalue()
+ data = data.decode('utf-8')
+ # ... and reencode it into the target encoding
+ data = self.encoder.encode(data)
+ # write to the target stream
+ self.stream.write(data)
+ # empty queue
+ self.queue.truncate(0)
+
+ def writerows(self, rows):
+ for row in rows:
+ self.writerow(row)
+