Implements naive csv reader / writer (#2)

2016-12-28 12:27:23 +01:00
parent 96c9b55fec
commit e77fbae38b
3 changed files with 158 additions and 4 deletions
--- a/bonobo/ext/jupyter/plugin.py
+++ b/bonobo/ext/jupyter/plugin.py
@ -6,10 +6,9 @@ try:
 except ImportError as e:
    import logging

-    logging.exception(
-        'You must install Jupyter to use the bonobo Jupyter extension. Easiest way is to install the '
-        'optional "jupyter" dependencies with «pip install bonobo[jupyter]», but you can also install a '
-        'specific version by yourself.')
+    logging.exception('You must install Jupyter to use the bonobo Jupyter extension. Easiest way is to install the '
+                      'optional "jupyter" dependencies with «pip install bonobo[jupyter]», but you can also install a '
+                      'specific version by yourself.')


 class JupyterOutputPlugin(Plugin):
--- a/bonobo/io/csv.py
+++ b/bonobo/io/csv.py
@ -0,0 +1,90 @@
+import csv
+from copy import copy
+
+from .file import FileReader, FileWriter, FileHandler
+
+
+class CsvHandler(FileHandler):
+    delimiter = ';'
+    quotechar = '"'
+    headers = None
+
+
+class CsvReader(CsvHandler, FileReader):
+    """
+    Reads a CSV and yield the values as dicts.
+
+    .. attribute:: delimiter
+
+        The CSV delimiter.
+
+    .. attribute:: quotechar
+
+        The CSV quote character.
+
+    .. attribute:: headers
+
+        The list of column names, if the CSV does not contain it as its first line.
+
+    .. attribute:: skip
+
+        The amount of lines to skip before it actually yield output.
+
+    """
+
+    skip = 0
+
+    def __init__(self, path_or_buf, delimiter=None, quotechar=None, headers=None, skip=None):
+        super().__init__(path_or_buf)
+
+        self.delimiter = str(delimiter or self.delimiter)
+        self.quotechar = quotechar or self.quotechar
+        self.headers = headers or self.headers
+        self.skip = skip or self.skip
+
+    @property
+    def has_headers(self):
+        return bool(self.headers)
+
+    def read(self, ctx):
+        reader = csv.reader(ctx.file, delimiter=self.delimiter, quotechar=self.quotechar)
+        headers = self.has_headers and self.headers or next(reader)
+        field_count = len(headers)
+
+        if self.skip and self.skip > 0:
+            for i in range(0, self.skip):
+                next(reader)
+
+        for row in reader:
+            if len(row) != field_count:
+                raise ValueError('Got a line with %d fields, expecting %d.' % (
+                    len(row),
+                    field_count, ))
+
+            yield dict(zip(headers, row))
+
+
+class CsvWriter(CsvHandler, FileWriter):
+    def __init__(self, path_or_buf, delimiter=None, quotechar=None, headers=None):
+        super().__init__(path_or_buf)
+
+        self.delimiter = str(delimiter or self.delimiter)
+        self.quotechar = quotechar or self.quotechar
+        self.headers = headers or self.headers
+
+    def initialize(self, ctx):
+        super().initialize(ctx)
+        ctx.writer = csv.writer(ctx.file, delimiter=self.delimiter, quotechar=self.quotechar)
+        ctx.headers = copy(self.headers)
+        ctx.first = True
+
+    def write(self, ctx, row):
+        if ctx.first:
+            ctx.headers = ctx.headers or row.keys()
+            ctx.writer.writerow(ctx.headers)
+            ctx.first = False
+        ctx.writer.writerow(row[header] for header in ctx.headers)
+
+    def finalize(self, ctx):
+        del ctx.headers, ctx.writer, ctx.first
+        super().finalize(ctx)
--- a/tests/io/test_csv.py
+++ b/tests/io/test_csv.py
@ -0,0 +1,65 @@
+import pytest
+
+from bonobo import Bag
+from bonobo.core.contexts import ComponentExecutionContext
+from bonobo.io.csv import CsvReader, CsvWriter
+from bonobo.util.testing import CapturingComponentExecutionContext
+from bonobo.util.tokens import BEGIN, END
+
+
+def test_write_csv_to_file(tmpdir):
+    file = tmpdir.join('output.json')
+    writer = CsvWriter(str(file))
+    context = ComponentExecutionContext(writer, None)
+
+    context.initialize()
+    context.recv(BEGIN, Bag({'foo': 'bar'}), Bag({'foo': 'baz', 'ignore': 'this'}), END)
+    context.step()
+    context.step()
+    context.finalize()
+
+    assert file.read() == 'foo\nbar\nbaz\n'
+
+    with pytest.raises(AttributeError):
+        getattr(context, 'file')
+
+
+def test_write_json_without_initializer_should_not_work(tmpdir):
+    file = tmpdir.join('output.json')
+    writer = CsvWriter(str(file))
+
+    context = ComponentExecutionContext(writer, None)
+    with pytest.raises(AttributeError):
+        writer(context, {'foo': 'bar'})
+
+
+def test_read_csv_from_file(tmpdir):
+    file = tmpdir.join('input.csv')
+    file.write('a,b,c\na foo,b foo,c foo\na bar,b bar,c bar')
+
+    reader = CsvReader(str(file), delimiter=',')
+
+    context = CapturingComponentExecutionContext(reader, None)
+
+    context.initialize()
+    context.recv(BEGIN, Bag(), END)
+    context.step()
+    context.finalize()
+
+    assert len(context.send.mock_calls) == 2
+
+    args0, kwargs0 = context.send.call_args_list[0]
+    assert len(args0) == 1 and not len(kwargs0)
+    args1, kwargs1 = context.send.call_args_list[1]
+    assert len(args1) == 1 and not len(kwargs1)
+
+    assert args0[0].args[0] == {
+        'a': 'a foo',
+        'b': 'b foo',
+        'c': 'c foo',
+    }
+    assert args1[0].args[0] == {
+        'a': 'a bar',
+        'b': 'b bar',
+        'c': 'c bar',
+    }