Implements naive csv reader / writer (#2)
This commit is contained in:
@ -6,8 +6,7 @@ try:
|
|||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
logging.exception(
|
logging.exception('You must install Jupyter to use the bonobo Jupyter extension. Easiest way is to install the '
|
||||||
'You must install Jupyter to use the bonobo Jupyter extension. Easiest way is to install the '
|
|
||||||
'optional "jupyter" dependencies with «pip install bonobo[jupyter]», but you can also install a '
|
'optional "jupyter" dependencies with «pip install bonobo[jupyter]», but you can also install a '
|
||||||
'specific version by yourself.')
|
'specific version by yourself.')
|
||||||
|
|
||||||
|
|||||||
90
bonobo/io/csv.py
Normal file
90
bonobo/io/csv.py
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
import csv
|
||||||
|
from copy import copy
|
||||||
|
|
||||||
|
from .file import FileReader, FileWriter, FileHandler
|
||||||
|
|
||||||
|
|
||||||
|
class CsvHandler(FileHandler):
|
||||||
|
delimiter = ';'
|
||||||
|
quotechar = '"'
|
||||||
|
headers = None
|
||||||
|
|
||||||
|
|
||||||
|
class CsvReader(CsvHandler, FileReader):
|
||||||
|
"""
|
||||||
|
Reads a CSV and yield the values as dicts.
|
||||||
|
|
||||||
|
.. attribute:: delimiter
|
||||||
|
|
||||||
|
The CSV delimiter.
|
||||||
|
|
||||||
|
.. attribute:: quotechar
|
||||||
|
|
||||||
|
The CSV quote character.
|
||||||
|
|
||||||
|
.. attribute:: headers
|
||||||
|
|
||||||
|
The list of column names, if the CSV does not contain it as its first line.
|
||||||
|
|
||||||
|
.. attribute:: skip
|
||||||
|
|
||||||
|
The amount of lines to skip before it actually yield output.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
skip = 0
|
||||||
|
|
||||||
|
def __init__(self, path_or_buf, delimiter=None, quotechar=None, headers=None, skip=None):
|
||||||
|
super().__init__(path_or_buf)
|
||||||
|
|
||||||
|
self.delimiter = str(delimiter or self.delimiter)
|
||||||
|
self.quotechar = quotechar or self.quotechar
|
||||||
|
self.headers = headers or self.headers
|
||||||
|
self.skip = skip or self.skip
|
||||||
|
|
||||||
|
@property
|
||||||
|
def has_headers(self):
|
||||||
|
return bool(self.headers)
|
||||||
|
|
||||||
|
def read(self, ctx):
|
||||||
|
reader = csv.reader(ctx.file, delimiter=self.delimiter, quotechar=self.quotechar)
|
||||||
|
headers = self.has_headers and self.headers or next(reader)
|
||||||
|
field_count = len(headers)
|
||||||
|
|
||||||
|
if self.skip and self.skip > 0:
|
||||||
|
for i in range(0, self.skip):
|
||||||
|
next(reader)
|
||||||
|
|
||||||
|
for row in reader:
|
||||||
|
if len(row) != field_count:
|
||||||
|
raise ValueError('Got a line with %d fields, expecting %d.' % (
|
||||||
|
len(row),
|
||||||
|
field_count, ))
|
||||||
|
|
||||||
|
yield dict(zip(headers, row))
|
||||||
|
|
||||||
|
|
||||||
|
class CsvWriter(CsvHandler, FileWriter):
|
||||||
|
def __init__(self, path_or_buf, delimiter=None, quotechar=None, headers=None):
|
||||||
|
super().__init__(path_or_buf)
|
||||||
|
|
||||||
|
self.delimiter = str(delimiter or self.delimiter)
|
||||||
|
self.quotechar = quotechar or self.quotechar
|
||||||
|
self.headers = headers or self.headers
|
||||||
|
|
||||||
|
def initialize(self, ctx):
|
||||||
|
super().initialize(ctx)
|
||||||
|
ctx.writer = csv.writer(ctx.file, delimiter=self.delimiter, quotechar=self.quotechar)
|
||||||
|
ctx.headers = copy(self.headers)
|
||||||
|
ctx.first = True
|
||||||
|
|
||||||
|
def write(self, ctx, row):
|
||||||
|
if ctx.first:
|
||||||
|
ctx.headers = ctx.headers or row.keys()
|
||||||
|
ctx.writer.writerow(ctx.headers)
|
||||||
|
ctx.first = False
|
||||||
|
ctx.writer.writerow(row[header] for header in ctx.headers)
|
||||||
|
|
||||||
|
def finalize(self, ctx):
|
||||||
|
del ctx.headers, ctx.writer, ctx.first
|
||||||
|
super().finalize(ctx)
|
||||||
65
tests/io/test_csv.py
Normal file
65
tests/io/test_csv.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from bonobo import Bag
|
||||||
|
from bonobo.core.contexts import ComponentExecutionContext
|
||||||
|
from bonobo.io.csv import CsvReader, CsvWriter
|
||||||
|
from bonobo.util.testing import CapturingComponentExecutionContext
|
||||||
|
from bonobo.util.tokens import BEGIN, END
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_csv_to_file(tmpdir):
|
||||||
|
file = tmpdir.join('output.json')
|
||||||
|
writer = CsvWriter(str(file))
|
||||||
|
context = ComponentExecutionContext(writer, None)
|
||||||
|
|
||||||
|
context.initialize()
|
||||||
|
context.recv(BEGIN, Bag({'foo': 'bar'}), Bag({'foo': 'baz', 'ignore': 'this'}), END)
|
||||||
|
context.step()
|
||||||
|
context.step()
|
||||||
|
context.finalize()
|
||||||
|
|
||||||
|
assert file.read() == 'foo\nbar\nbaz\n'
|
||||||
|
|
||||||
|
with pytest.raises(AttributeError):
|
||||||
|
getattr(context, 'file')
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_json_without_initializer_should_not_work(tmpdir):
|
||||||
|
file = tmpdir.join('output.json')
|
||||||
|
writer = CsvWriter(str(file))
|
||||||
|
|
||||||
|
context = ComponentExecutionContext(writer, None)
|
||||||
|
with pytest.raises(AttributeError):
|
||||||
|
writer(context, {'foo': 'bar'})
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_csv_from_file(tmpdir):
|
||||||
|
file = tmpdir.join('input.csv')
|
||||||
|
file.write('a,b,c\na foo,b foo,c foo\na bar,b bar,c bar')
|
||||||
|
|
||||||
|
reader = CsvReader(str(file), delimiter=',')
|
||||||
|
|
||||||
|
context = CapturingComponentExecutionContext(reader, None)
|
||||||
|
|
||||||
|
context.initialize()
|
||||||
|
context.recv(BEGIN, Bag(), END)
|
||||||
|
context.step()
|
||||||
|
context.finalize()
|
||||||
|
|
||||||
|
assert len(context.send.mock_calls) == 2
|
||||||
|
|
||||||
|
args0, kwargs0 = context.send.call_args_list[0]
|
||||||
|
assert len(args0) == 1 and not len(kwargs0)
|
||||||
|
args1, kwargs1 = context.send.call_args_list[1]
|
||||||
|
assert len(args1) == 1 and not len(kwargs1)
|
||||||
|
|
||||||
|
assert args0[0].args[0] == {
|
||||||
|
'a': 'a foo',
|
||||||
|
'b': 'b foo',
|
||||||
|
'c': 'c foo',
|
||||||
|
}
|
||||||
|
assert args1[0].args[0] == {
|
||||||
|
'a': 'a bar',
|
||||||
|
'b': 'b bar',
|
||||||
|
'c': 'c bar',
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user