[stdlib] Adds Update(...) and FixedWindow(...) the the standard nodes provided with bonobo.

This commit is contained in:
Romain Dorgueil
2017-11-12 10:06:15 +01:00
parent a97b1c5d2e
commit c2f17296f6
15 changed files with 147 additions and 38 deletions

View File

@ -0,0 +1,58 @@
import pytest
from bonobo import CsvReader, CsvWriter, settings
from bonobo.execution.contexts.node import NodeExecutionContext
from bonobo.util.testing import FilesystemTester, BufferingNodeExecutionContext
csv_tester = FilesystemTester('csv')
csv_tester.input_data = 'a,b,c\na foo,b foo,c foo\na bar,b bar,c bar'
def test_write_csv_ioformat_arg0(tmpdir):
fs, filename, services = csv_tester.get_services_for_writer(tmpdir)
with pytest.raises(ValueError):
CsvWriter(path=filename, ioformat=settings.IOFORMAT_ARG0)
with pytest.raises(ValueError):
CsvReader(path=filename, delimiter=',', ioformat=settings.IOFORMAT_ARG0),
@pytest.mark.parametrize('add_kwargs', (
{},
{
'ioformat': settings.IOFORMAT_KWARGS,
},
))
def test_write_csv_to_file_kwargs(tmpdir, add_kwargs):
fs, filename, services = csv_tester.get_services_for_writer(tmpdir)
with NodeExecutionContext(CsvWriter(filename, **add_kwargs), services=services) as context:
context.write_sync({'foo': 'bar'}, {'foo': 'baz', 'ignore': 'this'})
with fs.open(filename) as fp:
assert fp.read() == 'foo\nbar\nbaz\n'
with pytest.raises(AttributeError):
getattr(context, 'file')
def test_read_csv_from_file_kwargs(tmpdir):
fs, filename, services = csv_tester.get_services_for_reader(tmpdir)
with BufferingNodeExecutionContext(
CsvReader(path=filename, delimiter=','),
services=services,
) as context:
context.write_sync(())
assert context.get_buffer() == [
{
'a': 'a foo',
'b': 'b foo',
'c': 'c foo',
}, {
'a': 'a bar',
'b': 'b bar',
'c': 'c bar',
}
]

View File

@ -0,0 +1,50 @@
import pytest
from bonobo import Bag, FileReader, FileWriter
from bonobo.constants import BEGIN, END
from bonobo.execution.contexts.node import NodeExecutionContext
from bonobo.util.testing import BufferingNodeExecutionContext, FilesystemTester
txt_tester = FilesystemTester('txt')
txt_tester.input_data = 'Hello\nWorld\n'
def test_file_writer_contextless(tmpdir):
fs, filename, services = txt_tester.get_services_for_writer(tmpdir)
with FileWriter(path=filename).open(fs) as fp:
fp.write('Yosh!')
with fs.open(filename) as fp:
assert fp.read() == 'Yosh!'
@pytest.mark.parametrize(
'lines,output',
[
(('ACME', ), 'ACME'), # one line...
(('Foo', 'Bar', 'Baz'), 'Foo\nBar\nBaz'), # more than one line...
]
)
def test_file_writer_in_context(tmpdir, lines, output):
fs, filename, services = txt_tester.get_services_for_writer(tmpdir)
with NodeExecutionContext(FileWriter(path=filename), services=services) as context:
context.write(BEGIN, *map(Bag, lines), END)
for _ in range(len(lines)):
context.step()
with fs.open(filename) as fp:
assert fp.read() == output
def test_file_reader(tmpdir):
fs, filename, services = txt_tester.get_services_for_reader(tmpdir)
with BufferingNodeExecutionContext(FileReader(path=filename), services=services) as context:
context.write_sync(Bag())
output = context.get_buffer()
assert len(output) == 2
assert output[0] == 'Hello'
assert output[1] == 'World'

View File

@ -0,0 +1,66 @@
import pytest
from bonobo import JsonReader, JsonWriter, settings
from bonobo import LdjsonReader, LdjsonWriter
from bonobo.execution.contexts.node import NodeExecutionContext
from bonobo.util.testing import FilesystemTester, BufferingNodeExecutionContext
json_tester = FilesystemTester('json')
json_tester.input_data = '''[{"x": "foo"},{"x": "bar"}]'''
def test_write_json_ioformat_arg0(tmpdir):
fs, filename, services = json_tester.get_services_for_writer(tmpdir)
with pytest.raises(ValueError):
JsonWriter(filename, ioformat=settings.IOFORMAT_ARG0)
with pytest.raises(ValueError):
JsonReader(filename, ioformat=settings.IOFORMAT_ARG0),
@pytest.mark.parametrize('add_kwargs', (
{},
{
'ioformat': settings.IOFORMAT_KWARGS,
},
))
def test_write_json_kwargs(tmpdir, add_kwargs):
fs, filename, services = json_tester.get_services_for_writer(tmpdir)
with NodeExecutionContext(JsonWriter(filename, **add_kwargs), services=services) as context:
context.write_sync({'foo': 'bar'})
with fs.open(filename) as fp:
assert fp.read() == '[{"foo": "bar"}]'
stream_json_tester = FilesystemTester('json')
stream_json_tester.input_data = '''{"foo": "bar"}\n{"baz": "boz"}'''
def test_read_stream_json(tmpdir):
fs, filename, services = stream_json_tester.get_services_for_reader(tmpdir)
with BufferingNodeExecutionContext(LdjsonReader(filename), services=services) as context:
context.write_sync(tuple())
actual = context.get_buffer()
expected = [{"foo": "bar"}, {"baz": "boz"}]
assert expected == actual
def test_write_stream_json(tmpdir):
fs, filename, services = stream_json_tester.get_services_for_reader(tmpdir)
with BufferingNodeExecutionContext(LdjsonWriter(filename), services=services) as context:
context.write_sync(
{
'foo': 'bar'
},
{'baz': 'boz'},
)
expected = '''{"foo": "bar"}\n{"baz": "boz"}\n'''
with fs.open(filename) as fin:
actual = fin.read()
assert expected == actual

View File

@ -0,0 +1,43 @@
import pickle
import pytest
from bonobo import Bag, PickleReader, PickleWriter
from bonobo.execution.contexts.node import NodeExecutionContext
from bonobo.util.testing import BufferingNodeExecutionContext, FilesystemTester
pickle_tester = FilesystemTester('pkl', mode='wb')
pickle_tester.input_data = pickle.dumps([['a', 'b', 'c'], ['a foo', 'b foo', 'c foo'], ['a bar', 'b bar', 'c bar']])
def test_write_pickled_dict_to_file(tmpdir):
fs, filename, services = pickle_tester.get_services_for_writer(tmpdir)
with NodeExecutionContext(PickleWriter(filename), services=services) as context:
context.write_sync(Bag(({'foo': 'bar'}, {})), Bag(({'foo': 'baz', 'ignore': 'this'}, {})))
with fs.open(filename, 'rb') as fp:
assert pickle.loads(fp.read()) == {'foo': 'bar'}
with pytest.raises(AttributeError):
getattr(context, 'file')
def test_read_pickled_list_from_file(tmpdir):
fs, filename, services = pickle_tester.get_services_for_reader(tmpdir)
with BufferingNodeExecutionContext(PickleReader(filename), services=services) as context:
context.write_sync(())
output = context.get_buffer()
assert len(output) == 2
assert output[0] == {
'a': 'a foo',
'b': 'b foo',
'c': 'c foo',
}
assert output[1] == {
'a': 'a bar',
'b': 'b bar',
'c': 'c bar',
}

111
tests/nodes/test_basics.py Normal file
View File

@ -0,0 +1,111 @@
from operator import methodcaller
from unittest.mock import MagicMock
import pytest
import bonobo
from bonobo.config.processors import ContextCurrifier
from bonobo.constants import NOT_MODIFIED
from bonobo.util.testing import BufferingNodeExecutionContext
def test_count():
with pytest.raises(TypeError):
bonobo.count()
context = MagicMock()
with ContextCurrifier(bonobo.count).as_contextmanager(context) as stack:
for i in range(42):
stack()
assert len(context.method_calls) == 1
bag = context.send.call_args[0][0]
assert isinstance(bag, bonobo.Bag)
assert 0 == len(bag.kwargs)
assert 1 == len(bag.args)
assert bag.args[0] == 42
def test_identity():
assert bonobo.identity(42) == 42
def test_limit():
context, results = MagicMock(), []
with ContextCurrifier(bonobo.Limit(2)).as_contextmanager(context) as stack:
for i in range(42):
results += list(stack())
assert results == [NOT_MODIFIED] * 2
def test_limit_not_there():
context, results = MagicMock(), []
with ContextCurrifier(bonobo.Limit(42)).as_contextmanager(context) as stack:
for i in range(10):
results += list(stack())
assert results == [NOT_MODIFIED] * 10
def test_limit_default():
context, results = MagicMock(), []
with ContextCurrifier(bonobo.Limit()).as_contextmanager(context) as stack:
for i in range(20):
results += list(stack())
assert results == [NOT_MODIFIED] * 10
def test_tee():
inner = MagicMock(side_effect=bonobo.identity)
tee = bonobo.Tee(inner)
results = []
for i in range(10):
results.append(tee('foo'))
assert results == [NOT_MODIFIED] * 10
assert len(inner.mock_calls) == 10
def test_noop():
assert bonobo.noop(1, 2, 3, 4, foo='bar') == NOT_MODIFIED
def test_update():
with BufferingNodeExecutionContext(bonobo.Update('a', k=True)) as context:
context.write_sync('a', ('a', {'b': 1}), ('b', {'k': False}))
assert context.get_buffer() == [
bonobo.Bag('a', 'a', k=True),
bonobo.Bag('a', 'a', b=1, k=True),
bonobo.Bag('b', 'a', k=True),
]
assert context.name == "Update('a', k=True)"
def test_fixedwindow():
with BufferingNodeExecutionContext(bonobo.FixedWindow(2)) as context:
context.write_sync(*range(10))
assert context.get_buffer() == [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]]
with BufferingNodeExecutionContext(bonobo.FixedWindow(2)) as context:
context.write_sync(*range(9))
assert context.get_buffer() == [[0, 1], [2, 3], [4, 5], [6, 7], [8]]
with BufferingNodeExecutionContext(bonobo.FixedWindow(1)) as context:
context.write_sync(*range(3))
assert context.get_buffer() == [[0], [1], [2]]
def test_methodcaller():
with BufferingNodeExecutionContext(methodcaller('swapcase')) as context:
context.write_sync('aaa', 'bBb', 'CcC')
assert context.get_buffer() == ['AAA', 'BbB', 'cCc']
with BufferingNodeExecutionContext(methodcaller('zfill', 5)) as context:
context.write_sync('a', 'bb', 'ccc')
assert context.get_buffer() == ['0000a', '000bb', '00ccc']