Rewritting Bags from scratch using a namedtuple approach, along with other (less major) updates.
New bag implementation improves a lot how bonobo works, even if this is highly backward incompatible (sorry, that's needed, and better sooner than later). * New implementation uses the same approach as python's namedtuple, by dynamically creating the python type's code. This has drawbacks, as it feels like not the right way, but also a lot of benefits that cannot be achieved using a regular approach, especially the constructor parameter order, hardcoded. * Memory usage is now much more efficient. The "keys" memory space will be used only once per "io type", being spent in the underlying type definition instead of in the actual instances. * Transformations now needs to use tuples as output, which will be bound to its "output type". The output type can be infered from the tuple length, or explicitely set by the user using either `context.set_output_type(...)` or `context.set_output_fields(...)` (to build a bag type from a list of field names). Jupyter/Graphviz integration is more tight, allowing to easily display graphs in a notebook, or displaying the live transformation status in an html table instead of a simple <div>. For now, context processors were hacked to stay working as before but the current API is not satisfactory, and should be replaced. This new big change being unreasonable without some time to work on it properly, it is postponed for next versions (0.7, 0.8, ...). Maybe the best idea is to have some kind of "local services", that would use the same dependency injection mechanism as the execution-wide services. Services are now passed by keywoerd arguments only, to avoid confusion with data-arguments.
This commit is contained in:
@ -1,62 +1,152 @@
|
||||
from collections import namedtuple
|
||||
from unittest import TestCase
|
||||
|
||||
import pytest
|
||||
|
||||
from bonobo import CsvReader, CsvWriter, settings
|
||||
from bonobo.execution.contexts.node import NodeExecutionContext
|
||||
from bonobo.util.testing import FilesystemTester, BufferingNodeExecutionContext
|
||||
from bonobo import CsvReader, CsvWriter
|
||||
from bonobo.constants import EMPTY
|
||||
from bonobo.util.testing import FilesystemTester, BufferingNodeExecutionContext, WriterTest, ConfigurableNodeTest, ReaderTest
|
||||
|
||||
csv_tester = FilesystemTester('csv')
|
||||
csv_tester.input_data = 'a,b,c\na foo,b foo,c foo\na bar,b bar,c bar'
|
||||
|
||||
defaults = {'lineterminator': '\n'}
|
||||
|
||||
def test_write_csv_ioformat_arg0(tmpdir):
|
||||
fs, filename, services = csv_tester.get_services_for_writer(tmpdir)
|
||||
with pytest.raises(ValueError):
|
||||
CsvWriter(path=filename, ioformat=settings.IOFORMAT_ARG0)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
CsvReader(path=filename, delimiter=',', ioformat=settings.IOFORMAT_ARG0),
|
||||
|
||||
|
||||
def test_write_csv_to_file_no_headers(tmpdir):
|
||||
fs, filename, services = csv_tester.get_services_for_writer(tmpdir)
|
||||
|
||||
with NodeExecutionContext(CsvWriter(filename), services=services) as context:
|
||||
context.write_sync(('bar', ), ('baz', 'boo'))
|
||||
|
||||
with fs.open(filename) as fp:
|
||||
assert fp.read() == 'bar\nbaz;boo\n'
|
||||
|
||||
|
||||
def test_write_csv_to_file_with_headers(tmpdir):
|
||||
fs, filename, services = csv_tester.get_services_for_writer(tmpdir)
|
||||
|
||||
with NodeExecutionContext(CsvWriter(filename, headers='foo'), services=services) as context:
|
||||
context.write_sync(('bar', ), ('baz', 'boo'))
|
||||
|
||||
with fs.open(filename) as fp:
|
||||
assert fp.read() == 'foo\nbar\nbaz\n'
|
||||
|
||||
with pytest.raises(AttributeError):
|
||||
getattr(context, 'file')
|
||||
incontext = ConfigurableNodeTest.incontext
|
||||
|
||||
|
||||
def test_read_csv_from_file_kwargs(tmpdir):
|
||||
fs, filename, services = csv_tester.get_services_for_reader(tmpdir)
|
||||
|
||||
with BufferingNodeExecutionContext(
|
||||
CsvReader(path=filename, delimiter=','),
|
||||
services=services,
|
||||
) as context:
|
||||
context.write_sync(())
|
||||
with BufferingNodeExecutionContext(CsvReader(filename, **defaults), services=services) as context:
|
||||
context.write_sync(EMPTY)
|
||||
|
||||
assert context.get_buffer_args_as_dicts() == [
|
||||
{
|
||||
'a': 'a foo',
|
||||
'b': 'b foo',
|
||||
'c': 'c foo',
|
||||
}, {
|
||||
'a': 'a bar',
|
||||
'b': 'b bar',
|
||||
'c': 'c bar',
|
||||
}
|
||||
]
|
||||
assert context.get_buffer_args_as_dicts() == [{
|
||||
'a': 'a foo',
|
||||
'b': 'b foo',
|
||||
'c': 'c foo',
|
||||
}, {
|
||||
'a': 'a bar',
|
||||
'b': 'b bar',
|
||||
'c': 'c bar',
|
||||
}]
|
||||
|
||||
|
||||
###
|
||||
# CSV Readers / Writers
|
||||
###
|
||||
|
||||
|
||||
class Csv:
|
||||
extension = 'csv'
|
||||
ReaderNodeType = CsvReader
|
||||
WriterNodeType = CsvWriter
|
||||
|
||||
|
||||
L1, L2, L3, L4 = ('a', 'hey'), ('b', 'bee'), ('c', 'see'), ('d', 'dee')
|
||||
LL = ('i', 'have', 'more', 'values')
|
||||
|
||||
|
||||
class CsvReaderTest(Csv, ReaderTest, TestCase):
|
||||
input_data = '\n'.join((
|
||||
'id,name',
|
||||
'1,John Doe',
|
||||
'2,Jane Doe',
|
||||
',DPR',
|
||||
'42,Elon Musk',
|
||||
))
|
||||
|
||||
def check_output(self, context, *, prepend=None):
|
||||
out = context.get_buffer()
|
||||
assert out == (prepend or list()) + [
|
||||
('1', 'John Doe'),
|
||||
('2', 'Jane Doe'),
|
||||
('', 'DPR'),
|
||||
('42', 'Elon Musk'),
|
||||
]
|
||||
|
||||
@incontext()
|
||||
def test_nofields(self, context):
|
||||
context.write_sync(EMPTY)
|
||||
context.stop()
|
||||
self.check_output(context)
|
||||
assert context.get_output_fields() == ('id', 'name')
|
||||
|
||||
@incontext(output_type=tuple)
|
||||
def test_output_type(self, context):
|
||||
context.write_sync(EMPTY)
|
||||
context.stop()
|
||||
self.check_output(context, prepend=[('id', 'name')])
|
||||
|
||||
@incontext(
|
||||
output_fields=(
|
||||
'x',
|
||||
'y',
|
||||
), skip=1
|
||||
)
|
||||
def test_output_fields(self, context):
|
||||
context.write_sync(EMPTY)
|
||||
context.stop()
|
||||
self.check_output(context)
|
||||
assert context.get_output_fields() == ('x', 'y')
|
||||
|
||||
|
||||
class CsvWriterTest(Csv, WriterTest, TestCase):
|
||||
@incontext()
|
||||
def test_fields(self, context):
|
||||
context.set_input_fields(['foo', 'bar'])
|
||||
context.write_sync(('a', 'b'), ('c', 'd'))
|
||||
context.stop()
|
||||
|
||||
assert self.readlines() == (
|
||||
'foo,bar',
|
||||
'a,b',
|
||||
'c,d',
|
||||
)
|
||||
|
||||
@incontext()
|
||||
def test_fields_from_type(self, context):
|
||||
context.set_input_type(namedtuple('Point', 'x y'))
|
||||
context.write_sync((1, 2), (3, 4))
|
||||
context.stop()
|
||||
|
||||
assert self.readlines() == ('x,y', '1,2', '3,4')
|
||||
|
||||
@incontext()
|
||||
def test_nofields_multiple_args(self, context):
|
||||
# multiple args are iterated onto and flattened in output
|
||||
context.write_sync((L1, L2), (L3, L4))
|
||||
context.stop()
|
||||
|
||||
assert self.readlines() == (
|
||||
'a,hey',
|
||||
'b,bee',
|
||||
'c,see',
|
||||
'd,dee',
|
||||
)
|
||||
|
||||
@incontext()
|
||||
def test_nofields_multiple_args_length_mismatch(self, context):
|
||||
# if length of input vary, then we get a TypeError (unrecoverable)
|
||||
with pytest.raises(TypeError):
|
||||
context.write_sync((L1, L2), (L3, ))
|
||||
|
||||
@incontext()
|
||||
def test_nofields_single_arg(self, context):
|
||||
# single args are just dumped, shapes can vary.
|
||||
context.write_sync((L1, ), (LL, ), (L3, ))
|
||||
context.stop()
|
||||
|
||||
assert self.readlines() == (
|
||||
'a,hey',
|
||||
'i,have,more,values',
|
||||
'c,see',
|
||||
)
|
||||
|
||||
@incontext()
|
||||
def test_nofields_empty_args(self, context):
|
||||
# empty calls are ignored
|
||||
context.write_sync(EMPTY, EMPTY, EMPTY)
|
||||
context.stop()
|
||||
|
||||
assert self.readlines() == ()
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
import pytest
|
||||
|
||||
from bonobo import Bag, FileReader, FileWriter
|
||||
from bonobo.constants import BEGIN, END
|
||||
from bonobo import FileReader, FileWriter
|
||||
from bonobo.constants import EMPTY
|
||||
from bonobo.execution.contexts.node import NodeExecutionContext
|
||||
from bonobo.util.testing import BufferingNodeExecutionContext, FilesystemTester
|
||||
|
||||
@ -30,9 +30,7 @@ def test_file_writer_in_context(tmpdir, lines, output):
|
||||
fs, filename, services = txt_tester.get_services_for_writer(tmpdir)
|
||||
|
||||
with NodeExecutionContext(FileWriter(path=filename), services=services) as context:
|
||||
context.write(BEGIN, *map(Bag, lines), END)
|
||||
for _ in range(len(lines)):
|
||||
context.step()
|
||||
context.write_sync(*lines)
|
||||
|
||||
with fs.open(filename) as fp:
|
||||
assert fp.read() == output
|
||||
@ -42,9 +40,9 @@ def test_file_reader(tmpdir):
|
||||
fs, filename, services = txt_tester.get_services_for_reader(tmpdir)
|
||||
|
||||
with BufferingNodeExecutionContext(FileReader(path=filename), services=services) as context:
|
||||
context.write_sync(Bag())
|
||||
output = context.get_buffer()
|
||||
context.write_sync(EMPTY)
|
||||
|
||||
output = context.get_buffer()
|
||||
assert len(output) == 2
|
||||
assert output[0] == 'Hello'
|
||||
assert output[1] == 'World'
|
||||
assert output[0] == ('Hello', )
|
||||
assert output[1] == ('World', )
|
||||
|
||||
@ -1,66 +1,300 @@
|
||||
import json
|
||||
from collections import OrderedDict, namedtuple
|
||||
from unittest import TestCase
|
||||
|
||||
import pytest
|
||||
|
||||
from bonobo import JsonReader, JsonWriter, settings
|
||||
from bonobo import JsonReader, JsonWriter
|
||||
from bonobo import LdjsonReader, LdjsonWriter
|
||||
from bonobo.execution.contexts.node import NodeExecutionContext
|
||||
from bonobo.util.testing import FilesystemTester, BufferingNodeExecutionContext
|
||||
from bonobo.constants import EMPTY
|
||||
from bonobo.util.testing import WriterTest, ReaderTest, ConfigurableNodeTest
|
||||
|
||||
json_tester = FilesystemTester('json')
|
||||
json_tester.input_data = '''[{"x": "foo"},{"x": "bar"}]'''
|
||||
FOOBAR = {'foo': 'bar'}
|
||||
OD_ABC = OrderedDict((('a', 'A'), ('b', 'B'), ('c', 'C')))
|
||||
FOOBAZ = {'foo': 'baz'}
|
||||
|
||||
incontext = ConfigurableNodeTest.incontext
|
||||
|
||||
###
|
||||
# Standard JSON Readers / Writers
|
||||
###
|
||||
|
||||
|
||||
def test_write_json_ioformat_arg0(tmpdir):
|
||||
fs, filename, services = json_tester.get_services_for_writer(tmpdir)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
JsonWriter(filename, ioformat=settings.IOFORMAT_ARG0)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
JsonReader(filename, ioformat=settings.IOFORMAT_ARG0),
|
||||
class Json:
|
||||
extension = 'json'
|
||||
ReaderNodeType = JsonReader
|
||||
WriterNodeType = JsonWriter
|
||||
|
||||
|
||||
@pytest.mark.parametrize('add_kwargs', (
|
||||
{},
|
||||
{
|
||||
'ioformat': settings.IOFORMAT_KWARGS,
|
||||
},
|
||||
))
|
||||
def test_write_json_kwargs(tmpdir, add_kwargs):
|
||||
fs, filename, services = json_tester.get_services_for_writer(tmpdir)
|
||||
class JsonReaderDictsTest(Json, ReaderTest, TestCase):
|
||||
input_data = '[{"foo": "bar"},\n{"baz": "boz"}]'
|
||||
|
||||
with NodeExecutionContext(JsonWriter(filename, **add_kwargs), services=services) as context:
|
||||
context.write_sync({'foo': 'bar'})
|
||||
@incontext()
|
||||
def test_nofields(self, context):
|
||||
context.write_sync(EMPTY)
|
||||
context.stop()
|
||||
|
||||
with fs.open(filename) as fp:
|
||||
assert fp.read() == '[{"foo": "bar"}]'
|
||||
assert context.get_buffer() == [
|
||||
({
|
||||
"foo": "bar"
|
||||
}, ),
|
||||
({
|
||||
"baz": "boz"
|
||||
}, ),
|
||||
]
|
||||
|
||||
|
||||
stream_json_tester = FilesystemTester('json')
|
||||
stream_json_tester.input_data = '''{"foo": "bar"}\n{"baz": "boz"}'''
|
||||
class JsonReaderListsTest(Json, ReaderTest, TestCase):
|
||||
input_data = '[[1,2,3],\n[4,5,6]]'
|
||||
|
||||
@incontext()
|
||||
def test_nofields(self, context):
|
||||
context.write_sync(EMPTY)
|
||||
context.stop()
|
||||
|
||||
assert context.get_buffer() == [
|
||||
([1, 2, 3], ),
|
||||
([4, 5, 6], ),
|
||||
]
|
||||
|
||||
@incontext(output_type=tuple)
|
||||
def test_output_type(self, context):
|
||||
context.write_sync(EMPTY)
|
||||
context.stop()
|
||||
|
||||
assert context.get_buffer() == [
|
||||
([1, 2, 3], ),
|
||||
([4, 5, 6], ),
|
||||
]
|
||||
|
||||
|
||||
def test_read_stream_json(tmpdir):
|
||||
fs, filename, services = stream_json_tester.get_services_for_reader(tmpdir)
|
||||
with BufferingNodeExecutionContext(LdjsonReader(filename), services=services) as context:
|
||||
context.write_sync(tuple())
|
||||
actual = context.get_buffer()
|
||||
class JsonReaderStringsTest(Json, ReaderTest, TestCase):
|
||||
input_data = '[' + ',\n'.join(map(json.dumps, ('foo', 'bar', 'baz'))) + ']'
|
||||
|
||||
expected = [{"foo": "bar"}, {"baz": "boz"}]
|
||||
assert expected == actual
|
||||
@incontext()
|
||||
def test_nofields(self, context):
|
||||
context.write_sync(EMPTY)
|
||||
context.stop()
|
||||
|
||||
assert context.get_buffer() == [
|
||||
('foo', ),
|
||||
('bar', ),
|
||||
('baz', ),
|
||||
]
|
||||
|
||||
@incontext(output_type=tuple)
|
||||
def test_output_type(self, context):
|
||||
context.write_sync(EMPTY)
|
||||
context.stop()
|
||||
|
||||
assert context.get_buffer() == [
|
||||
('foo', ),
|
||||
('bar', ),
|
||||
('baz', ),
|
||||
]
|
||||
|
||||
|
||||
def test_write_stream_json(tmpdir):
|
||||
fs, filename, services = stream_json_tester.get_services_for_reader(tmpdir)
|
||||
class JsonWriterTest(Json, WriterTest, TestCase):
|
||||
@incontext()
|
||||
def test_fields(self, context):
|
||||
context.set_input_fields(['foo', 'bar'])
|
||||
context.write_sync(('a', 'b'), ('c', 'd'))
|
||||
context.stop()
|
||||
|
||||
with BufferingNodeExecutionContext(LdjsonWriter(filename), services=services) as context:
|
||||
context.write_sync(
|
||||
{
|
||||
'foo': 'bar'
|
||||
},
|
||||
{'baz': 'boz'},
|
||||
assert self.readlines() == (
|
||||
'[{"foo": "a", "bar": "b"},',
|
||||
'{"foo": "c", "bar": "d"}]',
|
||||
)
|
||||
|
||||
expected = '''{"foo": "bar"}\n{"baz": "boz"}\n'''
|
||||
with fs.open(filename) as fin:
|
||||
actual = fin.read()
|
||||
assert expected == actual
|
||||
@incontext()
|
||||
def test_fields_from_type(self, context):
|
||||
context.set_input_type(namedtuple('Point', 'x y'))
|
||||
context.write_sync((1, 2), (3, 4))
|
||||
context.stop()
|
||||
|
||||
assert self.readlines() == (
|
||||
'[{"x": 1, "y": 2},',
|
||||
'{"x": 3, "y": 4}]',
|
||||
)
|
||||
|
||||
@incontext()
|
||||
def test_nofields_multiple_args(self, context):
|
||||
# multiple args are iterated onto and flattened in output
|
||||
context.write_sync((FOOBAR, FOOBAR), (OD_ABC, FOOBAR), (FOOBAZ, FOOBAR))
|
||||
context.stop()
|
||||
|
||||
assert self.readlines() == (
|
||||
'[{"foo": "bar"},',
|
||||
'{"foo": "bar"},',
|
||||
'{"a": "A", "b": "B", "c": "C"},',
|
||||
'{"foo": "bar"},',
|
||||
'{"foo": "baz"},',
|
||||
'{"foo": "bar"}]',
|
||||
)
|
||||
|
||||
@incontext()
|
||||
def test_nofields_multiple_args_length_mismatch(self, context):
|
||||
# if length of input vary, then we get a TypeError (unrecoverable)
|
||||
with pytest.raises(TypeError):
|
||||
context.write_sync((FOOBAR, FOOBAR), (OD_ABC))
|
||||
|
||||
@incontext()
|
||||
def test_nofields_single_arg(self, context):
|
||||
# single args are just dumped, shapes can vary.
|
||||
context.write_sync(FOOBAR, OD_ABC, FOOBAZ)
|
||||
context.stop()
|
||||
|
||||
assert self.readlines() == (
|
||||
'[{"foo": "bar"},',
|
||||
'{"a": "A", "b": "B", "c": "C"},',
|
||||
'{"foo": "baz"}]',
|
||||
)
|
||||
|
||||
@incontext()
|
||||
def test_nofields_empty_args(self, context):
|
||||
# empty calls are ignored
|
||||
context.write_sync(EMPTY, EMPTY, EMPTY)
|
||||
context.stop()
|
||||
|
||||
assert self.readlines() == ('[]', )
|
||||
|
||||
|
||||
###
|
||||
# Line Delimiter JSON Readers / Writers
|
||||
###
|
||||
|
||||
|
||||
class Ldjson:
|
||||
extension = 'ldjson'
|
||||
ReaderNodeType = LdjsonReader
|
||||
WriterNodeType = LdjsonWriter
|
||||
|
||||
|
||||
class LdjsonReaderDictsTest(Ldjson, ReaderTest, TestCase):
|
||||
input_data = '{"foo": "bar"}\n{"baz": "boz"}'
|
||||
|
||||
@incontext()
|
||||
def test_nofields(self, context):
|
||||
context.write_sync(EMPTY)
|
||||
context.stop()
|
||||
|
||||
assert context.get_buffer() == [
|
||||
({
|
||||
"foo": "bar"
|
||||
}, ),
|
||||
({
|
||||
"baz": "boz"
|
||||
}, ),
|
||||
]
|
||||
|
||||
|
||||
class LdjsonReaderListsTest(Ldjson, ReaderTest, TestCase):
|
||||
input_data = '[1,2,3]\n[4,5,6]'
|
||||
|
||||
@incontext()
|
||||
def test_nofields(self, context):
|
||||
context.write_sync(EMPTY)
|
||||
context.stop()
|
||||
|
||||
assert context.get_buffer() == [
|
||||
([1, 2, 3], ),
|
||||
([4, 5, 6], ),
|
||||
]
|
||||
|
||||
@incontext(output_type=tuple)
|
||||
def test_output_type(self, context):
|
||||
context.write_sync(EMPTY)
|
||||
context.stop()
|
||||
|
||||
assert context.get_buffer() == [
|
||||
([1, 2, 3], ),
|
||||
([4, 5, 6], ),
|
||||
]
|
||||
|
||||
|
||||
class LdjsonReaderStringsTest(Ldjson, ReaderTest, TestCase):
|
||||
input_data = '\n'.join(map(json.dumps, ('foo', 'bar', 'baz')))
|
||||
|
||||
@incontext()
|
||||
def test_nofields(self, context):
|
||||
context.write_sync(EMPTY)
|
||||
context.stop()
|
||||
|
||||
assert context.get_buffer() == [
|
||||
('foo', ),
|
||||
('bar', ),
|
||||
('baz', ),
|
||||
]
|
||||
|
||||
@incontext(output_type=tuple)
|
||||
def test_output_type(self, context):
|
||||
context.write_sync(EMPTY)
|
||||
context.stop()
|
||||
|
||||
assert context.get_buffer() == [
|
||||
('foo', ),
|
||||
('bar', ),
|
||||
('baz', ),
|
||||
]
|
||||
|
||||
|
||||
class LdjsonWriterTest(Ldjson, WriterTest, TestCase):
|
||||
@incontext()
|
||||
def test_fields(self, context):
|
||||
context.set_input_fields(['foo', 'bar'])
|
||||
context.write_sync(('a', 'b'), ('c', 'd'))
|
||||
context.stop()
|
||||
|
||||
assert self.readlines() == ('{"foo": "a", "bar": "b"}', '{"foo": "c", "bar": "d"}')
|
||||
|
||||
@incontext()
|
||||
def test_fields_from_type(self, context):
|
||||
context.set_input_type(namedtuple('Point', 'x y'))
|
||||
context.write_sync((1, 2), (3, 4))
|
||||
context.stop()
|
||||
|
||||
assert self.readlines() == (
|
||||
'{"x": 1, "y": 2}',
|
||||
'{"x": 3, "y": 4}',
|
||||
)
|
||||
|
||||
@incontext()
|
||||
def test_nofields_multiple_args(self, context):
|
||||
# multiple args are iterated onto and flattened in output
|
||||
context.write_sync((FOOBAR, FOOBAR), (OD_ABC, FOOBAR), (FOOBAZ, FOOBAR))
|
||||
context.stop()
|
||||
|
||||
assert self.readlines() == (
|
||||
'{"foo": "bar"}',
|
||||
'{"foo": "bar"}',
|
||||
'{"a": "A", "b": "B", "c": "C"}',
|
||||
'{"foo": "bar"}',
|
||||
'{"foo": "baz"}',
|
||||
'{"foo": "bar"}',
|
||||
)
|
||||
|
||||
@incontext()
|
||||
def test_nofields_multiple_args_length_mismatch(self, context):
|
||||
# if length of input vary, then we get a TypeError (unrecoverable)
|
||||
with pytest.raises(TypeError):
|
||||
context.write_sync((FOOBAR, FOOBAR), (OD_ABC))
|
||||
|
||||
@incontext()
|
||||
def test_nofields_single_arg(self, context):
|
||||
# single args are just dumped, shapes can vary.
|
||||
context.write_sync(FOOBAR, OD_ABC, FOOBAZ)
|
||||
context.stop()
|
||||
|
||||
assert self.readlines() == (
|
||||
'{"foo": "bar"}',
|
||||
'{"a": "A", "b": "B", "c": "C"}',
|
||||
'{"foo": "baz"}',
|
||||
)
|
||||
|
||||
@incontext()
|
||||
def test_nofields_empty_args(self, context):
|
||||
# empty calls are ignored
|
||||
context.write_sync(EMPTY, EMPTY, EMPTY)
|
||||
context.stop()
|
||||
|
||||
assert self.readlines() == ()
|
||||
|
||||
@ -2,7 +2,8 @@ import pickle
|
||||
|
||||
import pytest
|
||||
|
||||
from bonobo import Bag, PickleReader, PickleWriter
|
||||
from bonobo import PickleReader, PickleWriter
|
||||
from bonobo.constants import EMPTY
|
||||
from bonobo.execution.contexts.node import NodeExecutionContext
|
||||
from bonobo.util.testing import BufferingNodeExecutionContext, FilesystemTester
|
||||
|
||||
@ -14,7 +15,7 @@ def test_write_pickled_dict_to_file(tmpdir):
|
||||
fs, filename, services = pickle_tester.get_services_for_writer(tmpdir)
|
||||
|
||||
with NodeExecutionContext(PickleWriter(filename), services=services) as context:
|
||||
context.write_sync(Bag(({'foo': 'bar'}, {})), Bag(({'foo': 'baz', 'ignore': 'this'}, {})))
|
||||
context.write_sync({'foo': 'bar'}, {'foo': 'baz', 'ignore': 'this'})
|
||||
|
||||
with fs.open(filename, 'rb') as fp:
|
||||
assert pickle.loads(fp.read()) == {'foo': 'bar'}
|
||||
@ -27,17 +28,11 @@ def test_read_pickled_list_from_file(tmpdir):
|
||||
fs, filename, services = pickle_tester.get_services_for_reader(tmpdir)
|
||||
|
||||
with BufferingNodeExecutionContext(PickleReader(filename), services=services) as context:
|
||||
context.write_sync(())
|
||||
output = context.get_buffer()
|
||||
context.write_sync(EMPTY)
|
||||
|
||||
assert len(output) == 2
|
||||
assert output[0] == {
|
||||
'a': 'a foo',
|
||||
'b': 'b foo',
|
||||
'c': 'c foo',
|
||||
}
|
||||
assert output[1] == {
|
||||
'a': 'a bar',
|
||||
'b': 'b bar',
|
||||
'c': 'c bar',
|
||||
}
|
||||
output = context.get_buffer()
|
||||
assert context.get_output_fields() == ('a', 'b', 'c')
|
||||
assert output == [
|
||||
('a foo', 'b foo', 'c foo'),
|
||||
('a bar', 'b bar', 'c bar'),
|
||||
]
|
||||
|
||||
@ -1,64 +1,79 @@
|
||||
from operator import methodcaller
|
||||
from unittest import TestCase
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
import bonobo
|
||||
from bonobo.config.processors import ContextCurrifier
|
||||
from bonobo.constants import NOT_MODIFIED
|
||||
from bonobo.util.testing import BufferingNodeExecutionContext
|
||||
from bonobo.constants import NOT_MODIFIED, EMPTY
|
||||
from bonobo.util import ensure_tuple, ValueHolder
|
||||
from bonobo.util.testing import BufferingNodeExecutionContext, StaticNodeTest, ConfigurableNodeTest
|
||||
|
||||
|
||||
def test_count():
|
||||
with pytest.raises(TypeError):
|
||||
bonobo.count()
|
||||
class CountTest(StaticNodeTest, TestCase):
|
||||
node = bonobo.count
|
||||
|
||||
context = MagicMock()
|
||||
def test_counter_required(self):
|
||||
with pytest.raises(TypeError):
|
||||
self.call()
|
||||
|
||||
with ContextCurrifier(bonobo.count).as_contextmanager(context) as stack:
|
||||
for i in range(42):
|
||||
stack()
|
||||
def test_manual_call(self):
|
||||
counter = ValueHolder(0)
|
||||
for i in range(3):
|
||||
self.call(counter)
|
||||
assert counter == 3
|
||||
|
||||
assert len(context.method_calls) == 1
|
||||
bag = context.send.call_args[0][0]
|
||||
assert isinstance(bag, bonobo.Bag)
|
||||
assert 0 == len(bag.kwargs)
|
||||
assert 1 == len(bag.args)
|
||||
assert bag.args[0] == 42
|
||||
def test_execution(self):
|
||||
with self.execute() as context:
|
||||
context.write_sync(*([EMPTY] * 42))
|
||||
assert context.get_buffer() == [(42, )]
|
||||
|
||||
|
||||
def test_identity():
|
||||
assert bonobo.identity(42) == 42
|
||||
class IdentityTest(StaticNodeTest, TestCase):
|
||||
node = bonobo.identity
|
||||
|
||||
def test_basic_call(self):
|
||||
assert self.call(42) == 42
|
||||
|
||||
def test_execution(self):
|
||||
object_list = [object() for _ in range(42)]
|
||||
with self.execute() as context:
|
||||
context.write_sync(*object_list)
|
||||
assert context.get_buffer() == list(map(ensure_tuple, object_list))
|
||||
|
||||
|
||||
def test_limit():
|
||||
context, results = MagicMock(), []
|
||||
class LimitTest(ConfigurableNodeTest, TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.NodeType = bonobo.Limit
|
||||
|
||||
with ContextCurrifier(bonobo.Limit(2)).as_contextmanager(context) as stack:
|
||||
for i in range(42):
|
||||
results += list(stack())
|
||||
def test_execution_default(self):
|
||||
object_list = [object() for _ in range(42)]
|
||||
with self.execute() as context:
|
||||
context.write_sync(*object_list)
|
||||
|
||||
assert results == [NOT_MODIFIED] * 2
|
||||
assert context.get_buffer() == list(map(ensure_tuple, object_list[:10]))
|
||||
|
||||
def test_execution_custom(self):
|
||||
object_list = [object() for _ in range(42)]
|
||||
with self.execute(21) as context:
|
||||
context.write_sync(*object_list)
|
||||
|
||||
def test_limit_not_there():
|
||||
context, results = MagicMock(), []
|
||||
assert context.get_buffer() == list(map(ensure_tuple, object_list[:21]))
|
||||
|
||||
with ContextCurrifier(bonobo.Limit(42)).as_contextmanager(context) as stack:
|
||||
for i in range(10):
|
||||
results += list(stack())
|
||||
def test_manual(self):
|
||||
limit = self.NodeType(5)
|
||||
buffer = []
|
||||
for x in range(10):
|
||||
buffer += list(limit(x))
|
||||
assert len(buffer) == 5
|
||||
|
||||
assert results == [NOT_MODIFIED] * 10
|
||||
|
||||
|
||||
def test_limit_default():
|
||||
context, results = MagicMock(), []
|
||||
|
||||
with ContextCurrifier(bonobo.Limit()).as_contextmanager(context) as stack:
|
||||
for i in range(20):
|
||||
results += list(stack())
|
||||
|
||||
assert results == [NOT_MODIFIED] * 10
|
||||
def test_underflow(self):
|
||||
limit = self.NodeType(10)
|
||||
buffer = []
|
||||
for x in range(5):
|
||||
buffer += list(limit(x))
|
||||
assert len(buffer) == 5
|
||||
|
||||
|
||||
def test_tee():
|
||||
@ -76,36 +91,28 @@ def test_noop():
|
||||
assert bonobo.noop(1, 2, 3, 4, foo='bar') == NOT_MODIFIED
|
||||
|
||||
|
||||
def test_update():
|
||||
with BufferingNodeExecutionContext(bonobo.Update('a', k=True)) as context:
|
||||
context.write_sync('a', ('a', {'b': 1}), ('b', {'k': False}))
|
||||
assert context.get_buffer() == [
|
||||
bonobo.Bag('a', 'a', k=True),
|
||||
bonobo.Bag('a', 'a', b=1, k=True),
|
||||
bonobo.Bag('b', 'a', k=True),
|
||||
]
|
||||
assert context.name == "Update('a', k=True)"
|
||||
|
||||
|
||||
def test_fixedwindow():
|
||||
with BufferingNodeExecutionContext(bonobo.FixedWindow(2)) as context:
|
||||
context.write_sync(*range(10))
|
||||
assert context.get_buffer() == [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]]
|
||||
assert context.get_buffer() == [(0, 1), (2, 3), (4, 5), (6, 7), (8, 9)]
|
||||
|
||||
with BufferingNodeExecutionContext(bonobo.FixedWindow(2)) as context:
|
||||
context.write_sync(*range(9))
|
||||
assert context.get_buffer() == [[0, 1], [2, 3], [4, 5], [6, 7], [8]]
|
||||
assert context.get_buffer() == [(0, 1), (2, 3), (4, 5), (6, 7), (
|
||||
8,
|
||||
None,
|
||||
)]
|
||||
|
||||
with BufferingNodeExecutionContext(bonobo.FixedWindow(1)) as context:
|
||||
context.write_sync(*range(3))
|
||||
assert context.get_buffer() == [[0], [1], [2]]
|
||||
assert context.get_buffer() == [(0, ), (1, ), (2, )]
|
||||
|
||||
|
||||
def test_methodcaller():
|
||||
with BufferingNodeExecutionContext(methodcaller('swapcase')) as context:
|
||||
context.write_sync('aaa', 'bBb', 'CcC')
|
||||
assert context.get_buffer() == ['AAA', 'BbB', 'cCc']
|
||||
assert context.get_buffer() == list(map(ensure_tuple, ['AAA', 'BbB', 'cCc']))
|
||||
|
||||
with BufferingNodeExecutionContext(methodcaller('zfill', 5)) as context:
|
||||
context.write_sync('a', 'bb', 'ccc')
|
||||
assert context.get_buffer() == ['0000a', '000bb', '00ccc']
|
||||
assert context.get_buffer() == list(map(ensure_tuple, ['0000a', '000bb', '00ccc']))
|
||||
|
||||
Reference in New Issue
Block a user