Files
bonobo/tests/nodes/io/test_json.py
Romain Dorgueil 5e0b6567cd Rewritting Bags from scratch using a namedtuple approach, along with other (less major) updates.
New bag implementation improves a lot how bonobo works, even if this is
highly backward incompatible (sorry, that's needed, and better sooner
than later).

* New implementation uses the same approach as python's namedtuple,
  by dynamically creating the python type's code. This has drawbacks, as
  it feels like not the right way, but also a lot of benefits that
  cannot be achieved using a regular approach, especially the
  constructor parameter order, hardcoded.
* Memory usage is now much more efficient. The "keys" memory space will
  be used only once per "io type", being spent in the underlying type
  definition instead of in the actual instances.
* Transformations now needs to use tuples as output, which will be bound
  to its "output type". The output type can be infered from the tuple
  length, or explicitely set by the user using either
  `context.set_output_type(...)` or `context.set_output_fields(...)` (to
  build a bag type from a list of field names).

Jupyter/Graphviz integration is more tight, allowing to easily display
graphs in a notebook, or displaying the live transformation status in an
html table instead of a simple <div>.

For now, context processors were hacked to stay working as before but
the current API is not satisfactory, and should be replaced. This new
big change being unreasonable without some time to work on it properly,
it is postponed for next versions (0.7, 0.8, ...). Maybe the best idea
is to have some kind of "local services", that would use the same
dependency injection mechanism as the execution-wide services.

Services are now passed by keywoerd arguments only, to avoid confusion
with data-arguments.
2017-11-27 00:04:51 +01:00

301 lines
7.6 KiB
Python

import json
from collections import OrderedDict, namedtuple
from unittest import TestCase
import pytest
from bonobo import JsonReader, JsonWriter
from bonobo import LdjsonReader, LdjsonWriter
from bonobo.constants import EMPTY
from bonobo.util.testing import WriterTest, ReaderTest, ConfigurableNodeTest
FOOBAR = {'foo': 'bar'}
OD_ABC = OrderedDict((('a', 'A'), ('b', 'B'), ('c', 'C')))
FOOBAZ = {'foo': 'baz'}
incontext = ConfigurableNodeTest.incontext
###
# Standard JSON Readers / Writers
###
class Json:
extension = 'json'
ReaderNodeType = JsonReader
WriterNodeType = JsonWriter
class JsonReaderDictsTest(Json, ReaderTest, TestCase):
input_data = '[{"foo": "bar"},\n{"baz": "boz"}]'
@incontext()
def test_nofields(self, context):
context.write_sync(EMPTY)
context.stop()
assert context.get_buffer() == [
({
"foo": "bar"
}, ),
({
"baz": "boz"
}, ),
]
class JsonReaderListsTest(Json, ReaderTest, TestCase):
input_data = '[[1,2,3],\n[4,5,6]]'
@incontext()
def test_nofields(self, context):
context.write_sync(EMPTY)
context.stop()
assert context.get_buffer() == [
([1, 2, 3], ),
([4, 5, 6], ),
]
@incontext(output_type=tuple)
def test_output_type(self, context):
context.write_sync(EMPTY)
context.stop()
assert context.get_buffer() == [
([1, 2, 3], ),
([4, 5, 6], ),
]
class JsonReaderStringsTest(Json, ReaderTest, TestCase):
input_data = '[' + ',\n'.join(map(json.dumps, ('foo', 'bar', 'baz'))) + ']'
@incontext()
def test_nofields(self, context):
context.write_sync(EMPTY)
context.stop()
assert context.get_buffer() == [
('foo', ),
('bar', ),
('baz', ),
]
@incontext(output_type=tuple)
def test_output_type(self, context):
context.write_sync(EMPTY)
context.stop()
assert context.get_buffer() == [
('foo', ),
('bar', ),
('baz', ),
]
class JsonWriterTest(Json, WriterTest, TestCase):
@incontext()
def test_fields(self, context):
context.set_input_fields(['foo', 'bar'])
context.write_sync(('a', 'b'), ('c', 'd'))
context.stop()
assert self.readlines() == (
'[{"foo": "a", "bar": "b"},',
'{"foo": "c", "bar": "d"}]',
)
@incontext()
def test_fields_from_type(self, context):
context.set_input_type(namedtuple('Point', 'x y'))
context.write_sync((1, 2), (3, 4))
context.stop()
assert self.readlines() == (
'[{"x": 1, "y": 2},',
'{"x": 3, "y": 4}]',
)
@incontext()
def test_nofields_multiple_args(self, context):
# multiple args are iterated onto and flattened in output
context.write_sync((FOOBAR, FOOBAR), (OD_ABC, FOOBAR), (FOOBAZ, FOOBAR))
context.stop()
assert self.readlines() == (
'[{"foo": "bar"},',
'{"foo": "bar"},',
'{"a": "A", "b": "B", "c": "C"},',
'{"foo": "bar"},',
'{"foo": "baz"},',
'{"foo": "bar"}]',
)
@incontext()
def test_nofields_multiple_args_length_mismatch(self, context):
# if length of input vary, then we get a TypeError (unrecoverable)
with pytest.raises(TypeError):
context.write_sync((FOOBAR, FOOBAR), (OD_ABC))
@incontext()
def test_nofields_single_arg(self, context):
# single args are just dumped, shapes can vary.
context.write_sync(FOOBAR, OD_ABC, FOOBAZ)
context.stop()
assert self.readlines() == (
'[{"foo": "bar"},',
'{"a": "A", "b": "B", "c": "C"},',
'{"foo": "baz"}]',
)
@incontext()
def test_nofields_empty_args(self, context):
# empty calls are ignored
context.write_sync(EMPTY, EMPTY, EMPTY)
context.stop()
assert self.readlines() == ('[]', )
###
# Line Delimiter JSON Readers / Writers
###
class Ldjson:
extension = 'ldjson'
ReaderNodeType = LdjsonReader
WriterNodeType = LdjsonWriter
class LdjsonReaderDictsTest(Ldjson, ReaderTest, TestCase):
input_data = '{"foo": "bar"}\n{"baz": "boz"}'
@incontext()
def test_nofields(self, context):
context.write_sync(EMPTY)
context.stop()
assert context.get_buffer() == [
({
"foo": "bar"
}, ),
({
"baz": "boz"
}, ),
]
class LdjsonReaderListsTest(Ldjson, ReaderTest, TestCase):
input_data = '[1,2,3]\n[4,5,6]'
@incontext()
def test_nofields(self, context):
context.write_sync(EMPTY)
context.stop()
assert context.get_buffer() == [
([1, 2, 3], ),
([4, 5, 6], ),
]
@incontext(output_type=tuple)
def test_output_type(self, context):
context.write_sync(EMPTY)
context.stop()
assert context.get_buffer() == [
([1, 2, 3], ),
([4, 5, 6], ),
]
class LdjsonReaderStringsTest(Ldjson, ReaderTest, TestCase):
input_data = '\n'.join(map(json.dumps, ('foo', 'bar', 'baz')))
@incontext()
def test_nofields(self, context):
context.write_sync(EMPTY)
context.stop()
assert context.get_buffer() == [
('foo', ),
('bar', ),
('baz', ),
]
@incontext(output_type=tuple)
def test_output_type(self, context):
context.write_sync(EMPTY)
context.stop()
assert context.get_buffer() == [
('foo', ),
('bar', ),
('baz', ),
]
class LdjsonWriterTest(Ldjson, WriterTest, TestCase):
@incontext()
def test_fields(self, context):
context.set_input_fields(['foo', 'bar'])
context.write_sync(('a', 'b'), ('c', 'd'))
context.stop()
assert self.readlines() == ('{"foo": "a", "bar": "b"}', '{"foo": "c", "bar": "d"}')
@incontext()
def test_fields_from_type(self, context):
context.set_input_type(namedtuple('Point', 'x y'))
context.write_sync((1, 2), (3, 4))
context.stop()
assert self.readlines() == (
'{"x": 1, "y": 2}',
'{"x": 3, "y": 4}',
)
@incontext()
def test_nofields_multiple_args(self, context):
# multiple args are iterated onto and flattened in output
context.write_sync((FOOBAR, FOOBAR), (OD_ABC, FOOBAR), (FOOBAZ, FOOBAR))
context.stop()
assert self.readlines() == (
'{"foo": "bar"}',
'{"foo": "bar"}',
'{"a": "A", "b": "B", "c": "C"}',
'{"foo": "bar"}',
'{"foo": "baz"}',
'{"foo": "bar"}',
)
@incontext()
def test_nofields_multiple_args_length_mismatch(self, context):
# if length of input vary, then we get a TypeError (unrecoverable)
with pytest.raises(TypeError):
context.write_sync((FOOBAR, FOOBAR), (OD_ABC))
@incontext()
def test_nofields_single_arg(self, context):
# single args are just dumped, shapes can vary.
context.write_sync(FOOBAR, OD_ABC, FOOBAZ)
context.stop()
assert self.readlines() == (
'{"foo": "bar"}',
'{"a": "A", "b": "B", "c": "C"}',
'{"foo": "baz"}',
)
@incontext()
def test_nofields_empty_args(self, context):
# empty calls are ignored
context.write_sync(EMPTY, EMPTY, EMPTY)
context.stop()
assert self.readlines() == ()