feat, wip: refactoring and simplification of how casts are made.

This commit is contained in:
Romain Dorgueil
2018-07-29 11:59:47 +01:00
parent f5ebe1b1e7
commit 980a76399b
17 changed files with 251 additions and 65 deletions

View File

@ -10,7 +10,7 @@ to another is maximal.
from bonobo.execution.strategies import create_strategy from bonobo.execution.strategies import create_strategy
from bonobo.nodes import * from bonobo.nodes import *
from bonobo.nodes import __all__ as _all_nodes from bonobo.nodes import __all__ as _all_nodes
from bonobo.structs import Graph from bonobo.structs.graphs import Graph
from bonobo.util.api import ApiHelper from bonobo.util.api import ApiHelper
from bonobo.util.environ import parse_args, get_argument_parser from bonobo.util.environ import parse_args, get_argument_parser
from bonobo.registry import create_reader, create_writer from bonobo.registry import create_reader, create_writer

View File

@ -20,30 +20,13 @@
""" """
from bonobo.structs.tokens import Token
from bonobo.util.envelopes import UnchangedEnvelope
class Token:
def __init__(self, name):
self.__name__ = name
def __repr__(self):
return '<{}>'.format(self.__name__)
class Flag(Token):
must_be_first = False
must_be_last = False
allows_data = True
BEGIN = Token('Begin') BEGIN = Token('Begin')
END = Token('End') END = Token('End')
INHERIT = Flag('Inherit') NOT_MODIFIED = UnchangedEnvelope()
NOT_MODIFIED = Flag('NotModified')
NOT_MODIFIED.must_be_first = True
NOT_MODIFIED.must_be_last = True
NOT_MODIFIED.allows_data = False
EMPTY = tuple() EMPTY = tuple()

View File

@ -4,7 +4,7 @@ from queue import Empty
from time import sleep from time import sleep
from bonobo.config import create_container from bonobo.config import create_container
from bonobo.constants import BEGIN, END from bonobo.constants import BEGIN, END, EMPTY
from bonobo.errors import InactiveReadableError from bonobo.errors import InactiveReadableError
from bonobo.execution import events from bonobo.execution import events
from bonobo.execution.contexts.base import BaseContext from bonobo.execution.contexts.base import BaseContext
@ -120,6 +120,10 @@ class GraphExecutionContext(BaseContext):
except InactiveReadableError: except InactiveReadableError:
nodes.discard(node) nodes.discard(node)
def run_until_complete(self):
self.write(BEGIN, EMPTY, END)
self.loop()
def stop(self, stopper=None): def stop(self, stopper=None):
super(GraphExecutionContext, self).stop() super(GraphExecutionContext, self).stop()

View File

@ -7,12 +7,14 @@ from types import GeneratorType
from bonobo.config import create_container from bonobo.config import create_container
from bonobo.config.processors import ContextCurrifier from bonobo.config.processors import ContextCurrifier
from bonobo.constants import NOT_MODIFIED, BEGIN, END, TICK_PERIOD, Token, Flag, INHERIT from bonobo.constants import BEGIN, END, TICK_PERIOD
from bonobo.errors import InactiveReadableError, UnrecoverableError, UnrecoverableTypeError from bonobo.errors import InactiveReadableError, UnrecoverableError, UnrecoverableTypeError
from bonobo.execution.contexts.base import BaseContext from bonobo.execution.contexts.base import BaseContext
from bonobo.structs.inputs import Input from bonobo.structs.inputs import Input
from bonobo.util import get_name, isconfigurabletype, ensure_tuple from bonobo.structs.tokens import Token, Flag
from bonobo.util import get_name, isconfigurabletype, ensure_tuple, deprecated
from bonobo.util.bags import BagType from bonobo.util.bags import BagType
from bonobo.util.envelopes import isenvelope, F_NOT_MODIFIED, F_INHERIT
from bonobo.util.statistics import WithStatistics from bonobo.util.statistics import WithStatistics
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -329,17 +331,29 @@ class NodeExecutionContext(BaseContext, WithStatistics):
:return: Bag :return: Bag
""" """
tokens, _output = split_token(_output) if isenvelope(_output):
_output, _flags, _options = _output.unfold()
else:
_flags, _options = [], {}
if NOT_MODIFIED in tokens: if len(_flags):
return ensure_tuple(_input, cls=(self.output_type or tuple)) # TODO: parse flags to check constraints are respected (like not modified alone, etc.)
if INHERIT in tokens: if F_NOT_MODIFIED in _flags:
return _input
if F_INHERIT in _flags:
if self._output_type is None: if self._output_type is None:
self._output_type = concat_types(self._input_type, self._input_length, self._output_type, len(_output)) self._output_type = concat_types(
self._input_type, self._input_length, self._output_type, len(_output)
)
_output = _input + ensure_tuple(_output) _output = _input + ensure_tuple(_output)
return ensure_tuple(_output, cls=(self._output_type or tuple)) if not self._output_type:
if issubclass(type(_output), tuple):
self._output_type = type(_output)
return ensure_tuple(_output, cls=self._output_type)
def _send(self, value, _control=False): def _send(self, value, _control=False):
""" """
@ -367,6 +381,7 @@ def isflag(param):
return isinstance(param, Flag) return isinstance(param, Flag)
@deprecated
def split_token(output): def split_token(output):
""" """
Split an output into token tuple, real output tuple. Split an output into token tuple, real output tuple.
@ -392,6 +407,7 @@ def split_token(output):
output = output[i:] output = output[i:]
if not data_allowed and len(output): if not data_allowed and len(output):
raise ValueError('Output data provided after a flag that does not allow data.') raise ValueError('Output data provided after a flag that does not allow data.')
return flags, output return flags, output

View File

@ -1,5 +0,0 @@
from bonobo.structs.graphs import Graph
__all__ = [
'Graph',
]

12
bonobo/structs/tokens.py Normal file
View File

@ -0,0 +1,12 @@
class Token:
def __init__(self, name):
self.__name__ = name
def __repr__(self):
return '<{}>'.format(self.__name__)
class Flag(Token):
must_be_first = False
must_be_last = False
allows_data = True

View File

@ -3,7 +3,17 @@ import functools
class sortedlist(list): class sortedlist(list):
"""
A list with an insort() method that wan be used to maintain sorted lists. The list by itself is not sorted, it's
up to the user to not insert unsorted elements.
"""
def insort(self, x): def insort(self, x):
"""
If the list is sorted, insert the element in the right place. Otherwise, unpredictable behaviour.
:param x:
"""
bisect.insort(self, x) bisect.insort(self, x)

32
bonobo/util/envelopes.py Normal file
View File

@ -0,0 +1,32 @@
from bonobo.structs.tokens import Flag
F_INHERIT = Flag('Inherit')
F_NOT_MODIFIED = Flag('NotModified')
F_NOT_MODIFIED.must_be_first = True
F_NOT_MODIFIED.must_be_last = True
F_NOT_MODIFIED.allows_data = False
class Envelope:
def __init__(self, content, *, flags=None, **options):
self._content = content
self._flags = set(flags or ())
self._options = options
def unfold(self):
return self._content, self._flags, self._options
class AppendingEnvelope(Envelope):
def __init__(self, content, **options):
super().__init__(content, flags={F_INHERIT}, **options)
class UnchangedEnvelope(Envelope):
def __init__(self, **options):
super().__init__(None, flags={F_NOT_MODIFIED}, **options)
def isenvelope(mixed):
return isinstance(mixed, Envelope)

View File

@ -11,7 +11,7 @@ import pytest
from bonobo import open_fs, __main__, get_examples_path from bonobo import open_fs, __main__, get_examples_path
from bonobo.commands import entrypoint from bonobo.commands import entrypoint
from bonobo.constants import Token from bonobo.structs.tokens import Token
from bonobo.execution.contexts.graph import GraphExecutionContext from bonobo.execution.contexts.graph import GraphExecutionContext
from bonobo.execution.contexts.node import NodeExecutionContext from bonobo.execution.contexts.node import NodeExecutionContext

View File

@ -0,0 +1,62 @@
from collections import namedtuple
import bonobo
from bonobo.config import use_raw_input
from bonobo.execution.contexts import GraphExecutionContext
from bonobo.util.bags import BagType
Extracted = namedtuple('Extracted', ['id', 'name', 'value'])
ExtractedBT = BagType('ExtractedBT', ['id', 'name', 'value'])
def extract_nt():
yield Extracted(id=1, name='Guido', value='.py')
yield Extracted(id=2, name='Larry', value='.pl')
yield Extracted(id=3, name='Dennis', value='.c')
yield Extracted(id=4, name='Yukihiro', value='.rb')
def extract_bt():
yield ExtractedBT(id=1, name='Guido', value='.py')
yield ExtractedBT(id=2, name='Larry', value='.pl')
yield ExtractedBT(id=3, name='Dennis', value='.c')
yield ExtractedBT(id=4, name='Yukihiro', value='.rb')
def transform_using_args(id, name, value):
yield Extracted(id=id * 2, name=name, value=name.lower() + value)
@use_raw_input
def transform_nt(row):
yield row._replace(name=row.name.upper())
def StoreInList(buffer: list):
def store_in_list(*args, buffer=buffer):
buffer.append(args)
return store_in_list
def test_execution():
graph = bonobo.Graph()
result_args = []
result_nt = []
result_bt = []
graph.add_chain(extract_nt, transform_using_args, StoreInList(result_args))
graph.add_chain(transform_nt, StoreInList(result_nt), _input=extract_nt)
graph.add_chain(extract_bt, transform_using_args, StoreInList(result_bt))
with GraphExecutionContext(graph) as context:
context.run_until_complete()
assert result_args == [(2, 'Guido', 'guido.py'), (4, 'Larry', 'larry.pl'), (6, 'Dennis', 'dennis.c'),
(8, 'Yukihiro', 'yukihiro.rb')]
assert result_nt == [(1, 'GUIDO', '.py'), (2, 'LARRY', '.pl'), (3, 'DENNIS', '.c'), (4, 'YUKIHIRO', '.rb')]
assert result_bt == [(2, 'Guido', 'guido.py'), (4, 'Larry', 'larry.pl'), (6, 'Dennis', 'dennis.c'),
(8, 'Yukihiro', 'yukihiro.rb')]

View File

@ -3,9 +3,10 @@ from unittest.mock import MagicMock
import pytest import pytest
from bonobo import Graph from bonobo import Graph
from bonobo.constants import EMPTY, NOT_MODIFIED, INHERIT from bonobo.constants import EMPTY
from bonobo.execution.contexts.node import NodeExecutionContext, split_token from bonobo.execution.contexts.node import NodeExecutionContext, split_token
from bonobo.execution.strategies import NaiveStrategy from bonobo.execution.strategies import NaiveStrategy
from bonobo.util.envelopes import F_NOT_MODIFIED, F_INHERIT
from bonobo.util.testing import BufferingNodeExecutionContext, BufferingGraphExecutionContext from bonobo.util.testing import BufferingNodeExecutionContext, BufferingGraphExecutionContext
@ -227,32 +228,36 @@ def test_node_lifecycle_with_kill():
def test_split_token(): def test_split_token():
with pytest.deprecated_call():
assert split_token(('foo', 'bar')) == (set(), ('foo', 'bar')) assert split_token(('foo', 'bar')) == (set(), ('foo', 'bar'))
assert split_token(()) == (set(), ()) assert split_token(()) == (set(), ())
assert split_token('') == (set(), ('', )) assert split_token('') == (set(), ('', ))
def test_split_token_duplicate(): def test_split_token_duplicate():
with pytest.deprecated_call():
with pytest.raises(ValueError): with pytest.raises(ValueError):
split_token((NOT_MODIFIED, NOT_MODIFIED)) split_token((F_NOT_MODIFIED, F_NOT_MODIFIED))
with pytest.raises(ValueError): with pytest.raises(ValueError):
split_token((INHERIT, INHERIT)) split_token((F_INHERIT, F_INHERIT))
with pytest.raises(ValueError): with pytest.raises(ValueError):
split_token((INHERIT, NOT_MODIFIED, INHERIT)) split_token((F_INHERIT, F_NOT_MODIFIED, F_INHERIT))
def test_split_token_not_modified(): def test_split_token_not_modified():
with pytest.deprecated_call():
with pytest.raises(ValueError): with pytest.raises(ValueError):
split_token((NOT_MODIFIED, 'foo', 'bar')) split_token((F_NOT_MODIFIED, 'foo', 'bar'))
with pytest.raises(ValueError): with pytest.raises(ValueError):
split_token((NOT_MODIFIED, INHERIT)) split_token((F_NOT_MODIFIED, F_INHERIT))
with pytest.raises(ValueError): with pytest.raises(ValueError):
split_token((INHERIT, NOT_MODIFIED)) split_token((F_INHERIT, F_NOT_MODIFIED))
assert split_token(NOT_MODIFIED) == ({NOT_MODIFIED}, ()) assert split_token(F_NOT_MODIFIED) == ({F_NOT_MODIFIED}, ())
assert split_token((NOT_MODIFIED, )) == ({NOT_MODIFIED}, ()) assert split_token((F_NOT_MODIFIED, )) == ({F_NOT_MODIFIED}, ())
def test_split_token_inherit(): def test_split_token_inherit():
assert split_token(INHERIT) == ({INHERIT}, ()) with pytest.deprecated_call():
assert split_token((INHERIT, )) == ({INHERIT}, ()) assert split_token(F_INHERIT) == ({F_INHERIT}, ())
assert split_token((INHERIT, 'foo', 'bar')) == ({INHERIT}, ('foo', 'bar')) assert split_token((F_INHERIT, )) == ({F_INHERIT}, ())
assert split_token((F_INHERIT, 'foo', 'bar')) == ({F_INHERIT}, ('foo', 'bar'))

View File

@ -1,4 +1,4 @@
from bonobo.constants import INHERIT from bonobo.util.envelopes import AppendingEnvelope
from bonobo.util.testing import BufferingNodeExecutionContext from bonobo.util.testing import BufferingNodeExecutionContext
messages = [ messages = [
@ -8,7 +8,7 @@ messages = [
def append(*args): def append(*args):
return INHERIT, '!' return AppendingEnvelope('!')
def test_inherit(): def test_inherit():

View File

@ -15,4 +15,6 @@ def test_not_modified():
with BufferingNodeExecutionContext(useless) as context: with BufferingNodeExecutionContext(useless) as context:
context.write_sync(*input_messages) context.write_sync(*input_messages)
assert context.get_buffer() == input_messages result = context.get_buffer()
print(result)
assert result == input_messages

65
tests/nodes/test_casts.py Normal file
View File

@ -0,0 +1,65 @@
from collections import namedtuple
from typing import Callable
import pytest
from bonobo.constants import EMPTY
from bonobo.util.bags import BagType
from bonobo.util.envelopes import Envelope
from bonobo.util.testing import BufferingNodeExecutionContext
MyTuple = namedtuple('MyTuple', ['a', 'b', 'c'])
MyBag = BagType('MyBag', ['a', 'b', 'c'])
class MyCustomType():
def __init__(self, *args):
self.args = args
def as_tuple(self):
return MyBag(*self.args)
@pytest.mark.parametrize(['factory', 'expected', 'expected_item0'], [
[lambda: (1, 2, 3), tuple, int],
[lambda: Envelope((1, 2, 3)), tuple, int],
[lambda: MyTuple(1, 2, 3), MyTuple, int],
[lambda: Envelope(MyTuple(1, 2, 3)), MyTuple, int],
[lambda: MyBag(1, 2, 3), MyBag, int],
[lambda: Envelope(MyBag(1, 2, 3)), MyBag, int],
[lambda: MyCustomType(1, 2, 3), tuple, MyCustomType],
[lambda: Envelope(MyCustomType(1, 2, 3)), tuple, MyCustomType],
])
def test_casts_after_output(factory: Callable, expected, expected_item0):
def transform():
yield factory()
yield factory()
with BufferingNodeExecutionContext(transform) as context:
context.write_sync(EMPTY)
result = context.get_buffer()
assert expected == type(result[0])
assert expected_item0 == type(result[0][0])
assert expected == type(result[1])
assert expected_item0 == type(result[1][0])
def test_cast_after_returning_custom_type():
def transform():
yield MyCustomType(1, 2, 3)
yield MyCustomType(4, 5, 6)
with BufferingNodeExecutionContext(transform) as context:
context.write_sync(EMPTY)
result = context.get_buffer()
assert tuple == type(result[0])
assert tuple == type(result[1])
assert MyCustomType == type(result[0][0])
assert MyCustomType == type(result[1][0])
with BufferingNodeExecutionContext(MyCustomType.as_tuple) as context:
context.write_sync(*result)
result = context.get_buffer()
assert MyBag == type(result[0])
assert MyBag == type(result[1])

View File

@ -3,7 +3,7 @@ import pytest
from unittest.mock import sentinel from unittest.mock import sentinel
from bonobo.constants import BEGIN from bonobo.constants import BEGIN
from bonobo.structs import Graph from bonobo.structs.graphs import Graph
identity = lambda x: x identity = lambda x: x

View File

@ -1,4 +1,4 @@
from bonobo.constants import Token from bonobo.structs.tokens import Token
def test_token_repr(): def test_token_repr():

View File

@ -2,7 +2,7 @@ from bonobo.config.processors import use_context_processor
from bonobo.constants import BEGIN, END from bonobo.constants import BEGIN, END
from bonobo.execution.contexts.graph import GraphExecutionContext from bonobo.execution.contexts.graph import GraphExecutionContext
from bonobo.execution.strategies import NaiveStrategy from bonobo.execution.strategies import NaiveStrategy
from bonobo.structs import Graph from bonobo.structs.graphs import Graph
def generate_integers(): def generate_integers():