Minor documentation tuning, moving get_pseudo_nodes to bonobo.util.testing.

This commit is contained in:
Romain Dorgueil
2019-06-02 08:40:14 +02:00
parent e84440df8c
commit caef022b56
6 changed files with 116 additions and 47 deletions

View File

@ -1,3 +1,4 @@
import inspect
import os import os
from jinja2 import DictLoader, Environment from jinja2 import DictLoader, Environment
@ -30,7 +31,6 @@ class Module:
return os.path.join(__path__, apidoc_root, *self.name.split(".")) + ".rst" return os.path.join(__path__, apidoc_root, *self.name.split(".")) + ".rst"
import inspect
bonobo = __import__("bonobo") bonobo = __import__("bonobo")
assert bonobo.__version__ assert bonobo.__version__

View File

@ -59,7 +59,11 @@ class PartialGraph:
class Graph: class Graph:
""" """
Represents a directed graph of nodes. Core structure representing a directed graph of nodes. It will be used to create data streaming queues between your
objects during the job execution.
This is how the data flows are defined.
""" """
name = "" name = ""
@ -75,7 +79,9 @@ class Graph:
yield from self.nodes yield from self.nodes
def __len__(self): def __len__(self):
"""Node count. """
The graph length is defined as its node count.
""" """
return len(self.nodes) return len(self.nodes)
@ -92,6 +98,10 @@ class Graph:
return self.get_cursor().__rshift__(other) return self.get_cursor().__rshift__(other)
def get_cursor(self, ref=BEGIN): def get_cursor(self, ref=BEGIN):
"""
Create a `GraphCursor` to use the operator-based syntax to build graph, starting at `ref`.
"""
return GraphCursor(self, last=self.index_of(ref)) return GraphCursor(self, last=self.index_of(ref))
def index_of(self, mixed): def index_of(self, mixed):
@ -115,10 +125,16 @@ class Graph:
raise ValueError("Cannot find node matching {!r}.".format(mixed)) raise ValueError("Cannot find node matching {!r}.".format(mixed))
def indexes_of(self, *things): def indexes_of(self, *things):
"""
Returns the set of indexes of the things passed as arguments.
"""
return set(map(self.index_of, things)) return set(map(self.index_of, things))
def outputs_of(self, idx_or_node, create=False): def outputs_of(self, idx_or_node, create=False):
"""Get a set of the outputs for a given node, node index or name. """
Get a set of the outputs for a given node, node index or name.
""" """
idx_or_node = self.index_of(idx_or_node) idx_or_node = self.index_of(idx_or_node)
@ -127,8 +143,10 @@ class Graph:
return self.edges[idx_or_node] return self.edges[idx_or_node]
def add_node(self, new_node, *, _name=None): def add_node(self, new_node, *, _name=None):
"""Add a node without connections in this graph and returns its index. """
Add a node without connections in this graph and returns its index.
If _name is specified, name this node (string reference for further usage). If _name is specified, name this node (string reference for further usage).
""" """
idx = len(self.nodes) idx = len(self.nodes)
self.edges[idx] = set() self.edges[idx] = set()
@ -149,7 +167,8 @@ class Graph:
return self.add_node(new_node, _name=_name) return self.add_node(new_node, _name=_name)
def add_chain(self, *nodes, _input=BEGIN, _output=None, _name=None, use_existing_nodes=False): def add_chain(self, *nodes, _input=BEGIN, _output=None, _name=None, use_existing_nodes=False):
"""Add `nodes` as a chain in this graph. """
Add `nodes` as a chain in this graph.
**Input rules** **Input rules**
@ -222,7 +241,9 @@ class Graph:
@property @property
def topologically_sorted_indexes(self): def topologically_sorted_indexes(self):
"""Iterate in topological order, based on networkx's topological_sort() function. """
Iterate in topological order, based on networkx's topological_sort() function.
""" """
try: try:
return self._topologcally_sorted_indexes_cache return self._topologcally_sorted_indexes_cache

View File

@ -5,7 +5,7 @@ import os
import runpy import runpy
import sys import sys
from contextlib import contextmanager, redirect_stderr, redirect_stdout from contextlib import contextmanager, redirect_stderr, redirect_stdout
from unittest.mock import patch from unittest.mock import patch, sentinel
import pytest import pytest
@ -14,6 +14,7 @@ from bonobo.commands import entrypoint
from bonobo.execution.contexts.graph import GraphExecutionContext from bonobo.execution.contexts.graph import GraphExecutionContext
from bonobo.execution.contexts.node import NodeExecutionContext from bonobo.execution.contexts.node import NodeExecutionContext
from bonobo.structs.tokens import Token from bonobo.structs.tokens import Token
from bonobo.util import tuplize
@contextmanager @contextmanager
@ -26,6 +27,11 @@ def optional_contextmanager(cm, *, ignore=False):
class FilesystemTester: class FilesystemTester:
"""
Helper that create temporary filesystem service to be used in unit tests.
"""
def __init__(self, extension="txt", mode="w", *, input_data=""): def __init__(self, extension="txt", mode="w", *, input_data=""):
self.extension = extension self.extension = extension
self.input_data = input_data self.input_data = input_data
@ -43,6 +49,12 @@ class FilesystemTester:
class QueueList(list): class QueueList(list):
"""
A list that behave like a queue (or is it the oposite?).
The datastructure is not smart at all, but it's quite useful for testing.
"""
def append(self, item): def append(self, item):
if not isinstance(item, Token): if not isinstance(item, Token):
super(QueueList, self).append(item) super(QueueList, self).append(item)
@ -51,6 +63,11 @@ class QueueList(list):
class BufferingContext: class BufferingContext:
"""
Base class to add a buffer to a context.
"""
def __init__(self, buffer=None): def __init__(self, buffer=None):
if buffer is None: if buffer is None:
buffer = QueueList() buffer = QueueList()
@ -64,12 +81,22 @@ class BufferingContext:
class BufferingNodeExecutionContext(BufferingContext, NodeExecutionContext): class BufferingNodeExecutionContext(BufferingContext, NodeExecutionContext):
"""
Node execution context that actually stores the node outputs in a buffer, so one can test it afterward.
"""
def __init__(self, *args, buffer=None, **kwargs): def __init__(self, *args, buffer=None, **kwargs):
BufferingContext.__init__(self, buffer) BufferingContext.__init__(self, buffer)
NodeExecutionContext.__init__(self, *args, **kwargs, _outputs=[self.buffer]) NodeExecutionContext.__init__(self, *args, **kwargs, _outputs=[self.buffer])
class BufferingGraphExecutionContext(BufferingContext, GraphExecutionContext): class BufferingGraphExecutionContext(BufferingContext, GraphExecutionContext):
"""
Graph execution context that uses buffering node execution contexts, all nodes buffering to the same buffer.
"""
NodeExecutionContextType = BufferingNodeExecutionContext NodeExecutionContextType = BufferingNodeExecutionContext
def __init__(self, *args, buffer=None, **kwargs): def __init__(self, *args, buffer=None, **kwargs):
@ -192,7 +219,10 @@ class ConfigurableNodeTest:
class ReaderTest(ConfigurableNodeTest): class ReaderTest(ConfigurableNodeTest):
""" Helper class to test reader transformations. """ """
Helper class to test reader transformations.
"""
ReaderNodeType = None ReaderNodeType = None
@ -232,7 +262,10 @@ class ReaderTest(ConfigurableNodeTest):
class WriterTest(ConfigurableNodeTest): class WriterTest(ConfigurableNodeTest):
""" Helper class to test writer transformations. """ """
Helper class to test writer transformations.
"""
WriterNodeType = None WriterNodeType = None
@ -255,3 +288,15 @@ class WriterTest(ConfigurableNodeTest):
def readlines(self): def readlines(self):
with self.fs.open(self.filename) as fp: with self.fs.open(self.filename) as fp:
return tuple(map(str.strip, fp.readlines())) return tuple(map(str.strip, fp.readlines()))
@tuplize
def get_pseudo_nodes(*names):
"""
Generates a serie of named sentinels to test graph APIs.
>>> a, b, c = get_pseudo_nodes(*"abc")
"""
for name in names:
yield getattr(sentinel, name)

View File

@ -201,8 +201,11 @@ positional parameters as you want.
.. note:: .. note::
As of |bonobo| 0.7, a new syntax is available that we believe is more powerfull and more readable than the legacy As of |bonobo| 0.7, a new syntax is available that we believe is more powerfull and more readable than the legacy
`add_chain` method. The former API is here to stay and it's perfectly safe to use it, but if it is an option, you `add_chain` method. The former API is here to stay and it's perfectly safe to use it (in fact, the new syntax uses
should consider the new syntax. During the transition period, we'll document both. `add_chain` under the hood).
If it is an option for you, we suggest you consider the new syntax. During the transition period, we'll document
both but the new syntax will eventually become default.
.. code-block:: python .. code-block:: python

View File

@ -4,6 +4,7 @@ import pytest
from bonobo.constants import BEGIN from bonobo.constants import BEGIN
from bonobo.structs.graphs import Graph from bonobo.structs.graphs import Graph
from bonobo.util.testing import get_pseudo_nodes
identity = lambda x: x identity = lambda x: x
@ -26,19 +27,21 @@ def test_graph_outputs_of():
def test_graph_index_of(): def test_graph_index_of():
g = Graph() g = Graph()
g.add_node(sentinel.foo) foo, bar, not_there = get_pseudo_nodes("foo", "bar", "not_there")
g.add_node(sentinel.bar)
g.add_node(foo)
g.add_node(bar)
# sequential, can resolve objects # sequential, can resolve objects
assert g.index_of(sentinel.foo) == 0 assert g.index_of(foo) == 0
assert g.index_of(sentinel.bar) == 1 assert g.index_of(bar) == 1
# calling on an index should return the index # calling on an index should return the index
assert g.index_of(sentinel.bar) == g.index_of(g.index_of(sentinel.bar)) assert g.index_of(bar) == g.index_of(g.index_of(bar))
# not existing should raise value error # not existing should raise value error
with pytest.raises(ValueError): with pytest.raises(ValueError):
g.index_of(sentinel.not_there) g.index_of(not_there)
# tokens resolve to themselves # tokens resolve to themselves
assert g.index_of(BEGIN) == BEGIN assert g.index_of(BEGIN) == BEGIN
@ -58,15 +61,16 @@ def test_graph_add_component():
def test_invalid_graph_usage(): def test_invalid_graph_usage():
g = Graph() g = Graph()
foo, bar = get_pseudo_nodes("foo", "bar")
with pytest.raises(ValueError): with pytest.raises(ValueError):
g.add_chain() g.add_chain()
g.add_node(sentinel.foo) g.add_node(foo)
g.add_node(sentinel.bar) g.add_node(bar)
with pytest.raises(RuntimeError): with pytest.raises(RuntimeError):
g.add_chain(_input=sentinel.bar, _output=sentinel.foo, _name="this_is_not_possible") g.add_chain(_input=bar, _output=foo, _name="this_is_not_possible")
def test_graph_add_chain(): def test_graph_add_chain():
@ -81,48 +85,51 @@ def test_graph_add_chain():
def test_graph_topological_sort(): def test_graph_topological_sort():
g = Graph() g = Graph()
a1, a2, a3, b1, b2 = get_pseudo_nodes("a1", "a2", "a3", "b1", "b2")
g.add_chain(sentinel.a1, sentinel.a2, sentinel.a3, _input=None, _output=None) g.add_chain(a1, a2, a3, _input=None, _output=None)
assert g.topologically_sorted_indexes == (0, 1, 2) assert g.topologically_sorted_indexes == (0, 1, 2)
assert g[0] == sentinel.a1 assert g[0] == a1
assert g[1] == sentinel.a2 assert g[1] == a2
assert g[2] == sentinel.a3 assert g[2] == a3
g.add_chain(sentinel.b1, sentinel.b2, _output=sentinel.a2) g.add_chain(b1, b2, _output=a2)
assert g.topologically_sorted_indexes[-2:] == (1, 2) assert g.topologically_sorted_indexes[-2:] == (1, 2)
assert g.topologically_sorted_indexes.index(3) < g.topologically_sorted_indexes.index(4) assert g.topologically_sorted_indexes.index(3) < g.topologically_sorted_indexes.index(4)
assert g[3] == sentinel.b1 assert g[3] == b1
assert g[4] == sentinel.b2 assert g[4] == b2
def test_connect_two_chains(): def test_connect_two_chains():
g = Graph() g = Graph()
a1, a2, b1, b2 = get_pseudo_nodes("a1", "a2", "b1", "b2")
g.add_chain(sentinel.a1, sentinel.a2, _input=None, _output=None) g.add_chain(a1, a2, _input=None, _output=None)
g.add_chain(sentinel.b1, sentinel.b2, _input=None, _output=None) g.add_chain(b1, b2, _input=None, _output=None)
assert len(g.outputs_of(sentinel.a2)) == 0 assert len(g.outputs_of(a2)) == 0
g.add_chain(_input=sentinel.a2, _output=sentinel.b1) g.add_chain(_input=a2, _output=b1)
assert g.outputs_of(sentinel.a2) == {g.index_of(sentinel.b1)} assert g.outputs_of(a2) == g.indexes_of(b1)
def test_connect_two_anonymous_nodes(): def test_connect_two_anonymous_nodes():
g = Graph() g = Graph()
a, b = get_pseudo_nodes(*"ab")
# Create two "anonymous" nodes # Create two "anonymous" nodes
g.add_node(sentinel.a) g.add_node(a)
g.add_node(sentinel.b) g.add_node(b)
# Connect them # Connect them
g.add_chain(_input=sentinel.a, _output=sentinel.b) g.add_chain(_input=a, _output=b)
def test_named_nodes(): def test_named_nodes():
g = Graph() g = Graph()
a, b, c, d, e, f = sentinel.a, sentinel.b, sentinel.c, sentinel.d, sentinel.e, sentinel.f a, b, c, d, e, f = get_pseudo_nodes(*"abcdef")
# Here we mark _input to None, so normalize won't get the "begin" impulsion. # Here we mark _input to None, so normalize won't get the "begin" impulsion.
g.add_chain(e, f, _input=None, _name="load") g.add_chain(e, f, _input=None, _name="load")

View File

@ -1,17 +1,10 @@
from operator import attrgetter from operator import attrgetter
from unittest.mock import sentinel
import pytest import pytest
from bonobo.constants import BEGIN from bonobo.constants import BEGIN
from bonobo.structs.graphs import Graph, GraphCursor from bonobo.structs.graphs import Graph, GraphCursor
from bonobo.util import tuplize from bonobo.util.testing import get_pseudo_nodes
@tuplize
def get_pseudo_nodes(*names):
for name in names:
yield getattr(sentinel, name)
def test_get_cursor(): def test_get_cursor():