diff --git a/bin/update_apidoc.py b/bin/update_apidoc.py index efb0563..2677900 100644 --- a/bin/update_apidoc.py +++ b/bin/update_apidoc.py @@ -1,3 +1,4 @@ +import inspect import os from jinja2 import DictLoader, Environment @@ -30,7 +31,6 @@ class Module: return os.path.join(__path__, apidoc_root, *self.name.split(".")) + ".rst" -import inspect bonobo = __import__("bonobo") assert bonobo.__version__ diff --git a/bonobo/structs/graphs.py b/bonobo/structs/graphs.py index aaf3fd6..8c0a22c 100644 --- a/bonobo/structs/graphs.py +++ b/bonobo/structs/graphs.py @@ -59,7 +59,11 @@ class PartialGraph: class Graph: """ - Represents a directed graph of nodes. + Core structure representing a directed graph of nodes. It will be used to create data streaming queues between your + objects during the job execution. + + This is how the data flows are defined. + """ name = "" @@ -75,7 +79,9 @@ class Graph: yield from self.nodes def __len__(self): - """Node count. + """ + The graph length is defined as its node count. + """ return len(self.nodes) @@ -92,6 +98,10 @@ class Graph: return self.get_cursor().__rshift__(other) def get_cursor(self, ref=BEGIN): + """ + Create a `GraphCursor` to use the operator-based syntax to build graph, starting at `ref`. + + """ return GraphCursor(self, last=self.index_of(ref)) def index_of(self, mixed): @@ -115,10 +125,16 @@ class Graph: raise ValueError("Cannot find node matching {!r}.".format(mixed)) def indexes_of(self, *things): + """ + Returns the set of indexes of the things passed as arguments. + + """ return set(map(self.index_of, things)) def outputs_of(self, idx_or_node, create=False): - """Get a set of the outputs for a given node, node index or name. + """ + Get a set of the outputs for a given node, node index or name. + """ idx_or_node = self.index_of(idx_or_node) @@ -127,8 +143,10 @@ class Graph: return self.edges[idx_or_node] def add_node(self, new_node, *, _name=None): - """Add a node without connections in this graph and returns its index. + """ + Add a node without connections in this graph and returns its index. If _name is specified, name this node (string reference for further usage). + """ idx = len(self.nodes) self.edges[idx] = set() @@ -149,7 +167,8 @@ class Graph: return self.add_node(new_node, _name=_name) def add_chain(self, *nodes, _input=BEGIN, _output=None, _name=None, use_existing_nodes=False): - """Add `nodes` as a chain in this graph. + """ + Add `nodes` as a chain in this graph. **Input rules** @@ -222,7 +241,9 @@ class Graph: @property def topologically_sorted_indexes(self): - """Iterate in topological order, based on networkx's topological_sort() function. + """ + Iterate in topological order, based on networkx's topological_sort() function. + """ try: return self._topologcally_sorted_indexes_cache diff --git a/bonobo/util/testing.py b/bonobo/util/testing.py index 13554f8..0c80e92 100644 --- a/bonobo/util/testing.py +++ b/bonobo/util/testing.py @@ -5,7 +5,7 @@ import os import runpy import sys from contextlib import contextmanager, redirect_stderr, redirect_stdout -from unittest.mock import patch +from unittest.mock import patch, sentinel import pytest @@ -14,6 +14,7 @@ from bonobo.commands import entrypoint from bonobo.execution.contexts.graph import GraphExecutionContext from bonobo.execution.contexts.node import NodeExecutionContext from bonobo.structs.tokens import Token +from bonobo.util import tuplize @contextmanager @@ -26,6 +27,11 @@ def optional_contextmanager(cm, *, ignore=False): class FilesystemTester: + """ + Helper that create temporary filesystem service to be used in unit tests. + + """ + def __init__(self, extension="txt", mode="w", *, input_data=""): self.extension = extension self.input_data = input_data @@ -43,6 +49,12 @@ class FilesystemTester: class QueueList(list): + """ + A list that behave like a queue (or is it the oposite?). + + The datastructure is not smart at all, but it's quite useful for testing. + """ + def append(self, item): if not isinstance(item, Token): super(QueueList, self).append(item) @@ -51,6 +63,11 @@ class QueueList(list): class BufferingContext: + """ + Base class to add a buffer to a context. + + """ + def __init__(self, buffer=None): if buffer is None: buffer = QueueList() @@ -64,12 +81,22 @@ class BufferingContext: class BufferingNodeExecutionContext(BufferingContext, NodeExecutionContext): + """ + Node execution context that actually stores the node outputs in a buffer, so one can test it afterward. + + """ + def __init__(self, *args, buffer=None, **kwargs): BufferingContext.__init__(self, buffer) NodeExecutionContext.__init__(self, *args, **kwargs, _outputs=[self.buffer]) class BufferingGraphExecutionContext(BufferingContext, GraphExecutionContext): + """ + Graph execution context that uses buffering node execution contexts, all nodes buffering to the same buffer. + + """ + NodeExecutionContextType = BufferingNodeExecutionContext def __init__(self, *args, buffer=None, **kwargs): @@ -99,13 +126,13 @@ def runner(f): @runner def runner_entrypoint(args): - """ Run bonobo using the python command entrypoint directly (bonobo.commands.entrypoint). """ + """Run bonobo using the python command entrypoint directly (bonobo.commands.entrypoint). """ return entrypoint(args) @runner def runner_module(args): - """ Run bonobo using the bonobo.__main__ file, which is equivalent as doing "python -m bonobo ...".""" + """Run bonobo using the bonobo.__main__ file, which is equivalent as doing "python -m bonobo ...".""" with patch.object(sys, "argv", ["bonobo", *args]): return runpy.run_path(__main__.__file__, run_name="__main__") @@ -192,7 +219,10 @@ class ConfigurableNodeTest: class ReaderTest(ConfigurableNodeTest): - """ Helper class to test reader transformations. """ + """ + Helper class to test reader transformations. + + """ ReaderNodeType = None @@ -232,7 +262,10 @@ class ReaderTest(ConfigurableNodeTest): class WriterTest(ConfigurableNodeTest): - """ Helper class to test writer transformations. """ + """ + Helper class to test writer transformations. + + """ WriterNodeType = None @@ -255,3 +288,15 @@ class WriterTest(ConfigurableNodeTest): def readlines(self): with self.fs.open(self.filename) as fp: return tuple(map(str.strip, fp.readlines())) + + +@tuplize +def get_pseudo_nodes(*names): + """ + Generates a serie of named sentinels to test graph APIs. + + >>> a, b, c = get_pseudo_nodes(*"abc") + + """ + for name in names: + yield getattr(sentinel, name) diff --git a/docs/guide/graphs.rst b/docs/guide/graphs.rst index 67f8ce9..694ad70 100644 --- a/docs/guide/graphs.rst +++ b/docs/guide/graphs.rst @@ -201,8 +201,11 @@ positional parameters as you want. .. note:: As of |bonobo| 0.7, a new syntax is available that we believe is more powerfull and more readable than the legacy - `add_chain` method. The former API is here to stay and it's perfectly safe to use it, but if it is an option, you - should consider the new syntax. During the transition period, we'll document both. + `add_chain` method. The former API is here to stay and it's perfectly safe to use it (in fact, the new syntax uses + `add_chain` under the hood). + + If it is an option for you, we suggest you consider the new syntax. During the transition period, we'll document + both but the new syntax will eventually become default. .. code-block:: python diff --git a/tests/structs/test_graphs.py b/tests/structs/test_graphs.py index 725ba61..5dcb10f 100644 --- a/tests/structs/test_graphs.py +++ b/tests/structs/test_graphs.py @@ -4,6 +4,7 @@ import pytest from bonobo.constants import BEGIN from bonobo.structs.graphs import Graph +from bonobo.util.testing import get_pseudo_nodes identity = lambda x: x @@ -26,19 +27,21 @@ def test_graph_outputs_of(): def test_graph_index_of(): g = Graph() - g.add_node(sentinel.foo) - g.add_node(sentinel.bar) + foo, bar, not_there = get_pseudo_nodes("foo", "bar", "not_there") + + g.add_node(foo) + g.add_node(bar) # sequential, can resolve objects - assert g.index_of(sentinel.foo) == 0 - assert g.index_of(sentinel.bar) == 1 + assert g.index_of(foo) == 0 + assert g.index_of(bar) == 1 # calling on an index should return the index - assert g.index_of(sentinel.bar) == g.index_of(g.index_of(sentinel.bar)) + assert g.index_of(bar) == g.index_of(g.index_of(bar)) # not existing should raise value error with pytest.raises(ValueError): - g.index_of(sentinel.not_there) + g.index_of(not_there) # tokens resolve to themselves assert g.index_of(BEGIN) == BEGIN @@ -58,15 +61,16 @@ def test_graph_add_component(): def test_invalid_graph_usage(): g = Graph() + foo, bar = get_pseudo_nodes("foo", "bar") with pytest.raises(ValueError): g.add_chain() - g.add_node(sentinel.foo) - g.add_node(sentinel.bar) + g.add_node(foo) + g.add_node(bar) with pytest.raises(RuntimeError): - g.add_chain(_input=sentinel.bar, _output=sentinel.foo, _name="this_is_not_possible") + g.add_chain(_input=bar, _output=foo, _name="this_is_not_possible") def test_graph_add_chain(): @@ -81,48 +85,51 @@ def test_graph_add_chain(): def test_graph_topological_sort(): g = Graph() + a1, a2, a3, b1, b2 = get_pseudo_nodes("a1", "a2", "a3", "b1", "b2") - g.add_chain(sentinel.a1, sentinel.a2, sentinel.a3, _input=None, _output=None) + g.add_chain(a1, a2, a3, _input=None, _output=None) assert g.topologically_sorted_indexes == (0, 1, 2) - assert g[0] == sentinel.a1 - assert g[1] == sentinel.a2 - assert g[2] == sentinel.a3 + assert g[0] == a1 + assert g[1] == a2 + assert g[2] == a3 - g.add_chain(sentinel.b1, sentinel.b2, _output=sentinel.a2) + g.add_chain(b1, b2, _output=a2) assert g.topologically_sorted_indexes[-2:] == (1, 2) assert g.topologically_sorted_indexes.index(3) < g.topologically_sorted_indexes.index(4) - assert g[3] == sentinel.b1 - assert g[4] == sentinel.b2 + assert g[3] == b1 + assert g[4] == b2 def test_connect_two_chains(): g = Graph() + a1, a2, b1, b2 = get_pseudo_nodes("a1", "a2", "b1", "b2") - g.add_chain(sentinel.a1, sentinel.a2, _input=None, _output=None) - g.add_chain(sentinel.b1, sentinel.b2, _input=None, _output=None) - assert len(g.outputs_of(sentinel.a2)) == 0 + g.add_chain(a1, a2, _input=None, _output=None) + g.add_chain(b1, b2, _input=None, _output=None) + assert len(g.outputs_of(a2)) == 0 - g.add_chain(_input=sentinel.a2, _output=sentinel.b1) - assert g.outputs_of(sentinel.a2) == {g.index_of(sentinel.b1)} + g.add_chain(_input=a2, _output=b1) + assert g.outputs_of(a2) == g.indexes_of(b1) def test_connect_two_anonymous_nodes(): g = Graph() + a, b = get_pseudo_nodes(*"ab") # Create two "anonymous" nodes - g.add_node(sentinel.a) - g.add_node(sentinel.b) + g.add_node(a) + g.add_node(b) # Connect them - g.add_chain(_input=sentinel.a, _output=sentinel.b) + g.add_chain(_input=a, _output=b) def test_named_nodes(): g = Graph() - a, b, c, d, e, f = sentinel.a, sentinel.b, sentinel.c, sentinel.d, sentinel.e, sentinel.f + a, b, c, d, e, f = get_pseudo_nodes(*"abcdef") # Here we mark _input to None, so normalize won't get the "begin" impulsion. g.add_chain(e, f, _input=None, _name="load") diff --git a/tests/structs/test_graphs_new_syntax.py b/tests/structs/test_graphs_new_syntax.py index 570fa47..3ceb157 100644 --- a/tests/structs/test_graphs_new_syntax.py +++ b/tests/structs/test_graphs_new_syntax.py @@ -1,17 +1,10 @@ from operator import attrgetter -from unittest.mock import sentinel import pytest from bonobo.constants import BEGIN from bonobo.structs.graphs import Graph, GraphCursor -from bonobo.util import tuplize - - -@tuplize -def get_pseudo_nodes(*names): - for name in names: - yield getattr(sentinel, name) +from bonobo.util.testing import get_pseudo_nodes def test_get_cursor():