Implements #314, make index_of() public in graph api, document node connections and let outputs_of() take anything resolvable by index_of().

2019-06-01 11:06:16 +02:00
parent 1267b91841
commit 705dea3215
7 changed files with 215 additions and 116 deletions
--- a/bonobo/init.py
+++ b/bonobo/init.py
@ -8,50 +8,13 @@
 import sys
 from pathlib import Path
 from bonobo._api import (
    CsvReader,
    CsvWriter,
    FileReader,
    FileWriter,
    Filter,
    FixedWindow,
    Format,
    Graph,
    JsonReader,
    JsonWriter,
    LdjsonReader,
    LdjsonWriter,
    Limit,
    MapFields,
    OrderFields,
    PickleReader,
    PickleWriter,
    PrettyPrinter,
    RateLimited,
    Rename,
    SetFields,
    Tee,
    UnpackItems,
    __all__,
    __doc__,
    count,
    create_reader,
    create_strategy,
    create_writer,
    get_argument_parser,
    get_examples_path,
    identity,
    inspect,
    noop,
    open_examples_fs,
    open_fs,
    parse_args,
    run,
 )
 from bonobo._version import __version__
 from bonobo._api import *
-from bonobo._api import __all__, __doc__
+from bonobo._api import (
    CsvReader, CsvWriter, FileReader, FileWriter, Filter, FixedWindow, Format, Graph, JsonReader, JsonWriter,
    LdjsonReader, LdjsonWriter, Limit, MapFields, OrderFields, PickleReader, PickleWriter, PrettyPrinter, RateLimited,
    Rename, SetFields, Tee, UnpackItems, __all__, __doc__, count, create_reader, create_strategy, create_writer,
    get_argument_parser, get_examples_path, identity, inspect, noop, open_examples_fs, open_fs, parse_args, run
 )
 from bonobo._version import __version__
 if sys.version_info < (3, 5):
--- a/bonobo/contrib/django/commands.py
+++ b/bonobo/contrib/django/commands.py
@ -2,13 +2,13 @@ from logging import getLogger
 from types import GeneratorType
 from colorama import Back, Fore, Style
 from django.core.management import BaseCommand
 from django.core.management.base import OutputWrapper
 from mondrian import term
 import bonobo
 from bonobo.plugins.console import ConsoleOutputPlugin
 from bonobo.util.term import CLEAR_EOL
 from django.core.management import BaseCommand
 from django.core.management.base import OutputWrapper
 from .utils import create_or_update
--- a/bonobo/execution/strategies/init.py
+++ b/bonobo/execution/strategies/init.py
@ -7,9 +7,7 @@ at home if you want to give it a shot.
 """
 from bonobo.execution.strategies.executor import (
-    AsyncThreadPoolExecutorStrategy,
+    AsyncThreadPoolExecutorStrategy, ProcessPoolExecutorStrategy, ThreadPoolExecutorStrategy
    ProcessPoolExecutorStrategy,
    ThreadPoolExecutorStrategy,
 )
 from bonobo.execution.strategies.naive import NaiveStrategy
--- a/bonobo/structs/graphs.py
+++ b/bonobo/structs/graphs.py
@ -59,6 +59,7 @@ class Graph:
        self.edges = {BEGIN: set()}
        self.named = {}
        self.nodes = []
        if len(chain):
            self.add_chain(*chain)
    def __iter__(self):
@ -73,41 +74,103 @@ class Graph:
        return self.nodes[key]
    def get_cursor(self, ref=BEGIN):
-        return GraphCursor(self, last=self._resolve_index(ref))
+        return GraphCursor(self, last=self.index_of(ref))
-    def outputs_of(self, idx, create=False):
+    def index_of(self, mixed):
        """ Get a set of the outputs for a given node index.
        """
-        if create and not idx in self.edges:
+        Find the index based on various strategies for a node, probably an input or output of chain. Supported
-            self.edges[idx] = set()
+        inputs are indexes, node values or names.
        return self.edges[idx]
-    def add_node(self, c):
+        """
        if mixed is None:
            return None
        if type(mixed) is int or mixed in self.edges:
            return mixed
        if isinstance(mixed, str) and mixed in self.named:
            return self.named[mixed]
        if mixed in self.nodes:
            return self.nodes.index(mixed)
        raise ValueError("Cannot find node matching {!r}.".format(mixed))
    def outputs_of(self, idx_or_node, create=False):
        """Get a set of the outputs for a given node, node index or name.
        """
        idx_or_node = self.index_of(idx_or_node)
        if create and not idx_or_node in self.edges:
            self.edges[idx_or_node] = set()
        return self.edges[idx_or_node]
    def add_node(self, c, *, _name=None):
        """Add a node without connections in this graph and returns its index.
        If _name is specified, name this node (string reference for  further usage).
        """
        idx = len(self.nodes)
        self.edges[idx] = set()
        self.nodes.append(c)
        if _name:
            if _name in self.named:
                raise KeyError("Duplicate name {!r} in graph.".format(_name))
            self.named[_name] = idx
        return idx
    def add_chain(self, *nodes, _input=BEGIN, _output=None, _name=None):
-        """ Add a chain in this graph.
+        """Add `nodes` as a chain in this graph.
        **Input rules**
        * By default, this chain will be connected to `BEGIN`, a.k.a the special node that kickstarts transformations.
        * If `_input` is set to `None`, then this chain won't receive any input unless you connect it manually to
          something.
        * If `_input` is something that can resolve to another node using `index_of` rules, then the chain will
          receive the output stream of referenced node.
        **Output rules**
        * By default, this chain won't send its output anywhere. This is, most of the time, what you want.
        * If `_output` is set to something (that can resolve to a node), then the last node in the chain will send its
          outputs to the given node. This means you can provide an object, a name, or an index.
        **Naming**
        * If a `_name` is given, the first node in the chain will be named this way (same effect as providing a `_name`
          to add_node).
        **Special cases**
        * You can use this method to connect two other chains (in fact, two nodes) by not giving any `nodes`, but
          still providing values to `_input` and `_output`.
        """
-        if len(nodes):
+        _input = self.index_of(_input)
-            _input = self._resolve_index(_input)
+        _output = self.index_of(_output)
            _output = self._resolve_index(_output)
        _first = None
        _last = None
        # Sanity checks.
        if not len(nodes):
            if _input is None or _output is None:
                raise ValueError(
                    "Using add_chain(...) without nodes is only possible if you provide both _input and _output values."
                )
            if _name is not None:
                raise RuntimeError("Using add_chain(...) without nodes does not allow to use the _name parameter.")
        for i, node in enumerate(nodes):
-                _last = self.add_node(node)
+            _last = self.add_node(node, _name=_name if not i else None)
-                if not i and _name:
+
                    if _name in self.named:
                        raise KeyError("Duplicate name {!r} in graph.".format(_name))
                    self.named[_name] = _last
            if _first is None:
                _first = _last
            self.outputs_of(_input, create=True).add(_last)
            _input = _last
        if _output is not None:
@ -117,7 +180,6 @@ class Graph:
            del self._topologcally_sorted_indexes_cache
        return GraphRange(self, _first, _last)
        return GraphRange(self, None, None)
    def copy(self):
        g = Graph()
@ -191,26 +253,6 @@ class Graph:
        except (ExecutableNotFound, FileNotFoundError) as exc:
            return "<strong>{}</strong>: {}".format(type(exc).__name__, str(exc))
    def _resolve_index(self, mixed):
        """
        Find the index based on various strategies for a node, probably an input or output of chain. Supported
        inputs are indexes, node values or names.
        """
        if mixed is None:
            return None
        if type(mixed) is int or mixed in self.edges:
            return mixed
        if isinstance(mixed, str) and mixed in self.named:
            return self.named[mixed]
        if mixed in self.nodes:
            return self.nodes.index(mixed)
        raise ValueError("Cannot find node matching {!r}.".format(mixed))
 def _get_graphviz_node_id(graph, i):
    escaped_index = str(i)
--- a/bonobo/util/init.py
+++ b/bonobo/util/init.py
@ -6,15 +6,7 @@ and inspect transformations, graphs, and nodes.
 from bonobo.util.collections import cast, ensure_tuple, sortedlist, tuplize
 from bonobo.util.compat import deprecated, deprecated_alias
 from bonobo.util.inspect import (
-    inspect_node,
+    inspect_node, isconfigurable, isconfigurabletype, iscontextprocessor, isdict, ismethod, isoption, istuple, istype
    isconfigurable,
    isconfigurabletype,
    iscontextprocessor,
    isdict,
    ismethod,
    isoption,
    istuple,
    istype,
 )
 from bonobo.util.objects import ValueHolder, get_attribute_or_create, get_name
--- a/docs/guide/graphs.rst
+++ b/docs/guide/graphs.rst
@ -213,10 +213,9 @@ Named nodes
 Using above code to create convergences often leads to code which is hard to read, because you have to define the "target" stream
 before the streams that logically goes to the beginning of the transformation graph. To overcome that, one can use
-"named" nodes:
+"named" nodes.
-    graph.add_chain(x, y, z, _name='zed')
+Please note that naming a chain is exactly the same thing as naming the first node of a chain.
    graph.add_chain(f, g, h, _input='zed')
 .. code-block:: python
@ -224,13 +223,12 @@ before the streams that logically goes to the beginning of the transformation gr
    graph = bonobo.Graph()
    # Add two different chains
    graph.add_chain(a, b, _output="load")
    graph.add_chain(f, g, _output="load")
    # Here we mark _input to None, so normalize won't get the "begin" impulsion.
    graph.add_chain(normalize, store, _input=None, _name="load")
    # Add two different chains that will output to the "load" node
    graph.add_chain(a, b, _output="load")
    graph.add_chain(f, g, _output="load")
 Resulting graph:
@ -249,6 +247,43 @@ Resulting graph:
        "normalize (load)" -> "store"
    }
 You can also create single nodes, and the api provide the same capability on single nodes. 
 .. code-block:: python
    import bonobo
    graph = bonobo.Graph()
    # Create a node without any connection, name it.
    graph.add_node(foo, _name="foo")
    # Use it somewhere else as the data source.
    graph.add_chain(..., _input="foo")
    # ... or as the data sink.
    graph.add_chain(..., _output="foo")
 Connecting two nodes
 ::::::::::::::::::::
 You may want to connect two nodes at some point. You can use `add_chain` without nodes to achieve it.
 .. code-block:: python
    import bonobo
    graph = bonobo.Graph()
    # Create two "anonymous" nodes
    graph.add_node(a)
    graph.add_node(b)
    # Connect them
    graph.add_chain(_input=a, _output=b)
 Inspecting graphs
 :::::::::::::::::
--- a/tests/structs/test_graphs.py
+++ b/tests/structs/test_graphs.py
@ -23,6 +23,27 @@ def test_graph_outputs_of():
    assert len(g.outputs_of(0)) == 0
 def test_graph_index_of():
    g = Graph()
    g.add_node(sentinel.foo)
    g.add_node(sentinel.bar)
    # sequential, can resolve objects
    assert g.index_of(sentinel.foo) == 0
    assert g.index_of(sentinel.bar) == 1
    # calling on an index should return the index
    assert g.index_of(sentinel.bar) == g.index_of(g.index_of(sentinel.bar))
    # not existing should raise value error
    with pytest.raises(ValueError):
        g.index_of(sentinel.not_there)
    # tokens resolve to themselves
    assert g.index_of(BEGIN) == BEGIN
 def test_graph_add_component():
    g = Graph()
@ -35,6 +56,19 @@ def test_graph_add_component():
    assert len(g.nodes) == 2
 def test_invalid_graph_usage():
    g = Graph()
    with pytest.raises(ValueError):
        g.add_chain()
    g.add_node(sentinel.foo)
    g.add_node(sentinel.bar)
    with pytest.raises(RuntimeError):
        g.add_chain(_input=sentinel.bar, _output=sentinel.foo, _name="this_is_not_possible")
 def test_graph_add_chain():
    g = Graph()
@ -63,6 +97,41 @@ def test_graph_topological_sort():
    assert g[4] == sentinel.b2
 def test_connect_two_chains():
    g = Graph()
    g.add_chain(sentinel.a1, sentinel.a2, _input=None, _output=None)
    g.add_chain(sentinel.b1, sentinel.b2, _input=None, _output=None)
    assert len(g.outputs_of(sentinel.a2)) == 0
    g.add_chain(_input=sentinel.a2, _output=sentinel.b1)
    assert g.outputs_of(sentinel.a2) == {g.index_of(sentinel.b1)}
 def test_connect_two_anonymous_nodes():
    g = Graph()
    # Create two "anonymous" nodes
    g.add_node(sentinel.a)
    g.add_node(sentinel.b)
    # Connect them
    g.add_chain(_input=sentinel.a, _output=sentinel.b)
 def test_named_nodes():
    g = Graph()
    a, b, c, d, e, f = sentinel.a, sentinel.b, sentinel.c, sentinel.d, sentinel.e, sentinel.f
    # Here we mark _input to None, so normalize won't get the "begin" impulsion.
    g.add_chain(e, f, _input=None, _name="load")
    # Add two different chains
    g.add_chain(a, b, _output="load")
    g.add_chain(c, d, _output="load")
 def test_copy():
    g1 = Graph()
    g2 = g1.copy()