Topological sort of a graph, allowing better console (and other) outputs.
Uses algorithm borrowed from networkx graph library to sort a graph in topological order. The method is only used by output plugins, as internal plumbery does not really care about the node order. Also includes a bonobo.util.python.require function that helps importing thing in a package-less context, or when there are conflict with site package names.
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@ -37,6 +37,7 @@
|
||||
/examples/private
|
||||
/htmlcov/
|
||||
/sdist/
|
||||
/tags
|
||||
celerybeat-schedule
|
||||
parts/
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
@ -85,3 +85,8 @@ class Configurable(metaclass=ConfigurableMeta):
|
||||
# set option values.
|
||||
for name, value in kwargs.items():
|
||||
setattr(self, name, value)
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
""" You can implement a configurable callable behaviour by implemenenting the call(...) method. Of course, it is also backward compatible with legacy __call__ override.
|
||||
"""
|
||||
return self.call(*args, **kwargs)
|
||||
|
||||
@ -5,9 +5,19 @@ def require(package, requirement=None):
|
||||
return __import__(package)
|
||||
except ImportError:
|
||||
from colorama import Fore, Style
|
||||
print(Fore.YELLOW, 'This example requires the {!r} package. Install it using:'.format(requirement),
|
||||
Style.RESET_ALL, sep='')
|
||||
print(
|
||||
Fore.YELLOW,
|
||||
'This example requires the {!r} package. Install it using:'.
|
||||
format(requirement),
|
||||
Style.RESET_ALL,
|
||||
sep=''
|
||||
)
|
||||
print()
|
||||
print(Fore.YELLOW, ' $ pip install {!s}'.format(requirement), Style.RESET_ALL, sep='')
|
||||
print(
|
||||
Fore.YELLOW,
|
||||
' $ pip install {!s}'.format(requirement),
|
||||
Style.RESET_ALL,
|
||||
sep=''
|
||||
)
|
||||
print()
|
||||
raise
|
||||
@ -87,7 +87,6 @@ class LoopingExecutionContext(Wrapper):
|
||||
finally:
|
||||
self._stopped = True
|
||||
|
||||
|
||||
def handle_error(self, exc, trace):
|
||||
return print_error(exc, trace, context=self.wrapped)
|
||||
|
||||
|
||||
@ -21,7 +21,7 @@ class GraphExecutionContext:
|
||||
|
||||
def __init__(self, graph, plugins=None, services=None):
|
||||
self.graph = graph
|
||||
self.nodes = [NodeExecutionContext(node, parent=self) for node in self.graph.nodes]
|
||||
self.nodes = [NodeExecutionContext(node, parent=self) for node in self.graph]
|
||||
self.plugins = [PluginExecutionContext(plugin, parent=self) for plugin in plugins or ()]
|
||||
self.services = Container(services) if services else Container()
|
||||
|
||||
|
||||
@ -73,19 +73,19 @@ class ConsoleOutputPlugin(Plugin):
|
||||
def write(context, prefix='', rewind=True, append=None, debug=False, profile=False):
|
||||
t_cnt = len(context)
|
||||
|
||||
for i, node in enumerate(context):
|
||||
for i in context.graph.topologically_sorted_indexes:
|
||||
node = context[i]
|
||||
if node.alive:
|
||||
_line = ''.join(
|
||||
(
|
||||
Fore.BLACK, '({})'.format(i + 1), Style.RESET_ALL, ' ', Style.BRIGHT, '+', Style.RESET_ALL, ' ',
|
||||
node.name, ' ', node.get_statistics_as_string(debug=debug,
|
||||
profile=profile), Style.RESET_ALL, ' ',
|
||||
' ', Style.BRIGHT, '+', Style.RESET_ALL, ' ', node.name, '(', str(i), ') ',
|
||||
node.get_statistics_as_string(debug=debug, profile=profile), Style.RESET_ALL, ' ',
|
||||
)
|
||||
)
|
||||
else:
|
||||
_line = ''.join(
|
||||
(
|
||||
Fore.BLACK, '({})'.format(i + 1), ' - ', node.name, ' ',
|
||||
' ', Fore.BLACK, '-', ' ', node.name, '(', str(i), ') ',
|
||||
node.get_statistics_as_string(debug=debug, profile=profile), Style.RESET_ALL, ' ',
|
||||
)
|
||||
)
|
||||
|
||||
@ -74,5 +74,3 @@ class CsvWriter(CsvHandler, FileWriter):
|
||||
writer.writerow(row[header] for header in headers.value)
|
||||
lineno.value += 1
|
||||
return NOT_MODIFIED
|
||||
|
||||
|
||||
|
||||
@ -3,35 +3,115 @@ from bonobo.constants import BEGIN
|
||||
|
||||
class Graph:
|
||||
"""
|
||||
Represents a coherent directed acyclic graph of components.
|
||||
Represents a directed graph of nodes.
|
||||
"""
|
||||
|
||||
def __init__(self, *chain):
|
||||
self.edges = {BEGIN: set()}
|
||||
self.named = {}
|
||||
self.nodes = []
|
||||
self.graph = {BEGIN: set()}
|
||||
self.add_chain(*chain)
|
||||
|
||||
def __iter__(self):
|
||||
yield from self.nodes
|
||||
|
||||
def __len__(self):
|
||||
""" Node count.
|
||||
"""
|
||||
return len(self.nodes)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.nodes[key]
|
||||
|
||||
def outputs_of(self, idx, create=False):
|
||||
if create and not idx in self.graph:
|
||||
self.graph[idx] = set()
|
||||
return self.graph[idx]
|
||||
""" Get a set of the outputs for a given node index.
|
||||
"""
|
||||
if create and not idx in self.edges:
|
||||
self.edges[idx] = set()
|
||||
return self.edges[idx]
|
||||
|
||||
def add_node(self, c):
|
||||
i = len(self.nodes)
|
||||
""" Add a node without connections in this graph and returns its index.
|
||||
"""
|
||||
idx = len(self.nodes)
|
||||
self.edges[idx] = set()
|
||||
self.nodes.append(c)
|
||||
return i
|
||||
return idx
|
||||
|
||||
def add_chain(self, *nodes, _input=BEGIN, _output=None):
|
||||
for node in nodes:
|
||||
def add_chain(self, *nodes, _input=BEGIN, _output=None, _name=None):
|
||||
""" Add a chain in this graph.
|
||||
"""
|
||||
if len(nodes):
|
||||
_input = self._resolve_index(_input)
|
||||
_output = self._resolve_index(_output)
|
||||
|
||||
for i, node in enumerate(nodes):
|
||||
_next = self.add_node(node)
|
||||
if not i and _name:
|
||||
if _name in self.named:
|
||||
raise KeyError('Duplicate name {!r} in graph.'.format(_name))
|
||||
self.named[_name] = _next
|
||||
self.outputs_of(_input, create=True).add(_next)
|
||||
_input = _next
|
||||
if _output:
|
||||
if not _output in self.nodes:
|
||||
raise ValueError('Output not found.')
|
||||
self.outputs_of(_input, create=True).add(self.nodes.index(_output))
|
||||
|
||||
if _output is not None:
|
||||
self.outputs_of(_input, create=True).add(_output)
|
||||
|
||||
if hasattr(self, '_topologcally_sorted_indexes_cache'):
|
||||
del self._topologcally_sorted_indexes_cache
|
||||
|
||||
return self
|
||||
|
||||
def __len__(self):
|
||||
return len(self.nodes)
|
||||
@property
|
||||
def topologically_sorted_indexes(self):
|
||||
"""Iterate in topological order, based on networkx's topological_sort() function.
|
||||
"""
|
||||
try:
|
||||
return self._topologcally_sorted_indexes_cache
|
||||
except AttributeError:
|
||||
seen = set()
|
||||
order = []
|
||||
explored = set()
|
||||
|
||||
for i in self.edges:
|
||||
if i in explored:
|
||||
continue
|
||||
fringe = [i]
|
||||
while fringe:
|
||||
w = fringe[-1] # depth first search
|
||||
if w in explored: # already looked down this branch
|
||||
fringe.pop()
|
||||
continue
|
||||
seen.add(w) # mark as seen
|
||||
# Check successors for cycles and for new nodes
|
||||
new_nodes = []
|
||||
for n in self.outputs_of(w):
|
||||
if n not in explored:
|
||||
if n in seen: # CYCLE !!
|
||||
raise RuntimeError("Graph contains a cycle.")
|
||||
new_nodes.append(n)
|
||||
if new_nodes: # Add new_nodes to fringe
|
||||
fringe.extend(new_nodes)
|
||||
else: # No new nodes so w is fully explored
|
||||
explored.add(w)
|
||||
order.append(w)
|
||||
fringe.pop() # done considering this node
|
||||
self._topologcally_sorted_indexes_cache = tuple(filter(lambda i: type(i) is int, reversed(order)))
|
||||
return self._topologcally_sorted_indexes_cache
|
||||
|
||||
def _resolve_index(self, mixed):
|
||||
""" Find the index based on various strategies for a node, probably an input or output of chain. Supported inputs are indexes, node values or names.
|
||||
"""
|
||||
if mixed is None:
|
||||
return None
|
||||
|
||||
if type(mixed) is int or mixed in self.edges:
|
||||
return mixed
|
||||
|
||||
if isinstance(mixed, str) and mixed in self.named:
|
||||
return self.named[mixed]
|
||||
|
||||
if mixed in self.nodes:
|
||||
return self.nodes.index(mixed)
|
||||
|
||||
raise ValueError('Cannot find node matching {!r}.'.format(mixed))
|
||||
|
||||
@ -200,6 +200,9 @@ class ValueHolder:
|
||||
def __invert__(self):
|
||||
return ~self.value
|
||||
|
||||
def __len__(self):
|
||||
return len(self.value)
|
||||
|
||||
|
||||
def get_attribute_or_create(obj, attr, default):
|
||||
try:
|
||||
@ -207,4 +210,3 @@ def get_attribute_or_create(obj, attr, default):
|
||||
except AttributeError:
|
||||
setattr(obj, attr, default)
|
||||
return getattr(obj, attr)
|
||||
|
||||
|
||||
22
bonobo/util/python.py
Normal file
22
bonobo/util/python.py
Normal file
@ -0,0 +1,22 @@
|
||||
import inspect
|
||||
import os
|
||||
import runpy
|
||||
|
||||
|
||||
class _RequiredModule:
|
||||
def __init__(self, dct):
|
||||
self.__dict__ = dct
|
||||
|
||||
|
||||
class _RequiredModulesRegistry(dict):
|
||||
def require(self, name):
|
||||
if name not in self:
|
||||
bits = name.split('.')
|
||||
pathname = os.path.join(os.getcwd(), os.path.dirname(inspect.getfile(inspect.stack()[1][0])))
|
||||
filename = os.path.join(pathname, *bits[:-1], bits[-1] + '.py')
|
||||
self[name] = _RequiredModule(runpy.run_path(filename, run_name=name))
|
||||
return self[name]
|
||||
|
||||
|
||||
registry = _RequiredModulesRegistry()
|
||||
require = registry.require
|
||||
31
setup.py
31
setup.py
@ -36,44 +36,41 @@ else:
|
||||
|
||||
setup(
|
||||
name='bonobo',
|
||||
description=
|
||||
('Bonobo, a simple, modern and atomic extract-transform-load toolkit for '
|
||||
description=('Bonobo, a simple, modern and atomic extract-transform-load toolkit for '
|
||||
'python 3.5+.'),
|
||||
license='Apache License, Version 2.0',
|
||||
install_requires=[
|
||||
'colorama >=0.3,<1.0', 'fs >=2.0,<3.0', 'psutil >=5.2,<6.0',
|
||||
'requests >=2.0,<3.0', 'stevedore >=1.21,<2.0'
|
||||
'colorama >=0.3,<1.0', 'fs >=2.0,<3.0', 'psutil >=5.2,<6.0', 'requests >=2.0,<3.0', 'stevedore >=1.21,<2.0'
|
||||
],
|
||||
version=version,
|
||||
long_description=long_description,
|
||||
classifiers=classifiers,
|
||||
packages=find_packages(exclude=['ez_setup', 'example', 'test']),
|
||||
include_package_data=True,
|
||||
data_files=[('share/jupyter/nbextensions/bonobo-jupyter', [
|
||||
'bonobo/ext/jupyter/static/extension.js',
|
||||
'bonobo/ext/jupyter/static/index.js',
|
||||
data_files=[
|
||||
(
|
||||
'share/jupyter/nbextensions/bonobo-jupyter', [
|
||||
'bonobo/ext/jupyter/static/extension.js', 'bonobo/ext/jupyter/static/index.js',
|
||||
'bonobo/ext/jupyter/static/index.js.map'
|
||||
])],
|
||||
]
|
||||
)
|
||||
],
|
||||
extras_require={
|
||||
'dev': [
|
||||
'coverage >=4,<5', 'pylint >=1,<2', 'pytest >=3,<4',
|
||||
'pytest-cov >=2,<3', 'pytest-timeout >=1,<2', 'sphinx',
|
||||
'coverage >=4,<5', 'pylint >=1,<2', 'pytest >=3,<4', 'pytest-cov >=2,<3', 'pytest-timeout >=1,<2', 'sphinx',
|
||||
'sphinx_rtd_theme', 'yapf'
|
||||
],
|
||||
'jupyter': ['jupyter >=1.0,<1.1', 'ipywidgets >=6.0.0.beta5']
|
||||
},
|
||||
entry_points={
|
||||
'bonobo.commands': [
|
||||
'init = bonobo.commands.init:register',
|
||||
'run = bonobo.commands.run:register',
|
||||
'init = bonobo.commands.init:register', 'run = bonobo.commands.run:register',
|
||||
'version = bonobo.commands.version:register'
|
||||
],
|
||||
'console_scripts': ['bonobo = bonobo.commands:entrypoint'],
|
||||
'edgy.project.features':
|
||||
['bonobo = '
|
||||
'edgy.project.features': ['bonobo = '
|
||||
'bonobo.ext.edgy.project.feature:BonoboFeature']
|
||||
},
|
||||
url='https://www.bonobo-project.org/',
|
||||
download_url=
|
||||
'https://github.com/python-bonobo/bonobo/tarball/{version}'.format(
|
||||
version=version), )
|
||||
download_url='https://github.com/python-bonobo/bonobo/tarball/{version}'.format(version=version),
|
||||
)
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
import pytest
|
||||
|
||||
from unittest.mock import sentinel
|
||||
|
||||
from bonobo.constants import BEGIN
|
||||
from bonobo.structs import Graph
|
||||
|
||||
@ -41,3 +43,30 @@ def test_graph_add_chain():
|
||||
g.add_chain(identity, identity, identity)
|
||||
assert len(g.nodes) == 3
|
||||
assert len(g.outputs_of(BEGIN)) == 1
|
||||
|
||||
|
||||
def test_graph_topological_sort():
|
||||
g = Graph()
|
||||
|
||||
g.add_chain(
|
||||
sentinel.a1,
|
||||
sentinel.a2,
|
||||
sentinel.a3,
|
||||
_input=None,
|
||||
_output=None,
|
||||
)
|
||||
|
||||
assert g.topologically_sorted_indexes == (0, 1, 2)
|
||||
assert g[0] == sentinel.a1
|
||||
assert g[1] == sentinel.a2
|
||||
assert g[2] == sentinel.a3
|
||||
|
||||
g.add_chain(
|
||||
sentinel.b1,
|
||||
sentinel.b2,
|
||||
_output=sentinel.a2,
|
||||
)
|
||||
|
||||
assert g.topologically_sorted_indexes == (0, 3, 4, 1, 2)
|
||||
assert g[3] == sentinel.b1
|
||||
assert g[4] == sentinel.b2
|
||||
|
||||
1
tests/util/requireable/dummy.py
Normal file
1
tests/util/requireable/dummy.py
Normal file
@ -0,0 +1 @@
|
||||
foo = 'bar'
|
||||
6
tests/util/test_python.py
Normal file
6
tests/util/test_python.py
Normal file
@ -0,0 +1,6 @@
|
||||
from bonobo.util.python import require
|
||||
|
||||
|
||||
def test_require():
|
||||
dummy = require('requireable.dummy')
|
||||
assert dummy.foo == 'bar'
|
||||
Reference in New Issue
Block a user