implements bags, so we can pass arbitrary args/kwargs to functions.

This commit is contained in:
Romain Dorgueil
2016-12-25 12:40:28 +01:00
parent 9c4ec68b18
commit a3adb044bf
19 changed files with 151 additions and 120 deletions

View File

@ -37,7 +37,7 @@ extras_require = {
'pytest-cov >=2.4,<2.5', 'pytest-cov >=2.4,<2.5',
'sphinx', 'sphinx',
'sphinx_rtd_theme', 'sphinx_rtd_theme',
'yapf',
], ],
} }

View File

@ -1,17 +1,10 @@
import sys import sys
from .core import Graph, NaiveStrategy, ProcessPoolExecutorStrategy, ThreadPoolExecutorStrategy, inject, service from .core import *
from .io import *
from .util import *
PY35 = (sys.version_info >= (3, 5)) PY35 = (sys.version_info >= (3, 5))
assert PY35, 'Python 3.5+ is required to use Bonobo.' assert PY35, 'Python 3.5+ is required to use Bonobo.'
__all__ = [
Graph,
NaiveStrategy,
ProcessPoolExecutorStrategy,
ThreadPoolExecutorStrategy,
inject,
service,
]
__version__ = '0.0.0' __version__ = '0.0.0'

View File

@ -1,13 +1,16 @@
from .bags import Bag, Inherit
from .graphs import Graph from .graphs import Graph
from .services import inject, service from .services import inject, service
from .strategies.executor import ThreadPoolExecutorStrategy, ProcessPoolExecutorStrategy from .strategies.executor import ThreadPoolExecutorStrategy, ProcessPoolExecutorStrategy
from .strategies.naive import NaiveStrategy from .strategies.naive import NaiveStrategy
__all__ = [ __all__ = [
Graph, 'Bag',
NaiveStrategy, 'Graph',
ProcessPoolExecutorStrategy, 'Inherit',
ThreadPoolExecutorStrategy, 'NaiveStrategy',
inject, 'ProcessPoolExecutorStrategy',
service, 'ThreadPoolExecutorStrategy',
'inject',
'service',
] ]

19
bonobo/core/bags.py Normal file
View File

@ -0,0 +1,19 @@
class Bag:
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
def apply(self, f, *args, **kwargs):
return f(*args, *self.args, **kwargs, **self.kwargs)
def __repr__(self):
return '<{} *{} **{}>'.format(type(self).__name__, self.args, self.kwargs)
class Inherit(Bag):
def override(self, input):
self.args = input.args + self.args
kwargs = dict(input.kwargs)
kwargs.update(self.kwargs)
self.kwargs = kwargs
return self

View File

@ -3,11 +3,12 @@ from functools import partial
from queue import Empty from queue import Empty
from time import sleep from time import sleep
from bonobo.core.bags import Bag
from bonobo.core.errors import InactiveReadableError from bonobo.core.errors import InactiveReadableError
from bonobo.core.inputs import Input from bonobo.core.inputs import Input
from bonobo.core.stats import WithStatistics from bonobo.core.stats import WithStatistics
from bonobo.util.lifecycle import get_initializer, get_finalizer from bonobo.util.lifecycle import get_initializer, get_finalizer
from bonobo.util.tokens import BEGIN, END, NEW, RUNNING, TERMINATED from bonobo.util.tokens import Begin, End, New, Running, Terminated, NotModified
class ExecutionContext: class ExecutionContext:
@ -22,8 +23,8 @@ class ExecutionContext:
component_context.outputs = [self[j].input for j in self.graph.outputs_of(i)] component_context.outputs = [self[j].input for j in self.graph.outputs_of(i)]
except KeyError as e: except KeyError as e:
continue continue
component_context.input.on_begin = partial(component_context.send, BEGIN, _control=True) component_context.input.on_begin = partial(component_context.send, Begin, _control=True)
component_context.input.on_end = partial(component_context.send, END, _control=True) component_context.input.on_end = partial(component_context.send, End, _control=True)
def __getitem__(self, item): def __getitem__(self, item):
return self.components[item] return self.components[item]
@ -94,7 +95,7 @@ class ComponentExecutionContext(WithStatistics):
self.component = component self.component = component
self.input = Input() self.input = Input()
self.outputs = [] self.outputs = []
self.state = NEW self.state = New
self.stats = { self.stats = {
'in': 0, 'in': 0,
'out': 0, 'out': 0,
@ -132,27 +133,33 @@ class ComponentExecutionContext(WithStatistics):
self.input.put(value) self.input.put(value)
def get(self): def get(self):
row = self.input.get(timeout=1) # todo XXX if timeout, in stat is erroneous
return row self.stats['in'] += 1
return self.input.get(timeout=1)
def _call(self, row): def _call(self, bag_or_arg):
# timer = Timer() # todo add timer
# with timer: bag = bag_or_arg if hasattr(bag_or_arg, 'apply') else Bag(bag_or_arg)
args = () if row is None else (row, )
if getattr(self.component, '_with_context', False): if getattr(self.component, '_with_context', False):
return self.component(self, *args) return bag.apply(self.component, self)
return self.component(*args) return bag.apply(self.component)
def step(self): def step(self):
# Pull data from the first available input channel. # Pull data from the first available input channel.
"""Runs a transformation callable with given args/kwargs and flush the result into the right """Runs a transformation callable with given args/kwargs and flush the result into the right
output channel.""" output channel."""
row = self.get() input_row = self.get()
self.stats['in'] += 1
results = self._call(row) def _resolve(result):
nonlocal input_row
if result is NotModified:
return input_row
if hasattr(result, 'override'):
return result.override(input_row)
return result
results = self._call(input_row)
# self._exec_time += timer.duration # self._exec_time += timer.duration
# Put data onto output channels # Put data onto output channels
@ -160,7 +167,7 @@ class ComponentExecutionContext(WithStatistics):
results = iterable(results) results = iterable(results)
except TypeError: except TypeError:
if results: if results:
self.send(results) self.send(_resolve(results))
else: else:
# case with no result, an execution went through anyway, use for stats. # case with no result, an execution went through anyway, use for stats.
# self._exec_count += 1 # self._exec_count += 1
@ -171,13 +178,13 @@ class ComponentExecutionContext(WithStatistics):
result = next(results) result = next(results)
except StopIteration as e: except StopIteration as e:
break break
self.send(result) self.send(_resolve(result))
def run(self): def run(self):
assert self.state is NEW, ('A {} can only be run once, and thus is expected to be in {} state at the ' assert self.state is New, ('A {} can only be run once, and thus is expected to be in {} state at the '
'beginning of a run().').format(type(self).__name__, NEW) 'beginning of a run().').format(type(self).__name__, New)
self.state = RUNNING self.state = Running
try: try:
get_initializer(self.component)(self) get_initializer(self.component)(self)
except Exception as e: except Exception as e:
@ -197,10 +204,10 @@ class ComponentExecutionContext(WithStatistics):
except Exception as e: except Exception as e:
self.handle_error(e, traceback.format_exc()) self.handle_error(e, traceback.format_exc())
assert self.state is RUNNING, ('A {} must be in {} state when finalization starts.').format( assert self.state is Running, ('A {} must be in {} state when finalization starts.').format(
type(self).__name__, RUNNING) type(self).__name__, Running)
self.state = TERMINATED self.state = Terminated
try: try:
get_finalizer(self.component)(self) get_finalizer(self.component)(self)
except Exception as e: except Exception as e:

View File

@ -1,4 +1,4 @@
from bonobo.util.tokens import BEGIN from bonobo.util.tokens import Begin
class Graph: class Graph:
@ -8,7 +8,7 @@ class Graph:
def __init__(self): def __init__(self):
self.components = [] self.components = []
self.graph = {BEGIN: set()} self.graph = {Begin: set()}
def outputs_of(self, idx, create=False): def outputs_of(self, idx, create=False):
if create and not idx in self.graph: if create and not idx in self.graph:
@ -20,7 +20,7 @@ class Graph:
self.components.append(c) self.components.append(c)
return i return i
def add_chain(self, *components, input=BEGIN): def add_chain(self, *components, input=Begin):
for component in components: for component in components:
next = self.add_component(component) next = self.add_component(component)
self.outputs_of(input, create=True).add(next) self.outputs_of(input, create=True).add(next)

View File

@ -19,7 +19,7 @@ from queue import Queue
from bonobo.core.errors import AbstractError, InactiveWritableError, InactiveReadableError from bonobo.core.errors import AbstractError, InactiveWritableError, InactiveReadableError
from bonobo.util import noop from bonobo.util import noop
from bonobo.util.tokens import BEGIN, END from bonobo.util.tokens import Begin, End
BUFFER_SIZE = 8192 BUFFER_SIZE = 8192
@ -53,7 +53,7 @@ class Input(Queue, Readable, Writable):
def put(self, data, block=True, timeout=None): def put(self, data, block=True, timeout=None):
# Begin token is a metadata to raise the input runlevel. # Begin token is a metadata to raise the input runlevel.
if data == BEGIN: if data == Begin:
self._runlevel += 1 self._runlevel += 1
self._writable_runlevel += 1 self._writable_runlevel += 1
@ -66,7 +66,7 @@ class Input(Queue, Readable, Writable):
if self._writable_runlevel < 1: if self._writable_runlevel < 1:
raise InactiveWritableError('Cannot put() on an inactive {}.'.format(Writable.__name__)) raise InactiveWritableError('Cannot put() on an inactive {}.'.format(Writable.__name__))
if data == END: if data == End:
self._writable_runlevel -= 1 self._writable_runlevel -= 1
return Queue.put(self, data, block, timeout) return Queue.put(self, data, block, timeout)
@ -77,7 +77,7 @@ class Input(Queue, Readable, Writable):
data = Queue.get(self, block, timeout) data = Queue.get(self, block, timeout)
if data == END: if data == End:
self._runlevel -= 1 self._runlevel -= 1
# callback # callback
@ -92,7 +92,7 @@ class Input(Queue, Readable, Writable):
def empty(self): def empty(self):
self.mutex.acquire() self.mutex.acquire()
while self._qsize() and self.queue[0] == END: while self._qsize() and self.queue[0] == End:
self._runlevel -= 1 self._runlevel -= 1
Queue._get(self) Queue._get(self)
self.mutex.release() self.mutex.release()

View File

@ -3,8 +3,9 @@ from concurrent.futures import Executor
from concurrent.futures import ProcessPoolExecutor from concurrent.futures import ProcessPoolExecutor
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from bonobo.core.bags import Bag
from bonobo.core.strategies.base import Strategy from bonobo.core.strategies.base import Strategy
from bonobo.util.tokens import BEGIN, END from bonobo.util.tokens import Begin, End
class ExecutorStrategy(Strategy): class ExecutorStrategy(Strategy):
@ -19,10 +20,10 @@ class ExecutorStrategy(Strategy):
context = self.create_context(graph, plugins=plugins) context = self.create_context(graph, plugins=plugins)
executor = self.executor_factory() executor = self.executor_factory()
for i in graph.outputs_of(BEGIN): for i in graph.outputs_of(Begin):
context[i].recv(BEGIN) context[i].recv(Begin)
context[i].recv(None) context[i].recv(Bag())
context[i].recv(END) context[i].recv(End)
futures = [] futures = []

View File

@ -0,0 +1 @@
from .json import *

View File

@ -2,6 +2,8 @@ import json
from bonobo.util.lifecycle import with_context, set_initializer, set_finalizer from bonobo.util.lifecycle import with_context, set_initializer, set_finalizer
__all__ = ['to_json', ]
def to_json(path_or_buf): def to_json(path_or_buf):
# todo different cases + documentation # todo different cases + documentation

View File

@ -1,6 +1,16 @@
import functools import functools
import pprint import pprint
from .tokens import NotModified
__all__ = [
'NotModified',
'head',
'log',
'noop',
'tee',
]
def head(n=10): def head(n=10):
i = 0 i = 0

View File

@ -8,8 +8,11 @@ class Token:
return '<{}>'.format(self.__name__) return '<{}>'.format(self.__name__)
BEGIN = Token('Begin') Begin = Token('Begin')
END = Token('End') End = Token('End')
NEW = Token('New')
RUNNING = Token('Running') New = Token('New')
TERMINATED = Token('Terminated') Running = Token('Running')
Terminated = Token('Terminated')
NotModified = Token('NotModified')

View File

@ -13,13 +13,8 @@ from bonobo import __version__
# -- General configuration ------------------------------------------------ # -- General configuration ------------------------------------------------
extensions = [ extensions = [
'sphinx.ext.autodoc', 'sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.coverage',
'sphinx.ext.doctest', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode'
'sphinx.ext.intersphinx',
'sphinx.ext.todo',
'sphinx.ext.coverage',
'sphinx.ext.ifconfig',
'sphinx.ext.viewcode'
] ]
# Add any paths that contain templates here, relative to this directory. # Add any paths that contain templates here, relative to this directory.
@ -86,7 +81,12 @@ html_theme_options = {
html_sidebars = { html_sidebars = {
'**': [ '**': [
'sidebarlogo.html', 'localtoc.html', 'relations.html', 'searchbox.html', 'sidebarinfos.html', 'sourcelink.html', 'sidebarlogo.html',
'localtoc.html',
'relations.html',
'searchbox.html',
'sidebarinfos.html',
'sourcelink.html',
] ]
} }
@ -124,30 +124,21 @@ latex_elements = {
# Grouping the document tree into LaTeX files. List of tuples # Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, # (source start file, target name, title,
# author, documentclass [howto, manual, or own class]). # author, documentclass [howto, manual, or own class]).
latex_documents = [ latex_documents = [(master_doc, 'Bonobo.tex', 'Bonobo Documentation', 'Romain Dorgueil', 'manual'), ]
(master_doc, 'Bonobo.tex', 'Bonobo Documentation',
'Romain Dorgueil', 'manual'),
]
# -- Options for manual page output --------------------------------------- # -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples # One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section). # (source start file, name, description, authors, manual section).
man_pages = [ man_pages = [(master_doc, 'bonobo', 'Bonobo Documentation', [author], 1)]
(master_doc, 'bonobo', 'Bonobo Documentation',
[author], 1)
]
# -- Options for Texinfo output ------------------------------------------- # -- Options for Texinfo output -------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples # Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author, # (source start file, target name, title, author,
# dir menu entry, description, category) # dir menu entry, description, category)
texinfo_documents = [ texinfo_documents = [(master_doc, 'Bonobo', 'Bonobo Documentation', author, 'Bonobo',
(master_doc, 'Bonobo', 'Bonobo Documentation', 'One line description of project.', 'Miscellaneous'), ]
author, 'Bonobo', 'One line description of project.',
'Miscellaneous'),
]
# -- Options for Epub output ---------------------------------------------- # -- Options for Epub output ----------------------------------------------

View File

@ -22,7 +22,8 @@ def _getlink(x):
def normalize(row): def normalize(row):
result = { result = {
**row, **
row,
'links': list(filter(None, map(_getlink, json.loads(row.get('links'))))), 'links': list(filter(None, map(_getlink, json.loads(row.get('links'))))),
'country': countries.get(alpha_2=row.get('country_code', '').upper()).name, 'country': countries.get(alpha_2=row.get('country_code', '').upper()).name,
} }
@ -37,11 +38,11 @@ def filter_france(row):
def display(row): def display(row):
print(t.bold(row.get('name'))) print(t.bold(row.get('name')))
address = list(filter(None, ( address = list(
filter(None, (
' '.join(filter(None, (row.get('postal_code', None), row.get('city', None)))), ' '.join(filter(None, (row.get('postal_code', None), row.get('city', None)))),
row.get('county', None), row.get('county', None),
row.get('country'), row.get('country'), )))
)))
print(' - {}: {address}'.format(t.blue('address'), address=', '.join(address))) print(' - {}: {address}'.format(t.blue('address'), address=', '.join(address)))
print(' - {}: {links}'.format(t.blue('links'), links=', '.join(row['links']))) print(' - {}: {links}'.format(t.blue('links'), links=', '.join(row['links'])))
@ -51,10 +52,10 @@ def display(row):
if __name__ == '__main__': if __name__ == '__main__':
console_run( console_run(
extract_ods(SEARCH_URL, DATASET, timezone='Europe/Paris'), extract_ods(
SEARCH_URL, DATASET, timezone='Europe/Paris'),
normalize, normalize,
filter_france, filter_france,
tee(display), tee(display),
to_json('fablabs.json'), to_json('fablabs.json'),
output=True, output=True, )
)

View File

@ -21,29 +21,30 @@ setup(
name='bonobo', name='bonobo',
description='Bonobo', description='Bonobo',
license='Apache License, Version 2.0', license='Apache License, Version 2.0',
install_requires=[], install_requires=['psutil >=5.0,<5.1', ],
version=version, version=version,
long_description=read('README.rst'), long_description=read('README.rst'),
classifiers=read('classifiers.txt', tolines), classifiers=read('classifiers.txt', tolines),
packages=find_packages(exclude=['ez_setup', 'example', 'test']), packages=find_packages(exclude=['ez_setup', 'example', 'test']),
include_package_data=True, include_package_data=True,
extras_require={'dev': ['coverage >=4.2,<4.3', extras_require={
'dev': [
'coverage >=4.2,<4.3',
'mock >=2.0,<2.1', 'mock >=2.0,<2.1',
'nose >=1.3,<1.4', 'nose >=1.3,<1.4',
'pylint >=1.6,<1.7', 'pylint >=1.6,<1.7',
'pytest >=3,<4', 'pytest >=3,<4',
'pytest-cov >=2.4,<2.5', 'pytest-cov >=2.4,<2.5',
'sphinx', 'sphinx',
'sphinx_rtd_theme'], 'sphinx_rtd_theme',
'yapf',
],
'jupyter': ['ipywidgets >=6.0.0.beta5'] 'jupyter': ['ipywidgets >=6.0.0.beta5']
}, },
data_files=[ data_files=[('share/jupyter/nbextensions/bonobo', [
('share/jupyter/nbextensions/bonobo', [
'bonobo/ext/jupyter/static/extension.js', 'bonobo/ext/jupyter/static/extension.js',
'bonobo/ext/jupyter/static/index.js', 'bonobo/ext/jupyter/static/index.js',
'bonobo/ext/jupyter/static/index.js.map', 'bonobo/ext/jupyter/static/index.js.map',
]), ]), ],
],
url='https://github.com/hartym/bonobo', url='https://github.com/hartym/bonobo',
download_url='https://github.com/hartym/bonobo'.format(version=version), download_url='https://github.com/hartym/bonobo'.format(version=version), )
)

View File

@ -1,7 +1,7 @@
import pytest import pytest
from bonobo.core.graphs import Graph from bonobo.core.graphs import Graph
from bonobo.util.tokens import BEGIN from bonobo.util.tokens import Begin
identity = lambda x: x identity = lambda x: x
@ -10,7 +10,7 @@ def test_graph_outputs_of():
g = Graph() g = Graph()
# default graph only node # default graph only node
assert len(g.outputs_of(BEGIN)) == 0 assert len(g.outputs_of(Begin)) == 0
# unexisting node # unexisting node
with pytest.raises(KeyError): with pytest.raises(KeyError):
@ -40,4 +40,4 @@ def test_graph_add_chain():
g.add_chain(identity, identity, identity) g.add_chain(identity, identity, identity)
assert len(g.components) == 3 assert len(g.components) == 3
assert len(g.outputs_of(BEGIN)) == 1 assert len(g.outputs_of(Begin)) == 1

View File

@ -20,7 +20,7 @@ import pytest
from bonobo.core.errors import InactiveWritableError, InactiveReadableError from bonobo.core.errors import InactiveWritableError, InactiveReadableError
from bonobo.core.inputs import Input from bonobo.core.inputs import Input
from bonobo.util.tokens import BEGIN, END from bonobo.util.tokens import Begin, End
def test_input_runlevels(): def test_input_runlevels():
@ -32,15 +32,15 @@ def test_input_runlevels():
q.put('hello, unborn queue.') q.put('hello, unborn queue.')
# Begin # Begin
q.put(BEGIN) q.put(Begin)
assert q.alive and q._runlevel == 1 assert q.alive and q._runlevel == 1
q.put('foo') q.put('foo')
# Second Begin # Second Begin
q.put(BEGIN) q.put(Begin)
assert q.alive and q._runlevel == 2 assert q.alive and q._runlevel == 2
q.put('bar') q.put('bar')
q.put(END) q.put(End)
# FIFO # FIFO
assert q.get() == 'foo' assert q.get() == 'foo'
@ -56,7 +56,7 @@ def test_input_runlevels():
q.put('baz') q.put('baz')
# Now kill the queue... # Now kill the queue...
q.put(END) q.put(End)
with pytest.raises(InactiveWritableError): with pytest.raises(InactiveWritableError):
q.put('foo') q.put('foo')

View File

@ -4,6 +4,7 @@ from bonobo import inject, service
class MyFoo(): class MyFoo():
pass pass
def test_service_is_singleton(): def test_service_is_singleton():
@service @service
def foo(): def foo():
@ -21,4 +22,3 @@ def test_service_is_singleton():
assert type(foo()) == type(foo2()) assert type(foo()) == type(foo2())
assert foo2() is not foo() assert foo2() is not foo()

View File

@ -5,8 +5,7 @@ class MyThingWithStats(WithStatistics):
def get_stats(self, *args, **kwargs): def get_stats(self, *args, **kwargs):
return ( return (
('foo', 42), ('foo', 42),
('bar', 69), ('bar', 69), )
)
def test_with_statistics(): def test_with_statistics():