Merge remote-tracking branch 'upstream/0.2' into 0.2
This commit is contained in:
7
Makefile
7
Makefile
@ -1,7 +1,7 @@
|
|||||||
# This file has been auto-generated.
|
# This file has been auto-generated.
|
||||||
# All changes will be lost, see Projectfile.
|
# All changes will be lost, see Projectfile.
|
||||||
#
|
#
|
||||||
# Updated at 2017-01-19 12:12:07.294619
|
# Updated at 2017-04-21 10:27:25.709949
|
||||||
|
|
||||||
PYTHON ?= $(shell which python)
|
PYTHON ?= $(shell which python)
|
||||||
PYTHON_BASENAME ?= $(shell basename $(PYTHON))
|
PYTHON_BASENAME ?= $(shell basename $(PYTHON))
|
||||||
@ -10,6 +10,7 @@ PYTHON_REQUIREMENTS_DEV_FILE ?= requirements-dev.txt
|
|||||||
QUICK ?=
|
QUICK ?=
|
||||||
VIRTUAL_ENV ?= .virtualenv-$(PYTHON_BASENAME)
|
VIRTUAL_ENV ?= .virtualenv-$(PYTHON_BASENAME)
|
||||||
PIP ?= $(VIRTUAL_ENV)/bin/pip
|
PIP ?= $(VIRTUAL_ENV)/bin/pip
|
||||||
|
PIP_INSTALL_OPTIONS ?=
|
||||||
PYTEST ?= $(VIRTUAL_ENV)/bin/pytest
|
PYTEST ?= $(VIRTUAL_ENV)/bin/pytest
|
||||||
PYTEST_OPTIONS ?= --capture=no --cov=bonobo --cov-report html
|
PYTEST_OPTIONS ?= --capture=no --cov=bonobo --cov-report html
|
||||||
SPHINX_OPTS ?=
|
SPHINX_OPTS ?=
|
||||||
@ -24,13 +25,13 @@ YAPF_OPTIONS ?= -rip
|
|||||||
# Installs the local project dependencies.
|
# Installs the local project dependencies.
|
||||||
install: $(VIRTUAL_ENV)
|
install: $(VIRTUAL_ENV)
|
||||||
if [ -z "$(QUICK)" ]; then \
|
if [ -z "$(QUICK)" ]; then \
|
||||||
$(PIP) install -U pip wheel -r $(PYTHON_REQUIREMENTS_FILE) ; \
|
$(PIP) install -U pip wheel $(PIP_INSTALL_OPTIONS) -r $(PYTHON_REQUIREMENTS_FILE) ; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Installs the local project dependencies, including development-only libraries.
|
# Installs the local project dependencies, including development-only libraries.
|
||||||
install-dev: $(VIRTUAL_ENV)
|
install-dev: $(VIRTUAL_ENV)
|
||||||
if [ -z "$(QUICK)" ]; then \
|
if [ -z "$(QUICK)" ]; then \
|
||||||
$(PIP) install -U pip wheel -r $(PYTHON_REQUIREMENTS_DEV_FILE) ; \
|
$(PIP) install -U pip wheel $(PIP_INSTALL_OPTIONS) -r $(PYTHON_REQUIREMENTS_DEV_FILE) ; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Cleans up the local mess.
|
# Cleans up the local mess.
|
||||||
|
|||||||
@ -57,11 +57,13 @@ data_files = [
|
|||||||
|
|
||||||
entry_points = {
|
entry_points = {
|
||||||
'console_scripts': [
|
'console_scripts': [
|
||||||
'bonobo = bonobo.commands:entrypoint'
|
'bonobo = bonobo.commands:entrypoint',
|
||||||
|
'bb = bonobo.commands:entrypoint',
|
||||||
],
|
],
|
||||||
'bonobo.commands': [
|
'bonobo.commands': [
|
||||||
'init = bonobo.commands.init:register',
|
'init = bonobo.commands.init:register',
|
||||||
'run = bonobo.commands.run:register',
|
'run = bonobo.commands.run:register',
|
||||||
|
'version = bonobo.commands.version:register',
|
||||||
],
|
],
|
||||||
'edgy.project.features': [
|
'edgy.project.features': [
|
||||||
'bonobo = bonobo.ext.edgy.project.feature:BonoboFeature'
|
'bonobo = bonobo.ext.edgy.project.feature:BonoboFeature'
|
||||||
|
|||||||
12
README.rst
12
README.rst
@ -3,13 +3,13 @@
|
|||||||
|
|
||||||
Data-processing. By monkeys. For humans.
|
Data-processing. By monkeys. For humans.
|
||||||
|
|
||||||
Bonobo is a data-processing library for python 3.5+ that emphasis writing
|
Bonobo is a data-processing library for python 3.5+ that emphasises writing
|
||||||
simple, atomic, plain old python functions and chaining them using a basic
|
simple, atomic, plain old python functions and chaining them using a basic
|
||||||
acyclic graph. The nodes will need a bit of plumbery to be runnable in
|
acyclic graph. The nodes will need a bit of plumbery to be runnable in
|
||||||
different means (iteratively, in threads, in processes, on different machines
|
different means (iteratively, in threads, in processes, on different machines
|
||||||
...) but that should be as transparent as possible.
|
...) but that should be as transparent as possible.
|
||||||
|
|
||||||
The only thing asked to the developer is to either write "pure" functions to
|
The only thing asked of the developer is to write "pure" functions to
|
||||||
process data (create a new dict, don't change in place, etc.), and everything
|
process data (create a new dict, don't change in place, etc.), and everything
|
||||||
should be fine from this point.
|
should be fine from this point.
|
||||||
|
|
||||||
@ -65,13 +65,13 @@ Version 0.2
|
|||||||
* Changelog
|
* Changelog
|
||||||
* Migration guide
|
* Migration guide
|
||||||
* Update documentation
|
* Update documentation
|
||||||
* Threaded does not terminate anymore
|
* Threaded does not terminate anymore (fixed ?)
|
||||||
* More tests
|
* More tests
|
||||||
|
|
||||||
Bugs:
|
Bugs:
|
||||||
|
|
||||||
- KeyboardInterrupt does not work anymore.
|
- KeyboardInterrupt does not work anymore. (fixed ?)
|
||||||
- ThreadPool does not stop anymore.
|
- ThreadPool does not stop anymore. (fiexd ?)
|
||||||
|
|
||||||
Configuration
|
Configuration
|
||||||
.............
|
.............
|
||||||
@ -102,7 +102,7 @@ Random thoughts and things to do
|
|||||||
* NaiveStrategy
|
* NaiveStrategy
|
||||||
* PoolExecutionStrategy
|
* PoolExecutionStrategy
|
||||||
* ThreadPoolExecutionStrategy
|
* ThreadPoolExecutionStrategy
|
||||||
* ProcesPoolExecutionStrategy
|
* ProcessPoolExecutionStrategy
|
||||||
* ThreadExecutionStrategy
|
* ThreadExecutionStrategy
|
||||||
* ProcessExecutionStrategy
|
* ProcessExecutionStrategy
|
||||||
|
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
__version__ = '0.1.6'
|
__version__ = '0.2.0'
|
||||||
|
|||||||
9
bonobo/commands/version.py
Normal file
9
bonobo/commands/version.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
import bonobo
|
||||||
|
|
||||||
|
|
||||||
|
def execute():
|
||||||
|
print('{} v.{}'.format(bonobo.__name__, bonobo.__version__))
|
||||||
|
|
||||||
|
|
||||||
|
def register(parser):
|
||||||
|
return execute
|
||||||
@ -1,7 +1,7 @@
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
|
|
||||||
class Timer(object):
|
class Timer:
|
||||||
"""
|
"""
|
||||||
Context manager used to time execution of stuff.
|
Context manager used to time execution of stuff.
|
||||||
"""
|
"""
|
||||||
|
|||||||
3
docs/_templates/index.html
vendored
3
docs/_templates/index.html
vendored
@ -3,7 +3,8 @@
|
|||||||
{% block body %}
|
{% block body %}
|
||||||
|
|
||||||
<div style="border: 2px solid red; font-weight: bold; margin: 1em; padding: 1em">
|
<div style="border: 2px solid red; font-weight: bold; margin: 1em; padding: 1em">
|
||||||
Rewrite in progress, things may be broken for now. Please give us some time to finish painting the walls.
|
Bonobo is currently <strong>ALPHA</strong> software. That means that the doc is not finished, and that
|
||||||
|
some APIs will change.
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<h1 style="text-align: center">
|
<h1 style="text-align: center">
|
||||||
|
|||||||
1
docs/_templates/sidebarinfos.html
vendored
1
docs/_templates/sidebarinfos.html
vendored
@ -7,4 +7,5 @@
|
|||||||
<p>
|
<p>
|
||||||
<iframe src="http://ghbtns.com/github-btn.html?user=python-bonobo&repo=bonobo&type=watch&count=true&size=small"
|
<iframe src="http://ghbtns.com/github-btn.html?user=python-bonobo&repo=bonobo&type=watch&count=true&size=small"
|
||||||
allowtransparency="true" frameborder="0" scrolling="0" width="200px" height="35px"></iframe>
|
allowtransparency="true" frameborder="0" scrolling="0" width="200px" height="35px"></iframe>
|
||||||
|
<script async defer src="https://bonobo-slack.herokuapp.com/slackin.js"></script>
|
||||||
</p>
|
</p>
|
||||||
|
|||||||
@ -82,7 +82,7 @@ For example, doing the following may cause unexpected problems:
|
|||||||
'foo': compute_something()
|
'foo': compute_something()
|
||||||
})
|
})
|
||||||
# Still bad! Don't mutate the dict!
|
# Still bad! Don't mutate the dict!
|
||||||
d['bar']: compute_anotherthing()
|
d['bar'] = compute_anotherthing()
|
||||||
return d
|
return d
|
||||||
|
|
||||||
The problem is easy to understand: as **Bonobo** won't make copies of your dict, the same dict will be passed along the
|
The problem is easy to understand: as **Bonobo** won't make copies of your dict, the same dict will be passed along the
|
||||||
|
|||||||
@ -11,7 +11,7 @@ happened because of **rdc.etl**.
|
|||||||
|
|
||||||
It would have been counterproductive to migrate the same codebase:
|
It would have been counterproductive to migrate the same codebase:
|
||||||
|
|
||||||
* a lot of mistakes were impossible to fix in a backward compatible way (for example, transormations were stateful,
|
* a lot of mistakes were impossible to fix in a backward compatible way (for example, transformations were stateful,
|
||||||
making them more complicated to write and impossible to reuse, a lot of effort was used to make the components have
|
making them more complicated to write and impossible to reuse, a lot of effort was used to make the components have
|
||||||
multi-inputs and multi-outputs, although in 99% of the case it's useless, etc.).
|
multi-inputs and multi-outputs, although in 99% of the case it's useless, etc.).
|
||||||
* we also wanted to develop something that took advantage of modern python versions, hence the choice of 3.5+.
|
* we also wanted to develop something that took advantage of modern python versions, hence the choice of 3.5+.
|
||||||
|
|||||||
@ -15,7 +15,7 @@ Let's write a first data transformation
|
|||||||
We'll start with the simplest transformation possible.
|
We'll start with the simplest transformation possible.
|
||||||
|
|
||||||
In **Bonobo**, a transformation is a plain old python callable, not more, not less. Let's write one that takes a string
|
In **Bonobo**, a transformation is a plain old python callable, not more, not less. Let's write one that takes a string
|
||||||
and uppercase it.
|
and uppercases it.
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
@ -68,7 +68,7 @@ Let's chain the three transformations together and run the transformation graph:
|
|||||||
}
|
}
|
||||||
|
|
||||||
We use the :func:`bonobo.run` helper that hides the underlying object composition necessary to actually run the
|
We use the :func:`bonobo.run` helper that hides the underlying object composition necessary to actually run the
|
||||||
transformations in parralel, because it's simpler.
|
transformations in parallel, because it's simpler.
|
||||||
|
|
||||||
Depending on what you're doing, you may use the shorthand helper method, or the verbose one. Always favor the shorter,
|
Depending on what you're doing, you may use the shorthand helper method, or the verbose one. Always favor the shorter,
|
||||||
if you don't need to tune the graph or the execution strategy (see below).
|
if you don't need to tune the graph or the execution strategy (see below).
|
||||||
@ -113,12 +113,12 @@ Concepts and definitions
|
|||||||
by yielding values (a.k.a returning a generator).
|
by yielding values (a.k.a returning a generator).
|
||||||
* Transformation graph (or Graph): a set of transformations tied together in a :class:`bonobo.Graph` instance, which is a simple
|
* Transformation graph (or Graph): a set of transformations tied together in a :class:`bonobo.Graph` instance, which is a simple
|
||||||
directed acyclic graph (also refered as a DAG, sometimes).
|
directed acyclic graph (also refered as a DAG, sometimes).
|
||||||
* Node: a transformation within the context of a transformation graph. The node defines what to do whith a
|
* Node: a transformation within the context of a transformation graph. The node defines what to do with a
|
||||||
transformation's output, and especially what other node to feed with the output.
|
transformation's output, and especially what other nodes to feed with the output.
|
||||||
* Execution strategy (or strategy): a way to run a transformation graph. It's responsibility is mainly to parralelize
|
* Execution strategy (or strategy): a way to run a transformation graph. It's responsibility is mainly to parallelize
|
||||||
(or not) the transformations, on one or more process and/or computer, and to setup the right queuing mechanism for
|
(or not) the transformations, on one or more process and/or computer, and to setup the right queuing mechanism for
|
||||||
transformations' inputs and outputs.
|
transformations' inputs and outputs.
|
||||||
* Execution context (or context): a wrapper around a node that holds the state for it. If the node need the state, there
|
* Execution context (or context): a wrapper around a node that holds the state for it. If the node needs state, there
|
||||||
are tools available in bonobo to feed it to the transformation using additional call parameters, and so every
|
are tools available in bonobo to feed it to the transformation using additional call parameters, and so every
|
||||||
transformation will be atomic.
|
transformation will be atomic.
|
||||||
|
|
||||||
|
|||||||
@ -2,7 +2,7 @@ Working with files
|
|||||||
==================
|
==================
|
||||||
|
|
||||||
Bonobo would not be of any use if the aim was to uppercase small lists of strings. In fact, Bonobo should not be used
|
Bonobo would not be of any use if the aim was to uppercase small lists of strings. In fact, Bonobo should not be used
|
||||||
if you don't expect any gain from parralelization/distribution of tasks.
|
if you don't expect any gain from parallelization/distribution of tasks.
|
||||||
|
|
||||||
Let's take the following graph as an example:
|
Let's take the following graph as an example:
|
||||||
|
|
||||||
@ -19,7 +19,7 @@ the :class:`bonobo.ThreadPoolExecutorStrategy`), which allows to start running `
|
|||||||
of data, and `C` as soon as `B` yielded the first line of data, even if `A` or `B` still have data to yield.
|
of data, and `C` as soon as `B` yielded the first line of data, even if `A` or `B` still have data to yield.
|
||||||
|
|
||||||
The great thing is that you generally don't have to think about it. Just be aware that your components will be run in
|
The great thing is that you generally don't have to think about it. Just be aware that your components will be run in
|
||||||
parralel (with the default strategy), and don't worry too much about blocking components, as they won't block their
|
parallel (with the default strategy), and don't worry too much about blocking components, as they won't block their
|
||||||
siblings when run in bonobo.
|
siblings when run in bonobo.
|
||||||
|
|
||||||
That being said, let's try to write a more real-world like transformation.
|
That being said, let's try to write a more real-world like transformation.
|
||||||
|
|||||||
24
setup.py
24
setup.py
@ -10,9 +10,12 @@ tolines = lambda c: list(filter(None, map(lambda s: s.strip(), c.split('\n'))))
|
|||||||
|
|
||||||
|
|
||||||
def read(filename, flt=None):
|
def read(filename, flt=None):
|
||||||
with open(filename) as f:
|
try:
|
||||||
content = f.read().strip()
|
with open(filename) as f:
|
||||||
return flt(content) if callable(flt) else content
|
content = f.read().strip()
|
||||||
|
return flt(content) if callable(flt) else content
|
||||||
|
except EnvironmentError:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
# Py3 compatibility hacks, borrowed from IPython.
|
# Py3 compatibility hacks, borrowed from IPython.
|
||||||
@ -26,8 +29,12 @@ except NameError:
|
|||||||
|
|
||||||
|
|
||||||
version_ns = {}
|
version_ns = {}
|
||||||
execfile(os.path.join(root_dir, 'bonobo/_version.py'), version_ns)
|
try:
|
||||||
version = version_ns.get('__version__', 'dev')
|
execfile(os.path.join(root_dir, 'bonobo/_version.py'), version_ns)
|
||||||
|
except EnvironmentError:
|
||||||
|
version = 'dev'
|
||||||
|
else:
|
||||||
|
version = version_ns.get('__version__', 'dev')
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='bonobo',
|
name='bonobo',
|
||||||
@ -58,8 +65,11 @@ setup(
|
|||||||
'jupyter': ['jupyter >=1.0,<1.1', 'ipywidgets >=6.0.0.beta5']
|
'jupyter': ['jupyter >=1.0,<1.1', 'ipywidgets >=6.0.0.beta5']
|
||||||
},
|
},
|
||||||
entry_points={
|
entry_points={
|
||||||
'bonobo.commands': ['init = bonobo.commands.init:register', 'run = bonobo.commands.run:register'],
|
'bonobo.commands': [
|
||||||
'console_scripts': ['bonobo = bonobo.commands:entrypoint'],
|
'init = bonobo.commands.init:register', 'run = bonobo.commands.run:register',
|
||||||
|
'version = bonobo.commands.version:register'
|
||||||
|
],
|
||||||
|
'console_scripts': ['bonobo = bonobo.commands:entrypoint', 'bb = bonobo.commands:entrypoint'],
|
||||||
'edgy.project.features': ['bonobo = '
|
'edgy.project.features': ['bonobo = '
|
||||||
'bonobo.ext.edgy.project.feature:BonoboFeature']
|
'bonobo.ext.edgy.project.feature:BonoboFeature']
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user