Minor fixes and update documentation. Preparing the upcoming 0.2 release.
This commit is contained in:
2
Makefile
2
Makefile
@ -1,7 +1,7 @@
|
||||
# This file has been auto-generated.
|
||||
# All changes will be lost, see Projectfile.
|
||||
#
|
||||
# Updated at 2017-01-10 23:15:21.478899
|
||||
# Updated at 2017-01-19 12:12:07.294619
|
||||
|
||||
PYTHON ?= $(shell which python)
|
||||
PYTHON_BASENAME ?= $(shell basename $(PYTHON))
|
||||
|
||||
@ -40,6 +40,7 @@ extras_require = {
|
||||
'pylint >=1.6,<1.7',
|
||||
'pytest >=3,<4',
|
||||
'pytest-cov >=2.4,<2.5',
|
||||
'pytest-timeout >=1.2,<1.3',
|
||||
'sphinx',
|
||||
'sphinx_rtd_theme',
|
||||
'yapf',
|
||||
|
||||
@ -68,6 +68,11 @@ Version 0.2
|
||||
* Threaded does not terminate anymore
|
||||
* More tests
|
||||
|
||||
Bugs:
|
||||
|
||||
- KeyboardInterrupt does not work anymore.
|
||||
- ThreadPool does not stop anymore.
|
||||
|
||||
Configuration
|
||||
.............
|
||||
|
||||
@ -119,6 +124,7 @@ Random thoughts and things to do
|
||||
def execute(graph: Graph, *, strategy: ExecutionStrategy, plugins: List[Plugin]) -> Execution:
|
||||
pass
|
||||
|
||||
* Handling console. Can we use a queue, and replace stdout / stderr ?
|
||||
|
||||
|
||||
|
||||
|
||||
@ -1,30 +1,31 @@
|
||||
""" Bonobo data-processing toolkit.
|
||||
# Bonobo data-processing toolkit.
|
||||
#
|
||||
# Bonobo is a line-by-line data-processing toolkit for python 3.5+ emphasizing simplicity and atomicity of data
|
||||
# transformations using a simple directed graph of python callables.
|
||||
#
|
||||
# Licensed under Apache License 2.0, read the LICENSE file in the root of the source tree.
|
||||
|
||||
Bonobo is a line-by-line data-processing toolkit for python 3.5+ emphasizing simplicity and atomicity of data
|
||||
transformations using a simple directed graph of python callables.
|
||||
"""Bonobo data-processing toolkit main module."""
|
||||
|
||||
Read more at http://docs.bonobo-project.org/
|
||||
|
||||
Copyright 2012-2014 Romain Dorgueil
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
assert (sys.version_info >= (3, 5)), 'Python 3.5+ is required to use Bonobo.'
|
||||
|
||||
from ._version import __version__
|
||||
from .config import __all__ as __all_config__
|
||||
from .context import __all__ as __all_context__
|
||||
from .core import __all__ as __all_core__
|
||||
from .io import __all__ as __all_io__
|
||||
from .util import __all__ as __all_util__
|
||||
|
||||
__all__ = __all_config__ + __all_context__ + __all_core__ + __all_io__ + __all_util__ + [
|
||||
'__version__',
|
||||
'create_strategy',
|
||||
'get_examples_path',
|
||||
'run',
|
||||
]
|
||||
|
||||
from .config import *
|
||||
from .context import *
|
||||
from .core import *
|
||||
@ -40,56 +41,43 @@ STRATEGIES = {
|
||||
}
|
||||
|
||||
|
||||
def run(graph, *chain, strategy=None, plugins=None):
|
||||
def get_examples_path(*pathsegments):
|
||||
import os
|
||||
import pathlib
|
||||
return str(pathlib.Path(os.path.dirname(__file__), 'examples', *pathsegments))
|
||||
|
||||
|
||||
def create_strategy(name=None):
|
||||
from bonobo.core.strategies.base import Strategy
|
||||
import logging
|
||||
|
||||
if isinstance(name, Strategy):
|
||||
return name
|
||||
|
||||
if name is None:
|
||||
name = DEFAULT_STRATEGY
|
||||
|
||||
logging.debug('Creating strategy {}...'.format(name))
|
||||
|
||||
try:
|
||||
factory = STRATEGIES[name]
|
||||
except KeyError as exc:
|
||||
raise RuntimeError('Invalid strategy {}. Available choices: {}.'.format(repr(name), ', '.join(
|
||||
sorted(STRATEGIES.keys())))) from exc
|
||||
|
||||
return factory()
|
||||
|
||||
|
||||
def run(graph, *chain, strategy=None, plugins=None):
|
||||
strategy = create_strategy(strategy)
|
||||
|
||||
if len(chain):
|
||||
warnings.warn('DEPRECATED. You should pass a Graph instance instead of a chain.')
|
||||
from bonobo import Graph
|
||||
graph = Graph(graph, *chain)
|
||||
|
||||
if not isinstance(strategy, Strategy):
|
||||
if strategy is None:
|
||||
strategy = DEFAULT_STRATEGY
|
||||
|
||||
try:
|
||||
strategy = STRATEGIES[strategy]
|
||||
except KeyError as exc:
|
||||
raise RuntimeError('Invalid strategy {}.'.format(repr(strategy))) from exc
|
||||
|
||||
strategy = strategy()
|
||||
|
||||
return strategy.execute(graph, plugins=plugins)
|
||||
|
||||
|
||||
__all__ = [
|
||||
'Bag',
|
||||
'Configurable',
|
||||
'ContextProcessor',
|
||||
'contextual',
|
||||
'CsvReader',
|
||||
'CsvWriter',
|
||||
'FileReader',
|
||||
'FileWriter',
|
||||
'Graph',
|
||||
'JsonReader',
|
||||
'JsonWriter',
|
||||
'NOT_MODIFIED',
|
||||
'NaiveStrategy',
|
||||
'Option',
|
||||
'ProcessPoolExecutorStrategy',
|
||||
'ThreadPoolExecutorStrategy',
|
||||
'__version__',
|
||||
'console_run',
|
||||
'inject',
|
||||
'jupyter_run',
|
||||
'limit',
|
||||
'log',
|
||||
'noop',
|
||||
'pprint',
|
||||
'service',
|
||||
'tee',
|
||||
]
|
||||
|
||||
del warnings
|
||||
del sys
|
||||
del warnings
|
||||
|
||||
@ -1,22 +1,25 @@
|
||||
import argparse
|
||||
|
||||
import logging
|
||||
from stevedore import ExtensionManager
|
||||
|
||||
|
||||
def entrypoint():
|
||||
def entrypoint(args=None):
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
subparsers = parser.add_subparsers(dest='command')
|
||||
subparsers.required = True
|
||||
|
||||
def register_extension(ext):
|
||||
commands = {}
|
||||
def register_extension(ext, commands=commands):
|
||||
try:
|
||||
parser = subparsers.add_parser(ext.name)
|
||||
command = ext.plugin(parser)
|
||||
parser.set_defaults(command=command)
|
||||
commands[ext.name] = ext.plugin(parser)
|
||||
except Exception:
|
||||
logging.exception('Error while loading command {}.'.format(ext.name))
|
||||
|
||||
mgr = ExtensionManager(namespace='bonobo.commands', )
|
||||
mgr.map(register_extension)
|
||||
|
||||
args = parser.parse_args().__dict__
|
||||
command = args.pop('command')
|
||||
command(**args)
|
||||
args = parser.parse_args(args).__dict__
|
||||
commands[args.pop('command')](**args)
|
||||
|
||||
@ -1,13 +1,21 @@
|
||||
import argparse
|
||||
|
||||
from bonobo import Graph, console_run
|
||||
from bonobo import Graph, run
|
||||
|
||||
|
||||
def execute(file):
|
||||
def execute(file, quiet=False):
|
||||
with file:
|
||||
code = compile(file.read(), file.name, 'exec')
|
||||
|
||||
context = {}
|
||||
# TODO: A few special variables should be set before running the file:
|
||||
#
|
||||
# See:
|
||||
# - https://docs.python.org/3/reference/import.html#import-mod-attrs
|
||||
# - https://docs.python.org/3/library/runpy.html#runpy.run_module
|
||||
context = {
|
||||
'__name__': '__bonobo__',
|
||||
'__file__': file.name,
|
||||
}
|
||||
|
||||
try:
|
||||
exec(code, context)
|
||||
@ -16,14 +24,19 @@ def execute(file):
|
||||
|
||||
graphs = dict((k, v) for k, v in context.items() if isinstance(v, Graph))
|
||||
|
||||
assert len(graphs) == 1, 'Having more than one graph definition in one file is unsupported for now, but it is ' \
|
||||
'something that will be implemented in the future. '
|
||||
assert len(graphs) == 1, ('Having zero or more than one graph definition in one file is unsupported for now, '
|
||||
'but it is something that will be implemented in the future.\n\nExpected: 1, got: {}.').format(
|
||||
len(graphs))
|
||||
|
||||
name, graph = list(graphs.items())[0]
|
||||
|
||||
return console_run(graph)
|
||||
# todo if console and not quiet, then add the console plugin
|
||||
# todo when better console plugin, add it if console and just disable display
|
||||
|
||||
return run(graph)
|
||||
|
||||
|
||||
def register(parser):
|
||||
parser.add_argument('file', type=argparse.FileType())
|
||||
parser.add_argument('--quiet', action='store_true')
|
||||
return execute
|
||||
|
||||
@ -1,3 +1,8 @@
|
||||
__all__ = [
|
||||
'Configurable',
|
||||
'Option',
|
||||
]
|
||||
|
||||
class Option:
|
||||
def __init__(self, type=None, *, required=False, default=None):
|
||||
self.name = None
|
||||
|
||||
@ -23,7 +23,7 @@ class GraphExecutionContext:
|
||||
|
||||
@property
|
||||
def alive(self):
|
||||
return self.started and not self.stopped
|
||||
return any(node.alive for node in self.nodes)
|
||||
|
||||
def __init__(self, graph, plugins=None):
|
||||
self.graph = graph
|
||||
|
||||
@ -26,3 +26,6 @@ class Graph:
|
||||
_next = self.add_node(node)
|
||||
self.outputs_of(_input, create=True).add(_next)
|
||||
_input = _next
|
||||
|
||||
def __len__(self):
|
||||
return len(self.nodes)
|
||||
|
||||
@ -39,11 +39,9 @@ class ExecutorStrategy(Strategy):
|
||||
futures.append(executor.submit(_runner))
|
||||
|
||||
for node_context in context.nodes:
|
||||
|
||||
def _runner(node_context=node_context):
|
||||
node_context.start()
|
||||
node_context.loop()
|
||||
|
||||
futures.append(executor.submit(_runner))
|
||||
|
||||
while context.alive:
|
||||
|
||||
0
bonobo/examples/__init__.py
Normal file
0
bonobo/examples/__init__.py
Normal file
0
bonobo/examples/datasets/__init__.py
Normal file
0
bonobo/examples/datasets/__init__.py
Normal file
@ -4,7 +4,7 @@ from bonobo import console_run
|
||||
from bonobo.ext.opendatasoft import from_opendatasoft_api
|
||||
from bonobo.io.file import FileWriter
|
||||
|
||||
OUTPUT_FILENAME = realpath(join(dirname(__file__), 'datasets/cheap_coffeeshops_in_paris.txt'))
|
||||
OUTPUT_FILENAME = realpath(join(dirname(__file__), 'coffeeshops.txt'))
|
||||
|
||||
console_run(
|
||||
from_opendatasoft_api(
|
||||
@ -3,7 +3,7 @@ import os
|
||||
|
||||
from blessings import Terminal
|
||||
|
||||
from bonobo import tee, JsonWriter, Graph
|
||||
from bonobo import Tee, JsonWriter, Graph, get_examples_path
|
||||
from bonobo.ext.opendatasoft import from_opendatasoft_api
|
||||
|
||||
try:
|
||||
@ -63,11 +63,11 @@ graph = Graph(
|
||||
),
|
||||
normalize,
|
||||
filter_france,
|
||||
tee(display),
|
||||
JsonWriter(path=os.path.join(__path__, 'datasets/coffeeshops.txt')),
|
||||
Tee(display),
|
||||
JsonWriter(path=get_examples_path('datasets/fablabs.txt')),
|
||||
)
|
||||
|
||||
if __name__ == '__main__':
|
||||
import bonobo
|
||||
from bonobo import run
|
||||
|
||||
bonobo.run(graph)
|
||||
run(graph)
|
||||
0
bonobo/examples/files/__init__.py
Normal file
0
bonobo/examples/files/__init__.py
Normal file
11
bonobo/examples/files/csv.py
Normal file
11
bonobo/examples/files/csv.py
Normal file
@ -0,0 +1,11 @@
|
||||
from bonobo import CsvReader, Graph, get_examples_path
|
||||
|
||||
graph = Graph(
|
||||
CsvReader(path=get_examples_path('datasets/coffeeshops.txt')),
|
||||
print,
|
||||
)
|
||||
|
||||
if __name__ == '__main__':
|
||||
import bonobo
|
||||
|
||||
bonobo.run(graph)
|
||||
0
bonobo/examples/tutorials/__init__.py
Normal file
0
bonobo/examples/tutorials/__init__.py
Normal file
14
bonobo/examples/tutorials/tut02_01_read.py
Normal file
14
bonobo/examples/tutorials/tut02_01_read.py
Normal file
@ -0,0 +1,14 @@
|
||||
import os
|
||||
import pathlib
|
||||
|
||||
import bonobo
|
||||
|
||||
workdir = pathlib.Path(os.path.dirname(__file__))
|
||||
|
||||
graph = bonobo.Graph(
|
||||
bonobo.FileReader(path=workdir.joinpath('datasets/coffeeshops.txt')),
|
||||
print,
|
||||
)
|
||||
|
||||
if __name__ == '__main__':
|
||||
bonobo.run(graph)
|
||||
7
bonobo/examples/types/__init__.py
Normal file
7
bonobo/examples/types/__init__.py
Normal file
@ -0,0 +1,7 @@
|
||||
from . import bags, dicts, strings
|
||||
|
||||
__all__ = [
|
||||
'bags',
|
||||
'dicts',
|
||||
'strings',
|
||||
]
|
||||
45
bonobo/examples/types/bags.py
Normal file
45
bonobo/examples/types/bags.py
Normal file
@ -0,0 +1,45 @@
|
||||
"""
|
||||
Example on how to use :class:`bonobo.Bag` instances to pass flexible args/kwargs to the next callable.
|
||||
|
||||
.. graphviz::
|
||||
|
||||
digraph {
|
||||
rankdir = LR;
|
||||
stylesheet = "../_static/graphs.css";
|
||||
|
||||
BEGIN [shape="point"];
|
||||
BEGIN -> "extract()" -> "transform(...)" -> "load(...)";
|
||||
}
|
||||
|
||||
"""
|
||||
|
||||
|
||||
from random import randint
|
||||
|
||||
from bonobo import Bag, Graph
|
||||
|
||||
|
||||
def extract():
|
||||
yield Bag(topic='foo')
|
||||
yield Bag(topic='bar')
|
||||
yield Bag(topic='baz')
|
||||
|
||||
|
||||
def transform(topic: str):
|
||||
return Bag.inherit(
|
||||
title=topic.title(),
|
||||
rand=randint(10, 99)
|
||||
)
|
||||
|
||||
|
||||
def load(topic: str, title: str, rand: int):
|
||||
print('{} ({}) wait={}'.format(title, topic, rand))
|
||||
|
||||
|
||||
graph = Graph()
|
||||
graph.add_chain(extract, transform, load)
|
||||
|
||||
if __name__ == '__main__':
|
||||
from bonobo import run
|
||||
|
||||
run(graph)
|
||||
47
bonobo/examples/types/dicts.py
Normal file
47
bonobo/examples/types/dicts.py
Normal file
@ -0,0 +1,47 @@
|
||||
"""
|
||||
Example on how to use symple python dictionaries to communicate between transformations.
|
||||
|
||||
.. graphviz::
|
||||
|
||||
digraph {
|
||||
rankdir = LR;
|
||||
stylesheet = "../_static/graphs.css";
|
||||
|
||||
BEGIN [shape="point"];
|
||||
BEGIN -> "extract()" -> "transform(row: dict)" -> "load(row: dict)";
|
||||
}
|
||||
|
||||
"""
|
||||
|
||||
from random import randint
|
||||
|
||||
from bonobo import Graph
|
||||
|
||||
|
||||
def extract():
|
||||
yield {'topic': 'foo'}
|
||||
yield {'topic': 'bar'}
|
||||
yield {'topic': 'baz'}
|
||||
|
||||
|
||||
def transform(row: dict):
|
||||
return {
|
||||
'topic': row['topic'].title(),
|
||||
'randint': randint(10, 99),
|
||||
}
|
||||
|
||||
|
||||
def load(row: dict):
|
||||
print(row)
|
||||
|
||||
|
||||
graph = Graph(
|
||||
extract,
|
||||
transform,
|
||||
load
|
||||
)
|
||||
|
||||
if __name__ == '__main__':
|
||||
from bonobo import run
|
||||
|
||||
run(graph)
|
||||
43
bonobo/examples/types/strings.py
Normal file
43
bonobo/examples/types/strings.py
Normal file
@ -0,0 +1,43 @@
|
||||
"""
|
||||
Example on how to use symple python strings to communicate between transformations.
|
||||
|
||||
.. graphviz::
|
||||
|
||||
digraph {
|
||||
rankdir = LR;
|
||||
stylesheet = "../_static/graphs.css";
|
||||
|
||||
BEGIN [shape="point"];
|
||||
BEGIN -> "extract()" -> "transform(s: str)" -> "load(s: str)";
|
||||
}
|
||||
|
||||
"""
|
||||
from random import randint
|
||||
|
||||
from bonobo import Graph
|
||||
|
||||
|
||||
def extract():
|
||||
yield 'foo'
|
||||
yield 'bar'
|
||||
yield 'baz'
|
||||
|
||||
|
||||
def transform(s: str):
|
||||
return '{} ({})'.format(s.title(), randint(10, 99))
|
||||
|
||||
|
||||
def load(s: str):
|
||||
print(s)
|
||||
|
||||
|
||||
graph = Graph(
|
||||
extract,
|
||||
transform,
|
||||
load
|
||||
)
|
||||
|
||||
if __name__ == '__main__':
|
||||
from bonobo import run
|
||||
|
||||
run(graph)
|
||||
0
bonobo/ext/edgy/__init__.py
Normal file
0
bonobo/ext/edgy/__init__.py
Normal file
0
bonobo/ext/edgy/project/__init__.py
Normal file
0
bonobo/ext/edgy/project/__init__.py
Normal file
26
bonobo/ext/edgy/project/feature.py
Normal file
26
bonobo/ext/edgy/project/feature.py
Normal file
@ -0,0 +1,26 @@
|
||||
try:
|
||||
import edgy.project
|
||||
except ImportError as e:
|
||||
import logging
|
||||
|
||||
logging.exception('You must install edgy.project to use this.')
|
||||
|
||||
import os
|
||||
|
||||
from edgy.project.events import subscribe
|
||||
from edgy.project.feature import Feature, SUPPORT_PRIORITY
|
||||
|
||||
|
||||
class BonoboFeature(Feature):
|
||||
requires = {'python'}
|
||||
|
||||
@subscribe('edgy.project.feature.make.on_generate', priority=SUPPORT_PRIORITY)
|
||||
def on_make_generate(self, event):
|
||||
makefile = event.makefile
|
||||
|
||||
@subscribe('edgy.project.on_start', priority=SUPPORT_PRIORITY)
|
||||
def on_start(self, event):
|
||||
package_path = event.setup['name'].replace('.', os.sep)
|
||||
|
||||
for file in ('example_graph'):
|
||||
self.render_file(os.path.join(package_path, file + '.py'), os.path.join('tornado', file + '.py.j2'))
|
||||
@ -1,6 +1,7 @@
|
||||
import csv
|
||||
|
||||
from bonobo import Option, ContextProcessor, contextual
|
||||
from bonobo.config import Option
|
||||
from bonobo.context import ContextProcessor, contextual
|
||||
from bonobo.util.objects import ValueHolder
|
||||
from .file import FileReader, FileWriter, FileHandler
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import json
|
||||
|
||||
from bonobo import ContextProcessor, contextual
|
||||
from bonobo.context import ContextProcessor, contextual
|
||||
from .file import FileWriter, FileReader
|
||||
|
||||
__all__ = ['JsonWriter', ]
|
||||
|
||||
@ -9,14 +9,14 @@ from .helpers import console_run, jupyter_run
|
||||
from .tokens import NOT_MODIFIED
|
||||
|
||||
__all__ = [
|
||||
'Limit',
|
||||
'NOT_MODIFIED',
|
||||
'PrettyPrint',
|
||||
'Tee',
|
||||
'console_run',
|
||||
'jupyter_run',
|
||||
'limit',
|
||||
'log',
|
||||
'noop',
|
||||
'pprint',
|
||||
'tee',
|
||||
]
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@ def identity(x):
|
||||
return x
|
||||
|
||||
|
||||
def limit(n=10):
|
||||
def Limit(n=10):
|
||||
i = 0
|
||||
|
||||
def _limit(*args, **kwargs):
|
||||
@ -37,7 +37,7 @@ def limit(n=10):
|
||||
return _limit
|
||||
|
||||
|
||||
def tee(f):
|
||||
def Tee(f):
|
||||
@functools.wraps(f)
|
||||
def wrapped(*args, **kwargs):
|
||||
nonlocal f
|
||||
@ -47,10 +47,10 @@ def tee(f):
|
||||
return wrapped
|
||||
|
||||
|
||||
log = tee(_pprint)
|
||||
pprint = Tee(_pprint)
|
||||
|
||||
|
||||
def pprint(title_keys=('title', 'name', 'id'), print_values=True, sort=True):
|
||||
def PrettyPrint(title_keys=('title', 'name', 'id'), print_values=True, sort=True):
|
||||
term = blessings.Terminal()
|
||||
|
||||
def _pprint(*args, **kwargs):
|
||||
@ -78,6 +78,7 @@ def pprint(title_keys=('title', 'name', 'id'), print_values=True, sort=True):
|
||||
|
||||
|
||||
'''
|
||||
Old code from rdc.etl
|
||||
|
||||
def writehr(self, label=None):
|
||||
width = t.width or 80
|
||||
@ -113,4 +114,4 @@ def pprint(title_keys=('title', 'name', 'id'), print_values=True, sort=True):
|
||||
|
||||
|
||||
def noop(*args, **kwargs): # pylint: disable=unused-argument
|
||||
pass
|
||||
return NOT_MODIFIED
|
||||
|
||||
7
docs/_templates/index.html
vendored
7
docs/_templates/index.html
vendored
@ -38,14 +38,13 @@
|
||||
<table class="contentstable">
|
||||
<tr>
|
||||
<td>
|
||||
<p class="biglink"><a class="biglink" href="{{ pathto("tutorial/basics") }}">{% trans %}First steps{% endtrans %}</a><br/>
|
||||
<p class="biglink"><a class="biglink" href="{{ pathto("tutorial/index") }}">{% trans %}First steps{% endtrans %}</a><br/>
|
||||
<span class="linkdescr">{% trans %}quick overview of basic features{% endtrans %}</span></p>
|
||||
</td>
|
||||
<td>
|
||||
{%- if hasdoc('search') %}
|
||||
<p class="biglink"><a class="biglink" href="{{ pathto("search") }}">{% trans %}
|
||||
Search{% endtrans %}</a><br/>
|
||||
<span class="linkdescr">{% trans %}search the documentation{% endtrans %}</span></p>{%- endif %}
|
||||
<span class="linkdescr">{% trans %}search the documentation{% endtrans %}</span></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
@ -69,7 +68,7 @@
|
||||
<span class="linkdescr">{% trans %}examples and recipes{% endtrans %}</span></p>
|
||||
</td>
|
||||
<td>
|
||||
<p class="biglink"><a class="biglink" href="{{ pathto("changes") }}">{% trans %}
|
||||
<p class="biglink"><a class="biglink" href="{{ pathto("contribute/index") }}">{% trans %}
|
||||
Contribute{% endtrans %}</a><br/>
|
||||
<span class="linkdescr">{% trans %}contributor guide{% endtrans %}</span></p>
|
||||
</td>
|
||||
|
||||
@ -181,4 +181,7 @@ epub_copyright = copyright
|
||||
epub_exclude_files = ['search.html']
|
||||
|
||||
# Example configuration for intersphinx: refer to the Python standard library.
|
||||
intersphinx_mapping = {'https://docs.python.org/': None}
|
||||
intersphinx_mapping = {
|
||||
'python': ('https://docs.python.org/3', None)
|
||||
}
|
||||
|
||||
|
||||
18
docs/contribute/index.rst
Normal file
18
docs/contribute/index.rst
Normal file
@ -0,0 +1,18 @@
|
||||
Contributing
|
||||
============
|
||||
|
||||
Contributing to bonobo is simple. Although we don't have a complete guide on this topic for now, the best way is to fork
|
||||
the github repository and send pull requests.
|
||||
|
||||
Keep the following points in mind:
|
||||
|
||||
* Although we will ask for 100% backward compatibility starting from 1.0 (following semantic versionning principles),
|
||||
pre-1.0 versions should do their best to keep compatibility between versions. Wehn in doubt, open a github issue
|
||||
to discuss things.
|
||||
* The core should stay as light as possible.
|
||||
* Coding standards are enforced using yapf. That means that you can code the way you want, we just ask you to run
|
||||
`make format` before committing your changes so everybody follows the same conventions.
|
||||
* General rule for anything you're not sure about is "open a github issue to discuss the point".
|
||||
* More formal proposal process will come the day we feel the need for it.
|
||||
|
||||
A very drafty roadmap is available in the readme.
|
||||
14
docs/guide/ext/docker.rst
Normal file
14
docs/guide/ext/docker.rst
Normal file
@ -0,0 +1,14 @@
|
||||
Bonobo with Docker
|
||||
==================
|
||||
|
||||
.. todo:: The `bonobo-docker` package is at a very alpha stage, and things will change. This section is here to give a
|
||||
brief overview but is neither complete nor definitive.
|
||||
|
||||
Installation
|
||||
::::::::::::
|
||||
|
||||
Overview
|
||||
::::::::
|
||||
|
||||
Details
|
||||
:::::::
|
||||
11
docs/guide/ext/jupyter.rst
Normal file
11
docs/guide/ext/jupyter.rst
Normal file
@ -0,0 +1,11 @@
|
||||
Bonobo with Jupyter
|
||||
==================
|
||||
|
||||
Installation
|
||||
::::::::::::
|
||||
|
||||
Overview
|
||||
::::::::
|
||||
|
||||
Details
|
||||
:::::::
|
||||
@ -1,8 +1,9 @@
|
||||
Web crawlers with Bonobo
|
||||
========================
|
||||
Bonobo with Selenium
|
||||
====================
|
||||
|
||||
.. todo:: The `bonobo-selenium` package is at a very alpha stage, and things will change. This section is here to give a
|
||||
brief overview but is neither complete nor definitive.
|
||||
|
||||
.. todo:: Bonobo-Selenium is at a very alpha stage, and things will change. This section is here to give a brief
|
||||
overview but is neither complete nor definitive.
|
||||
|
||||
Writing web crawlers with Bonobo and Selenium is easy.
|
||||
|
||||
@ -31,3 +32,11 @@ Where each step would do the following:
|
||||
* `details()` extract the data you're interested in.
|
||||
* ... and the writer saves it somewhere.
|
||||
|
||||
Installation
|
||||
::::::::::::
|
||||
|
||||
Overview
|
||||
::::::::
|
||||
|
||||
Details
|
||||
:::::::
|
||||
15
docs/guide/ext/sqlalchemy.rst
Normal file
15
docs/guide/ext/sqlalchemy.rst
Normal file
@ -0,0 +1,15 @@
|
||||
Bonobo with SQLAlchemy
|
||||
======================
|
||||
|
||||
.. todo:: The `bonobo-sqlalchemy` package is at a very alpha stage, and things will change. This section is here to
|
||||
give a brief overview but is neither complete nor definitive.
|
||||
|
||||
|
||||
Installation
|
||||
::::::::::::
|
||||
|
||||
Overview
|
||||
::::::::
|
||||
|
||||
Details
|
||||
:::::::
|
||||
@ -1,8 +1,25 @@
|
||||
Guides
|
||||
======
|
||||
|
||||
Concepts and best practices
|
||||
:::::::::::::::::::::::::::
|
||||
|
||||
There are a few things that you should know while writing transformations graphs with bonobo.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
purity
|
||||
crawlers
|
||||
|
||||
Third party integrations
|
||||
::::::::::::::::::::::::
|
||||
|
||||
There is a few **bonobo** extensions that ease the use of the library with third party tools. Each integration is
|
||||
available as an optional extra dependency, and the maturity stage of each extension vary.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
ext/docker
|
||||
ext/selenium
|
||||
ext/sqlalchemy
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
Pure components and space complexity
|
||||
====================================
|
||||
Pure transformations
|
||||
====================
|
||||
|
||||
The nature of components, and how the data flow from one to another, make them not so easy to write correctly.
|
||||
Hopefully, with a few hints, you will be able to understand why and how they should be written.
|
||||
@ -14,7 +14,7 @@ returning) them. Numeric values, strings and tuples being immutable in python, m
|
||||
type will already return a different instance.
|
||||
|
||||
Numbers
|
||||
=======
|
||||
:::::::
|
||||
|
||||
You can't be wrong with numbers. All of the following are correct.
|
||||
|
||||
@ -45,7 +45,7 @@ You can't be wrong with numbers. All of the following are correct.
|
||||
The same is true with other numeric types, so don't be shy. Operate like crazy, my friend.
|
||||
|
||||
Tuples
|
||||
======
|
||||
::::::
|
||||
|
||||
Tuples are immutable, so you risk nothing.
|
||||
|
||||
@ -63,12 +63,12 @@ Tuples are immutable, so you risk nothing.
|
||||
return t
|
||||
|
||||
Strings
|
||||
=======
|
||||
:::::::
|
||||
|
||||
You know the drill, strings are immutable, blablabla ... Examples left as an exercise for the reader.
|
||||
|
||||
Dicts
|
||||
=====
|
||||
:::::
|
||||
|
||||
So, now it gets interesting. Dicts are mutable. It means that you can mess things up badly here if you're not cautious.
|
||||
|
||||
|
||||
@ -8,6 +8,7 @@ Bonobo
|
||||
tutorial/index
|
||||
guide/index
|
||||
reference/index
|
||||
contribute/index
|
||||
genindex
|
||||
modindex
|
||||
|
||||
|
||||
56
docs/reference/api.rst
Normal file
56
docs/reference/api.rst
Normal file
@ -0,0 +1,56 @@
|
||||
Public API
|
||||
==========
|
||||
|
||||
All the "public api" callables, classes and other callables are available under the root :mod:`bonobo` package, even if
|
||||
they are documented within their sub-namespace, for convenience.
|
||||
|
||||
.. automodule:: bonobo
|
||||
:members: create_strategy, get_examples_path, run
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Config
|
||||
------
|
||||
|
||||
.. automodule:: bonobo.config
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Context
|
||||
-------
|
||||
|
||||
.. automodule:: bonobo.context
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Core
|
||||
----
|
||||
|
||||
.. automodule:: bonobo.core
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
IO
|
||||
--
|
||||
|
||||
.. automodule:: bonobo.io
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Util
|
||||
----
|
||||
|
||||
.. automodule:: bonobo.util
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
@ -1,22 +0,0 @@
|
||||
bonobo.compat package
|
||||
=====================
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
bonobo.compat.pandas module
|
||||
---------------------------
|
||||
|
||||
.. automodule:: bonobo.compat.pandas
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
.. automodule:: bonobo.compat
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@ -1,85 +0,0 @@
|
||||
bonobo.core package
|
||||
===================
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
|
||||
bonobo.core.strategies
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
bonobo.core.bags module
|
||||
-----------------------
|
||||
|
||||
.. automodule:: bonobo.core.bags
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
bonobo.core.contexts module
|
||||
---------------------------
|
||||
|
||||
.. automodule:: bonobo.core.contexts
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
bonobo.core.errors module
|
||||
-------------------------
|
||||
|
||||
.. automodule:: bonobo.core.errors
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
bonobo.core.graphs module
|
||||
-------------------------
|
||||
|
||||
.. automodule:: bonobo.core.graphs
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
bonobo.core.inputs module
|
||||
-------------------------
|
||||
|
||||
.. automodule:: bonobo.core.inputs
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
bonobo.core.plugins module
|
||||
--------------------------
|
||||
|
||||
.. automodule:: bonobo.core.plugins
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
bonobo.core.services module
|
||||
---------------------------
|
||||
|
||||
.. automodule:: bonobo.core.services
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
bonobo.core.stats module
|
||||
------------------------
|
||||
|
||||
.. automodule:: bonobo.core.stats
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
.. automodule:: bonobo.core
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@ -1,38 +0,0 @@
|
||||
bonobo.core.strategies package
|
||||
==============================
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
bonobo.core.strategies.base module
|
||||
----------------------------------
|
||||
|
||||
.. automodule:: bonobo.core.strategies.base
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
bonobo.core.strategies.executor module
|
||||
--------------------------------------
|
||||
|
||||
.. automodule:: bonobo.core.strategies.executor
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
bonobo.core.strategies.naive module
|
||||
-----------------------------------
|
||||
|
||||
.. automodule:: bonobo.core.strategies.naive
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
.. automodule:: bonobo.core.strategies
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@ -1,30 +0,0 @@
|
||||
bonobo.io package
|
||||
=================
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
bonobo.io.file module
|
||||
---------------------
|
||||
|
||||
.. automodule:: bonobo.io.file
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
bonobo.io.json module
|
||||
---------------------
|
||||
|
||||
.. automodule:: bonobo.io.json
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
.. automodule:: bonobo.io
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@ -1,21 +0,0 @@
|
||||
bonobo package
|
||||
==============
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
|
||||
bonobo.compat
|
||||
bonobo.core
|
||||
bonobo.ext
|
||||
bonobo.io
|
||||
bonobo.util
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
.. automodule:: bonobo
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@ -1,62 +0,0 @@
|
||||
bonobo.util package
|
||||
===================
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
bonobo.util.compat module
|
||||
-------------------------
|
||||
|
||||
.. automodule:: bonobo.util.compat
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
bonobo.util.helpers module
|
||||
--------------------------
|
||||
|
||||
.. automodule:: bonobo.util.helpers
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
bonobo.util.iterators module
|
||||
----------------------------
|
||||
|
||||
.. automodule:: bonobo.util.iterators
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
bonobo.util.lifecycle module
|
||||
----------------------------
|
||||
|
||||
.. automodule:: bonobo.util.lifecycle
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
bonobo.util.time module
|
||||
-----------------------
|
||||
|
||||
.. automodule:: bonobo.util.time
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
bonobo.util.tokens module
|
||||
-------------------------
|
||||
|
||||
.. automodule:: bonobo.util.tokens
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
.. automodule:: bonobo.util
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
33
docs/reference/commands.rst
Normal file
33
docs/reference/commands.rst
Normal file
@ -0,0 +1,33 @@
|
||||
Commands Reference
|
||||
==================
|
||||
|
||||
Bonobo Init
|
||||
:::::::::::
|
||||
|
||||
Create an empty project, ready to use bonobo.
|
||||
|
||||
Syntax: `bonobo init`
|
||||
|
||||
Requires `edgy.project`.
|
||||
|
||||
|
||||
Bonobo Run
|
||||
::::::::::
|
||||
|
||||
Run a transformation graph.
|
||||
|
||||
Syntax: `bonobo run [-c cmd | -m mod | file | -] [arg]`
|
||||
|
||||
.. todo:: implement -m, check if -c is of any use and if yes, implement it too. Implement args, too.
|
||||
|
||||
Bonobo RunC
|
||||
:::::::::::
|
||||
|
||||
Run a transformation graph in a docker container.
|
||||
|
||||
Syntax: `bonobo runc [-c cmd | -m mod | file | -] [arg]`
|
||||
|
||||
.. todo:: implement -m, check if -c is of any use and if yes, implement it too. Implement args, too.
|
||||
|
||||
Requires `bonobo-docker`, install with `docker` extra: `pip install bonobo[docker]`.
|
||||
|
||||
36
docs/reference/examples.rst
Normal file
36
docs/reference/examples.rst
Normal file
@ -0,0 +1,36 @@
|
||||
Examples
|
||||
========
|
||||
|
||||
There are a few examples bundled with **bonobo**. You'll find them under the :mod:`bonobo.examples` package.
|
||||
|
||||
Types
|
||||
:::::
|
||||
|
||||
bonobo.examples.types.strings
|
||||
-----------------------------
|
||||
|
||||
.. automodule:: bonobo.examples.types.strings
|
||||
:members: graph, extract, transform, load
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
bonobo.examples.types.dicts
|
||||
---------------------------
|
||||
|
||||
.. automodule:: bonobo.examples.types.dicts
|
||||
:members: graph, extract, transform, load
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
bonobo.examples.types.bags
|
||||
--------------------------
|
||||
|
||||
.. automodule:: bonobo.examples.types.bags
|
||||
:members: graph, extract, transform, load
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
@ -6,8 +6,5 @@ References
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
bonobo
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
commands
|
||||
api
|
||||
|
||||
@ -1,161 +0,0 @@
|
||||
Basic concepts
|
||||
==============
|
||||
|
||||
To begin with Bonobo, you need to install it in a working python 3.5+ environment:
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
$ pip install bonobo
|
||||
|
||||
See :doc:`/install` for more options.
|
||||
|
||||
Let's write a first data transformation
|
||||
:::::::::::::::::::::::::::::::::::::::
|
||||
|
||||
We'll start with the most simple components we can.
|
||||
|
||||
In **Bonobo**, a component is a plain old python callable, not more, not less. Let's write one that takes a string and
|
||||
uppercase it.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
def uppercase(x: str):
|
||||
return x.upper()
|
||||
|
||||
Pretty straightforward.
|
||||
|
||||
You could even use :func:`str.upper` directly instead of writing a wrapper, as a type's method (unbound) will take an
|
||||
instance of this type as its first parameter (what you'd call `self` in your method).
|
||||
|
||||
The type annotations written here are not used, but can make your code much more readable, and may very well be used as
|
||||
validators in the future.
|
||||
|
||||
Let's write two more components: a generator to produce the data to be transformed, and something that outputs it,
|
||||
because, yeah, feedback is cool.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
def generate_data():
|
||||
yield 'foo'
|
||||
yield 'bar'
|
||||
yield 'baz'
|
||||
|
||||
def output(x: str):
|
||||
print(x)
|
||||
|
||||
Once again, you could have skipped the pain of writing this and simply use an iterable to generate the data and the
|
||||
builtin :func:`print` for the output, but we'll stick to writing our own components for now.
|
||||
|
||||
Let's chain the three components together and run the transformation:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from bonobo import run
|
||||
|
||||
run(generate_data, uppercase, output)
|
||||
|
||||
.. graphviz::
|
||||
|
||||
digraph {
|
||||
rankdir = LR;
|
||||
stylesheet = "../_static/graphs.css";
|
||||
|
||||
BEGIN [shape="point"];
|
||||
BEGIN -> "generate_data" -> "uppercase" -> "output";
|
||||
}
|
||||
|
||||
We use the :func:`bonobo.run` helper that hides the underlying object composition necessary to actually run the
|
||||
components in parralel, because it's simpler.
|
||||
|
||||
Depending on what you're doing, you may use the shorthand helper method, or the verbose one. Always favor the shorter,
|
||||
if you don't need to tune the graph or the execution strategy (see below).
|
||||
|
||||
Diving in
|
||||
:::::::::
|
||||
|
||||
Let's rewrite it using the builtin functions :func:`str.upper` and :func:`print` instead of our own wrappers, and expand
|
||||
the :func:`bonobo.run()` helper so you see what's inside...
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from bonobo import Graph, ThreadPoolExecutorStrategy
|
||||
|
||||
# Represent our data processor as a simple directed graph of callables.
|
||||
graph = Graph()
|
||||
graph.add_chain(
|
||||
('foo', 'bar', 'baz'),
|
||||
str.upper,
|
||||
print,
|
||||
)
|
||||
|
||||
# Use a thread pool.
|
||||
executor = ThreadPoolExecutorStrategy()
|
||||
|
||||
# Run the thing.
|
||||
executor.execute(graph)
|
||||
|
||||
We also switched our generator for a tuple, **Bonobo** will wrap it as a generator itself if it's not callable but
|
||||
iterable.
|
||||
|
||||
The shorthand version with builtins would look like this:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from bonobo import run
|
||||
|
||||
run(
|
||||
('foo', 'bar', 'baz'),
|
||||
str.upper,
|
||||
print,
|
||||
)
|
||||
|
||||
Both methods are strictly equivalent (see :func:`bonobo.run`). When in doubt, prefer the shorter version.
|
||||
|
||||
Takeaways
|
||||
:::::::::
|
||||
|
||||
① The :class:`bonobo.Graph` class is used to represent a data-processing pipeline.
|
||||
|
||||
It can represent simple list-like linear graphs, like here, but it can also represent much more complex graphs, with
|
||||
branches and cycles.
|
||||
|
||||
This is what the graph we defined looks like:
|
||||
|
||||
.. graphviz::
|
||||
|
||||
digraph {
|
||||
rankdir = LR;
|
||||
"iter(['foo', 'bar', 'baz'])" -> "str.upper" -> "print";
|
||||
}
|
||||
|
||||
|
||||
② `Components` are simple python callables. Whatever can be called can be used as a `component`. Callables can
|
||||
either `return` or `yield` data to send it to the next step. Regular functions (using `return`) should be prefered if
|
||||
each call is guaranteed to return exactly one result, while generators (using `yield`) should be prefered if the
|
||||
number of output lines for a given input varies.
|
||||
|
||||
③ The `graph` is then executed using an `ExecutionStrategy`. In this tutorial, we'll only use
|
||||
:class:`bonobo.ThreadPoolExecutorStrategy`, which use an underlying `concurrent.futures.ThreadPoolExecutor` to
|
||||
schedule calls in a pool of threads, but basically this strategy is what determines the actual behaviour of execution.
|
||||
|
||||
④ Before actually executing the `components`, the `ExecutorStrategy` instance will wrap each component in a `context`,
|
||||
whose responsibility is to hold the state, to keep the `components` stateless. We'll expand on this later.
|
||||
|
||||
Concepts and definitions
|
||||
::::::::::::::::::::::::
|
||||
|
||||
* Component
|
||||
* Graph
|
||||
* Executor
|
||||
|
||||
.. todo:: Definitions, and substitute vague terms in the page by the exact term defined here
|
||||
|
||||
|
||||
Next
|
||||
::::
|
||||
|
||||
You now know all the basic concepts necessary to build (batch-like) data processors.
|
||||
|
||||
If you're confident with this part, let's get to a more real world example, using files and nice console output:
|
||||
:doc:`basics2`
|
||||
|
||||
@ -1,46 +0,0 @@
|
||||
Working with files
|
||||
==================
|
||||
|
||||
Bonobo would not be of any use if the aim was to uppercase small lists of strings. In fact, Bonobo should not be used
|
||||
if you don't expect any gain from parralelization of tasks.
|
||||
|
||||
Let's take the following graph as an example:
|
||||
|
||||
.. graphviz::
|
||||
|
||||
digraph {
|
||||
rankdir = LR;
|
||||
"A" -> "B" -> "C";
|
||||
}
|
||||
|
||||
The execution strategy does a bit of under the scene work, wrapping every component in a thread (assuming you're using
|
||||
the :class:`bonobo.ThreadPoolExecutorStrategy`), which allows to start running `B` as soon as `A` yielded the first line
|
||||
of data, and `C` as soon as `B` yielded the first line of data, even if `A` or `B` still have data to yield.
|
||||
|
||||
The great thing is that you generally don't have to think about it. Just be aware that your components will be run in
|
||||
parralel, and don't worry too much about blocking components, as they won't block their siblings.
|
||||
|
||||
That being said, let's try to write a more real-world like transformation.
|
||||
|
||||
Reading a file
|
||||
::::::::::::::
|
||||
|
||||
There are a few component builders available in **Bonobo** that let you read files. You should at least know about the following:
|
||||
|
||||
* :class:`bonobo.FileReader` (aliased as :func:`bonobo.from_file`)
|
||||
* :class:`bonobo.JsonFileReader` (aliased as :func:`bonobo.from_json`)
|
||||
* :class:`bonobo.CsvFileReader` (aliased as :func:`bonobo.from_csv`)
|
||||
|
||||
Reading a file is as simple as using one of those, and for the example, we'll use a text file that was generated using
|
||||
Bonobo from the "liste-des-cafes-a-un-euro" dataset made available by Mairie de Paris under the Open Database
|
||||
License (ODbL). You can `explore the original dataset <https://opendata.paris.fr/explore/dataset/liste-des-cafes-a-un-euro/information/>`_.
|
||||
You'll need the example dataset, available in **Bonobo**'s repository.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from bonobo import FileReader, run
|
||||
|
||||
run(
|
||||
FileReader('examples/datasets/cheap_coffeeshops_in_paris.txt'),
|
||||
print,
|
||||
)
|
||||
@ -3,12 +3,38 @@ First steps
|
||||
|
||||
We tried hard to make **Bonobo** simple. We use simple python, and we believe it should be simple to learn.
|
||||
|
||||
Tutorial
|
||||
::::::::
|
||||
|
||||
We strongly advice that even if you're an advanced python developper, you go through the whole tutorial for two
|
||||
reasons: that should be sufficient to do anything possible with **Bonobo** and that's a good moment to learn the few
|
||||
concepts you'll see everywhere in the software.
|
||||
|
||||
If you're not familiar with python, you should first read :doc:`./python`.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
basics
|
||||
basics2
|
||||
tut01
|
||||
tut02
|
||||
|
||||
Where to go next?
|
||||
:::::::::::::::::
|
||||
|
||||
When you're done with the tutorial, you may be interested in the following next steps:
|
||||
|
||||
Read the :doc:`../reference/examples`
|
||||
|
||||
Read about best development practices
|
||||
-------------------------------------
|
||||
|
||||
* :doc:`../guide/index`
|
||||
* :doc:`../guide/purity`
|
||||
|
||||
Read about integrating external tools with bonobo
|
||||
-------------------------------------------------
|
||||
|
||||
* :doc:`../guide/ext/docker`: run transformation graphs in isolated containers.
|
||||
* :doc:`../guide/ext/jupyter`: run transformations within jupyter notebooks.
|
||||
* :doc:`../guide/ext/selenium`: run
|
||||
* :doc:`../guide/ext/sqlalchemy`: everything you need to interract with SQL databases.
|
||||
|
||||
16
docs/tutorial/python.rst
Normal file
16
docs/tutorial/python.rst
Normal file
@ -0,0 +1,16 @@
|
||||
Just enough Python for Bonobo
|
||||
=============================
|
||||
|
||||
This guide is intended to help programmers or enthusiasts to grasp the python basics necessary to use Bonobo. It should
|
||||
definately not be considered as a general python introduction, neither a deep dive into details.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
python01
|
||||
python02
|
||||
python03
|
||||
python04
|
||||
python05
|
||||
|
||||
|
||||
132
docs/tutorial/tut01.rst
Normal file
132
docs/tutorial/tut01.rst
Normal file
@ -0,0 +1,132 @@
|
||||
Basic concepts
|
||||
==============
|
||||
|
||||
To begin with Bonobo, you need to install it in a working python 3.5+ environment:
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
$ pip install bonobo
|
||||
|
||||
See :doc:`/install` for more options.
|
||||
|
||||
Let's write a first data transformation
|
||||
:::::::::::::::::::::::::::::::::::::::
|
||||
|
||||
We'll start with the simplest transformation possible.
|
||||
|
||||
In **Bonobo**, a transformation is a plain old python callable, not more, not less. Let's write one that takes a string
|
||||
and uppercase it.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
def uppercase(x: str):
|
||||
return x.upper()
|
||||
|
||||
Pretty straightforward.
|
||||
|
||||
You could even use :func:`str.upper` directly instead of writing a wrapper, as a type's method (unbound) will take an
|
||||
instance of this type as its first parameter (what you'd call `self` in your method).
|
||||
|
||||
The type annotations written here are not used, but can make your code much more readable, and may very well be used as
|
||||
validators in the future.
|
||||
|
||||
Let's write two more transformations: a generator to produce the data to be transformed, and something that outputs it,
|
||||
because, yeah, feedback is cool.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
def generate_data():
|
||||
yield 'foo'
|
||||
yield 'bar'
|
||||
yield 'baz'
|
||||
|
||||
def output(x: str):
|
||||
print(x)
|
||||
|
||||
Once again, you could have skipped the pain of writing this and simply use an iterable to generate the data and the
|
||||
builtin :func:`print` for the output, but we'll stick to writing our own transformations for now.
|
||||
|
||||
Let's chain the three transformations together and run the transformation graph:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import bonobo
|
||||
|
||||
graph = bonobo.Graph(generate_data, uppercase, output)
|
||||
|
||||
if __name__ == '__main__':
|
||||
bonobo.run(graph)
|
||||
|
||||
.. graphviz::
|
||||
|
||||
digraph {
|
||||
rankdir = LR;
|
||||
stylesheet = "../_static/graphs.css";
|
||||
|
||||
BEGIN [shape="point"];
|
||||
BEGIN -> "generate_data" -> "uppercase" -> "output";
|
||||
}
|
||||
|
||||
We use the :func:`bonobo.run` helper that hides the underlying object composition necessary to actually run the
|
||||
transformations in parralel, because it's simpler.
|
||||
|
||||
Depending on what you're doing, you may use the shorthand helper method, or the verbose one. Always favor the shorter,
|
||||
if you don't need to tune the graph or the execution strategy (see below).
|
||||
|
||||
Takeaways
|
||||
:::::::::
|
||||
|
||||
① The :class:`bonobo.Graph` class is used to represent a data-processing pipeline.
|
||||
|
||||
It can represent simple list-like linear graphs, like here, but it can also represent much more complex graphs, with
|
||||
branches and cycles.
|
||||
|
||||
This is what the graph we defined looks like:
|
||||
|
||||
.. graphviz::
|
||||
|
||||
digraph {
|
||||
rankdir = LR;
|
||||
BEGIN [shape="point"];
|
||||
BEGIN -> "iter(['foo', 'bar', 'baz'])" -> "str.upper" -> "print";
|
||||
}
|
||||
|
||||
|
||||
② `Transformations` are simple python callables. Whatever can be called can be used as a `transformation`. Callables can
|
||||
either `return` or `yield` data to send it to the next step. Regular functions (using `return`) should be prefered if
|
||||
each call is guaranteed to return exactly one result, while generators (using `yield`) should be prefered if the
|
||||
number of output lines for a given input varies.
|
||||
|
||||
③ The `Graph` instance, or `transformation graph` is then executed using an `ExecutionStrategy`. You did not use it
|
||||
directly in this tutorial, but :func:`bonobo.run` created an instance of :class:`bonobo.ThreadPoolExecutorStrategy`
|
||||
under the hood (which is the default strategy). Actual behavior of an execution will depend on the strategy chosen, but
|
||||
the default should be fine in most of the basic cases.
|
||||
|
||||
④ Before actually executing the `transformations`, the `ExecutorStrategy` instance will wrap each component in an
|
||||
`execution context`, whose responsibility is to hold the state of the transformation. It enables to keep the
|
||||
`transformations` stateless, while allowing to add an external state if required. We'll expand on this later.
|
||||
|
||||
Concepts and definitions
|
||||
::::::::::::::::::::::::
|
||||
|
||||
* Transformation: a callable that takes input (as call parameters) and returns output(s), either as its return value or
|
||||
by yielding values (a.k.a returning a generator).
|
||||
* Transformation graph (or Graph): a set of transformations tied together in a :class:`bonobo.Graph` instance, which is a simple
|
||||
directed acyclic graph (also refered as a DAG, sometimes).
|
||||
* Node: a transformation within the context of a transformation graph. The node defines what to do whith a
|
||||
transformation's output, and especially what other node to feed with the output.
|
||||
* Execution strategy (or strategy): a way to run a transformation graph. It's responsibility is mainly to parralelize
|
||||
(or not) the transformations, on one or more process and/or computer, and to setup the right queuing mechanism for
|
||||
transformations' inputs and outputs.
|
||||
* Execution context (or context): a wrapper around a node that holds the state for it. If the node need the state, there
|
||||
are tools available in bonobo to feed it to the transformation using additional call parameters, and so every
|
||||
transformation will be atomic.
|
||||
|
||||
Next
|
||||
::::
|
||||
|
||||
You now know all the basic concepts necessary to build (batch-like) data processors.
|
||||
|
||||
If you're confident with this part, let's get to a more real world example, using files and nice console output:
|
||||
:doc:`basics2`
|
||||
|
||||
63
docs/tutorial/tut02.rst
Normal file
63
docs/tutorial/tut02.rst
Normal file
@ -0,0 +1,63 @@
|
||||
Working with files
|
||||
==================
|
||||
|
||||
Bonobo would not be of any use if the aim was to uppercase small lists of strings. In fact, Bonobo should not be used
|
||||
if you don't expect any gain from parralelization/distribution of tasks.
|
||||
|
||||
Let's take the following graph as an example:
|
||||
|
||||
.. graphviz::
|
||||
|
||||
digraph {
|
||||
rankdir = LR;
|
||||
BEGIN [shape="point"];
|
||||
BEGIN -> "A" -> "B" -> "C";
|
||||
}
|
||||
|
||||
The execution strategy does a bit of under the scene work, wrapping every component in a thread (assuming you're using
|
||||
the :class:`bonobo.ThreadPoolExecutorStrategy`), which allows to start running `B` as soon as `A` yielded the first line
|
||||
of data, and `C` as soon as `B` yielded the first line of data, even if `A` or `B` still have data to yield.
|
||||
|
||||
The great thing is that you generally don't have to think about it. Just be aware that your components will be run in
|
||||
parralel (with the default strategy), and don't worry too much about blocking components, as they won't block their
|
||||
siblings when run in bonobo.
|
||||
|
||||
That being said, let's try to write a more real-world like transformation.
|
||||
|
||||
Reading a file
|
||||
::::::::::::::
|
||||
|
||||
There are a few component builders available in **Bonobo** that let you read files. You should at least know about the
|
||||
following:
|
||||
|
||||
* :class:`bonobo.io.FileReader`
|
||||
* :class:`bonobo.io.JsonReader`
|
||||
* :class:`bonobo.io.CsvReader`
|
||||
|
||||
Reading a file is as simple as using one of those, and for the example, we'll use a text file that was generated using
|
||||
Bonobo from the "liste-des-cafes-a-un-euro" dataset made available by Mairie de Paris under the Open Database
|
||||
License (ODbL). You can `explore the original dataset <https://opendata.paris.fr/explore/dataset/liste-des-cafes-a-un-euro/information/>`_.
|
||||
You'll need the example dataset, available in **Bonobo**'s repository.
|
||||
|
||||
.. literalinclude:: ../../examples/tut02_01_read.py
|
||||
:language: python
|
||||
|
||||
Until then, we ran the file directly using our python interpreter, but there is other options, one of them being
|
||||
`bonobo run`. This command allows to run a graph defined by a python file, and is replacing the :func:`bonobo.run`
|
||||
helper. It's the exact reason why we call :func:`bonobo.run` in the `if __name__ == '__main__'` block, to only
|
||||
instanciate it if it is run directly.
|
||||
|
||||
Using bonobo command line has a few advantages. It will look for one and only one :class:`bonobo.Graph` instance defined
|
||||
in the file given as argument, configure an execution strategy, eventually plugins, and execute it. It has the benefit
|
||||
of allowing to tune the "artifacts" surrounding the transformation graph on command line (verbosity, plugins ...), and
|
||||
it will also ease the transition to run transformation graphs in containers, as the syntax will be the same. Of course,
|
||||
it is not required, and the containerization capabilities are provided by an optional and separate python package.
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
$ bonobo run examples/tut02_01_read.py
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -1,30 +0,0 @@
|
||||
import time
|
||||
from random import randint
|
||||
|
||||
from bonobo import Bag
|
||||
from bonobo.core.graphs import Graph
|
||||
|
||||
|
||||
def extract():
|
||||
yield Bag(topic='foo')
|
||||
yield Bag(topic='bar')
|
||||
yield Bag(topic='baz')
|
||||
|
||||
|
||||
def transform(topic: str):
|
||||
wait = randint(0, 1)
|
||||
time.sleep(wait)
|
||||
return Bag.inherit(title=topic.title(), wait=wait)
|
||||
|
||||
|
||||
def load(topic: str, title: str, wait: int):
|
||||
print('{} ({}) wait={}'.format(title, topic, wait))
|
||||
|
||||
|
||||
graph = Graph()
|
||||
graph.add_chain(extract, transform, load)
|
||||
|
||||
if __name__ == '__main__':
|
||||
from bonobo.util.helpers import run
|
||||
|
||||
run(graph)
|
||||
@ -1,32 +0,0 @@
|
||||
import time
|
||||
from random import randint
|
||||
|
||||
from bonobo.core.graphs import Graph
|
||||
|
||||
|
||||
def extract():
|
||||
yield {'topic': 'foo'}
|
||||
yield {'topic': 'bar'}
|
||||
yield {'topic': 'baz'}
|
||||
|
||||
|
||||
def transform(row):
|
||||
wait = randint(0, 1)
|
||||
time.sleep(wait)
|
||||
return {
|
||||
'topic': row['topic'].title(),
|
||||
'wait': wait,
|
||||
}
|
||||
|
||||
|
||||
def load(s):
|
||||
print(s)
|
||||
|
||||
|
||||
graph = Graph()
|
||||
graph.add_chain(extract, transform, load)
|
||||
|
||||
if __name__ == '__main__':
|
||||
from bonobo import run
|
||||
|
||||
run(graph)
|
||||
@ -1,21 +0,0 @@
|
||||
import os
|
||||
|
||||
from bonobo import CsvReader, Graph
|
||||
|
||||
__path__ = os.path.dirname(__file__)
|
||||
|
||||
|
||||
def skip_comments(line):
|
||||
if not line.startswith('#'):
|
||||
yield line
|
||||
|
||||
|
||||
graph = Graph(
|
||||
CsvReader(path=os.path.join(__path__, 'datasets/coffeeshops.txt')),
|
||||
print,
|
||||
)
|
||||
|
||||
if __name__ == '__main__':
|
||||
import bonobo
|
||||
|
||||
bonobo.run(graph)
|
||||
@ -1,29 +0,0 @@
|
||||
import time
|
||||
from random import randint
|
||||
|
||||
from bonobo.core.graphs import Graph
|
||||
|
||||
|
||||
def extract():
|
||||
yield 'foo'
|
||||
yield 'bar'
|
||||
yield 'baz'
|
||||
|
||||
|
||||
def transform(s):
|
||||
wait = randint(0, 1)
|
||||
time.sleep(wait)
|
||||
return s.title() + ' ' + str(wait)
|
||||
|
||||
|
||||
def load(s):
|
||||
print(s)
|
||||
|
||||
|
||||
graph = Graph()
|
||||
graph.add_chain(extract, transform, load)
|
||||
|
||||
if __name__ == '__main__':
|
||||
from bonobo import run
|
||||
|
||||
run(graph)
|
||||
32
setup.py
32
setup.py
@ -34,35 +34,37 @@ setup(
|
||||
description='Bonobo',
|
||||
license='Apache License, Version 2.0',
|
||||
install_requires=[
|
||||
'blessings >=1.6,<1.7', 'psutil >=5.0,<5.1', 'requests >=2.12,<2.13', 'stevedore >=1.19,<1.20',
|
||||
'toolz >=0.8,<0.9'
|
||||
'blessings >=1.6,<1.7', 'psutil >=5.0,<5.1', 'requests >=2.12,<2.13',
|
||||
'stevedore >=1.19,<1.20', 'toolz >=0.8,<0.9'
|
||||
],
|
||||
version=version,
|
||||
long_description=read('README.rst'),
|
||||
classifiers=read('classifiers.txt', tolines),
|
||||
packages=find_packages(exclude=['ez_setup', 'example', 'test']),
|
||||
include_package_data=True,
|
||||
data_files=[
|
||||
(
|
||||
'share/jupyter/nbextensions/bonobo-jupyter', [
|
||||
'bonobo/ext/jupyter/static/extension.js', 'bonobo/ext/jupyter/static/index.js',
|
||||
data_files=[('share/jupyter/nbextensions/bonobo-jupyter', [
|
||||
'bonobo/ext/jupyter/static/extension.js',
|
||||
'bonobo/ext/jupyter/static/index.js',
|
||||
'bonobo/ext/jupyter/static/index.js.map'
|
||||
]
|
||||
)
|
||||
],
|
||||
])],
|
||||
extras_require={
|
||||
'dev': [
|
||||
'coverage >=4.3,<4.4', 'mock >=2.0,<2.1', 'nose >=1.3,<1.4', 'pylint >=1.6,<1.7', 'pytest >=3,<4',
|
||||
'pytest-cov >=2.4,<2.5', 'sphinx', 'sphinx_rtd_theme', 'yapf'
|
||||
'coverage >=4.3,<4.4', 'mock >=2.0,<2.1', 'nose >=1.3,<1.4',
|
||||
'pylint >=1.6,<1.7', 'pytest >=3,<4', 'pytest-cov >=2.4,<2.5',
|
||||
'pytest-timeout >=1.2,<1.3', 'sphinx', 'sphinx_rtd_theme', 'yapf'
|
||||
],
|
||||
'jupyter': ['jupyter >=1.0,<1.1', 'ipywidgets >=6.0.0.beta5']
|
||||
},
|
||||
entry_points={
|
||||
'bonobo.commands': ['init = bonobo.commands.init:register', 'run = bonobo.commands.run:register'],
|
||||
'bonobo.commands': [
|
||||
'init = bonobo.commands.init:register',
|
||||
'run = bonobo.commands.run:register'
|
||||
],
|
||||
'console_scripts': ['bonobo = bonobo.commands:entrypoint'],
|
||||
'edgy.project.features': ['bonobo = '
|
||||
'edgy.project.features':
|
||||
['bonobo = '
|
||||
'bonobo.ext.edgy.project.feature:BonoboFeature']
|
||||
},
|
||||
url='https://bonobo-project.org/',
|
||||
download_url='https://github.com/python-bonobo/bonobo/tarball/{version}'.format(version=version),
|
||||
)
|
||||
download_url='https://github.com/python-bonobo/bonobo/tarball/{version}'.
|
||||
format(version=version), )
|
||||
|
||||
14
tests/test_basicusage.py
Normal file
14
tests/test_basicusage.py
Normal file
@ -0,0 +1,14 @@
|
||||
import pytest
|
||||
|
||||
import bonobo as bb
|
||||
|
||||
|
||||
@pytest.mark.timeout(2)
|
||||
def test_run_graph_noop():
|
||||
graph = bb.Graph(
|
||||
bb.noop
|
||||
)
|
||||
assert len(graph) == 1
|
||||
|
||||
result = bb.run(graph, strategy='threadpool')
|
||||
assert result
|
||||
32
tests/test_commands.py
Normal file
32
tests/test_commands.py
Normal file
@ -0,0 +1,32 @@
|
||||
import pkg_resources
|
||||
import pytest
|
||||
|
||||
from bonobo import get_examples_path
|
||||
from bonobo.commands import entrypoint
|
||||
|
||||
|
||||
def test_entrypoint():
|
||||
commands = {}
|
||||
|
||||
for command in pkg_resources.iter_entry_points('bonobo.commands'):
|
||||
commands[command.name] = command
|
||||
|
||||
assert 'init' in commands
|
||||
assert 'run' in commands
|
||||
|
||||
def test_no_command(capsys):
|
||||
with pytest.raises(SystemExit):
|
||||
entrypoint([])
|
||||
out, err = capsys.readouterr()
|
||||
assert 'error: the following arguments are required: command' in err
|
||||
|
||||
def test_init():
|
||||
pass # need ext dir
|
||||
|
||||
def test_run(capsys):
|
||||
entrypoint(['run', '--quiet', get_examples_path('types/strings.py')])
|
||||
out, err = capsys.readouterr()
|
||||
out = out.split('\n')
|
||||
assert out[0].startswith('Foo ')
|
||||
assert out[1].startswith('Bar ')
|
||||
assert out[2].startswith('Baz ')
|
||||
@ -1,4 +1,5 @@
|
||||
import pytest
|
||||
|
||||
from bonobo import Configurable, Option
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user