[doc] proofreading the guides, refactoring the reference.
This commit is contained in:
2
Makefile
2
Makefile
@ -1,4 +1,4 @@
|
|||||||
# Generated by Medikit 0.4.6 on 2018-01-14.
|
# Generated by Medikit 0.4.3 on 2018-01-16.
|
||||||
# All changes will be overriden.
|
# All changes will be overriden.
|
||||||
|
|
||||||
PACKAGE ?= bonobo
|
PACKAGE ?= bonobo
|
||||||
|
|||||||
63
bin/update_apidoc.py
Normal file
63
bin/update_apidoc.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
from jinja2 import Environment, DictLoader
|
||||||
|
|
||||||
|
__path__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__), '..'))
|
||||||
|
|
||||||
|
apidoc_root = 'docs/reference/api'
|
||||||
|
|
||||||
|
|
||||||
|
class Module:
|
||||||
|
def __init__(self, name, title=None, *, automodule_options=None):
|
||||||
|
|
||||||
|
self.name = name
|
||||||
|
self.title = title or ' '.join(map(str.title, self.name.split('.')[1:]))
|
||||||
|
self.automodule_options = automodule_options or list()
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<{} ({})>'.format(self.title, self.name)
|
||||||
|
|
||||||
|
def asdict(self):
|
||||||
|
return {
|
||||||
|
'name': self.name,
|
||||||
|
'title': self.title,
|
||||||
|
'automodule_options': self.automodule_options,
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_path(self):
|
||||||
|
return os.path.join(__path__, apidoc_root, *self.name.split('.')) + '.rst'
|
||||||
|
|
||||||
|
|
||||||
|
modules = [
|
||||||
|
Module('bonobo', title='Bonobo'),
|
||||||
|
Module('bonobo.config'),
|
||||||
|
Module('bonobo.constants', automodule_options=['no-members']),
|
||||||
|
Module('bonobo.execution'),
|
||||||
|
Module('bonobo.execution.contexts'),
|
||||||
|
Module('bonobo.execution.events'),
|
||||||
|
Module('bonobo.execution.strategies'),
|
||||||
|
Module('bonobo.util'),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def underlined_filter(txt, chr):
|
||||||
|
return txt + '\n' + chr * len(txt)
|
||||||
|
|
||||||
|
|
||||||
|
env = Environment(loader=DictLoader({
|
||||||
|
'module': '''
|
||||||
|
{{ (':mod:`'~title~' <'~name~'>`') | underlined('=') }}
|
||||||
|
|
||||||
|
.. currentmodule:: {{ name }}
|
||||||
|
|
||||||
|
:Module: :mod:`{{ name }}`
|
||||||
|
|
||||||
|
.. automodule:: {{ name }}
|
||||||
|
{% for opt in automodule_options %} :{{ opt }}:{{ "\n" }}{% endfor %}
|
||||||
|
'''[1:-1] + '\n'}))
|
||||||
|
env.filters['underlined'] = underlined_filter
|
||||||
|
|
||||||
|
for module in modules:
|
||||||
|
with open(module.get_path(), 'w+') as f:
|
||||||
|
f.write(env.get_template('module').render(module.asdict()))
|
||||||
File diff suppressed because one or more lines are too long
@ -1,8 +1,16 @@
|
|||||||
|
"""
|
||||||
|
Contains all the tools you need to get started with the framework, including (but not limited to) generic
|
||||||
|
transformations, readers, writers, and tools for writing and executing graphs and jobs.
|
||||||
|
|
||||||
|
All objects in this module are considered very safe to use, and backward compatibility when moving up from one version
|
||||||
|
to another is maximal.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
from bonobo.execution.strategies import create_strategy
|
from bonobo.execution.strategies import create_strategy
|
||||||
from bonobo.nodes import __all__ as _all_nodes
|
from bonobo.nodes import __all__ as _all_nodes
|
||||||
from bonobo.nodes import *
|
from bonobo.nodes import *
|
||||||
from bonobo.structs import Graph
|
from bonobo.structs import Graph
|
||||||
from bonobo.util import get_name
|
|
||||||
from bonobo.util.api import ApiHelper
|
from bonobo.util.api import ApiHelper
|
||||||
from bonobo.util.environ import parse_args, get_argument_parser
|
from bonobo.util.environ import parse_args, get_argument_parser
|
||||||
|
|
||||||
@ -96,7 +104,8 @@ api.register_group(create_strategy)
|
|||||||
@api.register
|
@api.register
|
||||||
def open_fs(fs_url=None, *args, **kwargs):
|
def open_fs(fs_url=None, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
Wraps :func:`fs.open_fs` function with a few candies.
|
Wraps :obj:`fs.opener.registry.Registry.open_fs`, with default to local current working directory and expanding ~ in
|
||||||
|
path.
|
||||||
|
|
||||||
:param str fs_url: A filesystem URL
|
:param str fs_url: A filesystem URL
|
||||||
:param parse_result: A parsed filesystem URL.
|
:param parse_result: A parsed filesystem URL.
|
||||||
@ -105,9 +114,9 @@ def open_fs(fs_url=None, *args, **kwargs):
|
|||||||
:param bool create: True if the filesystem should be created if it does not exist.
|
:param bool create: True if the filesystem should be created if it does not exist.
|
||||||
:param str cwd: The current working directory (generally only relevant for OS filesystems).
|
:param str cwd: The current working directory (generally only relevant for OS filesystems).
|
||||||
:param str default_protocol: The protocol to use if one is not supplied in the FS URL (defaults to ``"osfs"``).
|
:param str default_protocol: The protocol to use if one is not supplied in the FS URL (defaults to ``"osfs"``).
|
||||||
:returns: :class:`~fs.base.FS` object
|
:returns: :class:`fs.base.FS` object
|
||||||
"""
|
"""
|
||||||
from fs import open_fs as _open_fs
|
from fs.opener import open_fs as _open_fs
|
||||||
from os.path import expanduser
|
from os.path import expanduser
|
||||||
from os import getcwd
|
from os import getcwd
|
||||||
|
|
||||||
|
|||||||
@ -1,3 +1,9 @@
|
|||||||
|
"""
|
||||||
|
The Config API, located under the :mod:`bonobo.config` namespace, contains all the tools you need to create
|
||||||
|
configurable transformations, either class-based or function-based.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
from bonobo.config.configurables import Configurable
|
from bonobo.config.configurables import Configurable
|
||||||
from bonobo.config.functools import transformation_factory
|
from bonobo.config.functools import transformation_factory
|
||||||
from bonobo.config.options import Method, Option
|
from bonobo.config.options import Method, Option
|
||||||
|
|||||||
@ -68,7 +68,7 @@ class Option:
|
|||||||
if self.__doc__:
|
if self.__doc__:
|
||||||
self.__doc__ = textwrap.dedent(self.__doc__.strip('\n')).strip()
|
self.__doc__ = textwrap.dedent(self.__doc__.strip('\n')).strip()
|
||||||
if default:
|
if default:
|
||||||
self.__doc__ += '\nDefault: {!r}'.format(default)
|
self.__doc__ += '\n\nDefault: {!r}'.format(default)
|
||||||
|
|
||||||
# This hack is necessary for python3.5
|
# This hack is necessary for python3.5
|
||||||
self._creation_counter = Option._creation_counter
|
self._creation_counter = Option._creation_counter
|
||||||
|
|||||||
@ -1,6 +1,28 @@
|
|||||||
class Token:
|
"""
|
||||||
"""Factory for signal oriented queue messages or other token types."""
|
.. data:: BEGIN
|
||||||
|
|
||||||
|
**BEGIN** token marks the entrypoint of graphs, and all extractors will be connected to this node.
|
||||||
|
|
||||||
|
Without this, it would be impossible for an execution to actually start anything, as it's the marker that tells
|
||||||
|
|bonobo| which node to actually call when the execution starts.
|
||||||
|
|
||||||
|
.. data:: NOT_MODIFIED
|
||||||
|
|
||||||
|
**NOT_MODIFIED** is a special value you can return or yield from a transformation to tell bonobo to reuse
|
||||||
|
the input data as output.
|
||||||
|
|
||||||
|
As a convention, all loaders should return this, so loaders can be chained.
|
||||||
|
|
||||||
|
.. data:: EMPTY
|
||||||
|
|
||||||
|
Shortcut for "empty tuple". It's often much more clear to write (especially in a test) `write(EMPTY)` than
|
||||||
|
`write(())`, although strictly equivalent.
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class Token:
|
||||||
def __init__(self, name):
|
def __init__(self, name):
|
||||||
self.__name__ = name
|
self.__name__ = name
|
||||||
|
|
||||||
@ -8,16 +30,15 @@ class Token:
|
|||||||
return '<{}>'.format(self.__name__)
|
return '<{}>'.format(self.__name__)
|
||||||
|
|
||||||
|
|
||||||
BEGIN = Token('Begin')
|
|
||||||
END = Token('End')
|
|
||||||
|
|
||||||
|
|
||||||
class Flag(Token):
|
class Flag(Token):
|
||||||
must_be_first = False
|
must_be_first = False
|
||||||
must_be_last = False
|
must_be_last = False
|
||||||
allows_data = True
|
allows_data = True
|
||||||
|
|
||||||
|
|
||||||
|
BEGIN = Token('Begin')
|
||||||
|
END = Token('End')
|
||||||
|
|
||||||
INHERIT = Flag('Inherit')
|
INHERIT = Flag('Inherit')
|
||||||
NOT_MODIFIED = Flag('NotModified')
|
NOT_MODIFIED = Flag('NotModified')
|
||||||
NOT_MODIFIED.must_be_first = True
|
NOT_MODIFIED.must_be_first = True
|
||||||
|
|||||||
@ -1,3 +1,11 @@
|
|||||||
|
"""
|
||||||
|
This module contains all tools for Bonobo and Django to interract nicely.
|
||||||
|
|
||||||
|
* :class:`ETLCommand`
|
||||||
|
* :func:`create_or_update`
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
from .utils import create_or_update
|
from .utils import create_or_update
|
||||||
from .commands import ETLCommand
|
from .commands import ETLCommand
|
||||||
|
|
||||||
|
|||||||
@ -26,6 +26,12 @@ class ETLCommand(BaseCommand):
|
|||||||
def create_parser(self, prog_name, subcommand):
|
def create_parser(self, prog_name, subcommand):
|
||||||
return bonobo.get_argument_parser(super().create_parser(prog_name, subcommand))
|
return bonobo.get_argument_parser(super().create_parser(prog_name, subcommand))
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
"""
|
||||||
|
Entry point for subclassed commands to add custom arguments.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
def get_graph(self, *args, **options):
|
def get_graph(self, *args, **options):
|
||||||
def not_implemented():
|
def not_implemented():
|
||||||
raise NotImplementedError('You must implement {}.get_graph() method.'.format(self))
|
raise NotImplementedError('You must implement {}.get_graph() method.'.format(self))
|
||||||
|
|||||||
@ -1,3 +1,10 @@
|
|||||||
|
"""
|
||||||
|
Execution logic, surrounding contexts for nodes and graphs and events.
|
||||||
|
|
||||||
|
This module is considered **internal**.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|||||||
@ -1,3 +1,10 @@
|
|||||||
|
"""
|
||||||
|
Execution Contexts are objects that wraps the stateless data-structures (graphs and nodes) during a job execution to
|
||||||
|
keep an eye on their context/state (from the simplest things like i/o statistics to lifecycle and custom userland
|
||||||
|
state).
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
from bonobo.execution.contexts.graph import GraphExecutionContext
|
from bonobo.execution.contexts.graph import GraphExecutionContext
|
||||||
from bonobo.execution.contexts.node import NodeExecutionContext
|
from bonobo.execution.contexts.node import NodeExecutionContext
|
||||||
from bonobo.execution.contexts.plugin import PluginExecutionContext
|
from bonobo.execution.contexts.plugin import PluginExecutionContext
|
||||||
|
|||||||
@ -1,3 +1,30 @@
|
|||||||
|
"""
|
||||||
|
.. data:: START
|
||||||
|
|
||||||
|
Event dispatched before execution starts.
|
||||||
|
|
||||||
|
.. data:: STARTED
|
||||||
|
|
||||||
|
Event dispatched after execution starts.
|
||||||
|
|
||||||
|
.. data:: TICK
|
||||||
|
|
||||||
|
Event dispatched while execution runs, on a regular basis (on each "tick").
|
||||||
|
|
||||||
|
.. data:: STOP
|
||||||
|
|
||||||
|
Event dispatched before execution stops.
|
||||||
|
|
||||||
|
.. data:: STOPPED
|
||||||
|
|
||||||
|
Event dispatched after execution stops.
|
||||||
|
|
||||||
|
.. data:: KILL
|
||||||
|
|
||||||
|
Event dispatched when execution is killed.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
from whistle import Event
|
from whistle import Event
|
||||||
|
|
||||||
START = 'execution.start'
|
START = 'execution.start'
|
||||||
|
|||||||
@ -1,3 +1,11 @@
|
|||||||
|
"""
|
||||||
|
Execution strategies define how an actual job execution will happen. Default and recommended strategy is "threadpool",
|
||||||
|
for now, which leverage a :obj:`concurrent.futures.ThreadPoolExecutor` to run each node in a separate thread.
|
||||||
|
|
||||||
|
In the future, the two strategies that would really benefit bonobo are subprocess and dask/dask.distributed. Please be
|
||||||
|
at home if you want to give it a shot.
|
||||||
|
|
||||||
|
"""
|
||||||
from bonobo.execution.strategies.executor import ProcessPoolExecutorStrategy, ThreadPoolExecutorStrategy
|
from bonobo.execution.strategies.executor import ProcessPoolExecutorStrategy, ThreadPoolExecutorStrategy
|
||||||
from bonobo.execution.strategies.naive import NaiveStrategy
|
from bonobo.execution.strategies.naive import NaiveStrategy
|
||||||
|
|
||||||
|
|||||||
@ -1,3 +1,8 @@
|
|||||||
|
"""
|
||||||
|
The Util API, located under the :mod:`bonobo.util` namespace, contains helpers functions and decorators to work with
|
||||||
|
and inspect transformations, graphs, and nodes.
|
||||||
|
|
||||||
|
"""
|
||||||
from bonobo.util.collections import cast, ensure_tuple, sortedlist, tuplize
|
from bonobo.util.collections import cast, ensure_tuple, sortedlist, tuplize
|
||||||
from bonobo.util.compat import deprecated, deprecated_alias
|
from bonobo.util.compat import deprecated, deprecated_alias
|
||||||
from bonobo.util.inspect import (
|
from bonobo.util.inspect import (
|
||||||
|
|||||||
22
docs/_static/custom.css
vendored
22
docs/_static/custom.css
vendored
@ -35,13 +35,23 @@ div.note {
|
|||||||
border: 0;
|
border: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
div.admonition {
|
|
||||||
padding: 20px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.last {
|
.last {
|
||||||
margin-bottom: 0 !important;
|
margin-bottom: 0 !important;
|
||||||
}
|
}
|
||||||
pre {
|
|
||||||
padding: 6px 20px;
|
div.admonition {
|
||||||
|
padding: 16px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pre {
|
||||||
|
padding: 16px;
|
||||||
|
border: 1px solid #ddd;
|
||||||
|
background-color: #fafafa;
|
||||||
|
}
|
||||||
|
|
||||||
|
.section > dl {
|
||||||
|
border: 1px solid #ddd;
|
||||||
|
background-color: #fafafa;
|
||||||
|
margin: 16px 0;
|
||||||
|
padding: 16px;
|
||||||
|
}
|
||||||
2
docs/_templates/base.html
vendored
2
docs/_templates/base.html
vendored
@ -12,7 +12,7 @@
|
|||||||
{{ relbar() }}
|
{{ relbar() }}
|
||||||
|
|
||||||
<div class="footer">
|
<div class="footer">
|
||||||
© 2012-2017, <a href="https://romain.dorgueil.net" target="_blank">Romain Dorgueil</a> |
|
© 2012-2018, <a href="https://romain.dorgueil.net" target="_blank">Romain Dorgueil</a> |
|
||||||
<a href="https://www.bonobo-project.org/" target="_blank">Bonobo ETL</a>
|
<a href="https://www.bonobo-project.org/" target="_blank">Bonobo ETL</a>
|
||||||
|
|
||||||
{%- if show_source and has_source and sourcename %}
|
{%- if show_source and has_source and sourcename %}
|
||||||
|
|||||||
@ -14,6 +14,7 @@ import bonobo
|
|||||||
|
|
||||||
extensions = [
|
extensions = [
|
||||||
'sphinx.ext.autodoc',
|
'sphinx.ext.autodoc',
|
||||||
|
'sphinx.ext.autosummary',
|
||||||
'sphinx.ext.doctest',
|
'sphinx.ext.doctest',
|
||||||
'sphinx.ext.intersphinx',
|
'sphinx.ext.intersphinx',
|
||||||
'sphinx.ext.todo',
|
'sphinx.ext.todo',
|
||||||
@ -60,7 +61,12 @@ language = None
|
|||||||
# This patterns also effect to html_static_path and html_extra_path
|
# This patterns also effect to html_static_path and html_extra_path
|
||||||
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
|
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
|
||||||
|
|
||||||
# The name of the Pygments (syntax highlighting) style to use.
|
|
||||||
|
autoclass_content = 'both'
|
||||||
|
autodoc_member_order = 'groupwise'
|
||||||
|
autodoc_default_flags =['members', 'undoc-members', 'show-inheritance', ]
|
||||||
|
|
||||||
|
add_module_names = False
|
||||||
pygments_style = 'sphinx'
|
pygments_style = 'sphinx'
|
||||||
|
|
||||||
# If true, `todo` and `todoList` produce output, else they produce nothing.
|
# If true, `todo` and `todoList` produce output, else they produce nothing.
|
||||||
|
|||||||
@ -1,15 +1,16 @@
|
|||||||
|
.. currentmodule:: bonobo.contrib.django
|
||||||
|
|
||||||
Working with Django
|
Working with Django
|
||||||
===================
|
===================
|
||||||
|
|
||||||
|bonobo| provides a lightweight integration with django, to allow to write management commands using |bonobo| graphs.
|
|bonobo| provides a lightweight integration with django, to allow to include ETL pipelines in your django management
|
||||||
|
commands.
|
||||||
|
|
||||||
Management Command
|
Quick start
|
||||||
::::::::::::::::::
|
:::::::::::
|
||||||
|
|
||||||
To write a management command with |bonobo|, just extend the :class:`bonobo.contrib.django.ETLCommand` class and
|
To write a django management command that runs |bonobo| job(s), just extend :class:`ETLCommand`
|
||||||
override the `get_graph()` method.
|
instead of :class:`django.core.management.base.BaseCommand`, and override the :meth:`ETLCommand.get_graph` method:
|
||||||
|
|
||||||
Example:
|
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
@ -22,7 +23,10 @@ Example:
|
|||||||
graph.add_chain(...)
|
graph.add_chain(...)
|
||||||
return graph
|
return graph
|
||||||
|
|
||||||
You can also override the `get_services()` method.
|
Services
|
||||||
|
--------
|
||||||
|
|
||||||
|
You can override :meth:`ETLCommand.get_services` to provide your service implementations.
|
||||||
|
|
||||||
One common recipe to do so is to import it from somewhere else and override it as a :obj:`staticmethod`:
|
One common recipe to do so is to import it from somewhere else and override it as a :obj:`staticmethod`:
|
||||||
|
|
||||||
@ -39,15 +43,51 @@ One common recipe to do so is to import it from somewhere else and override it a
|
|||||||
def get_graph(...):
|
def get_graph(...):
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
|
Multiple graphs
|
||||||
|
---------------
|
||||||
|
|
||||||
|
The :meth:`ETLCommand.get_graph` method can also be implemented as a generator. In this case, each element yielded must
|
||||||
|
be a graph, and each graph will be executed in order:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
import bonobo
|
||||||
|
from bonobo.contrib.django import ETLCommand
|
||||||
|
|
||||||
|
class Command(ETLCommand):
|
||||||
|
def get_graph(self, **options):
|
||||||
|
yield bonobo.Graph(...)
|
||||||
|
yield bonobo.Graph(...)
|
||||||
|
yield bonobo.Graph(...)
|
||||||
|
|
||||||
|
This is especially helpful in two major cases:
|
||||||
|
|
||||||
|
* You must ensure that one job is finished before the next is run, and thus you can't add both graph's nodes in the
|
||||||
|
same graph.
|
||||||
|
* You want to change which graph is run depending on command line arguments.
|
||||||
|
|
||||||
|
|
||||||
|
Command line arguments
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
Like with regular django management commands, you can add arguments to the argument parser by overriding
|
||||||
|
:meth:`ETLCommand.add_arguments`.
|
||||||
|
|
||||||
|
The only difference with django is that the provided argument parser will already have arguments added to handle
|
||||||
|
environment.
|
||||||
|
|
||||||
|
|
||||||
|
Reference
|
||||||
|
:::::::::
|
||||||
|
|
||||||
|
:mod:`bonobo.contrib.django`
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
.. automodule:: bonobo.contrib.django
|
||||||
|
|
||||||
Source code
|
Source code
|
||||||
:::::::::::
|
:::::::::::
|
||||||
|
|
||||||
https://github.com/python-bonobo/bonobo/tree/master/bonobo/contrib/django
|
https://github.com/python-bonobo/bonobo/tree/master/bonobo/contrib/django
|
||||||
|
|
||||||
Reference
|
|
||||||
:::::::::
|
|
||||||
|
|
||||||
.. automodule:: bonobo.contrib.django
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
|
|||||||
@ -1,5 +1,7 @@
|
|||||||
Working with SQL Databases
|
.. currentmodule:: bonobo_sqlalchemy
|
||||||
==========================
|
|
||||||
|
Working with SQLAlchemy
|
||||||
|
=======================
|
||||||
|
|
||||||
.. include:: _beta.rst
|
.. include:: _beta.rst
|
||||||
|
|
||||||
@ -54,6 +56,7 @@ Let's select some data:
|
|||||||
|
|
||||||
And let's insert some data:
|
And let's insert some data:
|
||||||
|
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
import bonobo
|
import bonobo
|
||||||
@ -69,21 +72,13 @@ And let's insert some data:
|
|||||||
|
|
||||||
return graph
|
return graph
|
||||||
|
|
||||||
|
|
||||||
Reference
|
Reference
|
||||||
:::::::::
|
:::::::::
|
||||||
|
|
||||||
.. module:: bonobo_sqlalchemy
|
:mod:`bonobo_sqlalchemy`
|
||||||
|
------------------------
|
||||||
|
|
||||||
Select
|
.. automodule:: bonobo_sqlalchemy
|
||||||
------
|
|
||||||
|
|
||||||
.. autoclass:: Select
|
|
||||||
|
|
||||||
InsertOrUpdate
|
|
||||||
--------------
|
|
||||||
|
|
||||||
.. autoclass:: InsertOrUpdate
|
|
||||||
|
|
||||||
Source code
|
Source code
|
||||||
:::::::::::
|
:::::::::::
|
||||||
|
|||||||
@ -1,5 +1,45 @@
|
|||||||
Debugging
|
Debugging
|
||||||
=========
|
=========
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
This document writing is in progress, but its content should be correct (but succint).
|
||||||
|
|
||||||
|
Using a debugger (pdb...)
|
||||||
|
:::::::::::::::::::::::::
|
||||||
|
|
||||||
|
Using a debugger works (as in any python piece of code), but you must be aware that each node runs in a separate thread,
|
||||||
|
which means a few things:
|
||||||
|
|
||||||
|
* If a breakpoint happens in a thread, then this thread will stop, but all other threads will continue running. This
|
||||||
|
can be especially annoying if you try to use the pdb REPL for example, as your prompt will be overriden a few
|
||||||
|
times/second by the current excution statistics.
|
||||||
|
|
||||||
|
To avoid that, you can run bonobo with `QUIET=1` in environment, to hide statistics.
|
||||||
|
|
||||||
|
* If your breakpoint never happens (although it's at the very beginning of your transformation), it may mean that
|
||||||
|
something happens out of the transform. The :class:`bonobo.execution.contexts.NodeExecutionContext` instance that
|
||||||
|
surrounds your transformation may be stuck in its `while True: transform()` loop.
|
||||||
|
|
||||||
|
Break one level higher
|
||||||
|
|
||||||
|
|
||||||
|
Using printing statements
|
||||||
|
:::::::::::::::::::::::::
|
||||||
|
|
||||||
|
Of course, you can :obj:`print` things.
|
||||||
|
|
||||||
|
You can even add :obj:`print` statements in graphs, to :obj:`print` once per row.
|
||||||
|
|
||||||
|
A better :obj:`print` is available though, suitable for both flow-based data processing and human eyes.
|
||||||
|
Check :class:`bonobo.PrettyPrinter`.
|
||||||
|
|
||||||
|
|
||||||
|
Inspecting graphs
|
||||||
|
:::::::::::::::::
|
||||||
|
|
||||||
|
* Using the console: `bonobo inspect --graph`.
|
||||||
|
* Using Jupyter notebook: install the extension and just display a graph.
|
||||||
|
|
||||||
|
|
||||||
.. include:: _next.rst
|
.. include:: _next.rst
|
||||||
|
|||||||
56
docs/guide/future/services.rst
Normal file
56
docs/guide/future/services.rst
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
Services
|
||||||
|
========
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
|
||||||
|
This is a "future" document, that does not exist, it's only kept here not to lose the data until we organize better
|
||||||
|
documentation versioning.
|
||||||
|
|
||||||
|
Future and proposals
|
||||||
|
::::::::::::::::::::
|
||||||
|
|
||||||
|
This is a first implementation and it will evolve. Base concepts will stay the same though.
|
||||||
|
|
||||||
|
May or may not happen, depending on discussions.
|
||||||
|
|
||||||
|
* Singleton or prototype based injection (to use spring terminology, see
|
||||||
|
https://www.tutorialspoint.com/spring/spring_bean_scopes.htm), allowing smart factory usage and efficient sharing of
|
||||||
|
resources.
|
||||||
|
* Lazily resolved parameters, eventually overriden by command line or environment, so you can for example override the
|
||||||
|
database DSN or target filesystem on command line (or with shell environment vars).
|
||||||
|
* Pool based locks that ensure that only one (or n) transformations are using a given service at the same time.
|
||||||
|
* Simple config implementation, using a python file for config (ex: bonobo run ... --services=services_prod.py).
|
||||||
|
* Default configuration for services, using an optional callable (`def get_services(args): ...`). Maybe tie default
|
||||||
|
configuration to graph, but not really a fan because this is unrelated to graph logic.
|
||||||
|
* Default implementation for a service in a transformation or in the descriptor. Maybe not a good idea, because it
|
||||||
|
tends to push forward multiple instances of the same thing, but maybe...
|
||||||
|
|
||||||
|
A few ideas on how it can be implemented, from the user perspective.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
# using call
|
||||||
|
http = Service('http.client')(requests)
|
||||||
|
|
||||||
|
# using more explicit call
|
||||||
|
http = Service('http.client').set_default_impl(requests)
|
||||||
|
|
||||||
|
# using a decorator
|
||||||
|
@Service('http.client')
|
||||||
|
def http(self, services):
|
||||||
|
import requests
|
||||||
|
return requests
|
||||||
|
|
||||||
|
# as a default in a subclass of Service
|
||||||
|
class HttpService(Service):
|
||||||
|
def get_default_impl(self, services):
|
||||||
|
import requests
|
||||||
|
return requests
|
||||||
|
|
||||||
|
# ... then use it as another service
|
||||||
|
http = HttpService('http.client')
|
||||||
|
|
||||||
|
|
||||||
|
This is under development, let us know what you think (slack may be a good place for this).
|
||||||
|
The basics already work, and you can try it.
|
||||||
|
|
||||||
83
docs/guide/future/transformations.rst
Normal file
83
docs/guide/future/transformations.rst
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
Transformations
|
||||||
|
===============
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
|
||||||
|
This is a "future" document, that does not exist, it's only kept here not to lose the data until we organize better
|
||||||
|
documentation versioning.
|
||||||
|
|
||||||
|
|
||||||
|
Output
|
||||||
|
------
|
||||||
|
|
||||||
|
Let's see the rules (first to match wins).
|
||||||
|
|
||||||
|
1. A flag, eventually followed by something else, marks a special behaviour. If it supports it, the remaining part of
|
||||||
|
the output line will be interpreted using the same rules, and some flags can be combined.
|
||||||
|
|
||||||
|
**NOT_MODIFIED**
|
||||||
|
|
||||||
|
**NOT_MODIFIED** tells bonobo to use the input row unmodified as the output.
|
||||||
|
|
||||||
|
*CANNOT be combined*
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from bonobo import NOT_MODIFIED
|
||||||
|
|
||||||
|
def output_will_be_same_as_input(*args, **kwargs):
|
||||||
|
yield NOT_MODIFIED
|
||||||
|
|
||||||
|
2. Once all flags are "consumed", the remaining part is interpreted.
|
||||||
|
|
||||||
|
* If it is a :class:`bonobo.Bag` instance, then it's used directly.
|
||||||
|
* If it is a :class:`dict` then a kwargs-only :class:`bonobo.Bag` will be created.
|
||||||
|
* If it is a :class:`tuple` then an args-only :class:`bonobo.Bag` will be created, unless its last argument is a
|
||||||
|
:class:`dict` in which case a args+kwargs :class:`bonobo.Bag` will be created.
|
||||||
|
* If it's something else, it will be used to create a one-arg-only :class:`bonobo.Bag`.
|
||||||
|
|
||||||
|
**APPEND**
|
||||||
|
|
||||||
|
**APPEND** tells bonobo to append this output to the input (positional arguments will equal `input_args + output_args`,
|
||||||
|
keyword arguments will equal `{**input_kwargs, **output_kwargs}`).
|
||||||
|
|
||||||
|
*CAN be combined, but not with itself*
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from bonobo import APPEND
|
||||||
|
|
||||||
|
def output_will_be_appended_to_input(*args, **kwargs):
|
||||||
|
yield APPEND, 'foo', 'bar', {'eat_at': 'joe'}
|
||||||
|
|
||||||
|
**LOOPBACK**
|
||||||
|
|
||||||
|
**LOOPBACK** tells bonobo that this output must be looped back into our own input queue, allowing to create the stream
|
||||||
|
processing version of recursive algorithms.
|
||||||
|
|
||||||
|
*CAN be combined, but not with itself*
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from bonobo import LOOPBACK
|
||||||
|
|
||||||
|
def output_will_be_sent_to_self(*args, **kwargs):
|
||||||
|
yield LOOPBACK, 'Hello, I am the future "you".'
|
||||||
|
|
||||||
|
**CHANNEL(...)**
|
||||||
|
|
||||||
|
**CHANNEL(...)** tells bonobo that this output does not use the default channel and is routed through another path.
|
||||||
|
This is something you should probably not use unless your data flow design is complex, and if you're not certain
|
||||||
|
about it, it probably means that it is not the feature you're looking for.
|
||||||
|
|
||||||
|
*CAN be combined, but not with itself*
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from bonobo import CHANNEL
|
||||||
|
|
||||||
|
def output_will_be_sent_to_self(*args, **kwargs):
|
||||||
|
yield CHANNEL("errors"), 'That is not cool.'
|
||||||
|
|
||||||
@ -4,7 +4,6 @@ Graphs
|
|||||||
Graphs are the glue that ties transformations together. They are the only data-structure bonobo can execute directly. Graphs
|
Graphs are the glue that ties transformations together. They are the only data-structure bonobo can execute directly. Graphs
|
||||||
must be acyclic, and can contain as many nodes as your system can handle. However, although in theory the number of nodes can be rather high, practical use cases usually do not exceed more than a few hundred nodes and only then in extreme cases.
|
must be acyclic, and can contain as many nodes as your system can handle. However, although in theory the number of nodes can be rather high, practical use cases usually do not exceed more than a few hundred nodes and only then in extreme cases.
|
||||||
|
|
||||||
|
|
||||||
Within a graph, each node are isolated and can only communicate using their
|
Within a graph, each node are isolated and can only communicate using their
|
||||||
input and output queues. For each input row, a given node will be called with
|
input and output queues. For each input row, a given node will be called with
|
||||||
the row passed as arguments. Each *return* or *yield* value will be put on the
|
the row passed as arguments. Each *return* or *yield* value will be put on the
|
||||||
@ -38,6 +37,11 @@ Handling the data-flow this way brings the following properties:
|
|||||||
Graphs are defined using :class:`bonobo.Graph` instances, as seen in the
|
Graphs are defined using :class:`bonobo.Graph` instances, as seen in the
|
||||||
previous tutorial step.
|
previous tutorial step.
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
|
||||||
|
This document is currently reviewed to check for correctness after the 0.6 release.
|
||||||
|
|
||||||
|
|
||||||
What can be a node?
|
What can be a node?
|
||||||
:::::::::::::::::::
|
:::::::::::::::::::
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,15 @@
|
|||||||
Best Practices
|
Best Practices
|
||||||
==============
|
==============
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
|
||||||
|
This document needs to be rewritten for 0.6.
|
||||||
|
|
||||||
|
Especially, `Bag()` was removed, and |bonobo| either ensure your i/o rows are tuples or some kind of namedtuples.
|
||||||
|
|
||||||
|
Please be aware of that while reading, and eventually check `the migration guide to 0.6
|
||||||
|
<https://news.bonobo-project.org/migration-guide-for-bonobo-0-6-alpha-c1d36b0a9d35>`_.
|
||||||
|
|
||||||
The nature of components, and how the data flow from one to another, can be a bit tricky.
|
The nature of components, and how the data flow from one to another, can be a bit tricky.
|
||||||
Hopefully, they should be very easy to write with a few hints.
|
Hopefully, they should be very easy to write with a few hints.
|
||||||
|
|
||||||
|
|||||||
@ -12,6 +12,11 @@ Hardcoding those services is a good first step, but as your codebase grows, this
|
|||||||
pre-production environment, or production system. Maybe you have similar systems for different clients and want to select
|
pre-production environment, or production system. Maybe you have similar systems for different clients and want to select
|
||||||
the system at runtime, etc.
|
the system at runtime, etc.
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
|
||||||
|
This document is currently reviewed to check for correctness.
|
||||||
|
|
||||||
|
|
||||||
Definition of service dependencies
|
Definition of service dependencies
|
||||||
::::::::::::::::::::::::::::::::::
|
::::::::::::::::::::::::::::::::::
|
||||||
|
|
||||||
@ -103,54 +108,6 @@ use of a dependency for the time of the context manager (`with` statement)
|
|||||||
api.last_call()
|
api.last_call()
|
||||||
|
|
||||||
|
|
||||||
Future and proposals
|
|
||||||
::::::::::::::::::::
|
|
||||||
|
|
||||||
This is a first implementation and it will evolve. Base concepts will stay the same though.
|
|
||||||
|
|
||||||
May or may not happen, depending on discussions.
|
|
||||||
|
|
||||||
* Singleton or prototype based injection (to use spring terminology, see
|
|
||||||
https://www.tutorialspoint.com/spring/spring_bean_scopes.htm), allowing smart factory usage and efficient sharing of
|
|
||||||
resources.
|
|
||||||
* Lazily resolved parameters, eventually overriden by command line or environment, so you can for example override the
|
|
||||||
database DSN or target filesystem on command line (or with shell environment vars).
|
|
||||||
* Pool based locks that ensure that only one (or n) transformations are using a given service at the same time.
|
|
||||||
* Simple config implementation, using a python file for config (ex: bonobo run ... --services=services_prod.py).
|
|
||||||
* Default configuration for services, using an optional callable (`def get_services(args): ...`). Maybe tie default
|
|
||||||
configuration to graph, but not really a fan because this is unrelated to graph logic.
|
|
||||||
* Default implementation for a service in a transformation or in the descriptor. Maybe not a good idea, because it
|
|
||||||
tends to push forward multiple instances of the same thing, but maybe...
|
|
||||||
|
|
||||||
A few ideas on how it can be implemented, from the user perspective.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
# using call
|
|
||||||
http = Service('http.client')(requests)
|
|
||||||
|
|
||||||
# using more explicit call
|
|
||||||
http = Service('http.client').set_default_impl(requests)
|
|
||||||
|
|
||||||
# using a decorator
|
|
||||||
@Service('http.client')
|
|
||||||
def http(self, services):
|
|
||||||
import requests
|
|
||||||
return requests
|
|
||||||
|
|
||||||
# as a default in a subclass of Service
|
|
||||||
class HttpService(Service):
|
|
||||||
def get_default_impl(self, services):
|
|
||||||
import requests
|
|
||||||
return requests
|
|
||||||
|
|
||||||
# ... then use it as another service
|
|
||||||
http = HttpService('http.client')
|
|
||||||
|
|
||||||
|
|
||||||
This is under development, let us know what you think (slack may be a good place for this).
|
|
||||||
The basics already work, and you can try it.
|
|
||||||
|
|
||||||
|
|
||||||
Read more
|
Read more
|
||||||
:::::::::
|
:::::::::
|
||||||
|
|||||||
@ -1,44 +1,189 @@
|
|||||||
Transformations
|
Transformations
|
||||||
===============
|
===============
|
||||||
|
|
||||||
Transformations are the smallest building blocks in Bonobo ETL.
|
Transformations are the smallest building blocks in |bonobo|.
|
||||||
|
|
||||||
They are written using standard python callables (or iterables, if you're writing transformations that have no input,
|
There is no special data-structure used to represent transformations, it's basically just a regular python callable, or
|
||||||
a.k.a extractors).
|
even an iterable object (if it requires no input data).
|
||||||
|
|
||||||
Definitions
|
Once in a graph, transformations become nodes and the data-flow between them is described using edges.
|
||||||
:::::::::::
|
|
||||||
|
|
||||||
Transformation
|
|
||||||
|
|
||||||
The base building block of Bonobo, anything you would insert in a graph as a node. Mostly, a callable or an iterable.
|
|
||||||
|
|
||||||
Extractor
|
|
||||||
|
|
||||||
Special case transformation that use no input. It will be only called once, and its purpose is to generate data,
|
|
||||||
either by itself or by requesting it from an external service.
|
|
||||||
|
|
||||||
Loader
|
|
||||||
|
|
||||||
Special case transformation that feed an external service with data. For convenience, it can also yield the data but
|
|
||||||
a "pure" loader would have no output (although yielding things should have no bad side effect).
|
|
||||||
|
|
||||||
Callable
|
|
||||||
|
|
||||||
Anything one can call, in python. Can be a function, a python builtin, or anything that implements `__call__`
|
|
||||||
|
|
||||||
Iterable
|
|
||||||
|
|
||||||
Something we can iterate on, in python, so basically anything you'd be able to use in a `for` loop.
|
|
||||||
|
|
||||||
|
|
||||||
Concepts
|
.. note::
|
||||||
::::::::
|
|
||||||
|
|
||||||
Whatever kind of transformation you want to use, there are a few common concepts you should know about.
|
In this chapter, we'll consider that anytime we need a "database", it's something we can get from the global
|
||||||
|
namespace. This practice OK-ish for small jobs, but not at scale.
|
||||||
|
|
||||||
Input
|
You'll learn in :doc:`services` how to manage external dependencies the right way.
|
||||||
-----
|
|
||||||
|
Transformation types
|
||||||
|
::::::::::::::::::::
|
||||||
|
|
||||||
|
General case
|
||||||
|
------------
|
||||||
|
|
||||||
|
The **general case** is a transformation that yields n outputs for each input.
|
||||||
|
|
||||||
|
You can implement it using a generator:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
db = ...
|
||||||
|
|
||||||
|
def get_orders(user_id):
|
||||||
|
for order in db.get_orders(user_id):
|
||||||
|
yield user_id, order
|
||||||
|
|
||||||
|
.. graphviz::
|
||||||
|
|
||||||
|
digraph {
|
||||||
|
rankdir = LR;
|
||||||
|
stylesheet = "../_static/graphs.css";
|
||||||
|
|
||||||
|
BEFORE [shape=record label="0|1|<current>2|3|…" fontname="Courier New" fontsize=8 margin=0.03 width=0.3 style=filled fillcolor="#fafafa"];
|
||||||
|
AFTER [shape=record label="{0|order#98}|{<current>2|order#42}|{2|order#43}|{3|order#11}|{3|order#12}|{3|order#16}|{3|order#18}|…" fontname="Courier New" fontsize=8 margin=0.03 width=0.3 style=filled fillcolor="#fafafa"];
|
||||||
|
BEFORE:current -> "get_orders()" -> AFTER:current;
|
||||||
|
|
||||||
|
db [shape=cylinder label="" width=0.5 height=0.4];
|
||||||
|
db -> "get_orders()" [arrowhead=onormal];
|
||||||
|
{ rank = same; "get_orders()" db }
|
||||||
|
}
|
||||||
|
|
||||||
|
*Here, each row (containing a user id) will be transformed into a set of rows, each containing an user_id and an "order"
|
||||||
|
object.*
|
||||||
|
|
||||||
|
Extractor case
|
||||||
|
--------------
|
||||||
|
|
||||||
|
An **extractor** is a transformation that generates output without using any input. Usually, it does not generate this
|
||||||
|
data out of nowhere, but instead connects to an external system (database, api, http, files ...) to read the data from
|
||||||
|
there.
|
||||||
|
|
||||||
|
It can be implemented in two different ways.
|
||||||
|
|
||||||
|
* You can implement it using a generator, like in the general case:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
db = ...
|
||||||
|
|
||||||
|
def extract_user_ids():
|
||||||
|
yield from db.select_all_user_ids()
|
||||||
|
|
||||||
|
.. graphviz::
|
||||||
|
|
||||||
|
digraph {
|
||||||
|
rankdir = LR;
|
||||||
|
stylesheet = "../_static/graphs.css";
|
||||||
|
|
||||||
|
BEGIN [shape=point];
|
||||||
|
AFTER [shape=record label="<f0>0|1|2|3|…" fontname="Courier New" fontsize=8 margin=0.03 width=0.3 style=filled fillcolor="#fafafa"];
|
||||||
|
BEGIN -> "extract_user_ids()" -> AFTER:f0;
|
||||||
|
|
||||||
|
|
||||||
|
db [shape=cylinder label="" width=0.5 height=0.4];
|
||||||
|
db -> "extract_user_ids()" [arrowhead=onormal];
|
||||||
|
{ rank = same; "extract_user_ids()" db }
|
||||||
|
}
|
||||||
|
|
||||||
|
* You can also use an iterator directly:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
import bonobo
|
||||||
|
|
||||||
|
db = ...
|
||||||
|
|
||||||
|
def get_graph():
|
||||||
|
graph = bonobo.Graph()
|
||||||
|
graph.add_chain(
|
||||||
|
db.select_all_user_ids(),
|
||||||
|
...
|
||||||
|
)
|
||||||
|
return graph
|
||||||
|
|
||||||
|
It is very convenient in many cases, when your existing system already have an interface that gives you iterators.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
It's important to use a generative approach that yield data as it is provided and not generate everything
|
||||||
|
at once before returning, so |bonobo| can pass the data to the next nodes as soon as it starts streaming.
|
||||||
|
|
||||||
|
Loader case
|
||||||
|
-----------
|
||||||
|
|
||||||
|
A **loader** is a transformation that sends its input into an external system. To have a perfect symmetry with
|
||||||
|
extractors, we'd like not to have any output but as a convenience and because it has a negligible cost
|
||||||
|
in |bonobo|, the convention is that all loaders return :obj:`bonobo.constants.NOT_MODIFIED`, meaning that all rows that
|
||||||
|
streamed into this node's input will also stream into its outputs, not modified. It allows to chain transformations even
|
||||||
|
after a loader happened, and avoid using shenanigans to achieve the same thing:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from bonobo.constants import NOT_MODIFIED
|
||||||
|
|
||||||
|
analytics_db = ...
|
||||||
|
|
||||||
|
def load_into_analytics_db(user_id, order):
|
||||||
|
analytics_db.insert_or_update_order(user_id, order['id'], order['amount'])
|
||||||
|
return NOT_MODIFIED
|
||||||
|
|
||||||
|
|
||||||
|
.. graphviz::
|
||||||
|
|
||||||
|
digraph {
|
||||||
|
rankdir = LR;
|
||||||
|
stylesheet = "../_static/graphs.css";
|
||||||
|
|
||||||
|
BEFORE [shape=record label="{0|order#98}|{2|<current>order#42}|{2|order#43}|{3|order#11}|{3|order#12}|{3|order#16}|{3|order#18}|…" fontname="Courier New" fontsize=8 margin=0.03 width=0.3 style=filled fillcolor="#fafafa"];
|
||||||
|
AFTER [shape=record label="{0|order#98}|{<current>2|order#42}|{2|order#43}|{3|order#11}|{3|order#12}|{3|order#16}|{3|order#18}|…" fontname="Courier New" fontsize=8 margin=0.03 width=0.3 style=filled fillcolor="#fafafa"];
|
||||||
|
BEFORE:current -> "load_into_analytics_db()";
|
||||||
|
"load_into_analytics_db()" -> AFTER:current [label="NOT_MODIFIED" fontsize=8 fontname="Courier New"];
|
||||||
|
|
||||||
|
db [shape=cylinder label="" width=0.5 height=0.4];
|
||||||
|
db -> "load_into_analytics_db()" [arrowtail=onormal dir=back];
|
||||||
|
{ rank = same; "load_into_analytics_db()" db }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Execution Context
|
||||||
|
:::::::::::::::::
|
||||||
|
|
||||||
|
Transformations being regular functions, a bit of machinery is required to use them as nodes in a streaming flow.
|
||||||
|
|
||||||
|
When a :class:`bonobo.Graph` is executed, each node is wrapped in a
|
||||||
|
:class:`bonobo.execution.contexts.NodeExecutionContext` which is responsible for keeping the state of a node, within a
|
||||||
|
given execution.
|
||||||
|
|
||||||
|
|
||||||
|
Inputs and Outputs
|
||||||
|
::::::::::::::::::
|
||||||
|
|
||||||
|
When run in an execution context, transformations have inputs and outputs, which means that |bonobo| will pass data
|
||||||
|
that comes in the input queue as calls, and push returned / yielded values into the output queue.
|
||||||
|
|
||||||
|
|
||||||
|
.. graphviz::
|
||||||
|
|
||||||
|
digraph {
|
||||||
|
rankdir = LR;
|
||||||
|
stylesheet = "../_static/graphs.css";
|
||||||
|
|
||||||
|
"Input Queue" [shape=record label="{|||||}" margin=0.03 width=1 style=filled fillcolor="#fafafa" height=0.25];
|
||||||
|
"Output Queue" [shape=record label="{|||||}" margin=0.03 width=1 style=filled fillcolor="#fafafa" height=0.25];
|
||||||
|
|
||||||
|
"Input Queue" -> "transformation" [label="input"];
|
||||||
|
"transformation" -> "Output Queue" [label="output"];
|
||||||
|
}
|
||||||
|
|
||||||
|
For thread-based strategies, the underlying implementation if the input and output queues is the standard
|
||||||
|
:class:`queue.Queue`.
|
||||||
|
|
||||||
|
|
||||||
|
Inputs
|
||||||
|
------
|
||||||
|
|
||||||
|
.. todo:: proofread, check consistency and correctness
|
||||||
|
|
||||||
All input is retrieved via the call arguments. Each line of input means one call to the callable provided. Arguments
|
All input is retrieved via the call arguments. Each line of input means one call to the callable provided. Arguments
|
||||||
will be, in order:
|
will be, in order:
|
||||||
@ -52,133 +197,39 @@ You'll see below how to pass each of those.
|
|||||||
Output
|
Output
|
||||||
------
|
------
|
||||||
|
|
||||||
|
.. todo:: proofread, check consistency and correctness
|
||||||
|
|
||||||
Each callable can return/yield different things (all examples will use yield, but if there is only one output per input
|
Each callable can return/yield different things (all examples will use yield, but if there is only one output per input
|
||||||
line, you can also return your output row and expect the exact same behaviour).
|
line, you can also return your output row and expect the exact same behaviour).
|
||||||
|
|
||||||
Let's see the rules (first to match wins).
|
.. todo:: add rules for output parsing
|
||||||
|
|
||||||
1. A flag, eventually followed by something else, marks a special behaviour. If it supports it, the remaining part of
|
The logic is defined in this piece of code, documentation will be added soon:
|
||||||
the output line will be interpreted using the same rules, and some flags can be combined.
|
|
||||||
|
|
||||||
**NOT_MODIFIED**
|
.. literalinclude:: ../../bonobo/execution/contexts/node.py
|
||||||
|
:caption: NodeExecutionContext._cast(self, _input, _output)
|
||||||
|
:pyobject: NodeExecutionContext._cast
|
||||||
|
|
||||||
**NOT_MODIFIED** tells bonobo to use the input row unmodified as the output.
|
Basically, after checking a few flags (`NOT_MODIFIED`, then `INHERIT`), it will "cast" the data into the "output type",
|
||||||
|
which is either tuple or a kind of namedtuple.
|
||||||
|
|
||||||
*CANNOT be combined*
|
.. todo:: document cast/input_type/output_type logic.
|
||||||
|
|
||||||
Example:
|
Class-based Transformations
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from bonobo import NOT_MODIFIED
|
|
||||||
|
|
||||||
def output_will_be_same_as_input(*args, **kwargs):
|
|
||||||
yield NOT_MODIFIED
|
|
||||||
|
|
||||||
**APPEND**
|
|
||||||
|
|
||||||
**APPEND** tells bonobo to append this output to the input (positional arguments will equal `input_args + output_args`,
|
|
||||||
keyword arguments will equal `{**input_kwargs, **output_kwargs}`).
|
|
||||||
|
|
||||||
*CAN be combined, but not with itself*
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from bonobo import APPEND
|
|
||||||
|
|
||||||
def output_will_be_appended_to_input(*args, **kwargs):
|
|
||||||
yield APPEND, 'foo', 'bar', {'eat_at': 'joe'}
|
|
||||||
|
|
||||||
**LOOPBACK**
|
|
||||||
|
|
||||||
**LOOPBACK** tells bonobo that this output must be looped back into our own input queue, allowing to create the stream
|
|
||||||
processing version of recursive algorithms.
|
|
||||||
|
|
||||||
*CAN be combined, but not with itself*
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from bonobo import LOOPBACK
|
|
||||||
|
|
||||||
def output_will_be_sent_to_self(*args, **kwargs):
|
|
||||||
yield LOOPBACK, 'Hello, I am the future "you".'
|
|
||||||
|
|
||||||
**CHANNEL(...)**
|
|
||||||
|
|
||||||
**CHANNEL(...)** tells bonobo that this output does not use the default channel and is routed through another path.
|
|
||||||
This is something you should probably not use unless your data flow design is complex, and if you're not certain
|
|
||||||
about it, it probably means that it is not the feature you're looking for.
|
|
||||||
|
|
||||||
*CAN be combined, but not with itself*
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from bonobo import CHANNEL
|
|
||||||
|
|
||||||
def output_will_be_sent_to_self(*args, **kwargs):
|
|
||||||
yield CHANNEL("errors"), 'That is not cool.'
|
|
||||||
|
|
||||||
2. Once all flags are "consumed", the remaining part is interpreted.
|
|
||||||
|
|
||||||
* If it is a :class:`bonobo.Bag` instance, then it's used directly.
|
|
||||||
* If it is a :class:`dict` then a kwargs-only :class:`bonobo.Bag` will be created.
|
|
||||||
* If it is a :class:`tuple` then an args-only :class:`bonobo.Bag` will be created, unless its last argument is a
|
|
||||||
:class:`dict` in which case a args+kwargs :class:`bonobo.Bag` will be created.
|
|
||||||
* If it's something else, it will be used to create a one-arg-only :class:`bonobo.Bag`.
|
|
||||||
|
|
||||||
Function based transformations
|
|
||||||
::::::::::::::::::::::::::::::
|
|
||||||
|
|
||||||
The most basic transformations are function-based. Which means that you define a function, and it will be used directly
|
|
||||||
in a graph.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
def get_representation(row):
|
|
||||||
return repr(row)
|
|
||||||
|
|
||||||
graph = bonobo.Graph(
|
|
||||||
[...],
|
|
||||||
get_representation,
|
|
||||||
[...],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
It does not allow any configuration, but if it's an option, prefer it as it's simpler to write.
|
|
||||||
|
|
||||||
|
|
||||||
Class based transformations
|
|
||||||
:::::::::::::::::::::::::::
|
:::::::::::::::::::::::::::
|
||||||
|
|
||||||
For less basic use cases, you'll want to use classes to define some of your transformations. It's also a better choice
|
For use cases that are either less simple or that requires better reusability, you may want to use classes to define
|
||||||
to build reusable blocks, as you'll be able to create parametrizable transformations that the end user will be able to
|
some of your transformations.
|
||||||
configure at the last minute.
|
|
||||||
|
|
||||||
|
.. todo:: narrative doc
|
||||||
|
|
||||||
Configurable
|
See:
|
||||||
------------
|
|
||||||
|
|
||||||
.. autoclass:: bonobo.config.Configurable
|
* :class:`bonobo.config.Configurable`
|
||||||
|
* :class:`bonobo.config.Option`
|
||||||
Options
|
* :class:`bonobo.config.Service`
|
||||||
-------
|
* :class:`bonobo.config.Method`
|
||||||
|
* :class:`bonobo.config.ContextProcessor`
|
||||||
.. autoclass:: bonobo.config.Option
|
|
||||||
|
|
||||||
Services
|
|
||||||
--------
|
|
||||||
|
|
||||||
.. autoclass:: bonobo.config.Service
|
|
||||||
|
|
||||||
Methods
|
|
||||||
-------
|
|
||||||
|
|
||||||
.. autoclass:: bonobo.config.Method
|
|
||||||
|
|
||||||
ContextProcessors
|
|
||||||
-----------------
|
|
||||||
|
|
||||||
.. autoclass:: bonobo.config.ContextProcessor
|
|
||||||
|
|
||||||
|
|
||||||
Naming conventions
|
Naming conventions
|
||||||
|
|||||||
@ -1,12 +0,0 @@
|
|||||||
Bonobo API
|
|
||||||
==========
|
|
||||||
|
|
||||||
The Bonobo API, available directly under the :mod:`bonobo` package, contains all the tools you need to get started with
|
|
||||||
bonobo.
|
|
||||||
|
|
||||||
.. automodule:: bonobo
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
|
|
||||||
|
|
||||||
10
docs/reference/api/bonobo.rst
Normal file
10
docs/reference/api/bonobo.rst
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
:mod:`Bonobo <bonobo>`
|
||||||
|
======================
|
||||||
|
|
||||||
|
.. currentmodule:: bonobo
|
||||||
|
|
||||||
|
:Module: :mod:`bonobo`
|
||||||
|
|
||||||
|
.. automodule:: bonobo
|
||||||
|
|
||||||
|
|
||||||
10
docs/reference/api/bonobo/config.rst
Normal file
10
docs/reference/api/bonobo/config.rst
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
:mod:`Config <bonobo.config>`
|
||||||
|
=============================
|
||||||
|
|
||||||
|
.. currentmodule:: bonobo.config
|
||||||
|
|
||||||
|
:Module: :mod:`bonobo.config`
|
||||||
|
|
||||||
|
.. automodule:: bonobo.config
|
||||||
|
|
||||||
|
|
||||||
11
docs/reference/api/bonobo/constants.rst
Normal file
11
docs/reference/api/bonobo/constants.rst
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
:mod:`Constants <bonobo.constants>`
|
||||||
|
===================================
|
||||||
|
|
||||||
|
.. currentmodule:: bonobo.constants
|
||||||
|
|
||||||
|
:Module: :mod:`bonobo.constants`
|
||||||
|
|
||||||
|
.. automodule:: bonobo.constants
|
||||||
|
:no-members:
|
||||||
|
|
||||||
|
|
||||||
19
docs/reference/api/bonobo/execution.rst
Normal file
19
docs/reference/api/bonobo/execution.rst
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
:mod:`Execution <bonobo.execution>`
|
||||||
|
===================================
|
||||||
|
|
||||||
|
.. currentmodule:: bonobo.execution
|
||||||
|
|
||||||
|
:Module: :mod:`bonobo.execution`
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:caption: Submodules
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
execution/contexts
|
||||||
|
execution/events
|
||||||
|
execution/strategies
|
||||||
|
|
||||||
|
.. automodule:: bonobo.execution
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
10
docs/reference/api/bonobo/execution/contexts.rst
Normal file
10
docs/reference/api/bonobo/execution/contexts.rst
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
:mod:`Execution Contexts <bonobo.execution.contexts>`
|
||||||
|
=====================================================
|
||||||
|
|
||||||
|
.. currentmodule:: bonobo.execution.contexts
|
||||||
|
|
||||||
|
:Module: :mod:`bonobo.execution.contexts`
|
||||||
|
|
||||||
|
.. automodule:: bonobo.execution.contexts
|
||||||
|
|
||||||
|
|
||||||
10
docs/reference/api/bonobo/execution/events.rst
Normal file
10
docs/reference/api/bonobo/execution/events.rst
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
:mod:`Execution Events <bonobo.execution.events>`
|
||||||
|
=================================================
|
||||||
|
|
||||||
|
.. currentmodule:: bonobo.execution.events
|
||||||
|
|
||||||
|
:Module: :mod:`bonobo.execution.events`
|
||||||
|
|
||||||
|
.. automodule:: bonobo.execution.events
|
||||||
|
|
||||||
|
|
||||||
10
docs/reference/api/bonobo/execution/strategies.rst
Normal file
10
docs/reference/api/bonobo/execution/strategies.rst
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
:mod:`Execution Strategies <bonobo.execution.strategies>`
|
||||||
|
=========================================================
|
||||||
|
|
||||||
|
.. currentmodule:: bonobo.execution.strategies
|
||||||
|
|
||||||
|
:Module: :mod:`bonobo.execution.strategies`
|
||||||
|
|
||||||
|
.. automodule:: bonobo.execution.strategies
|
||||||
|
|
||||||
|
|
||||||
10
docs/reference/api/bonobo/util.rst
Normal file
10
docs/reference/api/bonobo/util.rst
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
:mod:`Util <bonobo.util>`
|
||||||
|
=========================
|
||||||
|
|
||||||
|
.. currentmodule:: bonobo.util
|
||||||
|
|
||||||
|
:Module: :mod:`bonobo.util`
|
||||||
|
|
||||||
|
.. automodule:: bonobo.util
|
||||||
|
|
||||||
|
|
||||||
@ -1,10 +0,0 @@
|
|||||||
Config API
|
|
||||||
==========
|
|
||||||
|
|
||||||
The Config API, located under the :mod:`bonobo.config` namespace, contains all the tools you need to create
|
|
||||||
configurable transformations, either class-based or function-based.
|
|
||||||
|
|
||||||
.. automodule:: bonobo.config
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
@ -1,10 +0,0 @@
|
|||||||
Util API
|
|
||||||
========
|
|
||||||
|
|
||||||
The Util API, located under the :mod:`bonobo.util` namespace, contains helpers functions and decorators to work with
|
|
||||||
and inspect transformations, graphs, and nodes.
|
|
||||||
|
|
||||||
.. automodule:: bonobo.util
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
@ -1,15 +1,18 @@
|
|||||||
References
|
Reference
|
||||||
==========
|
=========
|
||||||
|
|
||||||
Reference documents of all stable APIs and modules. If something is not here, please be careful about using it as it
|
Reference documents of all stable APIs and modules. If something is not here, please be careful about using it as it
|
||||||
means that the api is not yet 1.0-proof.
|
means that the api is not yet 1.0-proof.
|
||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 4
|
:maxdepth: 2
|
||||||
|
|
||||||
api
|
api/bonobo
|
||||||
api_config
|
api/bonobo/config
|
||||||
api_util
|
api/bonobo/constants
|
||||||
|
api/bonobo/execution
|
||||||
|
api/bonobo/util
|
||||||
commands
|
commands
|
||||||
settings
|
settings
|
||||||
examples
|
examples
|
||||||
|
private
|
||||||
|
|||||||
@ -2,7 +2,7 @@
|
|||||||
alabaster==0.7.10
|
alabaster==0.7.10
|
||||||
arrow==0.12.0
|
arrow==0.12.0
|
||||||
attrs==17.4.0
|
attrs==17.4.0
|
||||||
babel==2.5.1
|
babel==2.5.3
|
||||||
binaryornot==0.4.4
|
binaryornot==0.4.4
|
||||||
certifi==2017.11.5
|
certifi==2017.11.5
|
||||||
chardet==3.0.4
|
chardet==3.0.4
|
||||||
|
|||||||
@ -19,7 +19,7 @@ markupsafe==1.0
|
|||||||
mistune==0.8.3
|
mistune==0.8.3
|
||||||
nbconvert==5.3.1
|
nbconvert==5.3.1
|
||||||
nbformat==4.4.0
|
nbformat==4.4.0
|
||||||
notebook==5.3.0rc1
|
notebook==5.2.2
|
||||||
pandocfilters==1.4.2
|
pandocfilters==1.4.2
|
||||||
parso==0.1.1
|
parso==0.1.1
|
||||||
pexpect==4.3.1
|
pexpect==4.3.1
|
||||||
@ -28,14 +28,13 @@ prompt-toolkit==1.0.15
|
|||||||
ptyprocess==0.5.2
|
ptyprocess==0.5.2
|
||||||
pygments==2.2.0
|
pygments==2.2.0
|
||||||
python-dateutil==2.6.1
|
python-dateutil==2.6.1
|
||||||
pyzmq==17.0.0b3
|
pyzmq==16.0.3
|
||||||
qtconsole==4.3.1
|
qtconsole==4.3.1
|
||||||
send2trash==1.4.2
|
|
||||||
simplegeneric==0.8.1
|
simplegeneric==0.8.1
|
||||||
six==1.11.0
|
six==1.11.0
|
||||||
terminado==0.8.1
|
terminado==0.8.1
|
||||||
testpath==0.3.1
|
testpath==0.3.1
|
||||||
tornado==5.0a1
|
tornado==4.5.3
|
||||||
traitlets==4.3.2
|
traitlets==4.3.2
|
||||||
wcwidth==0.1.7
|
wcwidth==0.1.7
|
||||||
webencodings==0.5.1
|
webencodings==0.5.1
|
||||||
|
|||||||
@ -18,7 +18,7 @@ python-slugify==1.2.4
|
|||||||
pytz==2017.3
|
pytz==2017.3
|
||||||
requests==2.18.4
|
requests==2.18.4
|
||||||
six==1.11.0
|
six==1.11.0
|
||||||
sqlalchemy==1.2.0
|
sqlalchemy==1.2.1
|
||||||
stevedore==1.28.0
|
stevedore==1.28.0
|
||||||
unidecode==1.0.22
|
unidecode==1.0.22
|
||||||
urllib3==1.22
|
urllib3==1.22
|
||||||
|
|||||||
Reference in New Issue
Block a user