Merge pull request #10 from hartym/master

Updating documentation.
This commit is contained in:
Romain Dorgueil
2016-12-27 23:59:13 +01:00
committed by GitHub
45 changed files with 1114 additions and 151 deletions

View File

@ -23,4 +23,4 @@ exclude_lines =
ignore_errors = True
[html]
directory = doc/_build/html/coverage
directory = docs/_build/html/coverage

View File

@ -36,16 +36,20 @@ with open(os.path.realpath(os.path.join(os.path.dirname(__file__), '../version.t
__all__ = [
'Bag',
'FileWriter',
'Graph',
'NaiveStrategy',
'JsonFileWriter',
'NOT_MODIFIED',
'NaiveStrategy',
'ProcessPoolExecutorStrategy',
'ThreadPoolExecutorStrategy',
'console_run',
'head',
'inject',
'jupyter_run',
'log',
'noop',
'run',
'service',
'tee',
'to_json',
]

View File

9
bonobo/compat/pandas.py Normal file
View File

@ -0,0 +1,9 @@
from bonobo import FileWriter, JsonFileWriter
to_file = FileWriter
to_json = JsonFileWriter
__all__ = [
'to_json',
'to_file',
]

View File

@ -33,8 +33,8 @@ class Bag:
def flags(self):
return self._flags
def apply(self, func, *args, **kwargs):
return func(*args, *self.args, **kwargs, **self.kwargs)
def apply(self, func_or_iter, *args, **kwargs):
return func_or_iter(*args, *self.args, **kwargs, **self.kwargs)
def extend(self, *args, **kwargs):
return type(self)(*args, _parent=self, **kwargs)

View File

@ -128,7 +128,7 @@ class ComponentExecutionContext(WithStatistics, AbstractLoopContext):
@property
def name(self):
return self.component.__name__
return getattr(self.component, '__name__', getattr(type(self.component), '__name__', repr(self.component)))
def __init__(self, component, parent):
self.parent = parent

View File

@ -6,9 +6,10 @@ class Graph:
Represents a coherent directed acyclic graph (DAG) of components.
"""
def __init__(self):
def __init__(self, *chain):
self.components = []
self.graph = {BEGIN: set()}
self.add_chain(*chain)
def outputs_of(self, idx, create=False):
if create and not idx in self.graph:

View File

@ -1,7 +1,3 @@
from .helpers import console_run
from .plugin import ConsoleOutputPlugin
__all__ = [
'ConsoleOutputPlugin',
'console_run',
]
__all__ = ['ConsoleOutputPlugin', ]

View File

@ -1,9 +0,0 @@
from bonobo import Graph, ThreadPoolExecutorStrategy
from .plugin import ConsoleOutputPlugin
def console_run(*chain, output=True, plugins=None):
graph = Graph()
executor = ThreadPoolExecutorStrategy()
graph.add_chain(*chain)
return executor.execute(graph, plugins=(plugins or []) + [ConsoleOutputPlugin()] if output else [])

View File

@ -1,4 +1,4 @@
from .helpers import jupyter_run
from bonobo.util.helpers import jupyter_run
from .plugin import JupyterOutputPlugin
@ -6,7 +6,4 @@ def _jupyter_nbextension_paths():
return [{'section': 'notebook', 'src': 'static', 'dest': 'bonobo-jupyter', 'require': 'bonobo-jupyter/extension'}]
__all__ = [
'JupyterOutputPlugin',
'jupyter_run',
]
__all__ = ['JupyterOutputPlugin', ]

View File

@ -1,9 +1 @@
from bonobo import Graph, ThreadPoolExecutorStrategy
from .plugin import JupyterOutputPlugin
def jupyter_run(*chain, plugins=None):
graph = Graph()
executor = ThreadPoolExecutorStrategy()
graph.add_chain(*chain)
return executor.execute(graph, plugins=(plugins or []) + [JupyterOutputPlugin()])

View File

@ -3,11 +3,18 @@ from urllib.parse import urlencode
import requests # todo: make this a service so we can substitute it ?
def extract_ods(url, dataset, rows=100, **kwargs):
def from_opendatasoft_api(dataset=None,
endpoint='{scheme}://{netloc}{path}',
scheme='https',
netloc='data.opendatasoft.com',
path='/api/records/1.0/search/',
rows=100,
**kwargs):
path = path if path.startswith('/') else '/' + path
params = (
('dataset', dataset),
('rows', rows), ) + tuple(sorted(kwargs.items()))
base_url = url + '?' + urlencode(params)
base_url = endpoint.format(scheme=scheme, netloc=netloc, path=path) + '?' + urlencode(params)
def _extract_ods():
nonlocal base_url, rows

View File

@ -1,5 +1,9 @@
""" Readers and writers for common file formats. """
from .json import *
from .file import FileWriter
from .json import JsonFileWriter
__all__ = ['to_json', ]
__all__ = [
'FileWriter',
'JsonFileWriter',
]

35
bonobo/io/file.py Normal file
View File

@ -0,0 +1,35 @@
from bonobo.util.lifecycle import with_context
__all__ = ['FileWriter', ]
@with_context
class FileWriter:
# XXX TODO implement @with_context like this ? Pros and cons ?
class Meta:
contextual = True
def __init__(self, path_or_buf, eol='\n'):
self.path_or_buf = path_or_buf
self.eol = eol
def initialize(self, ctx):
""" todo add lock file ? optional maybe ? """
assert not hasattr(ctx, 'fp'), 'One at a time, baby.'
ctx.fp = open(self.path_or_buf, 'w+')
ctx.first = True
def write(self, fp, line, prefix=''):
fp.write(prefix + line)
def __call__(self, ctx, row):
if ctx.first:
prefix, ctx.first = '', False
else:
prefix = self.eol
self.write(ctx.fp, row, prefix=prefix)
def finalize(self, ctx):
ctx.fp.close()
del ctx.fp, ctx.first

View File

@ -1,40 +1,23 @@
import json
from .file import FileWriter
from bonobo.util.lifecycle import with_context
__all__ = [
'from_json',
'to_json',
]
__all__ = ['JsonFileWriter', ]
@with_context
class JsonWriter:
class JsonFileWriter(FileWriter):
def __init__(self, path_or_buf):
self.path_or_buf = path_or_buf
super().__init__(path_or_buf, eol=',\n')
def initialize(self, ctx):
assert not hasattr(ctx, 'fp'), 'One at a time, baby.'
ctx.fp = open(self.path_or_buf, 'w+')
super().initialize(ctx)
ctx.fp.write('[\n')
ctx.first = True
def __call__(self, ctx, row):
if ctx.first:
prefix = ''
ctx.first = False
else:
prefix = ',\n'
ctx.fp.write(prefix + json.dumps(row))
def write(self, fp, line, prefix=''):
fp.write(prefix + json.dumps(line))
def finalize(self, ctx):
ctx.fp.write('\n]')
ctx.fp.close()
del ctx.fp, ctx.first
def from_json(path_or_buf):
pass
to_json = JsonWriter
super().finalize(ctx)

View File

@ -4,12 +4,16 @@ import functools
import pprint
from .tokens import NOT_MODIFIED
from .helpers import run, console_run, jupyter_run
__all__ = [
'NOT_MODIFIED',
'console_run',
'head',
'jupyter_run',
'log',
'noop',
'run',
'tee',
]

20
bonobo/util/helpers.py Normal file
View File

@ -0,0 +1,20 @@
def run(*chain, plugins=None):
from bonobo import Graph, ThreadPoolExecutorStrategy
graph = Graph()
graph.add_chain(*chain)
executor = ThreadPoolExecutorStrategy()
return executor.execute(graph, plugins=plugins or [])
def console_run(*chain, output=True, plugins=None):
from bonobo.ext.console import ConsoleOutputPlugin
return run(*chain, plugins=(plugins or []) + [ConsoleOutputPlugin()] if output else [])
def jupyter_run(*chain, plugins=None):
from bonobo.ext.jupyter import JupyterOutputPlugin
return run(*chain, plugins=(plugins or []) + [JupyterOutputPlugin()])

View File

@ -1,43 +1,83 @@
{% extends "layout.html" %}
{% set title = _('Overview') %}
{% set title = _('Bonobo — Data processing for humans') %}
{% block body %}
<div style="border: 2px solid red; font-weight: bold;">
Migration in progress, things may be broken for now. Please give us some time to finish painting the walls.
<div style="border: 2px solid red; font-weight: bold; margin: 1em; padding: 1em">
Rewrite in progress, things may be broken for now. Please give us some time to finish painting the walls.
</div>
<h1>{{ _('Welcome to Bonobo\'s Documentation') }}</h1>
<div style="text-align: center;">
<img class="logo" src="{{ pathto('_static/bonobo.png', 1) }}" title="Bonobo"
<h1 style="text-align: center">
<img class="logo" src="{{ pathto('_static/bonobo.png', 1) }}" title="Bonobo" alt="Bonobo"
style=" width: 128px; height: 128px;"/>
</div>
</h1>
<p>
{% trans %}
Bonobo is a line-by-line data-processing toolkit for python 3.5+ emphasizing simplicity and atomicity of
data transformations using a simple directed graph of python callables.
<strong>Bonobo</strong> is a line-by-line data-processing toolkit for python 3.5+ emphasizing simple and
atomic data transformations defined using a directed graph of plain old python callables.
{% endtrans %}
</p>
<p>
{% trans %}
It was originally created as a programmatic ETL (extract transform load) python 2.7+ library called rdc.etl,
to process tenth of millions of retail stock informations, and served this purpose for years.
<strong>Bonobo</strong> is a full-featured Extract-Transform-Load library that won't force you to use an
ugly IDE.
{% endtrans %}
</p>
<p>
{% trans %}
Bonobo is a clean full-rewrite of rdc.etl, for python 3.5+, and is now used for many ETL as well as non-ETL
use cases. For examples, it's pretty easy to write selenium based web crawlers, or twitter bots. As long as
a use case can be represented as a graph of callables interracting, Bonobo can be used.
<strong>Bonobo</strong> is your own data-monkey army. Tedious and repetitive data-processing incoming? Give
it a try!
{% endtrans %}
</p>
<h2 style="margin-bottom: 0">{% trans %}Documentation{% endtrans %}</h2>
<table class="contentstable">
<tr>
<td>
<p class="biglink"><a class="biglink" href="{{ pathto("tutorial/basics") }}">{% trans %}First steps{% endtrans %}</a><br/>
<span class="linkdescr">{% trans %}quick overview of basic features{% endtrans %}</span></p>
</td>
<td>
{%- if hasdoc('search') %}
<p class="biglink"><a class="biglink" href="{{ pathto("search") }}">{% trans %}
Search{% endtrans %}</a><br/>
<span class="linkdescr">{% trans %}search the documentation{% endtrans %}</span></p>{%- endif %}
</td>
</tr>
<tr>
<td>
<p class="biglink"><a class="biglink" href="{{ pathto("guide/index") }}">{% trans %}
Guides{% endtrans %}</a><br/>
<span class="linkdescr">{% trans %}for a complete overview{% endtrans %}</span>
</p>
</td>
<td>
<p class="biglink"><a class="biglink" href="{{ pathto("reference/index") }}">{% trans %}References{% endtrans %}</a>
<br/>
<span class="linkdescr">{% trans %}all functions, classes, terms{% endtrans %}</span>
</p>
</td>
</tr>
<tr>
<td>
<p class="biglink"><a class="biglink" href="{{ pathto("changes") }}">{% trans %}
Cookbook{% endtrans %}</a><br/>
<span class="linkdescr">{% trans %}examples and recipes{% endtrans %}</span></p>
</td>
<td>
<p class="biglink"><a class="biglink" href="{{ pathto("changes") }}">{% trans %}
Contribute{% endtrans %}</a><br/>
<span class="linkdescr">{% trans %}contributor guide{% endtrans %}</span></p>
</td>
</tr>
</table>
<h2>Features</h2>
<ul>
<li>
{% trans %}
@ -65,52 +105,6 @@
</li>
</ul>
<h2 style="margin-bottom: 0">{% trans %}Documentation{% endtrans %}</h2>
<table class="contentstable">
<tr>
<td>
<p class="biglink"><a class="biglink" href="{{ pathto("tutorial") }}">{% trans %}First steps with
Bonobo{% endtrans %}</a><br/>
<span class="linkdescr">{% trans %}overview of basic features{% endtrans %}</span></p>
</td>
<td>
{%- if hasdoc('search') %}
<p class="biglink"><a class="biglink" href="{{ pathto("search") }}">{% trans %}
Search{% endtrans %}</a><br/>
<span class="linkdescr">{% trans %}search the documentation{% endtrans %}</span></p>{%- endif %}
</td>
</tr>
<tr>
<td>
<p class="biglink"><a class="biglink" href="{{ pathto("contents") }}">{% trans %}
Guides{% endtrans %}</a><br/>
<span class="linkdescr">{% trans %}for a complete overview{% endtrans %}</span>
</p>
</td>
<td>
{%- if hasdoc('genindex') %}
<p class="biglink"><a class="biglink" href="{{ pathto("genindex") }}">{% trans %}References{% endtrans %}</a>
<br/>
<span class="linkdescr">{% trans %}all functions, classes, terms{% endtrans %}</span>
</p>{%- endif %}
</td>
</tr>
<tr>
<td>
<p class="biglink"><a class="biglink" href="{{ pathto("changes") }}">{% trans %}
Cookbook{% endtrans %}</a><br/>
<span class="linkdescr">{% trans %}examples and recipes{% endtrans %}</span></p>
</td>
<td>
<p class="biglink"><a class="biglink" href="{{ pathto("changes") }}">{% trans %}
Contribute{% endtrans %}</a><br/>
<span class="linkdescr">{% trans %}contributor guide{% endtrans %}</span></p>
</td>
</tr>
</table>
<p>{% trans %}
You can also download PDF/EPUB versions of the Bonobo documentation:
<a href="http://readthedocs.org/projects/bonobo/downloads/pdf/stable/">PDF version</a>,

22
docs/_templates/sidebarintro.html vendored Normal file
View File

@ -0,0 +1,22 @@
<h3>About Bonobo</h3>
<p>
Bonobo is a data-processing toolkit for python 3.5+, with emphasis on simplicity, atomicity and testability. Oh,
and performances, too!
</p>
<h3>Other Formats</h3>
<p>
You can download the documentation in other formats as well:
</p>
<ul>
<li><a href="http://readthedocs.org/projects/bonobo/downloads/pdf/stable/">as PDF</a></li>
<li><a href="http://readthedocs.org/projects/bonobo/downloads/htmlzip/stable/">as zipped HTML</a></li>
<li><a href="http://readthedocs.org/projects/bonobo/downloads/epub/stable/">as EPUB</a></li>
</ul>
<h3>Useful Links</h3>
<ul>
<li><a href="https://bonobo-project.org/">Bonobo project's Website</a></li>
<li><a href="http://pypi.python.org/pypi/bonobo">Bonobo @ PyPI</a></li>
<li><a href="http://github.com/python-bonobo/bonobo">Bonobo @ github</a></li>
</ul>

View File

@ -12,8 +12,14 @@ import bonobo
# -- General configuration ------------------------------------------------
extensions = [
'sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.coverage',
'sphinx.ext.ifconfig', 'sphinx.ext.viewcode'
'sphinx.ext.autodoc',
'sphinx.ext.doctest',
'sphinx.ext.intersphinx',
'sphinx.ext.todo',
'sphinx.ext.coverage',
'sphinx.ext.ifconfig',
'sphinx.ext.viewcode',
'sphinx.ext.graphviz',
]
# Add any paths that contain templates here, relative to this directory.
@ -76,13 +82,23 @@ html_theme_options = {
}
html_sidebars = {
'**': [
'index': [
'sidebarlogo.html',
'navigation.html',
'localtoc.html',
'relations.html',
'sidebarintro.html',
'sourcelink.html',
'searchbox.html',
'sidebarinfos.html',
],
'**': [
'sidebarlogo.html',
'navigation.html',
'localtoc.html',
'relations.html',
'sourcelink.html',
'searchbox.html',
'sidebarinfos.html',
]
}
@ -95,6 +111,8 @@ html_additional_pages = {'index': 'index.html'}
html_static_path = ['_static']
html_show_sphinx = False
graphviz_output_format = 'svg'
# -- Options for HTMLHelp output ------------------------------------------
# Output file base name for HTML help builder.

4
docs/guide/index.rst Normal file
View File

@ -0,0 +1,4 @@
Guides
======
.. todo:: write the fucking doc!

22
docs/history.rst Normal file
View File

@ -0,0 +1,22 @@
History
=======
**Bonobo** is a full rewrite of **rdc.etl**.
**rdc.etl** is a full python 2.7+ ETL library for which development started in 2012, and was opensourced in 2013 (see
`first commit <https://github.com/rdcli/rdc.etl/commit/fdbc11c0ee7f6b97322693bd0051d63677b06a93>`_).
Although the first commit in **Bonobo** happened late 2016, it's based on a lot of code, learnings and experience that
happened because of **rdc.etl**.
It would have been counterproductive to migrate the same codebase:
* a lot of mistakes were impossible to fix in a backward compatible way (for example, transormations were stateful,
making them more complicated to write and impossible to reuse, a lot of effort was used to make the components have
multi-inputs and multi-outputs, although in 99% of the case it's useless, etc.).
* we also wanted to develop something that took advantage of modern python versions, hence the choice of 3.5+.
**rdc.etl** still runs data transformation jobs, in both python 2.7 and 3, and we reuse whatever is possible to
build Bonobo.
You can read

View File

@ -3,9 +3,11 @@ Bonobo
.. toctree::
:maxdepth: 2
:caption: Contents:
install
tutorial/index
guide/index
reference/index
genindex
modindex
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

34
docs/install.rst Normal file
View File

@ -0,0 +1,34 @@
Installation
============
.. todo::
better install docs, especially on how to use different fork, etc.
Install with pip
::::::::::::::::
.. code-block:: shell-session
$ pip install bonobo
Install from source
:::::::::::::::::::
.. code-block:: shell-session
$ pip install git+https://github.com/python-bonobo/bonobo.git@master#egg=bonobo
Editable install
::::::::::::::::
If you plan on making patches to Bonobo, you should install it as an "editable" package.
.. code-block:: shell-session
$ pip install --editable git+https://github.com/python-bonobo/bonobo.git@master#egg=bonobo
Note: `-e` is the shorthand version of `--editable`.

View File

@ -0,0 +1,22 @@
bonobo.compat package
=====================
Submodules
----------
bonobo.compat.pandas module
---------------------------
.. automodule:: bonobo.compat.pandas
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: bonobo.compat
:members:
:undoc-members:
:show-inheritance:

View File

@ -0,0 +1,85 @@
bonobo.core package
===================
Subpackages
-----------
.. toctree::
bonobo.core.strategies
Submodules
----------
bonobo.core.bags module
-----------------------
.. automodule:: bonobo.core.bags
:members:
:undoc-members:
:show-inheritance:
bonobo.core.contexts module
---------------------------
.. automodule:: bonobo.core.contexts
:members:
:undoc-members:
:show-inheritance:
bonobo.core.errors module
-------------------------
.. automodule:: bonobo.core.errors
:members:
:undoc-members:
:show-inheritance:
bonobo.core.graphs module
-------------------------
.. automodule:: bonobo.core.graphs
:members:
:undoc-members:
:show-inheritance:
bonobo.core.inputs module
-------------------------
.. automodule:: bonobo.core.inputs
:members:
:undoc-members:
:show-inheritance:
bonobo.core.plugins module
--------------------------
.. automodule:: bonobo.core.plugins
:members:
:undoc-members:
:show-inheritance:
bonobo.core.services module
---------------------------
.. automodule:: bonobo.core.services
:members:
:undoc-members:
:show-inheritance:
bonobo.core.stats module
------------------------
.. automodule:: bonobo.core.stats
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: bonobo.core
:members:
:undoc-members:
:show-inheritance:

View File

@ -0,0 +1,38 @@
bonobo.core.strategies package
==============================
Submodules
----------
bonobo.core.strategies.base module
----------------------------------
.. automodule:: bonobo.core.strategies.base
:members:
:undoc-members:
:show-inheritance:
bonobo.core.strategies.executor module
--------------------------------------
.. automodule:: bonobo.core.strategies.executor
:members:
:undoc-members:
:show-inheritance:
bonobo.core.strategies.naive module
-----------------------------------
.. automodule:: bonobo.core.strategies.naive
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: bonobo.core.strategies
:members:
:undoc-members:
:show-inheritance:

View File

@ -0,0 +1,22 @@
bonobo.ext.console package
==========================
Submodules
----------
bonobo.ext.console.plugin module
--------------------------------
.. automodule:: bonobo.ext.console.plugin
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: bonobo.ext.console
:members:
:undoc-members:
:show-inheritance:

View File

@ -0,0 +1,30 @@
bonobo.ext.jupyter package
==========================
Submodules
----------
bonobo.ext.jupyter.plugin module
--------------------------------
.. automodule:: bonobo.ext.jupyter.plugin
:members:
:undoc-members:
:show-inheritance:
bonobo.ext.jupyter.widget module
--------------------------------
.. automodule:: bonobo.ext.jupyter.widget
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: bonobo.ext.jupyter
:members:
:undoc-members:
:show-inheritance:

View File

@ -0,0 +1,46 @@
bonobo.ext package
==================
Subpackages
-----------
.. toctree::
bonobo.ext.console
bonobo.ext.jupyter
Submodules
----------
bonobo.ext.couchdb_ module
--------------------------
.. automodule:: bonobo.ext.couchdb_
:members:
:undoc-members:
:show-inheritance:
bonobo.ext.opendatasoft module
------------------------------
.. automodule:: bonobo.ext.opendatasoft
:members:
:undoc-members:
:show-inheritance:
bonobo.ext.selenium module
--------------------------
.. automodule:: bonobo.ext.selenium
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: bonobo.ext
:members:
:undoc-members:
:show-inheritance:

View File

@ -0,0 +1,30 @@
bonobo.io package
=================
Submodules
----------
bonobo.io.file module
---------------------
.. automodule:: bonobo.io.file
:members:
:undoc-members:
:show-inheritance:
bonobo.io.json module
---------------------
.. automodule:: bonobo.io.json
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: bonobo.io
:members:
:undoc-members:
:show-inheritance:

21
docs/reference/bonobo.rst Normal file
View File

@ -0,0 +1,21 @@
bonobo package
==============
Subpackages
-----------
.. toctree::
bonobo.compat
bonobo.core
bonobo.ext
bonobo.io
bonobo.util
Module contents
---------------
.. automodule:: bonobo
:members:
:undoc-members:
:show-inheritance:

View File

@ -0,0 +1,62 @@
bonobo.util package
===================
Submodules
----------
bonobo.util.compat module
-------------------------
.. automodule:: bonobo.util.compat
:members:
:undoc-members:
:show-inheritance:
bonobo.util.helpers module
--------------------------
.. automodule:: bonobo.util.helpers
:members:
:undoc-members:
:show-inheritance:
bonobo.util.iterators module
----------------------------
.. automodule:: bonobo.util.iterators
:members:
:undoc-members:
:show-inheritance:
bonobo.util.lifecycle module
----------------------------
.. automodule:: bonobo.util.lifecycle
:members:
:undoc-members:
:show-inheritance:
bonobo.util.time module
-----------------------
.. automodule:: bonobo.util.time
:members:
:undoc-members:
:show-inheritance:
bonobo.util.tokens module
-------------------------
.. automodule:: bonobo.util.tokens
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: bonobo.util
:members:
:undoc-members:
:show-inheritance:

13
docs/reference/index.rst Normal file
View File

@ -0,0 +1,13 @@
References
==========
.. todo:: write the fucking doc!
.. toctree::
:maxdepth: 4
bonobo
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View File

@ -0,0 +1,3 @@
bonobo
======

158
docs/tutorial/basics.rst Normal file
View File

@ -0,0 +1,158 @@
Basic concepts
==============
To begin with Bonobo, you need to install it in a working python 3.5+ environment:
.. code-block:: shell-session
$ pip install bonobo
See :doc:`/install` for more options.
Let's write a first data transformation
:::::::::::::::::::::::::::::::::::::::
We'll start with the most simple components we can.
In **Bonobo**, a component is a plain old python callable, not more, not less. Let's write one that takes a string and
uppercase it.
.. code-block:: python
def uppercase(x: str):
return x.upper()
Pretty straightforward.
You could even use :func:`str.upper` directly instead of writing a wrapper, as a type's method (unbound) will take an
instance of this type as its first parameter (what you'd call `self` in your method).
The type annotations written here are not used, but can make your code much more readable, and may very well be used as
validators in the future.
Let's write two more components: a generator to produce the data to be transformed, and something that outputs it,
because, yeah, feedback is cool.
.. code-block:: python
def generate_data():
yield 'foo'
yield 'bar'
yield 'baz'
def output(x: str):
print(x)
Once again, you could have skipped the pain of writing this and simply use an iterable to generate the data and the
builtin :func:`print` for the output, but we'll stick to writing our own components for now.
Let's chain the three components together and run the transformation:
.. code-block:: python
from bonobo import run
run(generate_data, uppercase, output)
.. graphviz::
digraph {
rankdir = LR;
"generate_data" -> "uppercase" -> "output";
}
We use the :func:`bonobo.run` helper that hides the underlying object composition necessary to actually run the
components in parralel, because it's simpler.
Depending on what you're doing, you may use the shorthand helper method, or the verbose one. Always favor the shorter,
if you don't need to tune the graph or the execution strategy (see below).
Diving in
:::::::::
Let's rewrite it using the builtin functions :func:`str.upper` and :func:`print` instead of our own wrappers, and expand
the :func:`bonobo.run()` helper so you see what's inside...
.. code-block:: python
from bonobo import Graph, ThreadPoolExecutorStrategy
# Represent our data processor as a simple directed graph of callables.
graph = Graph()
graph.add_chain(
('foo', 'bar', 'baz'),
str.upper,
print,
)
# Use a thread pool.
executor = ThreadPoolExecutorStrategy()
# Run the thing.
executor.execute(graph)
We also switched our generator for a tuple, **Bonobo** will wrap it as a generator itself if it's not callable but
iterable.
The shorthand version with builtins would look like this:
.. code-block:: python
from bonobo import run
run(
('foo', 'bar', 'baz'),
str.upper,
print,
)
Both methods are strictly equivalent (see :func:`bonobo.run`). When in doubt, prefer the shorter version.
Takeaways
:::::::::
① The :class:`bonobo.Graph` class is used to represent a data-processing pipeline.
It can represent simple list-like linear graphs, like here, but it can also represent much more complex graphs, with
branches and cycles.
This is what the graph we defined looks like:
.. graphviz::
digraph {
rankdir = LR;
"iter(['foo', 'bar', 'baz'])" -> "str.upper" -> "print";
}
`Components` are simple python callables. Whatever can be called can be used as a `component`. Callables can
either `return` or `yield` data to send it to the next step. Regular functions (using `return`) should be prefered if
each call is guaranteed to return exactly one result, while generators (using `yield`) should be prefered if the
number of output lines for a given input varies.
③ The `graph` is then executed using an `ExecutionStrategy`. In this tutorial, we'll only use
:class:`bonobo.ThreadPoolExecutorStrategy`, which use an underlying `concurrent.futures.ThreadPoolExecutor` to
schedule calls in a pool of threads, but basically this strategy is what determines the actual behaviour of execution.
④ Before actually executing the `components`, the `ExecutorStrategy` instance will wrap each component in a `context`,
whose responsibility is to hold the state, to keep the `components` stateless. We'll expand on this later.
Concepts and definitions
::::::::::::::::::::::::
* Component
* Graph
* Executor
.. todo:: Definitions, and substitute vague terms in the page by the exact term defined here
Next
::::
You now know all the basic concepts necessary to build (batch-like) data processors.
If you're confident with this part, let's get to a more real world example, using files and nice console output:
:doc:`basics2`

46
docs/tutorial/basics2.rst Normal file
View File

@ -0,0 +1,46 @@
Working with files
==================
Bonobo would not be of any use if the aim was to uppercase small lists of strings. In fact, Bonobo should not be used
if you don't expect any gain from parralelization of tasks.
Let's take the following graph as an example:
.. graphviz::
digraph {
rankdir = LR;
"A" -> "B" -> "C";
}
The execution strategy does a bit of under the scene work, wrapping every component in a thread (assuming you're using
the :class:`bonobo.ThreadPoolExecutorStrategy`), which allows to start running `B` as soon as `A` yielded the first line
of data, and `C` as soon as `B` yielded the first line of data, even if `A` or `B` still have data to yield.
The great thing is that you generally don't have to think about it. Just be aware that your components will be run in
parralel, and don't worry too much about blocking components, as they won't block their siblings.
That being said, let's try to write a more real-world like transformation.
Reading a file
::::::::::::::
There are a few component builders available in **Bonobo** that let you read files. You should at least know about the following:
* :class:`bonobo.FileReader` (aliased as :func:`bonobo.from_file`)
* :class:`bonobo.JsonFileReader` (aliased as :func:`bonobo.from_json`)
* :class:`bonobo.CsvFileReader` (aliased as :func:`bonobo.from_csv`)
Reading a file is as simple as using one of those, and for the example, we'll use a text file that was generated using
Bonobo from the "liste-des-cafes-a-un-euro" dataset made available by Mairie de Paris under the Open Database
License (ODbL). You can `explore the original dataset <https://opendata.paris.fr/explore/dataset/liste-des-cafes-a-un-euro/information/>`_.
You'll need the example dataset, available in **Bonobo**'s repository.
.. code-block:: python
from bonobo import FileReader, run
run(
FileReader('examples/datasets/cheap_coffeeshops_in_paris.txt'),
print,
)

14
docs/tutorial/index.rst Normal file
View File

@ -0,0 +1,14 @@
First steps
===========
We tried hard to make **Bonobo** simple. We use simple python, and we believe it should be simple to learn.
We strongly advice that even if you're an advanced python developper, you go through the whole tutorial for two
reasons: that should be sufficient to do anything possible with **Bonobo** and that's a good moment to learn the few
concepts you'll see everywhere in the software.
.. toctree::
:maxdepth: 2
basics
basics2

View File

@ -0,0 +1,182 @@
Extérieur Quai, 5, rue d'Alsace, 75010 Paris, France
Le Sully, 6 Bd henri IV, 75004 Paris, France
O q de poule, 53 rue du ruisseau, 75018 Paris, France
Le Pas Sage, 1 Passage du Grand Cerf, 75002 Paris, France
Le Dunois, 77 rue Dunois, 75013 Paris, France
La Renaissance, 112 Rue Championnet, 75018 Paris, France
Le chantereine, 51 Rue Victoire, 75009 Paris, France
Le Müller, 11 rue Feutrier, 75018 Paris, France
Le drapeau de la fidelité, 21 rue Copreaux, 75015 Paris, France
Le café des amis, 125 rue Blomet, 75015 Paris, France
Le Café Livres, 10 rue Saint Martin, 75004 Paris, France
Le Bosquet, 46 avenue Bosquet, 75007 Paris, France
Le Brio, 216, rue Marcadet, 75018 Paris, France
Le Kleemend's, 34 avenue Pierre Mendès-France, 75013 Paris, France
Café Pierre, 202 rue du faubourg st antoine, 75012 Paris, France
Les Arcades, 61 rue de Ponthieu, 75008 Paris, France
Le Square, 31 rue Saint-Dominique, 75007 Paris, France
Assaporare Dix sur Dix, 75, avenue Ledru-Rollin, 75012 Paris, France
Au cerceau d'or, 129 boulevard sebastopol, 75002 Paris, France
Café antoine, 17 rue Jean de la Fontaine, 75016 Paris, France
Café Lea, 5 rue Claude Bernard, 75005 Paris, France
Cardinal Saint-Germain, 11 boulevard Saint-Germain, 75005 Paris, France
Dédé la frite, 52 rue Notre-Dame des Victoires, 75002 Paris, France
La Bauloise, 36 rue du hameau, 75015 Paris, France
Le Bellerive, 71 quai de Seine, 75019 Paris, France
Le bistrot de Maëlle et Augustin, 42 rue coquillère, 75001 Paris, France
Le Dellac, 14 rue Rougemont, 75009 Paris, France
Le Felteu, 1 rue Pecquay, 75004 Paris, France
Le Saint Jean, 23 rue des abbesses, 75018 Paris, France
les montparnos, 65 boulevard Pasteur, 75015 Paris, France
Drole d'endroit pour une rencontre, 58 rue de Montorgueil, 75002 Paris, France
Le pari's café, 104 rue caulaincourt, 75018 Paris, France
Le Poulailler, 60 rue saint-sabin, 75011 Paris, France
L'Assassin, 99 rue Jean-Pierre Timbaud, 75011 Paris, France
l'Usine, 1 rue d'Avron, 75020 Paris, France
La Bricole, 52 rue Liebniz, 75018 Paris, France
le ronsard, place maubert, 75005 Paris, France
Face Bar, 82 rue des archives, 75003 Paris, France
American Kitchen, 49 rue bichat, 75010 Paris, France
La Marine, 55 bis quai de valmy, 75010 Paris, France
Le Bloc, 21 avenue Brochant, 75017 Paris, France
La Recoleta au Manoir, 229 avenue Gambetta, 75020 Paris, France
Le Pareloup, 80 Rue Saint-Charles, 75015 Paris, France
La Brasserie Gaité, 3 rue de la Gaité, 75014 Paris, France
Café Zen, 46 rue Victoire, 75009 Paris, France
O'Breizh, 27 rue de Penthièvre, 75008 Paris, France
Le Petit Choiseul, 23 rue saint augustin, 75002 Paris, France
Invitez vous chez nous, 7 rue Epée de Bois, 75005 Paris, France
La Cordonnerie, 142 Rue Saint-Denis 75002 Paris, 75002 Paris, France
Le Supercoin, 3, rue Baudelique, 75018 Paris, France
Populettes, 86 bis rue Riquet, 75018 Paris, France
Au bon coin, 49 rue des Cloys, 75018 Paris, France
Le Couvent, 69 rue Broca, 75013 Paris, France
La Brûlerie des Ternes, 111 rue mouffetard, 75005 Paris, France
L'Écir, 59 Boulevard Saint-Jacques, 75014 Paris, France
Le Chat bossu, 126, rue du Faubourg Saint Antoine, 75012 Paris, France
Denfert café, 58 boulvevard Saint Jacques, 75014 Paris, France
Le Café frappé, 95 rue Montmartre, 75002 Paris, France
La Perle, 78 rue vieille du temple, 75003 Paris, France
Le Descartes, 1 rue Thouin, 75005 Paris, France
Bagels & Coffee Corner, Place de Clichy, 75017 Paris, France
Le petit club, 55 rue de la tombe Issoire, 75014 Paris, France
Le Plein soleil, 90 avenue Parmentier, 75011 Paris, France
Le Relais Haussmann, 146, boulevard Haussmann, 75008 Paris, France
Le Malar, 88 rue Saint-Dominique, 75007 Paris, France
Au panini de la place, 47 rue Belgrand, 75020 Paris, France
Le Village, 182 rue de Courcelles, 75017 Paris, France
Pause Café, 41 rue de Charonne, 75011 Paris, France
Le Pure café, 14 rue Jean Macé, 75011 Paris, France
Extra old café, 307 fg saint Antoine, 75011 Paris, France
Chez Fafa, 44 rue Vinaigriers, 75010 Paris, France
En attendant l'or, 3 rue Faidherbe, 75011 Paris, France
Aux cadrans, 21 ter boulevard Diderot, 75012 Paris, France
Brûlerie San José, 30 rue des Petits-Champs, 75002 Paris, France
Etienne, 14 rue Turbigo, Paris, 75001 Paris, France
L'ingénu, 184 bd Voltaire, 75011 Paris, France
L'Olive, 8 rue L'Olive, 75018 Paris, France
Le Biz, 18 rue Favart, 75002 Paris, France
Le Cap Bourbon, 1 rue Louis le Grand, 75002 Paris, France
Le General Beuret, 9 Place du General Beuret, 75015 Paris, France
Le Germinal, 95 avenue Emile Zola, 75015 Paris, France
Le Ragueneau, 202 rue Saint-Honoré, 75001 Paris, France
Le refuge, 72 rue lamarck, 75018 Paris, France
Le sully, 13 rue du Faubourg Saint Denis, 75010 Paris, France
L'antre d'eux, 16 rue DE MEZIERES, 75006 Paris, France
Le bal du pirate, 60 rue des bergers, 75015 Paris, France
zic zinc, 95 rue claude decaen, 75012 Paris, France
l'orillon bar, 35 rue de l'orillon, 75011 Paris, France
Le Zazabar, 116 Rue de Ménilmontant, 75020 Paris, France
L'Inévitable, 22 rue Linné, 75005 Paris, France
Ragueneau, 202 rue Saint Honoré, 75001 Paris, France
Le Caminito, 48 rue du Dessous des Berges, 75013 Paris, France
Epicerie Musicale, 55bis quai de Valmy, 75010 Paris, France
Le petit Bretonneau, Le petit Bretonneau - à l'intérieur de l'Hôpital, 75018 Paris, France
Le Centenaire, 104 rue amelot, 75011 Paris, France
La Montagne Sans Geneviève, 13 Rue du Pot de Fer, 75005 Paris, France
Les Pères Populaires, 46 rue de Buzenval, 75020 Paris, France
Cafe de grenelle, 188 rue de Grenelle, 75007 Paris, France
Le relais de la victoire, 73 rue de la Victoire, 75009 Paris, France
La chaumière gourmande, Route de la Muette à Neuilly
Club hippique du Jardin dAcclimatation, 75016 Paris, France
Caves populaires, 22 rue des Dames, 75017 Paris, France
Caprice café, 12 avenue Jean Moulin, 75014 Paris, France
Tamm Bara, 7 rue Clisson, 75013 Paris, France
L'anjou, 1 rue de Montholon, 75009 Paris, France
Café dans l'aerogare Air France Invalides, 2 rue Robert Esnault Pelterie, 75007 Paris, France
Waikiki, 10 rue d"Ulm, 75005 Paris, France
Chez Prune, 36 rue Beaurepaire, 75010 Paris, France
Au Vin Des Rues, 21 rue Boulard, 75014 Paris, France
bistrot les timbrés, 14 rue d'alleray, 75015 Paris, France
Café beauveau, 9 rue de Miromesnil, 75008 Paris, France
Café de la Mairie (du VIII), rue de Lisbonne, 75008 Paris, France
Café Pistache, 9 rue des petits champs, 75001 Paris, France
La Cagnotte, 13 Rue Jean-Baptiste Dumay, 75020 Paris, France
le 1 cinq, 172 rue de vaugirard, 75015 Paris, France
Le Killy Jen, 28 bis boulevard Diderot, 75012 Paris, France
Les Artisans, 106 rue Lecourbe, 75015 Paris, France
Peperoni, 83 avenue de Wagram, 75001 Paris, France
le lutece, 380 rue de vaugirard, 75015 Paris, France
Brasiloja, 16 rue Ganneron, 75018 Paris, France
Rivolux, 16 rue de Rivoli, 75004 Paris, France
Chai 33, 33 Cour Saint Emilion, 75012 Paris, France
L'européen, 21 Bis Boulevard Diderot, 75012 Paris, France
NoMa, 39 rue Notre Dame de Nazareth, 75003 Paris, France
O'Paris, 1 Rue des Envierges, 75020 Paris, France
Café Clochette, 16 avenue Richerand, 75010 Paris, France
La cantoche de Paname, 40 Boulevard Beaumarchais, 75011 Paris, France
Le Saint René, 148 Boulevard de Charonne, 75020 Paris, France
La Liberté, 196 rue du faubourg saint-antoine, 75012 Paris, France
Chez Rutabaga, 16 rue des Petits Champs, 75002 Paris, France
Le BB (Bouchon des Batignolles), 2 rue Lemercier, 75017 Paris, France
La Brocante, 10 rue Rossini, 75009 Paris, France
Le Plomb du cantal, 3 rue Gaîté, 75014 Paris, France
Les caves populaires, 22 rue des Dames, 75017 Paris, France
Chez Luna, 108 rue de Ménilmontant, 75020 Paris, France
Le bar Fleuri, 1 rue du Plateau, 75019 Paris, France
Le Chaumontois, 12 rue Armand Carrel, 75018 Paris, France
Trois pièces cuisine, 101 rue des dames, 75017 Paris, France
Le Zinc, 61 avenue de la Motte Picquet, 75015 Paris, France
La cantine de Zoé, 136 rue du Faubourg poissonnière, 75010 Paris, France
Les Vendangeurs, 6/8 rue Stanislas, 75006 Paris, France
L'avant comptoir, 3 carrefour de l'Odéon, 75006 Paris, France
Botak cafe, 1 rue Paul albert, 75018 Paris, France
le chateau d'eau, 67 rue du Château d'eau, 75010 Paris, France
Bistrot Saint-Antoine, 58 rue du Fbg Saint-Antoine, 75012 Paris, France
Chez Oscar, 11/13 boulevard Beaumarchais, 75004 Paris, France
Le Fronton, 63 rue de Ponthieu, 75008 Paris, France
Le Piquet, 48 avenue de la Motte Picquet, 75015 Paris, France
Le Tournebride, 104 rue Mouffetard, 75005 Paris, France
maison du vin, 52 rue des plantes, 75014 Paris, France
Coffee Chope, 344Vrue Vaugirard, 75015 Paris, France
L'entrepôt, 157 rue Bercy 75012 Paris, 75012 Paris, France
Le café Monde et Médias, Place de la République, 75003 Paris, France
Café rallye tournelles, 11 Quai de la Tournelle, 75005 Paris, France
Brasserie le Morvan, 61 rue du château d'eau, 75010 Paris, France
Chez Miamophile, 6 rue Mélingue, 75019 Paris, France
La Caravane, Rue de la Fontaine au Roi, 75011 Paris, France
Panem, 18 rue de Crussol, 75011 Paris, France
Petits Freres des Pauvres, 47 rue de Batignolles, 75017 Paris, France
Café Dupont, 198 rue de la Convention, 75015 Paris, France
L'Angle, 28 rue de Ponthieu, 75008 Paris, France
Institut des Cultures d'Islam, 19-23 rue Léon, 75018 Paris, France
Canopy Café associatif, 19 rue Pajol, 75018 Paris, France
L'Entracte, place de l'opera, 75002 Paris, France
Le Sévigné, 15 rue du Parc Royal, 75003 Paris, France
Le Café d'avant, 35 rue Claude Bernard, 75005 Paris, France
Le Lucernaire, 53 rue Notre-Dame des Champs, 75006 Paris, France
Le Brigadier, 12 rue Blanche, 75009 Paris, France
L'âge d'or, 26 rue du Docteur Magnan, 75013 Paris, France
Café Victor, 10 boulevard Victor, 75015 Paris, France
L'empreinte, 54, avenue Daumesnil, 75012 Paris, France
L'horizon, 93, rue de la Roquette, 75011 Paris, France
Au pays de Vannes, 34 bis rue de Wattignies, 75012 Paris, France
Café Martin, 2 place Martin Nadaud, 75001 Paris, France
Café Varenne, 36 rue de Varenne, 75007 Paris, France
l'Eléphant du nil, 125 Rue Saint-Antoine, 75004 Paris, France
Le Comptoir, 354 bis rue Vaugirard, 75015 Paris, France
Le Parc Vaugirard, 358 rue de Vaugirard, 75015 Paris, France
Le Reynou, 2 bis quai de la mégisserie, 75001 Paris, France
le Zango, 58 rue Daguerre, 75014 Paris, France
Melting Pot, 3 rue de Lagny, 75020 Paris, France
Pari's Café, 174 avenue de Clichy, 75017 Paris, France

View File

@ -1,16 +1,17 @@
import json
from blessings import Terminal
from pycountry import countries
from bonobo.ext.console import console_run
from bonobo.ext.ods import extract_ods
from bonobo.util import tee
from bonobo.io.json import to_json
from bonobo import console_run, tee, JsonFileWriter
from bonobo.ext.opendatasoft import from_opendatasoft_api
DATASET = 'fablabs-in-the-world'
SEARCH_URL = 'https://datanova.laposte.fr/api/records/1.0/search/'
URL = SEARCH_URL + '?dataset=' + DATASET
try:
import pycountry
except ImportError as exc:
raise ImportError('You must install package "pycountry" to run this example.') from exc
API_DATASET = 'fablabs-in-the-world'
API_NETLOC = 'datanova.laposte.fr'
ROWS = 100
t = Terminal()
@ -25,7 +26,7 @@ def normalize(row):
**
row,
'links': list(filter(None, map(_getlink, json.loads(row.get('links'))))),
'country': countries.get(alpha_2=row.get('country_code', '').upper()).name,
'country': pycountry.countries.get(alpha_2=row.get('country_code', '').upper()).name,
}
return result
@ -47,15 +48,15 @@ def display(row):
print(' - {}: {address}'.format(t.blue('address'), address=', '.join(address)))
print(' - {}: {links}'.format(t.blue('links'), links=', '.join(row['links'])))
print(' - {}: {geometry}'.format(t.blue('geometry'), **row))
print(' - {}: {source}'.format(t.blue('source'), source='datanova/' + DATASET))
print(' - {}: {source}'.format(t.blue('source'), source='datanova/' + API_DATASET))
if __name__ == '__main__':
console_run(
extract_ods(
SEARCH_URL, DATASET, timezone='Europe/Paris'),
from_opendatasoft_api(
API_DATASET, netloc=API_NETLOC, timezone='Europe/Paris'),
normalize,
filter_france,
tee(display),
to_json('fablabs.json'),
JsonFileWriter('fablabs.json'),
output=True, )

View File

@ -0,0 +1,15 @@
from os.path import dirname, realpath, join
from bonobo import console_run
from bonobo.ext.opendatasoft import from_opendatasoft_api
from bonobo.io.file import FileWriter
OUTPUT_FILENAME = realpath(join(dirname(__file__), 'datasets/cheap_coffeeshops_in_paris.txt'))
console_run(
from_opendatasoft_api(
'liste-des-cafes-a-un-euro', netloc='opendata.paris.fr'),
lambda row: '{nom_du_cafe}, {adresse}, {arrondissement} Paris, France'.format(**row),
FileWriter(OUTPUT_FILENAME), )
print('Import done, read {} for results.'.format(OUTPUT_FILENAME))

View File

@ -0,0 +1,18 @@
from bonobo import run
def generate_data():
yield 'foo'
yield 'bar'
yield 'baz'
def uppercase(x: str):
return x.upper()
def output(x: str):
print(x)
run(generate_data, uppercase, output)

View File

@ -0,0 +1,18 @@
from bonobo import Graph, ThreadPoolExecutorStrategy
def yield_from(*args):
yield from args
# Represent our data processor as a simple directed graph of callables.
graph = Graph(
lambda: (x for x in ('foo', 'bar', 'baz')),
str.upper,
print, )
# Use a thread pool.
executor = ThreadPoolExecutorStrategy()
# Run the thing.
executor.execute(graph)

View File

@ -1,6 +1,6 @@
from mock import patch
from bonobo.ext.ods import extract_ods
from bonobo.ext.opendatasoft import from_opendatasoft_api
class ResponseMock:
@ -17,7 +17,7 @@ class ResponseMock:
def test_read_from_opendatasoft_api():
extract = extract_ods('http://example.com/', 'test-a-set')
extract = from_opendatasoft_api('http://example.com/', 'test-a-set')
with patch(
'requests.get', return_value=ResponseMock([
{

View File

@ -1,13 +1,13 @@
import pytest
from bonobo import to_json, Bag
from bonobo import Bag, JsonFileWriter
from bonobo.core.contexts import ComponentExecutionContext
from bonobo.util.tokens import BEGIN, END
def test_write_json_to_file(tmpdir):
file = tmpdir.join('output.json')
json_writer = to_json(str(file))
json_writer = JsonFileWriter(str(file))
context = ComponentExecutionContext(json_writer, None)
context.initialize()
@ -28,7 +28,7 @@ def test_write_json_to_file(tmpdir):
def test_write_json_without_initializer_should_not_work(tmpdir):
file = tmpdir.join('output.json')
json_writer = to_json(str(file))
json_writer = JsonFileWriter(str(file))
context = ComponentExecutionContext(json_writer, None)
with pytest.raises(AttributeError):