Merge pull request #135 from hartym/master
[doc] Documentation, my dear. Half of the work, looks you are a littl…
This commit is contained in:
@ -5,7 +5,10 @@ graph = bonobo.Graph(
|
|||||||
print,
|
print,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_services():
|
||||||
|
return {'fs': bonobo.open_examples_fs('datasets')}
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
bonobo.run(
|
bonobo.run(graph, services=get_services())
|
||||||
graph, services={'fs': bonobo.open_examples_fs('datasets')}
|
|
||||||
)
|
|
||||||
|
|||||||
@ -8,10 +8,18 @@ def split_one(line):
|
|||||||
graph = bonobo.Graph(
|
graph = bonobo.Graph(
|
||||||
bonobo.FileReader('coffeeshops.txt'),
|
bonobo.FileReader('coffeeshops.txt'),
|
||||||
split_one,
|
split_one,
|
||||||
bonobo.JsonWriter('coffeeshops.json'),
|
bonobo.JsonWriter(
|
||||||
|
'coffeeshops.json', fs='fs.output', ioformat='arg0'
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_services():
|
||||||
|
return {
|
||||||
|
'fs': bonobo.open_examples_fs('datasets'),
|
||||||
|
'fs.output': bonobo.open_fs(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
bonobo.run(
|
bonobo.run(graph, services=get_services())
|
||||||
graph, services={'fs': bonobo.open_examples_fs('datasets')}
|
|
||||||
)
|
|
||||||
|
|||||||
@ -1,4 +1,6 @@
|
|||||||
import bonobo, json
|
import json
|
||||||
|
|
||||||
|
import bonobo
|
||||||
|
|
||||||
|
|
||||||
def split_one_to_map(line):
|
def split_one_to_map(line):
|
||||||
@ -18,10 +20,16 @@ class MyJsonWriter(bonobo.JsonWriter):
|
|||||||
graph = bonobo.Graph(
|
graph = bonobo.Graph(
|
||||||
bonobo.FileReader('coffeeshops.txt'),
|
bonobo.FileReader('coffeeshops.txt'),
|
||||||
split_one_to_map,
|
split_one_to_map,
|
||||||
MyJsonWriter('coffeeshops.json'),
|
MyJsonWriter('coffeeshops.json', fs='fs.output', ioformat='arg0'),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_services():
|
||||||
|
return {
|
||||||
|
'fs': bonobo.open_examples_fs('datasets'),
|
||||||
|
'fs.output': bonobo.open_fs(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
bonobo.run(
|
bonobo.run(graph, services=get_services())
|
||||||
graph, services={'fs': bonobo.open_examples_fs('datasets')}
|
|
||||||
)
|
|
||||||
|
|||||||
59
docs/_templates/index.html
vendored
59
docs/_templates/index.html
vendored
@ -2,46 +2,49 @@
|
|||||||
{% set title = _('Bonobo — Data processing for humans') %}
|
{% set title = _('Bonobo — Data processing for humans') %}
|
||||||
{% block body %}
|
{% block body %}
|
||||||
|
|
||||||
<div style="border: 2px solid red; font-weight: bold; margin: 1em; padding: 1em">
|
<h1 style="text-align: center">
|
||||||
Bonobo is <strong>ALPHA</strong> software. Some APIs will change.
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<h1 style="text-align: center">
|
|
||||||
<img class="logo" src="{{ pathto('_static/bonobo.png', 1) }}" title="Bonobo" alt="Bonobo"
|
<img class="logo" src="{{ pathto('_static/bonobo.png', 1) }}" title="Bonobo" alt="Bonobo"
|
||||||
style=" width: 128px; height: 128px;"/>
|
style=" width: 128px; height: 128px;"/>
|
||||||
</h1>
|
</h1>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
{% trans %}
|
{% trans %}
|
||||||
<strong>Bonobo</strong> is a line-by-line data-processing toolkit for python 3.5+ (extract-transform-load
|
<strong>Bonobo</strong> is a line-by-line data-processing toolkit for python 3.5+ (extract-transform-load
|
||||||
framework) emphasizing simple and atomic data transformations defined using a directed graph of plain old
|
framework, or ETL) emphasizing simple and atomic data transformations defined using a directed graph of plain old
|
||||||
python objects (functions, iterables, generators, ...).
|
python objects (functions, iterables, generators, ...).
|
||||||
{% endtrans %}
|
{% endtrans %}
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<h2 style="margin-bottom: 0">{% trans %}Documentation{% endtrans %}</h2>
|
<div style="border: 2px solid red; font-weight: bold; margin: 1em; padding: 1em">
|
||||||
|
Bonobo is <strong>ALPHA</strong> software. Some APIs will change.
|
||||||
|
</div>
|
||||||
|
|
||||||
<table class="contentstable">
|
|
||||||
|
<h2 style="margin-bottom: 0">{% trans %}Documentation{% endtrans %}</h2>
|
||||||
|
|
||||||
|
<table class="contentstable">
|
||||||
<tr>
|
<tr>
|
||||||
<td>
|
<td>
|
||||||
<p class="biglink"><a class="biglink" href="{{ pathto("tutorial/index") }}">{% trans %}First steps{% endtrans %}</a><br/>
|
<p class="biglink"><a class="biglink" href="{{ pathto(" tutorial/index") }}">{% trans %}First steps{%
|
||||||
|
endtrans %}</a><br/>
|
||||||
<span class="linkdescr">{% trans %}quick overview of basic features{% endtrans %}</span></p>
|
<span class="linkdescr">{% trans %}quick overview of basic features{% endtrans %}</span></p>
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
<p class="biglink"><a class="biglink" href="{{ pathto("search") }}">{% trans %}
|
<p class="biglink"><a class="biglink" href="{{ pathto(" search") }}">{% trans %}
|
||||||
Search{% endtrans %}</a><br/>
|
Search{% endtrans %}</a><br/>
|
||||||
<span class="linkdescr">{% trans %}search the documentation{% endtrans %}</span></p>
|
<span class="linkdescr">{% trans %}search the documentation{% endtrans %}</span></p>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>
|
<td>
|
||||||
<p class="biglink"><a class="biglink" href="{{ pathto("guide/index") }}">{% trans %}
|
<p class="biglink"><a class="biglink" href="{{ pathto(" guide/index") }}">{% trans %}
|
||||||
Guides{% endtrans %}</a><br/>
|
Guides{% endtrans %}</a><br/>
|
||||||
<span class="linkdescr">{% trans %}for a complete overview{% endtrans %}</span>
|
<span class="linkdescr">{% trans %}for a complete overview{% endtrans %}</span>
|
||||||
</p>
|
</p>
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
<p class="biglink"><a class="biglink" href="{{ pathto("reference/index") }}">{% trans %}References{% endtrans %}</a>
|
<p class="biglink"><a class="biglink" href="{{ pathto(" reference/index") }}">{% trans %}References{%
|
||||||
|
endtrans %}</a>
|
||||||
<br/>
|
<br/>
|
||||||
<span class="linkdescr">{% trans %}all functions, classes, terms{% endtrans %}</span>
|
<span class="linkdescr">{% trans %}all functions, classes, terms{% endtrans %}</span>
|
||||||
</p>
|
</p>
|
||||||
@ -49,21 +52,22 @@
|
|||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>
|
<td>
|
||||||
<p class="biglink"><a class="biglink" target="_blank" href="https://github.com/python-bonobo/bonobo/tree/master/bonobo/examples">{% trans %}
|
<p class="biglink"><a class="biglink" target="_blank"
|
||||||
|
href="https://github.com/python-bonobo/bonobo/tree/master/bonobo/examples">{% trans %}
|
||||||
Cookbook{% endtrans %}</a><br/>
|
Cookbook{% endtrans %}</a><br/>
|
||||||
<span class="linkdescr">{% trans %}examples and recipes{% endtrans %}</span></p>
|
<span class="linkdescr">{% trans %}examples and recipes{% endtrans %}</span></p>
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
<p class="biglink"><a class="biglink" href="{{ pathto("contribute/index") }}">{% trans %}
|
<p class="biglink"><a class="biglink" href="{{ pathto(" contribute/index") }}">{% trans %}
|
||||||
Contribute{% endtrans %}</a><br/>
|
Contribute{% endtrans %}</a><br/>
|
||||||
<span class="linkdescr">{% trans %}contributor guide{% endtrans %}</span></p>
|
<span class="linkdescr">{% trans %}contributor guide{% endtrans %}</span></p>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
<h2>Features</h2>
|
<h2>Features</h2>
|
||||||
|
|
||||||
<ul>
|
<ul>
|
||||||
<li>
|
<li>
|
||||||
{% trans %}
|
{% trans %}
|
||||||
<b>10 minutes to get started:</b> Know some python? Writing your first data processor is an affair
|
<b>10 minutes to get started:</b> Know some python? Writing your first data processor is an affair
|
||||||
@ -91,16 +95,23 @@
|
|||||||
</li>
|
</li>
|
||||||
<li>
|
<li>
|
||||||
{% trans %}
|
{% trans %}
|
||||||
Bonobo is young, and the todo-list is huge. Read the <a href="https://www.bonobo-project.org/roadmap">roadmap</a>.
|
Bonobo is young, and the todo-list is huge. Read the <a
|
||||||
|
href="https://www.bonobo-project.org/roadmap">roadmap</a>.
|
||||||
{% endtrans %}
|
{% endtrans %}
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<p>{% trans %}
|
<p>{% trans %}
|
||||||
You can also download PDF/EPUB versions of the Bonobo documentation:
|
You can also download PDF/EPUB versions of the Bonobo documentation:
|
||||||
<a href="http://readthedocs.org/projects/bonobo/downloads/pdf/stable/">PDF version</a>,
|
<a href="http://readthedocs.org/projects/bonobo/downloads/pdf/stable/">PDF version</a>,
|
||||||
<a href="http://readthedocs.org/projects/bonobo/downloads/epub/stable/">EPUB version</a>.
|
<a href="http://readthedocs.org/projects/bonobo/downloads/epub/stable/">EPUB version</a>.
|
||||||
{% endtrans %}
|
{% endtrans %}
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
<h2>Table of contents</h2>
|
||||||
|
|
||||||
|
|
||||||
|
<div>
|
||||||
|
{{ toctree(maxdepth=2, collapse=False)}}
|
||||||
|
</div>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|||||||
@ -4,6 +4,7 @@ Bonobo with SQLAlchemy
|
|||||||
.. todo:: The `bonobo-sqlalchemy` package is at a very alpha stage, and things will change. This section is here to
|
.. todo:: The `bonobo-sqlalchemy` package is at a very alpha stage, and things will change. This section is here to
|
||||||
give a brief overview but is neither complete nor definitive.
|
give a brief overview but is neither complete nor definitive.
|
||||||
|
|
||||||
|
Read the introduction: https://www.bonobo-project.org/with/sqlalchemy
|
||||||
|
|
||||||
Installation
|
Installation
|
||||||
::::::::::::
|
::::::::::::
|
||||||
|
|||||||
@ -8,8 +8,8 @@ Bonobo
|
|||||||
tutorial/index
|
tutorial/index
|
||||||
guide/index
|
guide/index
|
||||||
reference/index
|
reference/index
|
||||||
contribute/index
|
|
||||||
faq
|
faq
|
||||||
|
contribute/index
|
||||||
genindex
|
genindex
|
||||||
modindex
|
modindex
|
||||||
|
|
||||||
|
|||||||
@ -4,36 +4,47 @@ Installation
|
|||||||
Create an ETL project
|
Create an ETL project
|
||||||
:::::::::::::::::::::
|
:::::::::::::::::::::
|
||||||
|
|
||||||
If you only want to use Bonobo to code ETLs, your easiest option to get started is to use our
|
Creating a project and starting to write code should take less than a minute:
|
||||||
`cookiecutter template <https://github.com/python-bonobo/cookiecutter-bonobo>`_.
|
|
||||||
|
.. code-block:: shell-session
|
||||||
|
|
||||||
|
$ pip install --upgrade bonobo cookiecutter
|
||||||
|
$ bonobo init my-etl-project
|
||||||
|
$ bonobo run my-etl-project
|
||||||
|
|
||||||
|
Once you bootstrapped a project, you can start editing the default example transformation by editing
|
||||||
|
`my-etl-project/main.py`.
|
||||||
|
|
||||||
|
Other installation options
|
||||||
|
::::::::::::::::::::::::::
|
||||||
|
|
||||||
Install from PyPI
|
Install from PyPI
|
||||||
:::::::::::::::::
|
-----------------
|
||||||
|
|
||||||
You can also install it directly from the `Python Package Index <https://pypi.python.org/pypi/bonobo>`_.
|
You can install it directly from the `Python Package Index <https://pypi.python.org/pypi/bonobo>`_ (like we did above).
|
||||||
|
|
||||||
.. code-block:: shell-session
|
.. code-block:: shell-session
|
||||||
|
|
||||||
$ pip install bonobo
|
$ pip install bonobo
|
||||||
|
|
||||||
Install from source
|
Install from source
|
||||||
:::::::::::::::::::
|
-------------------
|
||||||
|
|
||||||
If you want to install an unreleased version, you can use git urls with pip. This is useful when using bonobo as a
|
If you want to install an unreleased version, you can use git urls with pip. This is useful when using bonobo as a
|
||||||
dependency of your code and you want to try a forked version of bonobo with your software. You can use the git+http
|
dependency of your code and you want to try a forked version of bonobo with your software. You can use a `git+http`
|
||||||
string in your `requirements.txt` file. However, the best option for development on bonobo directly is not this one,
|
string in your `requirements.txt` file. However, the best option for development on bonobo is an editable install (see
|
||||||
but editable installs (see below).
|
below).
|
||||||
|
|
||||||
.. code-block:: shell-session
|
.. code-block:: shell-session
|
||||||
|
|
||||||
$ pip install git+https://github.com/python-bonobo/bonobo.git@master#egg=bonobo
|
$ pip install git+https://github.com/python-bonobo/bonobo.git@develop#egg=bonobo
|
||||||
|
|
||||||
Editable install
|
Editable install
|
||||||
::::::::::::::::
|
----------------
|
||||||
|
|
||||||
If you plan on making patches to Bonobo, you should install it as an "editable" package, which is a really great pip feature.
|
If you plan on making patches to Bonobo, you should install it as an "editable" package, which is a really great pip
|
||||||
Pip will clone your repository in a source directory and create a symlink for it in the site-package directory of your
|
feature. Pip will clone your repository in a source directory and create a symlink for it in the site-package directory
|
||||||
python interpreter.
|
of your python interpreter.
|
||||||
|
|
||||||
.. code-block:: shell-session
|
.. code-block:: shell-session
|
||||||
|
|
||||||
@ -63,20 +74,17 @@ I usually name the git remote for the main bonobo repository "upstream", and my
|
|||||||
|
|
||||||
$ git remote rename origin upstream
|
$ git remote rename origin upstream
|
||||||
$ git remote add origin git@github.com:hartym/bonobo.git
|
$ git remote add origin git@github.com:hartym/bonobo.git
|
||||||
|
$ git fetch --all
|
||||||
|
|
||||||
Of course, replace my github username by the one you used to fork bonobo. You should be good to go!
|
Of course, replace my github username by the one you used to fork bonobo. You should be good to go!
|
||||||
|
|
||||||
Windows support
|
Windows support
|
||||||
:::::::::::::::
|
:::::::::::::::
|
||||||
|
|
||||||
There are problems on the windows platform, mostly due to the fact bonobo was not developed by experienced windows users.
|
There are minor issues on the windows platform, mostly due to the fact bonobo was not developed by experienced windows
|
||||||
|
users.
|
||||||
|
|
||||||
We're trying to look into that but energy available to provide serious support on windows is very limited.
|
We're trying to look into that but energy available to provide serious support on windows is very limited.
|
||||||
|
|
||||||
If you have experience in this domain and you're willing to help, you're more than welcome!
|
If you have experience in this domain and you're willing to help, you're more than welcome!
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
.. todo::
|
|
||||||
|
|
||||||
Better install docs, especially on how to use different forks or branches, etc.
|
|
||||||
|
|
||||||
|
|||||||
@ -9,17 +9,26 @@ python code in charge of handling similar shaped independant lines of data.
|
|||||||
|
|
||||||
Bonobo *is not* a statistical or data-science tool. If you're looking for a data-analysis tool in python, use Pandas.
|
Bonobo *is not* a statistical or data-science tool. If you're looking for a data-analysis tool in python, use Pandas.
|
||||||
|
|
||||||
Bonobo is a lean manufacturing assembly line for data that let you focus on the actual work instead of the plumbery.
|
Bonobo is a lean manufacturing assembly line for data that let you focus on the actual work instead of the plumbery
|
||||||
|
(execution contexts, parallelism, error handling, console output, logging, ...).
|
||||||
|
|
||||||
Bonobo uses simple python and should be quick and easy to learn.
|
Bonobo uses simple python and should be quick and easy to learn.
|
||||||
|
|
||||||
Tutorial
|
Tutorial
|
||||||
::::::::
|
::::::::
|
||||||
|
|
||||||
Warning: the documentation is still in progress. Although all content here should be accurate, you may feel a lack of
|
.. note::
|
||||||
completeness, for which we plaid guilty and apologize. If there is something blocking, please come on our
|
|
||||||
`slack channel <https://bonobo-slack.herokuapp.com/>`_ and complain, we'll figure something out. If there is something
|
Good documentation is not easy to write. We do our best to make it better and better.
|
||||||
that did not block you but can be a no-go for others, please consider contributing to the docs.
|
|
||||||
|
Although all content here should be accurate, you may feel a lack of completeness, for which we plaid guilty and
|
||||||
|
apologize.
|
||||||
|
|
||||||
|
If you're stuck, please come and ask on our `slack channel <https://bonobo-slack.herokuapp.com/>`_, we'll figure
|
||||||
|
something out.
|
||||||
|
|
||||||
|
If you're not stuck but had trouble understanding something, please consider contributing to the docs (via github
|
||||||
|
pull requests).
|
||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|||||||
@ -19,7 +19,7 @@ can run.
|
|||||||
|
|
||||||
.. code-block:: shell-session
|
.. code-block:: shell-session
|
||||||
|
|
||||||
bonobo init tutorial
|
$ bonobo init tutorial
|
||||||
|
|
||||||
This will create a `tutorial` directory (`content description here <https://www.bonobo-project.org/with/cookiecutter>`_).
|
This will create a `tutorial` directory (`content description here <https://www.bonobo-project.org/with/cookiecutter>`_).
|
||||||
|
|
||||||
@ -27,15 +27,15 @@ To run this project, use:
|
|||||||
|
|
||||||
.. code-block:: shell-session
|
.. code-block:: shell-session
|
||||||
|
|
||||||
bonobo run tutorial
|
$ bonobo run tutorial
|
||||||
|
|
||||||
|
|
||||||
Write a first transformation
|
Write a first transformation
|
||||||
::::::::::::::::::::::::::::
|
::::::::::::::::::::::::::::
|
||||||
|
|
||||||
Open `tutorial/__main__.py`, and delete all the code here.
|
Open `tutorial/main.py`, and delete all the code here.
|
||||||
|
|
||||||
A transformation can be whatever python can call, having inputs and outputs. Simplest transformations are functions.
|
A transformation can be whatever python can call. Simplest transformations are functions and generators.
|
||||||
|
|
||||||
Let's write one:
|
Let's write one:
|
||||||
|
|
||||||
@ -48,10 +48,10 @@ Easy.
|
|||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
|
||||||
This is about the same as :func:`str.upper`, and in the real world, you'd use it directly.
|
This function is very similar to :func:`str.upper`, which you can use directly.
|
||||||
|
|
||||||
Let's write two more transformations for the "extract" and "load" steps. In this example, we'll generate the data from
|
Let's write two more transformations for the "extract" and "load" steps. In this example, we'll generate the data from
|
||||||
scratch, and we'll use stdout to simulate data-persistence.
|
scratch, and we'll use stdout to "simulate" data-persistence.
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
@ -68,16 +68,16 @@ on things returned, and a normal function will just be seen as a generator that
|
|||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
|
||||||
Once again, :func:`print` would be used directly in a real-world transformation.
|
Once again, you should use the builtin :func:`print` directly instead of this `load()` function.
|
||||||
|
|
||||||
|
|
||||||
Create a transformation graph
|
Create a transformation graph
|
||||||
:::::::::::::::::::::::::::::
|
:::::::::::::::::::::::::::::
|
||||||
|
|
||||||
Bonobo main roles are two things:
|
Amongst other features, Bonobo will mostly help you there with the following:
|
||||||
|
|
||||||
* Execute the transformations in independant threads
|
* Execute the transformations in independant threads
|
||||||
* Pass the outputs of one thread to other(s) thread(s).
|
* Pass the outputs of one thread to other(s) thread(s) inputs.
|
||||||
|
|
||||||
To do this, it needs to know what data-flow you want to achieve, and you'll use a :class:`bonobo.Graph` to describe it.
|
To do this, it needs to know what data-flow you want to achieve, and you'll use a :class:`bonobo.Graph` to describe it.
|
||||||
|
|
||||||
@ -109,17 +109,17 @@ To do this, it needs to know what data-flow you want to achieve, and you'll use
|
|||||||
Execute the job
|
Execute the job
|
||||||
:::::::::::::::
|
:::::::::::::::
|
||||||
|
|
||||||
Save `tutorial/__main__.py` and execute your transformation:
|
Save `tutorial/main.py` and execute your transformation again:
|
||||||
|
|
||||||
.. code-block:: shell-session
|
.. code-block:: shell-session
|
||||||
|
|
||||||
bonobo run tutorial
|
$ bonobo run tutorial
|
||||||
|
|
||||||
This example is available in :mod:`bonobo.examples.tutorials.tut01e01`, and you can also run it as a module:
|
This example is available in :mod:`bonobo.examples.tutorials.tut01e01`, and you can also run it as a module:
|
||||||
|
|
||||||
.. code-block:: shell-session
|
.. code-block:: shell-session
|
||||||
|
|
||||||
bonobo run -m bonobo.examples.tutorials.tut01e01
|
$ bonobo run -m bonobo.examples.tutorials.tut01e01
|
||||||
|
|
||||||
|
|
||||||
Rewrite it using builtins
|
Rewrite it using builtins
|
||||||
@ -127,27 +127,17 @@ Rewrite it using builtins
|
|||||||
|
|
||||||
There is a much simpler way to describe an equivalent graph:
|
There is a much simpler way to describe an equivalent graph:
|
||||||
|
|
||||||
.. code-block:: python
|
.. literalinclude:: ../../bonobo/examples/tutorials/tut01e02.py
|
||||||
|
:language: python
|
||||||
|
|
||||||
import bonobo
|
The `extract()` generator has been replaced by a list, as Bonobo will interpret non-callable iterables as a no-input
|
||||||
|
generator.
|
||||||
|
|
||||||
graph = bonobo.Graph(
|
This example is also available in :mod:`bonobo.examples.tutorials.tut01e02`, and you can also run it as a module:
|
||||||
['foo', 'bar', 'baz',],
|
|
||||||
str.upper,
|
|
||||||
print,
|
|
||||||
)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
bonobo.run(graph)
|
|
||||||
|
|
||||||
We use a shortcut notation for the generator, with a list. Bonobo will wrap an iterable as a generator by itself if it
|
|
||||||
is added in a graph.
|
|
||||||
|
|
||||||
This example is available in :mod:`bonobo.examples.tutorials.tut01e02`, and you can also run it as a module:
|
|
||||||
|
|
||||||
.. code-block:: shell-session
|
.. code-block:: shell-session
|
||||||
|
|
||||||
bonobo run -m bonobo.examples.tutorials.tut01e02
|
$ bonobo run -m bonobo.examples.tutorials.tut01e02
|
||||||
|
|
||||||
You can now jump to the next part (:doc:`tut02`), or read a small summary of concepts and definitions introduced here
|
You can now jump to the next part (:doc:`tut02`), or read a small summary of concepts and definitions introduced here
|
||||||
below.
|
below.
|
||||||
@ -188,19 +178,19 @@ cases.
|
|||||||
Concepts and definitions
|
Concepts and definitions
|
||||||
::::::::::::::::::::::::
|
::::::::::::::::::::::::
|
||||||
|
|
||||||
* Transformation: a callable that takes input (as call parameters) and returns output(s), either as its return value or
|
* **Transformation**: a callable that takes input (as call parameters) and returns output(s), either as its return value or
|
||||||
by yielding values (a.k.a returning a generator).
|
by yielding values (a.k.a returning a generator).
|
||||||
|
|
||||||
* Transformation graph (or Graph): a set of transformations tied together in a :class:`bonobo.Graph` instance, which is
|
* **Transformation graph (or Graph)**: a set of transformations tied together in a :class:`bonobo.Graph` instance, which is
|
||||||
a directed acyclic graph (or DAG).
|
a directed acyclic graph (or DAG).
|
||||||
|
|
||||||
* Node: a graph element, most probably a transformation in a graph.
|
* **Node**: a graph element, most probably a transformation in a graph.
|
||||||
|
|
||||||
* Execution strategy (or strategy): a way to run a transformation graph. It's responsibility is mainly to parallelize
|
* **Execution strategy (or strategy)**: a way to run a transformation graph. It's responsibility is mainly to parallelize
|
||||||
(or not) the transformations, on one or more process and/or computer, and to setup the right queuing mechanism for
|
(or not) the transformations, on one or more process and/or computer, and to setup the right queuing mechanism for
|
||||||
transformations' inputs and outputs.
|
transformations' inputs and outputs.
|
||||||
|
|
||||||
* Execution context (or context): a wrapper around a node that holds the state for it. If the node needs state, there
|
* **Execution context (or context)**: a wrapper around a node that holds the state for it. If the node needs state, there
|
||||||
are tools available in bonobo to feed it to the transformation using additional call parameters, keeping
|
are tools available in bonobo to feed it to the transformation using additional call parameters, keeping
|
||||||
transformations stateless.
|
transformations stateless.
|
||||||
|
|
||||||
|
|||||||
@ -23,16 +23,18 @@ When run, the execution strategy wraps every component in a thread (assuming you
|
|||||||
:class:`bonobo.strategies.ThreadPoolExecutorStrategy`).
|
:class:`bonobo.strategies.ThreadPoolExecutorStrategy`).
|
||||||
|
|
||||||
Bonobo will send each line of data in the input node's thread (here, `A`). Now, each time `A` *yields* or *returns*
|
Bonobo will send each line of data in the input node's thread (here, `A`). Now, each time `A` *yields* or *returns*
|
||||||
something, it will be pushed on `B` input :class:`queue.Queue`, and will be consumed by `B`'s thread.
|
something, it will be pushed on `B` input :class:`queue.Queue`, and will be consumed by `B`'s thread. Meanwhile, `A`
|
||||||
|
will continue to run, if it's not done.
|
||||||
|
|
||||||
When there is more than one node linked as the output of a node (for example, with `B`, `C`, and `D`) , the same thing
|
When there is more than one node linked as the output of a node (for example, with `B`, `C`, and `D`), the same thing
|
||||||
happens except that each result coming out of `B` will be sent to both on `C` and `D` input :class:`queue.Queue`.
|
happens except that each result coming out of `B` will be sent to both on `C` and `D` input :class:`queue.Queue`.
|
||||||
|
|
||||||
One thing to keep in mind here is that as the objects are passed from thread to thread, you need to write "pure"
|
One thing to keep in mind here is that as the objects are passed from thread to thread, you need to write "pure"
|
||||||
transformations (see :doc:`/guide/purity`).
|
transformations (see :doc:`/guide/purity`).
|
||||||
|
|
||||||
You generally don't have to think about it. Just be aware that your nodes will run in parallel, and don't worry
|
You generally don't have to think about it. Just be aware that your nodes will run in parallel, and don't worry
|
||||||
too much about blocking nodes, as they won't block other nodes.
|
too much about nodes running blocking operations, as they will run in parallel. As soon as a line of output is ready,
|
||||||
|
the next nodes will start consuming it.
|
||||||
|
|
||||||
That being said, let's manipulate some files.
|
That being said, let's manipulate some files.
|
||||||
|
|
||||||
@ -52,18 +54,33 @@ We'll use a text file that was generated using Bonobo from the "liste-des-cafes-
|
|||||||
Mairie de Paris under the Open Database License (ODbL). You can `explore the original dataset
|
Mairie de Paris under the Open Database License (ODbL). You can `explore the original dataset
|
||||||
<https://opendata.paris.fr/explore/dataset/liste-des-cafes-a-un-euro/information/>`_.
|
<https://opendata.paris.fr/explore/dataset/liste-des-cafes-a-un-euro/information/>`_.
|
||||||
|
|
||||||
You'll need the `example dataset <https://github.com/python-bonobo/bonobo/blob/master/bonobo/examples/datasets/coffeeshops.txt>`_,
|
You'll need the `"coffeeshops.txt" example dataset <https://github.com/python-bonobo/bonobo/blob/master/bonobo/examples/datasets/coffeeshops.txt>`_,
|
||||||
available in **Bonobo**'s repository.
|
available in **Bonobo**'s repository:
|
||||||
|
|
||||||
|
.. code-block:: shell-session
|
||||||
|
|
||||||
|
$ curl https://raw.githubusercontent.com/python-bonobo/bonobo/master/bonobo/examples/datasets/coffeeshops.txt > `python -c 'import bonobo; print(bonobo.get_examples_path("datasets/coffeeshops.txt"))'`
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
The "example dataset download" step will be easier in the future.
|
||||||
|
|
||||||
|
https://github.com/python-bonobo/bonobo/issues/134
|
||||||
|
|
||||||
.. literalinclude:: ../../bonobo/examples/tutorials/tut02e01_read.py
|
.. literalinclude:: ../../bonobo/examples/tutorials/tut02e01_read.py
|
||||||
:language: python
|
:language: python
|
||||||
|
|
||||||
You can run this example as a module:
|
You can also run this example as a module (but you'll still need the dataset...):
|
||||||
|
|
||||||
.. code-block:: shell-session
|
.. code-block:: shell-session
|
||||||
|
|
||||||
$ bonobo run -m bonobo.examples.tutorials.tut02e01_read
|
$ bonobo run -m bonobo.examples.tutorials.tut02e01_read
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
Don't focus too much on the `get_services()` function for now. It is required, with this exact name, but we'll get
|
||||||
|
into that in a few minutes.
|
||||||
|
|
||||||
Writing to files
|
Writing to files
|
||||||
::::::::::::::::
|
::::::::::::::::
|
||||||
|
|
||||||
|
|||||||
@ -1,9 +1,195 @@
|
|||||||
Configurables and Services
|
Configurables and Services
|
||||||
==========================
|
==========================
|
||||||
|
|
||||||
This document does not exist yet, but will be available soon.
|
.. note::
|
||||||
|
|
||||||
Meanwhile, you can read the matching references:
|
This section lacks completeness, sorry for that (but you can still read it!).
|
||||||
|
|
||||||
|
In the last section, we used a few new tools.
|
||||||
|
|
||||||
|
Class-based transformations and configurables
|
||||||
|
:::::::::::::::::::::::::::::::::::::::::::::
|
||||||
|
|
||||||
|
Bonobo is a bit dumb. If something is callable, it considers it can be used as a transformation, and it's up to the
|
||||||
|
user to provide callables that logically fits in a graph.
|
||||||
|
|
||||||
|
You can use plain python objects with a `__call__()` method, and it ill just work.
|
||||||
|
|
||||||
|
As a lot of transformations needs common machinery, there is a few tools to quickly build transformations, most of
|
||||||
|
them requiring your class to subclass :class:`bonobo.config.Configurable`.
|
||||||
|
|
||||||
|
Configurables allows to use the following features:
|
||||||
|
|
||||||
|
* You can add **Options** (using the :class:`bonobo.config.Option` descriptor). Options can be positional, or keyword
|
||||||
|
based, can have a default value and will be consumed from the constructor arguments.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from bonobo.config import Configurable, Option
|
||||||
|
|
||||||
|
class PrefixIt(Configurable):
|
||||||
|
prefix = Option(str, positional=True, default='>>>')
|
||||||
|
|
||||||
|
def call(self, row):
|
||||||
|
return self.prefix + ' ' + row
|
||||||
|
|
||||||
|
prefixer = PrefixIt('$')
|
||||||
|
|
||||||
|
* You can add **Services** (using the :class:`bonobo.config.Service` descriptor). Services are a subclass of
|
||||||
|
:class:`bonobo.config.Option`, sharing the same basics, but specialized in the definition of "named services" that
|
||||||
|
will be resolved at runtime (a.k.a for which we will provide an implementation at runtime). We'll dive more into that
|
||||||
|
in the next section
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from bonobo.config import Configurable, Option, Service
|
||||||
|
|
||||||
|
class HttpGet(Configurable):
|
||||||
|
url = Option(default='https://jsonplaceholder.typicode.com/users')
|
||||||
|
http = Service('http.client')
|
||||||
|
|
||||||
|
def call(self, http):
|
||||||
|
resp = http.get(self.url)
|
||||||
|
|
||||||
|
for row in resp.json():
|
||||||
|
yield row
|
||||||
|
|
||||||
|
http_get = HttpGet()
|
||||||
|
|
||||||
|
|
||||||
|
* You can add **Methods** (using the :class:`bonobo.config.Method` descriptor). :class:`bonobo.config.Method` is a
|
||||||
|
subclass of :class:`bonobo.config.Option` that allows to pass callable parameters, either to the class constructor,
|
||||||
|
or using the class as a decorator.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from bonobo.config import Configurable, Method
|
||||||
|
|
||||||
|
class Applier(Configurable):
|
||||||
|
apply = Method()
|
||||||
|
|
||||||
|
def call(self, row):
|
||||||
|
return self.apply(row)
|
||||||
|
|
||||||
|
@Applier
|
||||||
|
def Prefixer(self, row):
|
||||||
|
return 'Hello, ' + row
|
||||||
|
|
||||||
|
prefixer = Prefixer()
|
||||||
|
|
||||||
|
* You can add **ContextProcessors**, which are an advanced feature we won't introduce here. If you're familiar with
|
||||||
|
pytest, you can think of them as pytest fixtures, execution wise.
|
||||||
|
|
||||||
|
Services
|
||||||
|
::::::::
|
||||||
|
|
||||||
|
The motivation behind services is mostly separation of concerns, testability and deployability.
|
||||||
|
|
||||||
|
Usually, your transformations will depend on services (like a filesystem, an http client, a database, a rest api, ...).
|
||||||
|
Those services can very well be hardcoded in the transformations, but there is two main drawbacks:
|
||||||
|
|
||||||
|
* You won't be able to change the implementation depending on the current environment (development laptop versus
|
||||||
|
production servers, bug-hunting session versus execution, etc.)
|
||||||
|
* You won't be able to test your transformations without testing the associated services.
|
||||||
|
|
||||||
|
To overcome those caveats of hardcoding things, we define Services in the configurable, which are basically
|
||||||
|
string-options of the service names, and we provide an implementation at the last moment possible.
|
||||||
|
|
||||||
|
There are two ways of providing implementations:
|
||||||
|
|
||||||
|
* Either file-wide, by providing a `get_services()` function that returns a dict of named implementations (we did so
|
||||||
|
with filesystems in the previous step, :doc:`tut02.rst`)
|
||||||
|
* Either directory-wide, by providing a `get_services()` function in a specially named `_services.py` file.
|
||||||
|
|
||||||
|
The first is simpler if you only have one transformation graph in one file, the second allows to group coherent
|
||||||
|
transformations together in a directory and share the implementations.
|
||||||
|
|
||||||
|
Let's see how to use it, starting from the previous service example:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from bonobo.config import Configurable, Option, Service
|
||||||
|
|
||||||
|
class HttpGet(Configurable):
|
||||||
|
url = Option(default='https://jsonplaceholder.typicode.com/users')
|
||||||
|
http = Service('http.client')
|
||||||
|
|
||||||
|
def call(self, http):
|
||||||
|
resp = http.get(self.url)
|
||||||
|
|
||||||
|
for row in resp.json():
|
||||||
|
yield row
|
||||||
|
|
||||||
|
We defined an "http.client" service, that obviously should have a `get()` method, returning responses that have a
|
||||||
|
`json()` method.
|
||||||
|
|
||||||
|
Let's provide two implementations for that. The first one will be using `requests <http://docs.python-requests.org/>`_,
|
||||||
|
that coincidally satisfies the described interface:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
import bonobo
|
||||||
|
import requests
|
||||||
|
|
||||||
|
def get_services():
|
||||||
|
return {
|
||||||
|
'http.client': requests
|
||||||
|
}
|
||||||
|
|
||||||
|
graph = bonobo.Graph(
|
||||||
|
HttpGet(),
|
||||||
|
print,
|
||||||
|
)
|
||||||
|
|
||||||
|
If you run this code, you should see some mock data returned by the webservice we called (assuming it's up and you can
|
||||||
|
reach it).
|
||||||
|
|
||||||
|
Now, the second implementation will replace that with a mock, used for testing purposes:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
class HttpResponseStub:
|
||||||
|
def json(self):
|
||||||
|
return [
|
||||||
|
{'id': 1, 'name': 'Leanne Graham', 'username': 'Bret', 'email': 'Sincere@april.biz', 'address': {'street': 'Kulas Light', 'suite': 'Apt. 556', 'city': 'Gwenborough', 'zipcode': '92998-3874', 'geo': {'lat': '-37.3159', 'lng': '81.1496'}}, 'phone': '1-770-736-8031 x56442', 'website': 'hildegard.org', 'company': {'name': 'Romaguera-Crona', 'catchPhrase': 'Multi-layered client-server neural-net', 'bs': 'harness real-time e-markets'}},
|
||||||
|
{'id': 2, 'name': 'Ervin Howell', 'username': 'Antonette', 'email': 'Shanna@melissa.tv', 'address': {'street': 'Victor Plains', 'suite': 'Suite 879', 'city': 'Wisokyburgh', 'zipcode': '90566-7771', 'geo': {'lat': '-43.9509', 'lng': '-34.4618'}}, 'phone': '010-692-6593 x09125', 'website': 'anastasia.net', 'company': {'name': 'Deckow-Crist', 'catchPhrase': 'Proactive didactic contingency', 'bs': 'synergize scalable supply-chains'}},
|
||||||
|
]
|
||||||
|
|
||||||
|
class HttpStub:
|
||||||
|
def get(self, url):
|
||||||
|
return HttpResponseStub()
|
||||||
|
|
||||||
|
def get_services():
|
||||||
|
return {
|
||||||
|
'http.client': HttpStub()
|
||||||
|
}
|
||||||
|
|
||||||
|
graph = bonobo.Graph(
|
||||||
|
HttpGet(),
|
||||||
|
print,
|
||||||
|
)
|
||||||
|
|
||||||
|
The `Graph` definition staying the exact same, you can easily substitute the `_services.py` file depending on your
|
||||||
|
environment (the way you're doing this is out of bonobo scope and heavily depends on your usual way of managing
|
||||||
|
configuration files on different platforms).
|
||||||
|
|
||||||
|
Starting with bonobo 0.5 (not yet released), you will be able to use service injections with function-based
|
||||||
|
transformations too, using the `bonobo.config.requires` decorator to mark a dependency.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from bonobo.config import requires
|
||||||
|
|
||||||
|
@requires('http.client')
|
||||||
|
def http_get(http):
|
||||||
|
resp = http.get('https://jsonplaceholder.typicode.com/users')
|
||||||
|
|
||||||
|
for row in resp.json():
|
||||||
|
yield row
|
||||||
|
|
||||||
|
|
||||||
|
Read more
|
||||||
|
:::::::::
|
||||||
|
|
||||||
* :doc:`/guide/services`
|
* :doc:`/guide/services`
|
||||||
* :doc:`/reference/api_config`
|
* :doc:`/reference/api_config`
|
||||||
|
|||||||
Reference in New Issue
Block a user