Merge pull request #135 from hartym/master
[doc] Documentation, my dear. Half of the work, looks you are a littl…
This commit is contained in:
@ -5,7 +5,10 @@ graph = bonobo.Graph(
|
||||
print,
|
||||
)
|
||||
|
||||
|
||||
def get_services():
|
||||
return {'fs': bonobo.open_examples_fs('datasets')}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
bonobo.run(
|
||||
graph, services={'fs': bonobo.open_examples_fs('datasets')}
|
||||
)
|
||||
bonobo.run(graph, services=get_services())
|
||||
|
||||
@ -8,10 +8,18 @@ def split_one(line):
|
||||
graph = bonobo.Graph(
|
||||
bonobo.FileReader('coffeeshops.txt'),
|
||||
split_one,
|
||||
bonobo.JsonWriter('coffeeshops.json'),
|
||||
bonobo.JsonWriter(
|
||||
'coffeeshops.json', fs='fs.output', ioformat='arg0'
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def get_services():
|
||||
return {
|
||||
'fs': bonobo.open_examples_fs('datasets'),
|
||||
'fs.output': bonobo.open_fs(),
|
||||
}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
bonobo.run(
|
||||
graph, services={'fs': bonobo.open_examples_fs('datasets')}
|
||||
)
|
||||
bonobo.run(graph, services=get_services())
|
||||
|
||||
@ -1,4 +1,6 @@
|
||||
import bonobo, json
|
||||
import json
|
||||
|
||||
import bonobo
|
||||
|
||||
|
||||
def split_one_to_map(line):
|
||||
@ -18,10 +20,16 @@ class MyJsonWriter(bonobo.JsonWriter):
|
||||
graph = bonobo.Graph(
|
||||
bonobo.FileReader('coffeeshops.txt'),
|
||||
split_one_to_map,
|
||||
MyJsonWriter('coffeeshops.json'),
|
||||
MyJsonWriter('coffeeshops.json', fs='fs.output', ioformat='arg0'),
|
||||
)
|
||||
|
||||
|
||||
def get_services():
|
||||
return {
|
||||
'fs': bonobo.open_examples_fs('datasets'),
|
||||
'fs.output': bonobo.open_fs(),
|
||||
}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
bonobo.run(
|
||||
graph, services={'fs': bonobo.open_examples_fs('datasets')}
|
||||
)
|
||||
bonobo.run(graph, services=get_services())
|
||||
|
||||
207
docs/_templates/index.html
vendored
207
docs/_templates/index.html
vendored
@ -2,105 +2,116 @@
|
||||
{% set title = _('Bonobo — Data processing for humans') %}
|
||||
{% block body %}
|
||||
|
||||
<div style="border: 2px solid red; font-weight: bold; margin: 1em; padding: 1em">
|
||||
Bonobo is <strong>ALPHA</strong> software. Some APIs will change.
|
||||
</div>
|
||||
<h1 style="text-align: center">
|
||||
<img class="logo" src="{{ pathto('_static/bonobo.png', 1) }}" title="Bonobo" alt="Bonobo"
|
||||
style=" width: 128px; height: 128px;"/>
|
||||
</h1>
|
||||
|
||||
<h1 style="text-align: center">
|
||||
<img class="logo" src="{{ pathto('_static/bonobo.png', 1) }}" title="Bonobo" alt="Bonobo"
|
||||
style=" width: 128px; height: 128px;"/>
|
||||
</h1>
|
||||
|
||||
<p>
|
||||
{% trans %}
|
||||
<strong>Bonobo</strong> is a line-by-line data-processing toolkit for python 3.5+ (extract-transform-load
|
||||
framework) emphasizing simple and atomic data transformations defined using a directed graph of plain old
|
||||
python objects (functions, iterables, generators, ...).
|
||||
{% endtrans %}
|
||||
</p>
|
||||
|
||||
<h2 style="margin-bottom: 0">{% trans %}Documentation{% endtrans %}</h2>
|
||||
|
||||
<table class="contentstable">
|
||||
<tr>
|
||||
<td>
|
||||
<p class="biglink"><a class="biglink" href="{{ pathto("tutorial/index") }}">{% trans %}First steps{% endtrans %}</a><br/>
|
||||
<span class="linkdescr">{% trans %}quick overview of basic features{% endtrans %}</span></p>
|
||||
</td>
|
||||
<td>
|
||||
<p class="biglink"><a class="biglink" href="{{ pathto("search") }}">{% trans %}
|
||||
Search{% endtrans %}</a><br/>
|
||||
<span class="linkdescr">{% trans %}search the documentation{% endtrans %}</span></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<p class="biglink"><a class="biglink" href="{{ pathto("guide/index") }}">{% trans %}
|
||||
Guides{% endtrans %}</a><br/>
|
||||
<span class="linkdescr">{% trans %}for a complete overview{% endtrans %}</span>
|
||||
</p>
|
||||
</td>
|
||||
<td>
|
||||
<p class="biglink"><a class="biglink" href="{{ pathto("reference/index") }}">{% trans %}References{% endtrans %}</a>
|
||||
<br/>
|
||||
<span class="linkdescr">{% trans %}all functions, classes, terms{% endtrans %}</span>
|
||||
</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<p class="biglink"><a class="biglink" target="_blank" href="https://github.com/python-bonobo/bonobo/tree/master/bonobo/examples">{% trans %}
|
||||
Cookbook{% endtrans %}</a><br/>
|
||||
<span class="linkdescr">{% trans %}examples and recipes{% endtrans %}</span></p>
|
||||
</td>
|
||||
<td>
|
||||
<p class="biglink"><a class="biglink" href="{{ pathto("contribute/index") }}">{% trans %}
|
||||
Contribute{% endtrans %}</a><br/>
|
||||
<span class="linkdescr">{% trans %}contributor guide{% endtrans %}</span></p>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<h2>Features</h2>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
{% trans %}
|
||||
<b>10 minutes to get started:</b> Know some python? Writing your first data processor is an affair
|
||||
of minutes.
|
||||
{% endtrans %}
|
||||
</li>
|
||||
<li>
|
||||
{% trans %}
|
||||
<b>Data sources and targets:</b> HTML, JSON, XML, SQL databases, NoSQL databases, HTTP/REST APIs,
|
||||
streaming APIs, python objects...
|
||||
{% endtrans %}
|
||||
</li>
|
||||
<li>
|
||||
{% trans %}
|
||||
<b>Service injection:</b> Abstract the transformation dependencies to easily switch data sources and
|
||||
dependant libraries. You'll be able to specify the concrete implementations or configurations at
|
||||
runtime, for example to switch a database connection string or an API endpoint.
|
||||
{% endtrans %}
|
||||
</li>
|
||||
<li>
|
||||
{% trans %}
|
||||
<b>Plugins:</b> Easily add features to all your transformations by using builtin plugins (Jupyter,
|
||||
Console, ...) or write your own.
|
||||
{% endtrans %}
|
||||
</li>
|
||||
<li>
|
||||
{% trans %}
|
||||
Bonobo is young, and the todo-list is huge. Read the <a href="https://www.bonobo-project.org/roadmap">roadmap</a>.
|
||||
{% endtrans %}
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<p>{% trans %}
|
||||
You can also download PDF/EPUB versions of the Bonobo documentation:
|
||||
<a href="http://readthedocs.org/projects/bonobo/downloads/pdf/stable/">PDF version</a>,
|
||||
<a href="http://readthedocs.org/projects/bonobo/downloads/epub/stable/">EPUB version</a>.
|
||||
<p>
|
||||
{% trans %}
|
||||
<strong>Bonobo</strong> is a line-by-line data-processing toolkit for python 3.5+ (extract-transform-load
|
||||
framework, or ETL) emphasizing simple and atomic data transformations defined using a directed graph of plain old
|
||||
python objects (functions, iterables, generators, ...).
|
||||
{% endtrans %}
|
||||
</p>
|
||||
</p>
|
||||
|
||||
<div style="border: 2px solid red; font-weight: bold; margin: 1em; padding: 1em">
|
||||
Bonobo is <strong>ALPHA</strong> software. Some APIs will change.
|
||||
</div>
|
||||
|
||||
|
||||
<h2 style="margin-bottom: 0">{% trans %}Documentation{% endtrans %}</h2>
|
||||
|
||||
<table class="contentstable">
|
||||
<tr>
|
||||
<td>
|
||||
<p class="biglink"><a class="biglink" href="{{ pathto(" tutorial/index") }}">{% trans %}First steps{%
|
||||
endtrans %}</a><br/>
|
||||
<span class="linkdescr">{% trans %}quick overview of basic features{% endtrans %}</span></p>
|
||||
</td>
|
||||
<td>
|
||||
<p class="biglink"><a class="biglink" href="{{ pathto(" search") }}">{% trans %}
|
||||
Search{% endtrans %}</a><br/>
|
||||
<span class="linkdescr">{% trans %}search the documentation{% endtrans %}</span></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<p class="biglink"><a class="biglink" href="{{ pathto(" guide/index") }}">{% trans %}
|
||||
Guides{% endtrans %}</a><br/>
|
||||
<span class="linkdescr">{% trans %}for a complete overview{% endtrans %}</span>
|
||||
</p>
|
||||
</td>
|
||||
<td>
|
||||
<p class="biglink"><a class="biglink" href="{{ pathto(" reference/index") }}">{% trans %}References{%
|
||||
endtrans %}</a>
|
||||
<br/>
|
||||
<span class="linkdescr">{% trans %}all functions, classes, terms{% endtrans %}</span>
|
||||
</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<p class="biglink"><a class="biglink" target="_blank"
|
||||
href="https://github.com/python-bonobo/bonobo/tree/master/bonobo/examples">{% trans %}
|
||||
Cookbook{% endtrans %}</a><br/>
|
||||
<span class="linkdescr">{% trans %}examples and recipes{% endtrans %}</span></p>
|
||||
</td>
|
||||
<td>
|
||||
<p class="biglink"><a class="biglink" href="{{ pathto(" contribute/index") }}">{% trans %}
|
||||
Contribute{% endtrans %}</a><br/>
|
||||
<span class="linkdescr">{% trans %}contributor guide{% endtrans %}</span></p>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<h2>Features</h2>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
{% trans %}
|
||||
<b>10 minutes to get started:</b> Know some python? Writing your first data processor is an affair
|
||||
of minutes.
|
||||
{% endtrans %}
|
||||
</li>
|
||||
<li>
|
||||
{% trans %}
|
||||
<b>Data sources and targets:</b> HTML, JSON, XML, SQL databases, NoSQL databases, HTTP/REST APIs,
|
||||
streaming APIs, python objects...
|
||||
{% endtrans %}
|
||||
</li>
|
||||
<li>
|
||||
{% trans %}
|
||||
<b>Service injection:</b> Abstract the transformation dependencies to easily switch data sources and
|
||||
dependant libraries. You'll be able to specify the concrete implementations or configurations at
|
||||
runtime, for example to switch a database connection string or an API endpoint.
|
||||
{% endtrans %}
|
||||
</li>
|
||||
<li>
|
||||
{% trans %}
|
||||
<b>Plugins:</b> Easily add features to all your transformations by using builtin plugins (Jupyter,
|
||||
Console, ...) or write your own.
|
||||
{% endtrans %}
|
||||
</li>
|
||||
<li>
|
||||
{% trans %}
|
||||
Bonobo is young, and the todo-list is huge. Read the <a
|
||||
href="https://www.bonobo-project.org/roadmap">roadmap</a>.
|
||||
{% endtrans %}
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<p>{% trans %}
|
||||
You can also download PDF/EPUB versions of the Bonobo documentation:
|
||||
<a href="http://readthedocs.org/projects/bonobo/downloads/pdf/stable/">PDF version</a>,
|
||||
<a href="http://readthedocs.org/projects/bonobo/downloads/epub/stable/">EPUB version</a>.
|
||||
{% endtrans %}
|
||||
</p>
|
||||
|
||||
<h2>Table of contents</h2>
|
||||
|
||||
|
||||
<div>
|
||||
{{ toctree(maxdepth=2, collapse=False)}}
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
||||
@ -4,6 +4,7 @@ Bonobo with SQLAlchemy
|
||||
.. todo:: The `bonobo-sqlalchemy` package is at a very alpha stage, and things will change. This section is here to
|
||||
give a brief overview but is neither complete nor definitive.
|
||||
|
||||
Read the introduction: https://www.bonobo-project.org/with/sqlalchemy
|
||||
|
||||
Installation
|
||||
::::::::::::
|
||||
|
||||
@ -8,8 +8,8 @@ Bonobo
|
||||
tutorial/index
|
||||
guide/index
|
||||
reference/index
|
||||
contribute/index
|
||||
faq
|
||||
contribute/index
|
||||
genindex
|
||||
modindex
|
||||
|
||||
|
||||
@ -4,36 +4,47 @@ Installation
|
||||
Create an ETL project
|
||||
:::::::::::::::::::::
|
||||
|
||||
If you only want to use Bonobo to code ETLs, your easiest option to get started is to use our
|
||||
`cookiecutter template <https://github.com/python-bonobo/cookiecutter-bonobo>`_.
|
||||
Creating a project and starting to write code should take less than a minute:
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
$ pip install --upgrade bonobo cookiecutter
|
||||
$ bonobo init my-etl-project
|
||||
$ bonobo run my-etl-project
|
||||
|
||||
Once you bootstrapped a project, you can start editing the default example transformation by editing
|
||||
`my-etl-project/main.py`.
|
||||
|
||||
Other installation options
|
||||
::::::::::::::::::::::::::
|
||||
|
||||
Install from PyPI
|
||||
:::::::::::::::::
|
||||
-----------------
|
||||
|
||||
You can also install it directly from the `Python Package Index <https://pypi.python.org/pypi/bonobo>`_.
|
||||
You can install it directly from the `Python Package Index <https://pypi.python.org/pypi/bonobo>`_ (like we did above).
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
$ pip install bonobo
|
||||
|
||||
Install from source
|
||||
:::::::::::::::::::
|
||||
-------------------
|
||||
|
||||
If you want to install an unreleased version, you can use git urls with pip. This is useful when using bonobo as a
|
||||
dependency of your code and you want to try a forked version of bonobo with your software. You can use the git+http
|
||||
string in your `requirements.txt` file. However, the best option for development on bonobo directly is not this one,
|
||||
but editable installs (see below).
|
||||
dependency of your code and you want to try a forked version of bonobo with your software. You can use a `git+http`
|
||||
string in your `requirements.txt` file. However, the best option for development on bonobo is an editable install (see
|
||||
below).
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
$ pip install git+https://github.com/python-bonobo/bonobo.git@master#egg=bonobo
|
||||
$ pip install git+https://github.com/python-bonobo/bonobo.git@develop#egg=bonobo
|
||||
|
||||
Editable install
|
||||
::::::::::::::::
|
||||
----------------
|
||||
|
||||
If you plan on making patches to Bonobo, you should install it as an "editable" package, which is a really great pip feature.
|
||||
Pip will clone your repository in a source directory and create a symlink for it in the site-package directory of your
|
||||
python interpreter.
|
||||
If you plan on making patches to Bonobo, you should install it as an "editable" package, which is a really great pip
|
||||
feature. Pip will clone your repository in a source directory and create a symlink for it in the site-package directory
|
||||
of your python interpreter.
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
@ -63,20 +74,17 @@ I usually name the git remote for the main bonobo repository "upstream", and my
|
||||
|
||||
$ git remote rename origin upstream
|
||||
$ git remote add origin git@github.com:hartym/bonobo.git
|
||||
$ git fetch --all
|
||||
|
||||
Of course, replace my github username by the one you used to fork bonobo. You should be good to go!
|
||||
|
||||
Windows support
|
||||
:::::::::::::::
|
||||
|
||||
There are problems on the windows platform, mostly due to the fact bonobo was not developed by experienced windows users.
|
||||
There are minor issues on the windows platform, mostly due to the fact bonobo was not developed by experienced windows
|
||||
users.
|
||||
|
||||
We're trying to look into that but energy available to provide serious support on windows is very limited.
|
||||
|
||||
If you have experience in this domain and you're willing to help, you're more than welcome!
|
||||
|
||||
|
||||
|
||||
.. todo::
|
||||
|
||||
Better install docs, especially on how to use different forks or branches, etc.
|
||||
|
||||
|
||||
@ -9,17 +9,26 @@ python code in charge of handling similar shaped independant lines of data.
|
||||
|
||||
Bonobo *is not* a statistical or data-science tool. If you're looking for a data-analysis tool in python, use Pandas.
|
||||
|
||||
Bonobo is a lean manufacturing assembly line for data that let you focus on the actual work instead of the plumbery.
|
||||
Bonobo is a lean manufacturing assembly line for data that let you focus on the actual work instead of the plumbery
|
||||
(execution contexts, parallelism, error handling, console output, logging, ...).
|
||||
|
||||
Bonobo uses simple python and should be quick and easy to learn.
|
||||
|
||||
Tutorial
|
||||
::::::::
|
||||
|
||||
Warning: the documentation is still in progress. Although all content here should be accurate, you may feel a lack of
|
||||
completeness, for which we plaid guilty and apologize. If there is something blocking, please come on our
|
||||
`slack channel <https://bonobo-slack.herokuapp.com/>`_ and complain, we'll figure something out. If there is something
|
||||
that did not block you but can be a no-go for others, please consider contributing to the docs.
|
||||
.. note::
|
||||
|
||||
Good documentation is not easy to write. We do our best to make it better and better.
|
||||
|
||||
Although all content here should be accurate, you may feel a lack of completeness, for which we plaid guilty and
|
||||
apologize.
|
||||
|
||||
If you're stuck, please come and ask on our `slack channel <https://bonobo-slack.herokuapp.com/>`_, we'll figure
|
||||
something out.
|
||||
|
||||
If you're not stuck but had trouble understanding something, please consider contributing to the docs (via github
|
||||
pull requests).
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
@ -19,7 +19,7 @@ can run.
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
bonobo init tutorial
|
||||
$ bonobo init tutorial
|
||||
|
||||
This will create a `tutorial` directory (`content description here <https://www.bonobo-project.org/with/cookiecutter>`_).
|
||||
|
||||
@ -27,15 +27,15 @@ To run this project, use:
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
bonobo run tutorial
|
||||
$ bonobo run tutorial
|
||||
|
||||
|
||||
Write a first transformation
|
||||
::::::::::::::::::::::::::::
|
||||
|
||||
Open `tutorial/__main__.py`, and delete all the code here.
|
||||
Open `tutorial/main.py`, and delete all the code here.
|
||||
|
||||
A transformation can be whatever python can call, having inputs and outputs. Simplest transformations are functions.
|
||||
A transformation can be whatever python can call. Simplest transformations are functions and generators.
|
||||
|
||||
Let's write one:
|
||||
|
||||
@ -48,10 +48,10 @@ Easy.
|
||||
|
||||
.. note::
|
||||
|
||||
This is about the same as :func:`str.upper`, and in the real world, you'd use it directly.
|
||||
This function is very similar to :func:`str.upper`, which you can use directly.
|
||||
|
||||
Let's write two more transformations for the "extract" and "load" steps. In this example, we'll generate the data from
|
||||
scratch, and we'll use stdout to simulate data-persistence.
|
||||
scratch, and we'll use stdout to "simulate" data-persistence.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@ -68,16 +68,16 @@ on things returned, and a normal function will just be seen as a generator that
|
||||
|
||||
.. note::
|
||||
|
||||
Once again, :func:`print` would be used directly in a real-world transformation.
|
||||
Once again, you should use the builtin :func:`print` directly instead of this `load()` function.
|
||||
|
||||
|
||||
Create a transformation graph
|
||||
:::::::::::::::::::::::::::::
|
||||
|
||||
Bonobo main roles are two things:
|
||||
Amongst other features, Bonobo will mostly help you there with the following:
|
||||
|
||||
* Execute the transformations in independant threads
|
||||
* Pass the outputs of one thread to other(s) thread(s).
|
||||
* Pass the outputs of one thread to other(s) thread(s) inputs.
|
||||
|
||||
To do this, it needs to know what data-flow you want to achieve, and you'll use a :class:`bonobo.Graph` to describe it.
|
||||
|
||||
@ -109,17 +109,17 @@ To do this, it needs to know what data-flow you want to achieve, and you'll use
|
||||
Execute the job
|
||||
:::::::::::::::
|
||||
|
||||
Save `tutorial/__main__.py` and execute your transformation:
|
||||
Save `tutorial/main.py` and execute your transformation again:
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
bonobo run tutorial
|
||||
$ bonobo run tutorial
|
||||
|
||||
This example is available in :mod:`bonobo.examples.tutorials.tut01e01`, and you can also run it as a module:
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
bonobo run -m bonobo.examples.tutorials.tut01e01
|
||||
$ bonobo run -m bonobo.examples.tutorials.tut01e01
|
||||
|
||||
|
||||
Rewrite it using builtins
|
||||
@ -127,27 +127,17 @@ Rewrite it using builtins
|
||||
|
||||
There is a much simpler way to describe an equivalent graph:
|
||||
|
||||
.. code-block:: python
|
||||
.. literalinclude:: ../../bonobo/examples/tutorials/tut01e02.py
|
||||
:language: python
|
||||
|
||||
import bonobo
|
||||
The `extract()` generator has been replaced by a list, as Bonobo will interpret non-callable iterables as a no-input
|
||||
generator.
|
||||
|
||||
graph = bonobo.Graph(
|
||||
['foo', 'bar', 'baz',],
|
||||
str.upper,
|
||||
print,
|
||||
)
|
||||
|
||||
if __name__ == '__main__':
|
||||
bonobo.run(graph)
|
||||
|
||||
We use a shortcut notation for the generator, with a list. Bonobo will wrap an iterable as a generator by itself if it
|
||||
is added in a graph.
|
||||
|
||||
This example is available in :mod:`bonobo.examples.tutorials.tut01e02`, and you can also run it as a module:
|
||||
This example is also available in :mod:`bonobo.examples.tutorials.tut01e02`, and you can also run it as a module:
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
bonobo run -m bonobo.examples.tutorials.tut01e02
|
||||
$ bonobo run -m bonobo.examples.tutorials.tut01e02
|
||||
|
||||
You can now jump to the next part (:doc:`tut02`), or read a small summary of concepts and definitions introduced here
|
||||
below.
|
||||
@ -188,19 +178,19 @@ cases.
|
||||
Concepts and definitions
|
||||
::::::::::::::::::::::::
|
||||
|
||||
* Transformation: a callable that takes input (as call parameters) and returns output(s), either as its return value or
|
||||
* **Transformation**: a callable that takes input (as call parameters) and returns output(s), either as its return value or
|
||||
by yielding values (a.k.a returning a generator).
|
||||
|
||||
* Transformation graph (or Graph): a set of transformations tied together in a :class:`bonobo.Graph` instance, which is
|
||||
* **Transformation graph (or Graph)**: a set of transformations tied together in a :class:`bonobo.Graph` instance, which is
|
||||
a directed acyclic graph (or DAG).
|
||||
|
||||
* Node: a graph element, most probably a transformation in a graph.
|
||||
* **Node**: a graph element, most probably a transformation in a graph.
|
||||
|
||||
* Execution strategy (or strategy): a way to run a transformation graph. It's responsibility is mainly to parallelize
|
||||
* **Execution strategy (or strategy)**: a way to run a transformation graph. It's responsibility is mainly to parallelize
|
||||
(or not) the transformations, on one or more process and/or computer, and to setup the right queuing mechanism for
|
||||
transformations' inputs and outputs.
|
||||
|
||||
* Execution context (or context): a wrapper around a node that holds the state for it. If the node needs state, there
|
||||
* **Execution context (or context)**: a wrapper around a node that holds the state for it. If the node needs state, there
|
||||
are tools available in bonobo to feed it to the transformation using additional call parameters, keeping
|
||||
transformations stateless.
|
||||
|
||||
|
||||
@ -23,16 +23,18 @@ When run, the execution strategy wraps every component in a thread (assuming you
|
||||
:class:`bonobo.strategies.ThreadPoolExecutorStrategy`).
|
||||
|
||||
Bonobo will send each line of data in the input node's thread (here, `A`). Now, each time `A` *yields* or *returns*
|
||||
something, it will be pushed on `B` input :class:`queue.Queue`, and will be consumed by `B`'s thread.
|
||||
something, it will be pushed on `B` input :class:`queue.Queue`, and will be consumed by `B`'s thread. Meanwhile, `A`
|
||||
will continue to run, if it's not done.
|
||||
|
||||
When there is more than one node linked as the output of a node (for example, with `B`, `C`, and `D`) , the same thing
|
||||
When there is more than one node linked as the output of a node (for example, with `B`, `C`, and `D`), the same thing
|
||||
happens except that each result coming out of `B` will be sent to both on `C` and `D` input :class:`queue.Queue`.
|
||||
|
||||
One thing to keep in mind here is that as the objects are passed from thread to thread, you need to write "pure"
|
||||
transformations (see :doc:`/guide/purity`).
|
||||
|
||||
You generally don't have to think about it. Just be aware that your nodes will run in parallel, and don't worry
|
||||
too much about blocking nodes, as they won't block other nodes.
|
||||
too much about nodes running blocking operations, as they will run in parallel. As soon as a line of output is ready,
|
||||
the next nodes will start consuming it.
|
||||
|
||||
That being said, let's manipulate some files.
|
||||
|
||||
@ -52,18 +54,33 @@ We'll use a text file that was generated using Bonobo from the "liste-des-cafes-
|
||||
Mairie de Paris under the Open Database License (ODbL). You can `explore the original dataset
|
||||
<https://opendata.paris.fr/explore/dataset/liste-des-cafes-a-un-euro/information/>`_.
|
||||
|
||||
You'll need the `example dataset <https://github.com/python-bonobo/bonobo/blob/master/bonobo/examples/datasets/coffeeshops.txt>`_,
|
||||
available in **Bonobo**'s repository.
|
||||
You'll need the `"coffeeshops.txt" example dataset <https://github.com/python-bonobo/bonobo/blob/master/bonobo/examples/datasets/coffeeshops.txt>`_,
|
||||
available in **Bonobo**'s repository:
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
$ curl https://raw.githubusercontent.com/python-bonobo/bonobo/master/bonobo/examples/datasets/coffeeshops.txt > `python -c 'import bonobo; print(bonobo.get_examples_path("datasets/coffeeshops.txt"))'`
|
||||
|
||||
.. note::
|
||||
|
||||
The "example dataset download" step will be easier in the future.
|
||||
|
||||
https://github.com/python-bonobo/bonobo/issues/134
|
||||
|
||||
.. literalinclude:: ../../bonobo/examples/tutorials/tut02e01_read.py
|
||||
:language: python
|
||||
|
||||
You can run this example as a module:
|
||||
You can also run this example as a module (but you'll still need the dataset...):
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
$ bonobo run -m bonobo.examples.tutorials.tut02e01_read
|
||||
|
||||
.. note::
|
||||
|
||||
Don't focus too much on the `get_services()` function for now. It is required, with this exact name, but we'll get
|
||||
into that in a few minutes.
|
||||
|
||||
Writing to files
|
||||
::::::::::::::::
|
||||
|
||||
|
||||
@ -1,9 +1,195 @@
|
||||
Configurables and Services
|
||||
==========================
|
||||
|
||||
This document does not exist yet, but will be available soon.
|
||||
.. note::
|
||||
|
||||
Meanwhile, you can read the matching references:
|
||||
This section lacks completeness, sorry for that (but you can still read it!).
|
||||
|
||||
In the last section, we used a few new tools.
|
||||
|
||||
Class-based transformations and configurables
|
||||
:::::::::::::::::::::::::::::::::::::::::::::
|
||||
|
||||
Bonobo is a bit dumb. If something is callable, it considers it can be used as a transformation, and it's up to the
|
||||
user to provide callables that logically fits in a graph.
|
||||
|
||||
You can use plain python objects with a `__call__()` method, and it ill just work.
|
||||
|
||||
As a lot of transformations needs common machinery, there is a few tools to quickly build transformations, most of
|
||||
them requiring your class to subclass :class:`bonobo.config.Configurable`.
|
||||
|
||||
Configurables allows to use the following features:
|
||||
|
||||
* You can add **Options** (using the :class:`bonobo.config.Option` descriptor). Options can be positional, or keyword
|
||||
based, can have a default value and will be consumed from the constructor arguments.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from bonobo.config import Configurable, Option
|
||||
|
||||
class PrefixIt(Configurable):
|
||||
prefix = Option(str, positional=True, default='>>>')
|
||||
|
||||
def call(self, row):
|
||||
return self.prefix + ' ' + row
|
||||
|
||||
prefixer = PrefixIt('$')
|
||||
|
||||
* You can add **Services** (using the :class:`bonobo.config.Service` descriptor). Services are a subclass of
|
||||
:class:`bonobo.config.Option`, sharing the same basics, but specialized in the definition of "named services" that
|
||||
will be resolved at runtime (a.k.a for which we will provide an implementation at runtime). We'll dive more into that
|
||||
in the next section
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from bonobo.config import Configurable, Option, Service
|
||||
|
||||
class HttpGet(Configurable):
|
||||
url = Option(default='https://jsonplaceholder.typicode.com/users')
|
||||
http = Service('http.client')
|
||||
|
||||
def call(self, http):
|
||||
resp = http.get(self.url)
|
||||
|
||||
for row in resp.json():
|
||||
yield row
|
||||
|
||||
http_get = HttpGet()
|
||||
|
||||
|
||||
* You can add **Methods** (using the :class:`bonobo.config.Method` descriptor). :class:`bonobo.config.Method` is a
|
||||
subclass of :class:`bonobo.config.Option` that allows to pass callable parameters, either to the class constructor,
|
||||
or using the class as a decorator.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from bonobo.config import Configurable, Method
|
||||
|
||||
class Applier(Configurable):
|
||||
apply = Method()
|
||||
|
||||
def call(self, row):
|
||||
return self.apply(row)
|
||||
|
||||
@Applier
|
||||
def Prefixer(self, row):
|
||||
return 'Hello, ' + row
|
||||
|
||||
prefixer = Prefixer()
|
||||
|
||||
* You can add **ContextProcessors**, which are an advanced feature we won't introduce here. If you're familiar with
|
||||
pytest, you can think of them as pytest fixtures, execution wise.
|
||||
|
||||
Services
|
||||
::::::::
|
||||
|
||||
The motivation behind services is mostly separation of concerns, testability and deployability.
|
||||
|
||||
Usually, your transformations will depend on services (like a filesystem, an http client, a database, a rest api, ...).
|
||||
Those services can very well be hardcoded in the transformations, but there is two main drawbacks:
|
||||
|
||||
* You won't be able to change the implementation depending on the current environment (development laptop versus
|
||||
production servers, bug-hunting session versus execution, etc.)
|
||||
* You won't be able to test your transformations without testing the associated services.
|
||||
|
||||
To overcome those caveats of hardcoding things, we define Services in the configurable, which are basically
|
||||
string-options of the service names, and we provide an implementation at the last moment possible.
|
||||
|
||||
There are two ways of providing implementations:
|
||||
|
||||
* Either file-wide, by providing a `get_services()` function that returns a dict of named implementations (we did so
|
||||
with filesystems in the previous step, :doc:`tut02.rst`)
|
||||
* Either directory-wide, by providing a `get_services()` function in a specially named `_services.py` file.
|
||||
|
||||
The first is simpler if you only have one transformation graph in one file, the second allows to group coherent
|
||||
transformations together in a directory and share the implementations.
|
||||
|
||||
Let's see how to use it, starting from the previous service example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from bonobo.config import Configurable, Option, Service
|
||||
|
||||
class HttpGet(Configurable):
|
||||
url = Option(default='https://jsonplaceholder.typicode.com/users')
|
||||
http = Service('http.client')
|
||||
|
||||
def call(self, http):
|
||||
resp = http.get(self.url)
|
||||
|
||||
for row in resp.json():
|
||||
yield row
|
||||
|
||||
We defined an "http.client" service, that obviously should have a `get()` method, returning responses that have a
|
||||
`json()` method.
|
||||
|
||||
Let's provide two implementations for that. The first one will be using `requests <http://docs.python-requests.org/>`_,
|
||||
that coincidally satisfies the described interface:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import bonobo
|
||||
import requests
|
||||
|
||||
def get_services():
|
||||
return {
|
||||
'http.client': requests
|
||||
}
|
||||
|
||||
graph = bonobo.Graph(
|
||||
HttpGet(),
|
||||
print,
|
||||
)
|
||||
|
||||
If you run this code, you should see some mock data returned by the webservice we called (assuming it's up and you can
|
||||
reach it).
|
||||
|
||||
Now, the second implementation will replace that with a mock, used for testing purposes:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
class HttpResponseStub:
|
||||
def json(self):
|
||||
return [
|
||||
{'id': 1, 'name': 'Leanne Graham', 'username': 'Bret', 'email': 'Sincere@april.biz', 'address': {'street': 'Kulas Light', 'suite': 'Apt. 556', 'city': 'Gwenborough', 'zipcode': '92998-3874', 'geo': {'lat': '-37.3159', 'lng': '81.1496'}}, 'phone': '1-770-736-8031 x56442', 'website': 'hildegard.org', 'company': {'name': 'Romaguera-Crona', 'catchPhrase': 'Multi-layered client-server neural-net', 'bs': 'harness real-time e-markets'}},
|
||||
{'id': 2, 'name': 'Ervin Howell', 'username': 'Antonette', 'email': 'Shanna@melissa.tv', 'address': {'street': 'Victor Plains', 'suite': 'Suite 879', 'city': 'Wisokyburgh', 'zipcode': '90566-7771', 'geo': {'lat': '-43.9509', 'lng': '-34.4618'}}, 'phone': '010-692-6593 x09125', 'website': 'anastasia.net', 'company': {'name': 'Deckow-Crist', 'catchPhrase': 'Proactive didactic contingency', 'bs': 'synergize scalable supply-chains'}},
|
||||
]
|
||||
|
||||
class HttpStub:
|
||||
def get(self, url):
|
||||
return HttpResponseStub()
|
||||
|
||||
def get_services():
|
||||
return {
|
||||
'http.client': HttpStub()
|
||||
}
|
||||
|
||||
graph = bonobo.Graph(
|
||||
HttpGet(),
|
||||
print,
|
||||
)
|
||||
|
||||
The `Graph` definition staying the exact same, you can easily substitute the `_services.py` file depending on your
|
||||
environment (the way you're doing this is out of bonobo scope and heavily depends on your usual way of managing
|
||||
configuration files on different platforms).
|
||||
|
||||
Starting with bonobo 0.5 (not yet released), you will be able to use service injections with function-based
|
||||
transformations too, using the `bonobo.config.requires` decorator to mark a dependency.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from bonobo.config import requires
|
||||
|
||||
@requires('http.client')
|
||||
def http_get(http):
|
||||
resp = http.get('https://jsonplaceholder.typicode.com/users')
|
||||
|
||||
for row in resp.json():
|
||||
yield row
|
||||
|
||||
|
||||
Read more
|
||||
:::::::::
|
||||
|
||||
* :doc:`/guide/services`
|
||||
* :doc:`/reference/api_config`
|
||||
|
||||
Reference in New Issue
Block a user