Merge branch 'master' into develop

2018-01-10 08:39:37 +01:00
parent a161e9d8d7 8900c567d9
commit ca9d5a19fc
39 changed files with 588 additions and 164 deletions
--- a/8
+++ b/8
@ -1,4 +1,4 @@
-# Generated by Medikit 0.4.5 on 2018-01-01.
+# Generated by Medikit 0.4.3 on 2018-01-10.
 # All changes will be overriden.

 PACKAGE ?= bonobo
@ -19,8 +19,9 @@ SPHINX_SOURCEDIR ?= docs
 SPHINX_BUILDDIR ?= $(SPHINX_SOURCEDIR)/_build
 YAPF ?= $(PYTHON) -m yapf
 YAPF_OPTIONS ?= -rip
+SPHINX_AUTOBUILD ?= $(PYTHON_DIRNAME)/sphinx-autobuild

-.PHONY: $(SPHINX_SOURCEDIR) clean format install install-dev test update update-requirements
+.PHONY: $(SPHINX_SOURCEDIR) clean format install install-dev test update update-requirements watch-$(SPHINX_SOURCEDIR)

 # Installs the local project dependencies.
 install:
@ -57,3 +58,6 @@ $(SPHINX_SOURCEDIR): install-dev
 format: install-dev
 	$(YAPF) $(YAPF_OPTIONS) .
 	$(YAPF) $(YAPF_OPTIONS) Projectfile
+
+watch-$(SPHINX_SOURCEDIR):
+	$(SPHINX_AUTOBUILD) $(SPHINX_SOURCEDIR) $(shell mktemp -d)
--- a/16
+++ b/16
@ -2,13 +2,12 @@

 from medikit import require

+make = require('make')
 pytest = require('pytest')
 python = require('python')
 sphinx = require('sphinx')
 yapf = require('yapf')

-# python.set_versions('3.5', '3.6', '3.7')  --> not yet implemented in medikit
-
 python.setup(
    name='bonobo',
    description='Bonobo, a simple, modern and atomic extract-transform-load toolkit for python 3.5+.',
@ -54,8 +53,10 @@ python.add_requirements(
    'stevedore ~=1.27',
    'whistle ~=1.0',
    dev=[
-        'pytest-sugar >=0.9,<0.10',
-        'pytest-timeout ~=1.0',
+        'cookiecutter >=1.5,<1.6',
+        'pytest-sugar >=0.8,<0.9',
+        'pytest-timeout >=1,<2',
+        'sphinx-sitemap >=0.2,<0.3',
    ],
    docker=[
        'bonobo-docker ~=0.6.0a1',
@ -69,4 +70,11 @@ python.add_requirements(
    ],
 )

+@listen(make.on_generate)
+def on_make_generate(event):
+    event.makefile['SPHINX_AUTOBUILD'] = '$(PYTHON_DIRNAME)/sphinx-autobuild'
+    event.makefile.add_target('watch-$(SPHINX_SOURCEDIR)', '''
+        $(SPHINX_AUTOBUILD) $(SPHINX_SOURCEDIR) $(shell mktemp -d)
+    ''', phony=True)
+
 # vim: ft=python:
--- a/bonobo/config/configurables.py
+++ b/bonobo/config/configurables.py
@ -1,5 +1,5 @@
 from bonobo.errors import AbstractError
-from bonobo.util import isoption, iscontextprocessor, sortedlist
+from bonobo.util import isoption, iscontextprocessor, sortedlist, get_name

 __all__ = [
    'Configurable',
@ -37,6 +37,26 @@ class ConfigurableMeta(type):
                    cls.__names.add(name)
                    cls.__options.insort((not value.positional, value._creation_counter, name, value))

+        # Docstring formating
+        _options_doc = []
+        for _positional, _counter, _name, _value in cls.__options:
+            _param = _name
+            if _value.type:
+                _param = get_name(_value.type) + ' ' + _param
+
+            prefix = ':param {}: '.format(_param)
+            for lineno, line in enumerate((_value.__doc__ or '').split('\n')):
+                _options_doc.append((' ' * len(prefix) if lineno else prefix) + line)
+        cls.__doc__ = '\n\n'.join(
+            map(
+                str.strip,
+                filter(None, (
+                    cls.__doc__,
+                    '\n'.join(_options_doc)
+                ))
+            )
+        )
+
    @property
    def __options__(cls):
        return ((name, option) for _, _, name, option in cls.__options)
--- a/bonobo/config/options.py
+++ b/bonobo/config/options.py
@ -1,3 +1,4 @@
+import textwrap
 import types

 from bonobo.util.inspect import istype
@ -62,7 +63,12 @@ class Option:
        self.positional = positional
        self.default = default

-        self.__doc__ = __doc__ or self.__doc__
+        # Docstring formating
+        self.__doc__ = __doc__ or None
+        if self.__doc__:
+            self.__doc__ = textwrap.dedent(self.__doc__.strip('\n')).strip()
+            if default:
+                self.__doc__ += '\nDefault: {!r}'.format(default)

        # This hack is necessary for python3.5
        self._creation_counter = Option._creation_counter
--- a/bonobo/nodes/io/base.py
+++ b/bonobo/nodes/io/base.py
@ -12,12 +12,21 @@ class FileHandler(Configurable):
        encoding (str): which encoding to use when opening the file.
    """

-    path = Option(str, required=True, positional=True)  # type: str
-    eol = Option(str, default='\n')  # type: str
-    mode = Option(str)  # type: str
-    encoding = Option(str, default='utf-8')  # type: str
-
-    fs = Service('fs')  # type: str
+    path = Option(str, required=True, positional=True, __doc__='''
+        Path to use within the provided filesystem.
+    ''')  # type: str
+    eol = Option(str, default='\n', __doc__='''
+        Character to use as line separator.
+    ''')  # type: str
+    mode = Option(str, __doc__='''
+        What mode to use for open() call.
+    ''')  # type: str
+    encoding = Option(str, default='utf-8', __doc__='''
+        Encoding.
+    ''')  # type: str
+    fs = Service('fs', __doc__='''
+        The filesystem instance to use.
+    ''')  # type: str

    @ContextProcessor
    def file(self, context, *, fs):
--- a/bonobo/nodes/io/csv.py
+++ b/bonobo/nodes/io/csv.py
@ -55,14 +55,11 @@ class CsvHandler(FileHandler):
 class CsvReader(FileReader, CsvHandler):
    """
    Reads a CSV and yield the values as dicts.
-
-    .. attribute:: skip
-
-        The amount of lines to skip before it actually yield output.
-
    """

-    skip = Option(int, default=0)
+    skip = Option(int, default=0, __doc__='''
+        If set and greater than zero, the reader will skip this amount of lines.
+    ''')

    @Method(
        positional=False,
--- a/bonobo/nodes/io/file.py
+++ b/bonobo/nodes/io/file.py
@ -12,7 +12,9 @@ class FileReader(Reader, FileHandler):
    present. Extending it is usually the right way to create more specific file readers (like json, csv, etc.)
    """

-    mode = Option(str, default='r')
+    mode = Option(str, default='r', __doc__='''
+        What mode to use for open() call.
+    ''')  # type: str

    output_fields = Option(
        ensure_tuple,
@ -70,7 +72,9 @@ class FileWriter(Writer, FileHandler):
    usually the right way to create more specific file writers (like json, csv, etc.)
    """

-    mode = Option(str, default='w+')
+    mode = Option(str, default='w+', __doc__='''
+        What mode to use for open() call.
+    ''')  # type: str

    def write(self, file, context, line, *, fs):
        """
--- a/bonobo/structs/graphs.py
+++ b/bonobo/structs/graphs.py
@ -64,7 +64,7 @@ class Graph:
                    if _name in self.named:
                        raise KeyError('Duplicate name {!r} in graph.'.format(_name))
                    self.named[_name] = _last
-                if not _first:
+                if _first is None:
                    _first = _last
                self.outputs_of(_input, create=True).add(_last)
                _input = _last
--- a/docs/conf.py
+++ b/docs/conf.py
@ -21,8 +21,11 @@ extensions = [
    'sphinx.ext.ifconfig',
    'sphinx.ext.viewcode',
    'sphinx.ext.graphviz',
+    'sphinx_sitemap',
 ]

+site_url = 'http://docs.bonobo-project.org/en/master/'
+
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']

--- a/docs/guide/_toc.rst
+++ b/docs/guide/_toc.rst
@ -0,0 +1,11 @@
+.. toctree::
+    :maxdepth: 2
+
+    introduction
+    transformations
+    graphs
+    services
+    environment
+    purity
+    debugging
+    plugins
--- a/docs/guide/debugging.rst
+++ b/docs/guide/debugging.rst
--- a/docs/guide/graphs.rst
+++ b/docs/guide/graphs.rst
@ -5,6 +5,92 @@ Graphs are the glue that ties transformations together. They are the only data-s
 must be acyclic, and can contain as many nodes as your system can handle. However, although in theory the number of nodes can be rather high, practical use cases usually do not exceed more than a few hundred nodes and only then in extreme cases.


+Within a graph, each node are isolated and can only communicate using their
+input and output queues. For each input row, a given node will be called with
+the row passed as arguments. Each *return* or *yield* value will be put on the
+node's output queue, and the nodes connected in the graph will then be able to
+process it.
+
+|bonobo| is a line-by-line data stream processing solution.
+
+Handling the data-flow this way brings the following properties:
+
+- **First in, first out**: unless stated otherwise, each node will receeive the
+  rows from FIFO queues, and so, the order of rows will be preserved. That is
+  true for each single node, but please note that if you define "graph bubbles"
+  (where a graph diverge in different branches then converge again), the
+  convergence node will receive rows FIFO from each input queue, meaning that
+  the order existing at the divergence point wont stay true at the convergence
+  point.
+
+- **Parallelism**: each node run in parallel (by default, using independant
+  threads). This is useful as you don't have to worry about blocking calls.
+  If a thread waits for, let's say, a database, or a network service, the other
+  nodes will continue handling data, as long as they have input rows available.
+
+- **Independance**: the rows are independant from each other, making this way
+  of working with data flows good for line-by-line data processing, but
+  also not ideal for "grouped" computations (where an output depends on more
+  than one line of input data). You can overcome this with rolling windows if
+  the input required are adjacent rows, but if you need to work on the whole
+  dataset at once, you should consider other software.
+
+Graphs are defined using :class:`bonobo.Graph` instances, as seen in the
+previous tutorial step.
+
+What can be a node?
+:::::::::::::::::::
+
+**TL;DR**: … anything, as long as it’s callable().
+
+Functions
+---------
+
+.. code-block:: python
+
+    def get_item(id):
+        return id, items.get(id)
+
+
+
+Each node of a graph will be executed in isolation from the other nodes, and the data is passed from one node to the
+next using FIFO queues, managed by the framework. It's transparent to the end-user, though, and you'll only use
+function arguments (for inputs) and return/yield values (for outputs).
+
+Each input row of a node will cause one call to this node's callable. Each output is cast internally as a tuple-like
+data structure (or more precisely, a namedtuple-like data structure), and for one given node, each output row must
+have the same structure.
+
+If you return/yield something which is not a tuple, bonobo will create a tuple of one element.
+
+Properties
+----------
+
+|bonobo| assists you with defining the data-flow of your data engineering process, and then streams data through your
+callable graphs.
+
+* Each node call will process one row of data.
+* Queues that flows the data between node are first-in, first-out (FIFO) standard python :class:`queue.Queue`.
+* Each node will run in parallel
+* Default execution strategy use threading, and each node will run in a separate thread.
+
+Fault tolerance
+---------------
+
+Node execution is fault tolerant.
+
+If an exception is raised from a node call, then this node call will be aborted but bonobo will continue the execution
+with the next row (after outputing the stack trace and incrementing the "err" counter for the node context).
+
+It allows to have ETL jobs that ignore faulty data and try their best to process the valid rows of a dataset.
+
+Some errors are fatal, though.
+
+If you pass a 2 elements tuple to a node that takes 3 args, |bonobo| will raise an :class:`bonobo.errors.UnrecoverableTypeError`, and exit the
+current graph execution as fast as it can (finishing the other node executions that are in progress first, but not
+starting new ones if there are remaining input rows).
+
+
 Definitions
 :::::::::::

--- a/docs/guide/index.rst
+++ b/docs/guide/index.rst
@ -3,13 +3,8 @@ Guides

 This section will guide you through your journey with Bonobo ETL.

-.. toctree::
-    :maxdepth: 2

-    introduction
-    transformations
-    graphs
-    services
-    environment
-    purity
+.. include:: _toc.rst
+
+

--- a/docs/reference/api.rst
+++ b/docs/reference/api.rst
@ -4,9 +4,6 @@ Bonobo API
 The Bonobo API, available directly under the :mod:`bonobo` package, contains all the tools you need to get started with
 bonobo.

-The :mod:`bonobo` package
-:::::::::::::::::::::::::
-
 .. automodule:: bonobo
    :members:
    :undoc-members:
--- a/docs/tutorial/0.5/_outdated_note.rst
+++ b/docs/tutorial/0.5/_outdated_note.rst
@ -0,0 +1,9 @@
+.. warning::
+
+    This tutorial was written for |bonobo| 0.5, while the current stable version is |bonobo| 0.6.
+
+    Please be aware that some things changed.
+
+    A summary of changes is available in the `migration guide from 0.5 to 0.6 <https://news.bonobo-project.org/migration-guide-for-bonobo-0-6-alpha-c1d36b0a9d35>`_.
+
+
--- a/docs/tutorial/0.5/index.rst
+++ b/docs/tutorial/0.5/index.rst
@ -0,0 +1,65 @@
+First steps
+===========
+
+.. include:: _outdated_note.rst
+
+What is Bonobo?
+:::::::::::::::
+
+Bonobo is an ETL (Extract-Transform-Load) framework for python 3.5. The goal is to define data-transformations, with
+python code in charge of handling similar shaped independent lines of data.
+
+Bonobo *is not* a statistical or data-science tool. If you're looking for a data-analysis tool in python, use Pandas.
+
+Bonobo is a lean manufacturing assembly line for data that let you focus on the actual work instead of the plumbery
+(execution contexts, parallelism, error handling, console output, logging, ...).
+
+Bonobo uses simple python and should be quick and easy to learn.
+
+Tutorial
+::::::::
+
+.. note::
+
+    Good documentation is not easy to write. We do our best to make it better and better.
+
+    Although all content here should be accurate, you may feel a lack of completeness, for which we plead guilty and
+    apologize.
+
+    If you're stuck, please come and ask on our `slack channel <https://bonobo-slack.herokuapp.com/>`_, we'll figure
+    something out.
+
+    If you're not stuck but had trouble understanding something, please consider contributing to the docs (via GitHub
+    pull requests).
+
+.. toctree::
+    :maxdepth: 2
+
+    tut01
+    tut02
+    tut03
+    tut04
+
+
+What's next?
+::::::::::::
+
+Read a few examples
+-------------------
+
+* :doc:`/reference/examples`
+
+Read about best development practices
+-------------------------------------
+
+* :doc:`/guide/index`
+* :doc:`/guide/purity`
+
+Read about integrating external tools with bonobo
+-------------------------------------------------
+
+* :doc:`/extension/docker`: run transformation graphs in isolated containers.
+* :doc:`/extension/jupyter`: run transformations within jupyter notebooks.
+* :doc:`/extension/selenium`: crawl the web using a real browser and work with the gathered data.
+* :doc:`/extension/sqlalchemy`: everything you need to interract with SQL databases.
+
--- a/docs/tutorial/0.5/python.rst
+++ b/docs/tutorial/0.5/python.rst
@ -0,0 +1,13 @@
+Just enough Python for Bonobo
+=============================
+
+.. include:: _outdated_note.rst
+
+.. todo::
+
+    This is a work in progress and it is not yet available. Please come back later or even better, help us write this
+    guide!
+
+    This guide is intended to help programmers or enthusiasts to grasp the python basics necessary to use Bonobo. It
+    should definately not be considered as a general python introduction, neither a deep dive into details.
+
--- a/docs/tutorial/0.5/tut01.rst
+++ b/docs/tutorial/0.5/tut01.rst
@ -1,7 +1,10 @@
 Let's get started!
 ==================

-To get started with Bonobo, you need to install it in a working python 3.5+ environment:
+.. include:: _outdated_note.rst
+
+To begin with Bonobo, you need to install it in a working python 3.5+ environment, and you'll also need cookiecutter
+to bootstrap your project.

 .. code-block:: shell-session

@ -13,24 +16,21 @@ See :doc:`/install` for more options.
 Create an empty project
 :::::::::::::::::::::::

-Your ETL code will live in standard python files and packages.
+Your ETL code will live in ETL projects, which are basically a bunch of files, including python code, that bonobo
+can run.

 .. code-block:: shell-session

-    $ bonobo create tutorial.py
+    $ bonobo init tutorial

-This will create a simple example job in a `tutorial.py` file.
+This will create a `tutorial` directory (`content description here <https://www.bonobo-project.org/with/cookiecutter>`_).

-Now, try to execute it:
+To run this project, use:

 .. code-block:: shell-session

-    $ python tutorial.py
+    $ bonobo run tutorial

-Congratulations, you just ran your first ETL job!
-
-
-.. todo:: XXX **CHANGES NEEDED BELOW THIS POINTS BEFORE 0.6** XXX

 Write a first transformation
 ::::::::::::::::::::::::::::
@ -107,9 +107,6 @@ To do this, it needs to know what data-flow you want to achieve, and you'll use
    The `if __name__ == '__main__':` section is not required, unless you want to run it directly using the python
    interpreter.

-    The name of the `graph` variable is arbitrary, but this variable must be global and available unconditionally.
-    Do not put it in its own function or in the `if __name__ == '__main__':` section.
-

 Execute the job
 :::::::::::::::
@ -177,8 +174,8 @@ strategy). Actual behavior of an execution will depend on the strategy chosen, b
 cases.

 ④ Before actually executing the `transformations`, the `ExecutorStrategy` instance will wrap each component in an
-`execution context`, whose responsibility is to hold the state of the transformation. It enables to keep the
-`transformations` stateless, while allowing to add an external state if required. We'll expand on this later.
+`execution context`, whose responsibility is to hold the state of the transformation. It enables you to keep the
+`transformations` stateless, while allowing you to add an external state if required. We'll expand on this later.

 Concepts and definitions
 ::::::::::::::::::::::::
--- a/docs/tutorial/0.5/tut02.rst
+++ b/docs/tutorial/0.5/tut02.rst
@ -1,6 +1,8 @@
 Working with files
 ==================

+.. include:: _outdated_note.rst
+
 Bonobo would be pointless if the aim was just to uppercase small lists of strings.

 In fact, Bonobo should not be used if you don't expect any gain from parallelization/distribution of tasks.
@ -59,7 +61,13 @@ available in **Bonobo**'s repository:

 .. code-block:: shell-session

-    $ bonobo download examples/datasets/coffeeshops.txt
+    $ curl https://raw.githubusercontent.com/python-bonobo/bonobo/master/bonobo/examples/datasets/coffeeshops.txt > `python3 -c 'import bonobo; print(bonobo.get_examples_path("datasets/coffeeshops.txt"))'`
+
+.. note::
+
+    The "example dataset download" step will be easier in the future.
+
+    https://github.com/python-bonobo/bonobo/issues/134

 .. literalinclude:: ../../bonobo/examples/tutorials/tut02e01_read.py
    :language: python
--- a/docs/tutorial/0.5/tut03.rst
+++ b/docs/tutorial/0.5/tut03.rst
@ -1,6 +1,8 @@
 Configurables and Services
 ==========================

+.. include:: _outdated_note.rst
+
 .. note::

    This section lacks completeness, sorry for that (but you can still read it!).
@ -13,7 +15,7 @@ Class-based transformations and configurables
 Bonobo is a bit dumb. If something is callable, it considers it can be used as a transformation, and it's up to the
 user to provide callables that logically fits in a graph.

-You can use plain python objects with a `__call__()` method, and it ill just work.
+You can use plain python objects with a `__call__()` method, and it will just work.

 As a lot of transformations needs common machinery, there is a few tools to quickly build transformations, most of
 them requiring your class to subclass :class:`bonobo.config.Configurable`.
@ -30,7 +32,7 @@ Configurables allows to use the following features:
        class PrefixIt(Configurable):
            prefix = Option(str, positional=True, default='>>>')

-            def __call__(self, row):
+            def call(self, row):
                return self.prefix + ' ' + row

        prefixer = PrefixIt('$')
@ -48,7 +50,7 @@ Configurables allows to use the following features:
            url = Option(default='https://jsonplaceholder.typicode.com/users')
            http = Service('http.client')

-            def __call__(self, http):
+            def call(self, http):
                resp = http.get(self.url)

                for row in resp.json():
@ -68,7 +70,7 @@ Configurables allows to use the following features:
        class Applier(Configurable):
            apply = Method()

-            def __call__(self, row):
+            def call(self, row):
                return self.apply(row)

        @Applier
@ -114,7 +116,7 @@ Let's see how to use it, starting from the previous service example:
        url = Option(default='https://jsonplaceholder.typicode.com/users')
        http = Service('http.client')

-        def __call__(self, http):
+        def call(self, http):
            resp = http.get(self.url)

            for row in resp.json():
--- a/docs/tutorial/0.5/tut04.rst
+++ b/docs/tutorial/0.5/tut04.rst
@ -1,6 +1,8 @@
 Working with databases
 ======================

+.. include:: _outdated_note.rst
+
 Databases (and especially SQL databases here) are not the focus of Bonobo, thus support for it is not (and will never
 be) included in the main package. Instead, working with databases is done using third party, well maintained and
 specialized packages, like SQLAlchemy, or other database access libraries from the python cheese shop.
--- a/docs/tutorial/2-jobs.rst
+++ b/docs/tutorial/2-jobs.rst
@ -1,113 +1,149 @@
 Part 2: Writing ETL Jobs
 ========================

-What's an ETL job ?
-:::::::::::::::::::
+In |bonobo|, an ETL job is a graph with some logic to execute it, like the file we created in the previous section.

-In |bonobo|, an ETL job is a single graph that can be executed on its own.
+You can learn more about the :class:`bonobo.Graph` data-structure and its properties in the
+:doc:`graphs guide </guide/graphs>`.

-Within a graph, each node are isolated and can only communicate using their
-input and output queues. For each input row, a given node will be called with
-the row passed as arguments. Each *return* or *yield* value will be put on the
-node's output queue, and the nodes connected in the graph will then be able to
-process it.

-|bonobo| is a line-by-line data stream processing solution.
+Scenario
+::::::::

-Handling the data-flow this way brings the following properties:
+Let's create a sample application, which goal will be to integrate some data in various systems.

- **First in, first out**: unless stated otherwise, each node will receeive the
-  rows from FIFO queues, and so, the order of rows will be preserved. That is
-  true for each single node, but please note that if you define "graph bubbles"
-  (where a graph diverge in different branches then converge again), the
-  convergence node will receive rows FIFO from each input queue, meaning that
-  the order existing at the divergence point wont stay true at the convergence
-  point.
+We'll use an open-data dataset, containing all the fablabs in the world.

- **Parallelism**: each node run in parallel (by default, using independant
-  threads). This is useful as you don't have to worry about blocking calls.
-  If a thread waits for, let's say, a database, or a network service, the other
-  nodes will continue handling data, as long as they have input rows available.
+We will normalize this data using a few different rules, then write it somewhere.

- **Independance**: the rows are independant from each other, making this way
-  of working with data flows good for line-by-line data processing, but
-  also not ideal for "grouped" computations (where an output depends on more
-  than one line of input data). You can overcome this with rolling windows if
-  the input required are adjacent rows, but if you need to work on the whole
-  dataset at once, you should consider other software.
+In this step, we'll focus on getting this data normalized and output to the console. In the next steps, we'll extend it
+to other targets, like files, and databases.

-Graphs are defined using :class:`bonobo.Graph` instances, as seen in the
-previous tutorial step.

-What can be a node?
-:::::::::::::::::::
+Setup
+:::::

-**TL;DR**: … anything, as long as it’s callable().
+We'll change the `tutorial.py` file created in the last step to handle this new scenario.

-Functions
---------
+First, let's remove all boilerplate code, so it looks like this:

 .. code-block:: python

-    def get_item(id):
-        return id, items.get(id)
+    import bonobo


-Each node of a graph will be executed in isolation from the other nodes, and the data is passed from one node to the
-next using FIFO queues, managed by the framework. It's transparent to the end-user, though, and you'll only use
-function arguments (for inputs) and return/yield values (for outputs).
-
-Each input row of a node will cause one call to this node's callable. Each output is cast internally as a tuple-like
-data structure (or more precisely, a namedtuple-like data structure), and for one given node, each output row must
-have the same structure.
-
-If you return/yield something which is not a tuple, bonobo will create a tuple of one element.
-
-Properties
----------
-
-|bonobo| assists you with defining the data-flow of your data engineering process, and then streams data through your
-callable graphs.
-
-* Each node call will process one row of data.
-* Queues that flows the data between node are first-in, first-out (FIFO) standard python :class:`queue.Queue`.
-* Each node will run in parallel
-* Default execution strategy use threading, and each node will run in a separate thread.
-
-Fault tolerance
---------------
-
-Node execution is fault tolerant.
-
-If an exception is raised from a node call, then this node call will be aborted but bonobo will continue the execution
-with the next row (after outputing the stack trace and incrementing the "err" counter for the node context).
-
-It allows to have ETL jobs that ignore faulty data and try their best to process the valid rows of a dataset.
-
-Some errors are fatal, though.
-
-If you pass a 2 elements tuple to a node that takes 3 args, |bonobo| will raise an :class:`bonobo.errors.UnrecoverableTypeError`, and exit the
-current graph execution as fast as it can (finishing the other node executions that are in progress first, but not
-starting new ones if there are remaining input rows).
+    def get_graph(**options):
+        graph = bonobo.Graph()
+        return graph


-Let's write a sample data integration job
-:::::::::::::::::::::::::::::::::::::::::
-
-Let's create a sample application.
-
-The goal of this application will be to extract all the fablabs in the world using an open-data API, normalize this
-data and, for now, display it. We'll then build on this foundation in the next steps to write to files, databases, etc.
+    def get_services(**options):
+        return {}


+    if __name__ == '__main__':
+        parser = bonobo.get_argument_parser()
+        with bonobo.parse_args(parser) as options:
+            bonobo.run(get_graph(**options), services=get_services(**options))


+Your job now contains the logic for executing an empty graph, and we'll complete this with our application logic.
+
+Reading the source data
+:::::::::::::::::::::::
+
+Let's add a simple chain to our `get_graph(...)` function, so that it reads from the fablabs open-data api.
+
+The source dataset we'll use can be found on `this site <https://public-us.opendatasoft.com/explore/dataset/fablabs/>`_.
+It's licensed under `Public Domain`, which makes it just perfect for our example.
+
+.. note::
+
+    There is a :mod:`bonobo.contrib.opendatasoft` module that makes reading from OpenDataSoft APIs easier, including
+    pagination and limits, but for our tutorial, we'll avoid that and build it manually.
+
+Let's write our extractor:
+
+.. code-block:: python
+
+    import requests
+
+    FABLABS_API_URL = 'https://public-us.opendatasoft.com/api/records/1.0/search/?dataset=fablabs&rows=1000'
+
+    def extract_fablabs():
+        yield from requests.get(FABLABS_API_URL).json().get('records')
+
+This extractor will get called once, query the API url, parse it as JSON, and yield the items from the "records" list,
+one by one.
+
+.. note::
+
+    You'll probably want to make it a bit more verbose in a real application, to handle all kind of errors that can
+    happen here. What if the server is down? What if it returns a response which is not JSON? What if the data is not
+    in the expected format?
+
+    For simplicity sake, we'll ignore that here but that's the kind of questions you should have in mind when writing
+    pipelines.
+
+To test our pipeline, let's use a :class:`bonobo.Limit` and a :class:`bonobo.PrettyPrinter`, and change our
+`get_graph(...)` function accordingly:
+
+.. code-block:: python
+
+    import bonobo
+
+    def get_graph(**options):
+        graph = bonobo.Graph()
+        graph.add_chain(
+            extract_fablabs,
+            bonobo.Limit(10),
+            bonobo.PrettyPrinter(),
+        )
+        return graph
+
+Running this job should output a bit of data, along with some statistics.
+
+First, let's look at the statistics:
+
+.. code-block:: shell-session
+
+    - extract_fablabs in=1 out=995 [done]
+    - Limit in=995 out=10 [done]
+    - PrettyPrinter in=10 out=10 [done]
+
+It is important to understand that we extracted everything (995 rows), before droping 99% of the dataset.
+
+This is OK for debugging, but not efficient.
+
+.. note::
+
+    You should always try to limit the amount of data as early as possible, which often means not generating the data
+    you won't need in the first place. Here, we could have used the `rows=` query parameter in the API URL to not
+    request the data we would anyway drop.
+
+Normalize
+:::::::::
+
+.. include:: _todo.rst
+
+Output
+::::::
+
+We used :class:`bonobo.PrettyPrinter` to output the data.
+
+It's a flexible transformation provided that helps you display the content of a stream, and you'll probably use it a
+lot for various reasons.
+

 Moving forward
 ::::::::::::::

 You now know:

-* How to ...
+* How to use a reader node.
+* How to use the console output.
+* How to limit the number of elements in a stream.
+* How to pass data from one node to another.
+* How to structure a graph using chains.

-**Next: :doc:`3-files`**
+It's now time to jump to :doc:`3-files`.
--- a/docs/tutorial/3-files.rst
+++ b/docs/tutorial/3-files.rst
@ -1,6 +1,51 @@
 Part 3: Working with Files
 ==========================

+.. include:: _wip_note.rst
+
+Writing to the console is nice, but using files is probably more realistic.
+
+Let's see how to use a few builtin writers and both local and remote filesystems.
+
+
+Filesystems
+:::::::::::
+
+In |bonobo|, files are accessed within a **filesystem** service which must be something with the same interface as
+`fs' FileSystem objects <https://docs.pyfilesystem.org/en/latest/builtin.html>`_. As a default, you'll get an instance
+of a local filesystem mapped to the current working directory as the `fs` service. You'll learn more about services in
+the next step, but for now, let's just use it.
+
+
+Writing using the service
+:::::::::::::::::::::::::
+
+Although |bonobo| contains helpers to write to common file formats, let's start by writing it manually.
+
+.. code-block:: python
+
+    from bonobo.config import use
+    from bonobo.constants import NOT_MODIFIED
+
+    @use('fs')
+    def write_repr_to_file(*row, fs):
+        with fs.open('output.txt', 'a+') as f:
+            print(row, file=f)
+        return NOT_MODIFIED
+
+Then, update the `get_graph(...)` function, by adding `write_repr_to_file` just before your `PrettyPrinter()` node.
+
+Let's try to run that and think about what happens.
+
+Each time a row comes to this node, the output file is open in "append or create" mode, a line is written, and the file
+is closed.
+
+This is **NOT** how you want to do things. Let's rewrite it so our `open(...)` call becomes execution-wide.
+
+
+
+
+
 * Filesystems

 * Reading files
@ -19,4 +64,4 @@ You now know:

 * How to ...

-**Next: :doc:`4-services`**
+It's now time to jump to :doc:`4-services`.
--- a/docs/tutorial/4-services.rst
+++ b/docs/tutorial/4-services.rst
@ -1,6 +1,7 @@
 Part 4: Services and Configurables
 ==================================

+.. include:: _wip_note.rst

 In the last section, we used a few new tools.

@ -204,4 +205,4 @@ You now know:

 * How to ...

-**Next: :doc:`5-packaging`**
+It's now time to jump to :doc:`5-packaging`.
--- a/docs/tutorial/5-packaging.rst
+++ b/docs/tutorial/5-packaging.rst
@ -1,6 +1,8 @@
 Part 5: Projects and Packaging
 ==============================

+.. include:: _wip_note.rst
+
 Until then, we worked with one file managing a job.

 Real life often involves more complicated setups, with relations and imports between different files.
@ -13,7 +15,6 @@ kind of project structure, as the targert structure will be dicated by the hosti
 sub-package would perfectly fit a django or flask project, or even a regular package, but it's up to you to chose the
 structure of your project.

-about using |bonobo| in a pyt
 is about set of jobs working together within a project.

 Let's see how to move from the current status to a package.
@ -26,3 +27,19 @@ You now know:

 * How to ...

+That's the end of the tutorial, you should now be familiar with all the basics.
+
+A few appendixes to the tutorial can explain how to integrate with other systems (we'll use the "fablabs" application
+created in this tutorial and extend it):
+
+* :doc:`notebooks`
+* :doc:`sqlalchemy`
+* :doc:`django`
+* :doc:`docker`
+
+Then, you can either to jump head-first into your code, or you can have a better grasp at all concepts by
+:doc:`reading the full bonobo guide </guide/index>`.
+
+Happy data flows!
+
+
--- a/docs/tutorial/_todo.rst
+++ b/docs/tutorial/_todo.rst
@ -0,0 +1,3 @@
+.. warning::
+
+    This section is missing. Sorry, but stay tuned! It'll be added soon.
--- a/docs/tutorial/_wip_note.rst
+++ b/docs/tutorial/_wip_note.rst
@ -0,0 +1,12 @@
+.. warning::
+
+    This section is being rewritten for |bonobo| 0.6, and it's now in a "work in progress" state.
+
+    You can read :doc:`the tutorial for the previous version (0.5) <0.5/index>`. Please note that things changed a bit
+    since then and you'll have quirks here and there.
+
+    You can also read the `migration guide from 0.5 to 0.6 <https://news.bonobo-project.org/migration-guide-for-bonobo-0-6-alpha-c1d36b0a9d35>`_
+    that will give you a good overview of the changes.
+
+    Hopefully, this document will be updated soon, and please accept our apologies about this doc status until then.
+
--- a/docs/tutorial/django.rst
+++ b/docs/tutorial/django.rst
@ -1,3 +1,24 @@
 Working with Django
 ===================

+.. warning::
+
+    This section does not exist yet, but it's in the plans to write it quite soon.
+
+    Meanwhile, you can check the source code and other links provided below.
+
+Source code
+:::::::::::
+
+https://github.com/python-bonobo/bonobo/tree/master/bonobo/contrib/django
+
+
+bonobo.contrib.django
+:::::::::::::::::::::
+
+.. automodule:: bonobo.contrib.django
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
--- a/docs/tutorial/docker.rst
+++ b/docs/tutorial/docker.rst
@ -0,0 +1,16 @@
+Working with Docker
+===================
+
+.. warning::
+
+    This section does not exist yet, but it's in the plans to write it quite soon.
+
+    Meanwhile, you can check the source code and other links provided below.
+
+Source code
+:::::::::::
+
+https://github.com/python-bonobo/bonobo-docker
+
+
+
--- a/docs/tutorial/index.rst
+++ b/docs/tutorial/index.rst
@ -53,3 +53,4 @@ out.
 If you're not stuck but had trouble understanding something, please consider contributing to the docs (using GitHub
 pull requests).

+.. include:: _wip_note.rst
--- a/docs/tutorial/notebooks.rst
+++ b/docs/tutorial/notebooks.rst
@ -1,4 +1,13 @@
 Working with Jupyter Notebooks
 ==============================

+.. warning::

+    This section does not exist yet, but it's in the plans to write it quite soon.
+
+    Meanwhile, you can check the source code and other links provided below.
+
+Source code
+:::::::::::
+
+https://github.com/python-bonobo/bonobo/tree/master/bonobo/contrib/jupyter
--- a/docs/tutorial/sqlalchemy.rst
+++ b/docs/tutorial/sqlalchemy.rst
@ -1,4 +1,15 @@
 Working with SQL Databases
 ==========================

+.. warning::
+
+    This section does not exist yet, but it's in the plans to write it quite soon.
+
+    Meanwhile, you can check the source code and other links provided below.
+
+
+Source code
+:::::::::::
+
+https://github.com/python-bonobo/bonobo-sqlalchemy

--- a/readthedocs-conda.yml
+++ b/readthedocs-conda.yml
@ -6,23 +6,32 @@ dependencies:
 - wheel=0.29.0
 - pip:
  - appdirs==1.4.3
-  - certifi==2017.7.27.1
+  - certifi==2017.11.5
  - chardet==3.0.4
  - colorama==0.3.9
-  - fs==2.0.12
+  - fs==2.0.17
+  - graphviz==0.8.2
  - idna==2.6
-  - jinja2==2.9.6
+  - jinja2==2.10
  - markupsafe==1.0
-  - mondrian==0.4.0
+  - mondrian==0.6.1
  - packaging==16.8
  - pbr==3.1.1
-  - psutil==5.4.0
+  - psutil==5.4.3
  - pyparsing==2.2.0
+  - python-slugify==1.2.4
  - pytz==2017.3
  - requests==2.18.4
  - six==1.11.0
-  - stevedore==1.27.1
+  - stevedore==1.28.0
+  - unidecode==1.0.22
  - urllib3==1.22
  - whistle==1.0.0
+  # for docs
+  - alabaster==0.7.10
+  - sphinx-sitemap==0.2
+  - sphinx==1.6.5
+  - sphinxcontrib-websupport==1.0.1
  # for examples
  - pycountry ==17.9.23
+
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@ -1,28 +1,38 @@
 -e .[dev]
 alabaster==0.7.10
+arrow==0.12.0
 attrs==17.4.0
 babel==2.5.1
+binaryornot==0.4.4
 certifi==2017.11.5
 chardet==3.0.4
+click==6.7
+cookiecutter==1.5.1
 coverage==4.4.2
 docutils==0.14
+future==0.16.0
 idna==2.6
 imagesize==0.7.1
+jinja2-time==0.2.0
 jinja2==2.10
 markupsafe==1.0
 pluggy==0.6.0
+poyo==0.4.1
 py==1.5.2
 pygments==2.2.0
 pytest-cov==2.5.1
-pytest-sugar==0.9.0
+pytest-sugar==0.8.0
 pytest-timeout==1.2.1
-pytest==3.3.1
+pytest==3.3.2
+python-dateutil==2.6.1
 pytz==2017.3
 requests==2.18.4
 six==1.11.0
 snowballstemmer==1.2.1
-sphinx==1.6.5
+sphinx-sitemap==0.2
+sphinx==1.6.6
 sphinxcontrib-websupport==1.0.1
 termcolor==1.1.0
 urllib3==1.22
+whichcraft==0.4.1
 yapf==0.20.0
--- a/requirements-docker.txt
+++ b/requirements-docker.txt
@ -1,6 +1,6 @@
 -e .[docker]
 appdirs==1.4.3
-bonobo-docker==0.6.0a1
+bonobo-docker==0.6.0
 certifi==2017.11.5
 chardet==3.0.4
 colorama==0.3.9
@ -22,7 +22,7 @@ requests==2.18.4
 semantic-version==2.6.0
 six==1.11.0
 stevedore==1.28.0
-unidecode==0.4.21
+unidecode==1.0.22
 urllib3==1.22
 websocket-client==0.46.0
 whistle==1.0.0
--- a/requirements-jupyter.txt
+++ b/requirements-jupyter.txt
@ -1,6 +1,5 @@
 -e .[jupyter]
 appnope==0.1.0
-attrs==17.4.0
 bleach==2.1.2
 decorator==4.1.2
 entrypoints==0.2.3
@ -12,7 +11,7 @@ ipywidgets==6.0.1
 jedi==0.11.1
 jinja2==2.10
 jsonschema==2.6.0
-jupyter-client==5.2.0
+jupyter-client==5.2.1
 jupyter-console==5.2.0
 jupyter-core==4.4.0
 jupyter==1.0.0
@ -25,20 +24,17 @@ pandocfilters==1.4.2
 parso==0.1.1
 pexpect==4.3.1
 pickleshare==0.7.4
-pluggy==0.6.0
 prompt-toolkit==1.0.15
 ptyprocess==0.5.2
-py==1.5.2
 pygments==2.2.0
-pytest==3.3.1
 python-dateutil==2.6.1
-pyzmq==17.0.0b3
+pyzmq==16.0.3
 qtconsole==4.3.1
 simplegeneric==0.8.1
 six==1.11.0
 terminado==0.8.1
 testpath==0.3.1
-tornado==5.0a1
+tornado==4.5.3
 traitlets==4.3.2
 wcwidth==0.1.7
 webencodings==0.5.1
--- a/requirements-sqlalchemy.txt
+++ b/requirements-sqlalchemy.txt
@ -1,6 +1,6 @@
 -e .[sqlalchemy]
 appdirs==1.4.3
-bonobo-sqlalchemy==0.6.0a1
+bonobo-sqlalchemy==0.6.0
 certifi==2017.11.5
 chardet==3.0.4
 colorama==0.3.9
@ -20,6 +20,6 @@ requests==2.18.4
 six==1.11.0
 sqlalchemy==1.2.0
 stevedore==1.28.0
-unidecode==0.4.21
+unidecode==1.0.22
 urllib3==1.22
 whistle==1.0.0
--- a/requirements.txt
+++ b/requirements.txt
@ -18,6 +18,6 @@ pytz==2017.3
 requests==2.18.4
 six==1.11.0
 stevedore==1.28.0
-unidecode==0.4.21
+unidecode==1.0.22
 urllib3==1.22
 whistle==1.0.0
--- a/setup.py
+++ b/setup.py
@ -64,8 +64,9 @@ setup(
    ],
    extras_require={
        'dev': [
-            'coverage (>= 4.4, < 5.0)', 'pytest (>= 3.1, < 4.0)', 'pytest-cov (>= 2.5, < 3.0)',
-            'pytest-sugar (>= 0.9, < 0.10)', 'pytest-timeout (~= 1.0)', 'sphinx (>= 1.6, < 2.0)', 'yapf'
+            'cookiecutter (>= 1.5, < 1.6)', 'coverage (>= 4.4, < 5.0)', 'pytest (>= 3.1, < 4.0)',
+            'pytest-cov (>= 2.5, < 3.0)', 'pytest-sugar (>= 0.8, < 0.9)', 'pytest-timeout (>= 1, < 2)',
+            'sphinx (>= 1.6, < 2.0)', 'sphinx-sitemap (>= 0.2, < 0.3)', 'yapf'
        ],
        'docker': ['bonobo-docker (~= 0.6.0a1)'],
        'jupyter': ['ipywidgets (~= 6.0)', 'jupyter (~= 1.0)'],