Merge branch 'master' into develop

This commit is contained in:
Romain Dorgueil
2018-01-10 08:39:37 +01:00
39 changed files with 588 additions and 164 deletions

View File

@ -1,4 +1,4 @@
# Generated by Medikit 0.4.5 on 2018-01-01.
# Generated by Medikit 0.4.3 on 2018-01-10.
# All changes will be overriden.
PACKAGE ?= bonobo
@ -19,8 +19,9 @@ SPHINX_SOURCEDIR ?= docs
SPHINX_BUILDDIR ?= $(SPHINX_SOURCEDIR)/_build
YAPF ?= $(PYTHON) -m yapf
YAPF_OPTIONS ?= -rip
SPHINX_AUTOBUILD ?= $(PYTHON_DIRNAME)/sphinx-autobuild
.PHONY: $(SPHINX_SOURCEDIR) clean format install install-dev test update update-requirements
.PHONY: $(SPHINX_SOURCEDIR) clean format install install-dev test update update-requirements watch-$(SPHINX_SOURCEDIR)
# Installs the local project dependencies.
install:
@ -57,3 +58,6 @@ $(SPHINX_SOURCEDIR): install-dev
format: install-dev
$(YAPF) $(YAPF_OPTIONS) .
$(YAPF) $(YAPF_OPTIONS) Projectfile
watch-$(SPHINX_SOURCEDIR):
$(SPHINX_AUTOBUILD) $(SPHINX_SOURCEDIR) $(shell mktemp -d)

View File

@ -2,13 +2,12 @@
from medikit import require
make = require('make')
pytest = require('pytest')
python = require('python')
sphinx = require('sphinx')
yapf = require('yapf')
# python.set_versions('3.5', '3.6', '3.7') --> not yet implemented in medikit
python.setup(
name='bonobo',
description='Bonobo, a simple, modern and atomic extract-transform-load toolkit for python 3.5+.',
@ -54,8 +53,10 @@ python.add_requirements(
'stevedore ~=1.27',
'whistle ~=1.0',
dev=[
'pytest-sugar >=0.9,<0.10',
'pytest-timeout ~=1.0',
'cookiecutter >=1.5,<1.6',
'pytest-sugar >=0.8,<0.9',
'pytest-timeout >=1,<2',
'sphinx-sitemap >=0.2,<0.3',
],
docker=[
'bonobo-docker ~=0.6.0a1',
@ -69,4 +70,11 @@ python.add_requirements(
],
)
@listen(make.on_generate)
def on_make_generate(event):
event.makefile['SPHINX_AUTOBUILD'] = '$(PYTHON_DIRNAME)/sphinx-autobuild'
event.makefile.add_target('watch-$(SPHINX_SOURCEDIR)', '''
$(SPHINX_AUTOBUILD) $(SPHINX_SOURCEDIR) $(shell mktemp -d)
''', phony=True)
# vim: ft=python:

View File

@ -1,5 +1,5 @@
from bonobo.errors import AbstractError
from bonobo.util import isoption, iscontextprocessor, sortedlist
from bonobo.util import isoption, iscontextprocessor, sortedlist, get_name
__all__ = [
'Configurable',
@ -37,6 +37,26 @@ class ConfigurableMeta(type):
cls.__names.add(name)
cls.__options.insort((not value.positional, value._creation_counter, name, value))
# Docstring formating
_options_doc = []
for _positional, _counter, _name, _value in cls.__options:
_param = _name
if _value.type:
_param = get_name(_value.type) + ' ' + _param
prefix = ':param {}: '.format(_param)
for lineno, line in enumerate((_value.__doc__ or '').split('\n')):
_options_doc.append((' ' * len(prefix) if lineno else prefix) + line)
cls.__doc__ = '\n\n'.join(
map(
str.strip,
filter(None, (
cls.__doc__,
'\n'.join(_options_doc)
))
)
)
@property
def __options__(cls):
return ((name, option) for _, _, name, option in cls.__options)

View File

@ -1,3 +1,4 @@
import textwrap
import types
from bonobo.util.inspect import istype
@ -62,7 +63,12 @@ class Option:
self.positional = positional
self.default = default
self.__doc__ = __doc__ or self.__doc__
# Docstring formating
self.__doc__ = __doc__ or None
if self.__doc__:
self.__doc__ = textwrap.dedent(self.__doc__.strip('\n')).strip()
if default:
self.__doc__ += '\nDefault: {!r}'.format(default)
# This hack is necessary for python3.5
self._creation_counter = Option._creation_counter

View File

@ -12,12 +12,21 @@ class FileHandler(Configurable):
encoding (str): which encoding to use when opening the file.
"""
path = Option(str, required=True, positional=True) # type: str
eol = Option(str, default='\n') # type: str
mode = Option(str) # type: str
encoding = Option(str, default='utf-8') # type: str
fs = Service('fs') # type: str
path = Option(str, required=True, positional=True, __doc__='''
Path to use within the provided filesystem.
''') # type: str
eol = Option(str, default='\n', __doc__='''
Character to use as line separator.
''') # type: str
mode = Option(str, __doc__='''
What mode to use for open() call.
''') # type: str
encoding = Option(str, default='utf-8', __doc__='''
Encoding.
''') # type: str
fs = Service('fs', __doc__='''
The filesystem instance to use.
''') # type: str
@ContextProcessor
def file(self, context, *, fs):

View File

@ -55,14 +55,11 @@ class CsvHandler(FileHandler):
class CsvReader(FileReader, CsvHandler):
"""
Reads a CSV and yield the values as dicts.
.. attribute:: skip
The amount of lines to skip before it actually yield output.
"""
skip = Option(int, default=0)
skip = Option(int, default=0, __doc__='''
If set and greater than zero, the reader will skip this amount of lines.
''')
@Method(
positional=False,

View File

@ -12,7 +12,9 @@ class FileReader(Reader, FileHandler):
present. Extending it is usually the right way to create more specific file readers (like json, csv, etc.)
"""
mode = Option(str, default='r')
mode = Option(str, default='r', __doc__='''
What mode to use for open() call.
''') # type: str
output_fields = Option(
ensure_tuple,
@ -70,7 +72,9 @@ class FileWriter(Writer, FileHandler):
usually the right way to create more specific file writers (like json, csv, etc.)
"""
mode = Option(str, default='w+')
mode = Option(str, default='w+', __doc__='''
What mode to use for open() call.
''') # type: str
def write(self, file, context, line, *, fs):
"""

View File

@ -64,7 +64,7 @@ class Graph:
if _name in self.named:
raise KeyError('Duplicate name {!r} in graph.'.format(_name))
self.named[_name] = _last
if not _first:
if _first is None:
_first = _last
self.outputs_of(_input, create=True).add(_last)
_input = _last

View File

@ -21,8 +21,11 @@ extensions = [
'sphinx.ext.ifconfig',
'sphinx.ext.viewcode',
'sphinx.ext.graphviz',
'sphinx_sitemap',
]
site_url = 'http://docs.bonobo-project.org/en/master/'
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

11
docs/guide/_toc.rst Normal file
View File

@ -0,0 +1,11 @@
.. toctree::
:maxdepth: 2
introduction
transformations
graphs
services
environment
purity
debugging
plugins

0
docs/guide/debugging.rst Normal file
View File

View File

@ -5,6 +5,92 @@ Graphs are the glue that ties transformations together. They are the only data-s
must be acyclic, and can contain as many nodes as your system can handle. However, although in theory the number of nodes can be rather high, practical use cases usually do not exceed more than a few hundred nodes and only then in extreme cases.
Within a graph, each node are isolated and can only communicate using their
input and output queues. For each input row, a given node will be called with
the row passed as arguments. Each *return* or *yield* value will be put on the
node's output queue, and the nodes connected in the graph will then be able to
process it.
|bonobo| is a line-by-line data stream processing solution.
Handling the data-flow this way brings the following properties:
- **First in, first out**: unless stated otherwise, each node will receeive the
rows from FIFO queues, and so, the order of rows will be preserved. That is
true for each single node, but please note that if you define "graph bubbles"
(where a graph diverge in different branches then converge again), the
convergence node will receive rows FIFO from each input queue, meaning that
the order existing at the divergence point wont stay true at the convergence
point.
- **Parallelism**: each node run in parallel (by default, using independant
threads). This is useful as you don't have to worry about blocking calls.
If a thread waits for, let's say, a database, or a network service, the other
nodes will continue handling data, as long as they have input rows available.
- **Independance**: the rows are independant from each other, making this way
of working with data flows good for line-by-line data processing, but
also not ideal for "grouped" computations (where an output depends on more
than one line of input data). You can overcome this with rolling windows if
the input required are adjacent rows, but if you need to work on the whole
dataset at once, you should consider other software.
Graphs are defined using :class:`bonobo.Graph` instances, as seen in the
previous tutorial step.
What can be a node?
:::::::::::::::::::
**TL;DR**: … anything, as long as its callable().
Functions
---------
.. code-block:: python
def get_item(id):
return id, items.get(id)
Each node of a graph will be executed in isolation from the other nodes, and the data is passed from one node to the
next using FIFO queues, managed by the framework. It's transparent to the end-user, though, and you'll only use
function arguments (for inputs) and return/yield values (for outputs).
Each input row of a node will cause one call to this node's callable. Each output is cast internally as a tuple-like
data structure (or more precisely, a namedtuple-like data structure), and for one given node, each output row must
have the same structure.
If you return/yield something which is not a tuple, bonobo will create a tuple of one element.
Properties
----------
|bonobo| assists you with defining the data-flow of your data engineering process, and then streams data through your
callable graphs.
* Each node call will process one row of data.
* Queues that flows the data between node are first-in, first-out (FIFO) standard python :class:`queue.Queue`.
* Each node will run in parallel
* Default execution strategy use threading, and each node will run in a separate thread.
Fault tolerance
---------------
Node execution is fault tolerant.
If an exception is raised from a node call, then this node call will be aborted but bonobo will continue the execution
with the next row (after outputing the stack trace and incrementing the "err" counter for the node context).
It allows to have ETL jobs that ignore faulty data and try their best to process the valid rows of a dataset.
Some errors are fatal, though.
If you pass a 2 elements tuple to a node that takes 3 args, |bonobo| will raise an :class:`bonobo.errors.UnrecoverableTypeError`, and exit the
current graph execution as fast as it can (finishing the other node executions that are in progress first, but not
starting new ones if there are remaining input rows).
Definitions
:::::::::::

View File

@ -3,13 +3,8 @@ Guides
This section will guide you through your journey with Bonobo ETL.
.. toctree::
:maxdepth: 2
introduction
transformations
graphs
services
environment
purity
.. include:: _toc.rst

View File

@ -4,9 +4,6 @@ Bonobo API
The Bonobo API, available directly under the :mod:`bonobo` package, contains all the tools you need to get started with
bonobo.
The :mod:`bonobo` package
:::::::::::::::::::::::::
.. automodule:: bonobo
:members:
:undoc-members:

View File

@ -0,0 +1,9 @@
.. warning::
This tutorial was written for |bonobo| 0.5, while the current stable version is |bonobo| 0.6.
Please be aware that some things changed.
A summary of changes is available in the `migration guide from 0.5 to 0.6 <https://news.bonobo-project.org/migration-guide-for-bonobo-0-6-alpha-c1d36b0a9d35>`_.

View File

@ -0,0 +1,65 @@
First steps
===========
.. include:: _outdated_note.rst
What is Bonobo?
:::::::::::::::
Bonobo is an ETL (Extract-Transform-Load) framework for python 3.5. The goal is to define data-transformations, with
python code in charge of handling similar shaped independent lines of data.
Bonobo *is not* a statistical or data-science tool. If you're looking for a data-analysis tool in python, use Pandas.
Bonobo is a lean manufacturing assembly line for data that let you focus on the actual work instead of the plumbery
(execution contexts, parallelism, error handling, console output, logging, ...).
Bonobo uses simple python and should be quick and easy to learn.
Tutorial
::::::::
.. note::
Good documentation is not easy to write. We do our best to make it better and better.
Although all content here should be accurate, you may feel a lack of completeness, for which we plead guilty and
apologize.
If you're stuck, please come and ask on our `slack channel <https://bonobo-slack.herokuapp.com/>`_, we'll figure
something out.
If you're not stuck but had trouble understanding something, please consider contributing to the docs (via GitHub
pull requests).
.. toctree::
:maxdepth: 2
tut01
tut02
tut03
tut04
What's next?
::::::::::::
Read a few examples
-------------------
* :doc:`/reference/examples`
Read about best development practices
-------------------------------------
* :doc:`/guide/index`
* :doc:`/guide/purity`
Read about integrating external tools with bonobo
-------------------------------------------------
* :doc:`/extension/docker`: run transformation graphs in isolated containers.
* :doc:`/extension/jupyter`: run transformations within jupyter notebooks.
* :doc:`/extension/selenium`: crawl the web using a real browser and work with the gathered data.
* :doc:`/extension/sqlalchemy`: everything you need to interract with SQL databases.

View File

@ -0,0 +1,13 @@
Just enough Python for Bonobo
=============================
.. include:: _outdated_note.rst
.. todo::
This is a work in progress and it is not yet available. Please come back later or even better, help us write this
guide!
This guide is intended to help programmers or enthusiasts to grasp the python basics necessary to use Bonobo. It
should definately not be considered as a general python introduction, neither a deep dive into details.

View File

@ -1,7 +1,10 @@
Let's get started!
==================
To get started with Bonobo, you need to install it in a working python 3.5+ environment:
.. include:: _outdated_note.rst
To begin with Bonobo, you need to install it in a working python 3.5+ environment, and you'll also need cookiecutter
to bootstrap your project.
.. code-block:: shell-session
@ -13,24 +16,21 @@ See :doc:`/install` for more options.
Create an empty project
:::::::::::::::::::::::
Your ETL code will live in standard python files and packages.
Your ETL code will live in ETL projects, which are basically a bunch of files, including python code, that bonobo
can run.
.. code-block:: shell-session
$ bonobo create tutorial.py
$ bonobo init tutorial
This will create a simple example job in a `tutorial.py` file.
This will create a `tutorial` directory (`content description here <https://www.bonobo-project.org/with/cookiecutter>`_).
Now, try to execute it:
To run this project, use:
.. code-block:: shell-session
$ python tutorial.py
$ bonobo run tutorial
Congratulations, you just ran your first ETL job!
.. todo:: XXX **CHANGES NEEDED BELOW THIS POINTS BEFORE 0.6** XXX
Write a first transformation
::::::::::::::::::::::::::::
@ -107,9 +107,6 @@ To do this, it needs to know what data-flow you want to achieve, and you'll use
The `if __name__ == '__main__':` section is not required, unless you want to run it directly using the python
interpreter.
The name of the `graph` variable is arbitrary, but this variable must be global and available unconditionally.
Do not put it in its own function or in the `if __name__ == '__main__':` section.
Execute the job
:::::::::::::::
@ -177,8 +174,8 @@ strategy). Actual behavior of an execution will depend on the strategy chosen, b
cases.
④ Before actually executing the `transformations`, the `ExecutorStrategy` instance will wrap each component in an
`execution context`, whose responsibility is to hold the state of the transformation. It enables to keep the
`transformations` stateless, while allowing to add an external state if required. We'll expand on this later.
`execution context`, whose responsibility is to hold the state of the transformation. It enables you to keep the
`transformations` stateless, while allowing you to add an external state if required. We'll expand on this later.
Concepts and definitions
::::::::::::::::::::::::

View File

@ -1,6 +1,8 @@
Working with files
==================
.. include:: _outdated_note.rst
Bonobo would be pointless if the aim was just to uppercase small lists of strings.
In fact, Bonobo should not be used if you don't expect any gain from parallelization/distribution of tasks.
@ -59,7 +61,13 @@ available in **Bonobo**'s repository:
.. code-block:: shell-session
$ bonobo download examples/datasets/coffeeshops.txt
$ curl https://raw.githubusercontent.com/python-bonobo/bonobo/master/bonobo/examples/datasets/coffeeshops.txt > `python3 -c 'import bonobo; print(bonobo.get_examples_path("datasets/coffeeshops.txt"))'`
.. note::
The "example dataset download" step will be easier in the future.
https://github.com/python-bonobo/bonobo/issues/134
.. literalinclude:: ../../bonobo/examples/tutorials/tut02e01_read.py
:language: python

View File

@ -1,6 +1,8 @@
Configurables and Services
==========================
.. include:: _outdated_note.rst
.. note::
This section lacks completeness, sorry for that (but you can still read it!).
@ -13,7 +15,7 @@ Class-based transformations and configurables
Bonobo is a bit dumb. If something is callable, it considers it can be used as a transformation, and it's up to the
user to provide callables that logically fits in a graph.
You can use plain python objects with a `__call__()` method, and it ill just work.
You can use plain python objects with a `__call__()` method, and it will just work.
As a lot of transformations needs common machinery, there is a few tools to quickly build transformations, most of
them requiring your class to subclass :class:`bonobo.config.Configurable`.
@ -30,7 +32,7 @@ Configurables allows to use the following features:
class PrefixIt(Configurable):
prefix = Option(str, positional=True, default='>>>')
def __call__(self, row):
def call(self, row):
return self.prefix + ' ' + row
prefixer = PrefixIt('$')
@ -48,7 +50,7 @@ Configurables allows to use the following features:
url = Option(default='https://jsonplaceholder.typicode.com/users')
http = Service('http.client')
def __call__(self, http):
def call(self, http):
resp = http.get(self.url)
for row in resp.json():
@ -68,7 +70,7 @@ Configurables allows to use the following features:
class Applier(Configurable):
apply = Method()
def __call__(self, row):
def call(self, row):
return self.apply(row)
@Applier
@ -114,7 +116,7 @@ Let's see how to use it, starting from the previous service example:
url = Option(default='https://jsonplaceholder.typicode.com/users')
http = Service('http.client')
def __call__(self, http):
def call(self, http):
resp = http.get(self.url)
for row in resp.json():

View File

@ -1,6 +1,8 @@
Working with databases
======================
.. include:: _outdated_note.rst
Databases (and especially SQL databases here) are not the focus of Bonobo, thus support for it is not (and will never
be) included in the main package. Instead, working with databases is done using third party, well maintained and
specialized packages, like SQLAlchemy, or other database access libraries from the python cheese shop.

View File

@ -1,113 +1,149 @@
Part 2: Writing ETL Jobs
========================
What's an ETL job ?
:::::::::::::::::::
In |bonobo|, an ETL job is a graph with some logic to execute it, like the file we created in the previous section.
In |bonobo|, an ETL job is a single graph that can be executed on its own.
You can learn more about the :class:`bonobo.Graph` data-structure and its properties in the
:doc:`graphs guide </guide/graphs>`.
Within a graph, each node are isolated and can only communicate using their
input and output queues. For each input row, a given node will be called with
the row passed as arguments. Each *return* or *yield* value will be put on the
node's output queue, and the nodes connected in the graph will then be able to
process it.
|bonobo| is a line-by-line data stream processing solution.
Scenario
::::::::
Handling the data-flow this way brings the following properties:
Let's create a sample application, which goal will be to integrate some data in various systems.
- **First in, first out**: unless stated otherwise, each node will receeive the
rows from FIFO queues, and so, the order of rows will be preserved. That is
true for each single node, but please note that if you define "graph bubbles"
(where a graph diverge in different branches then converge again), the
convergence node will receive rows FIFO from each input queue, meaning that
the order existing at the divergence point wont stay true at the convergence
point.
We'll use an open-data dataset, containing all the fablabs in the world.
- **Parallelism**: each node run in parallel (by default, using independant
threads). This is useful as you don't have to worry about blocking calls.
If a thread waits for, let's say, a database, or a network service, the other
nodes will continue handling data, as long as they have input rows available.
We will normalize this data using a few different rules, then write it somewhere.
- **Independance**: the rows are independant from each other, making this way
of working with data flows good for line-by-line data processing, but
also not ideal for "grouped" computations (where an output depends on more
than one line of input data). You can overcome this with rolling windows if
the input required are adjacent rows, but if you need to work on the whole
dataset at once, you should consider other software.
In this step, we'll focus on getting this data normalized and output to the console. In the next steps, we'll extend it
to other targets, like files, and databases.
Graphs are defined using :class:`bonobo.Graph` instances, as seen in the
previous tutorial step.
What can be a node?
:::::::::::::::::::
Setup
:::::
**TL;DR**: … anything, as long as its callable().
We'll change the `tutorial.py` file created in the last step to handle this new scenario.
Functions
---------
First, let's remove all boilerplate code, so it looks like this:
.. code-block:: python
def get_item(id):
return id, items.get(id)
import bonobo
Each node of a graph will be executed in isolation from the other nodes, and the data is passed from one node to the
next using FIFO queues, managed by the framework. It's transparent to the end-user, though, and you'll only use
function arguments (for inputs) and return/yield values (for outputs).
Each input row of a node will cause one call to this node's callable. Each output is cast internally as a tuple-like
data structure (or more precisely, a namedtuple-like data structure), and for one given node, each output row must
have the same structure.
If you return/yield something which is not a tuple, bonobo will create a tuple of one element.
Properties
----------
|bonobo| assists you with defining the data-flow of your data engineering process, and then streams data through your
callable graphs.
* Each node call will process one row of data.
* Queues that flows the data between node are first-in, first-out (FIFO) standard python :class:`queue.Queue`.
* Each node will run in parallel
* Default execution strategy use threading, and each node will run in a separate thread.
Fault tolerance
---------------
Node execution is fault tolerant.
If an exception is raised from a node call, then this node call will be aborted but bonobo will continue the execution
with the next row (after outputing the stack trace and incrementing the "err" counter for the node context).
It allows to have ETL jobs that ignore faulty data and try their best to process the valid rows of a dataset.
Some errors are fatal, though.
If you pass a 2 elements tuple to a node that takes 3 args, |bonobo| will raise an :class:`bonobo.errors.UnrecoverableTypeError`, and exit the
current graph execution as fast as it can (finishing the other node executions that are in progress first, but not
starting new ones if there are remaining input rows).
def get_graph(**options):
graph = bonobo.Graph()
return graph
Let's write a sample data integration job
:::::::::::::::::::::::::::::::::::::::::
Let's create a sample application.
The goal of this application will be to extract all the fablabs in the world using an open-data API, normalize this
data and, for now, display it. We'll then build on this foundation in the next steps to write to files, databases, etc.
def get_services(**options):
return {}
if __name__ == '__main__':
parser = bonobo.get_argument_parser()
with bonobo.parse_args(parser) as options:
bonobo.run(get_graph(**options), services=get_services(**options))
Your job now contains the logic for executing an empty graph, and we'll complete this with our application logic.
Reading the source data
:::::::::::::::::::::::
Let's add a simple chain to our `get_graph(...)` function, so that it reads from the fablabs open-data api.
The source dataset we'll use can be found on `this site <https://public-us.opendatasoft.com/explore/dataset/fablabs/>`_.
It's licensed under `Public Domain`, which makes it just perfect for our example.
.. note::
There is a :mod:`bonobo.contrib.opendatasoft` module that makes reading from OpenDataSoft APIs easier, including
pagination and limits, but for our tutorial, we'll avoid that and build it manually.
Let's write our extractor:
.. code-block:: python
import requests
FABLABS_API_URL = 'https://public-us.opendatasoft.com/api/records/1.0/search/?dataset=fablabs&rows=1000'
def extract_fablabs():
yield from requests.get(FABLABS_API_URL).json().get('records')
This extractor will get called once, query the API url, parse it as JSON, and yield the items from the "records" list,
one by one.
.. note::
You'll probably want to make it a bit more verbose in a real application, to handle all kind of errors that can
happen here. What if the server is down? What if it returns a response which is not JSON? What if the data is not
in the expected format?
For simplicity sake, we'll ignore that here but that's the kind of questions you should have in mind when writing
pipelines.
To test our pipeline, let's use a :class:`bonobo.Limit` and a :class:`bonobo.PrettyPrinter`, and change our
`get_graph(...)` function accordingly:
.. code-block:: python
import bonobo
def get_graph(**options):
graph = bonobo.Graph()
graph.add_chain(
extract_fablabs,
bonobo.Limit(10),
bonobo.PrettyPrinter(),
)
return graph
Running this job should output a bit of data, along with some statistics.
First, let's look at the statistics:
.. code-block:: shell-session
- extract_fablabs in=1 out=995 [done]
- Limit in=995 out=10 [done]
- PrettyPrinter in=10 out=10 [done]
It is important to understand that we extracted everything (995 rows), before droping 99% of the dataset.
This is OK for debugging, but not efficient.
.. note::
You should always try to limit the amount of data as early as possible, which often means not generating the data
you won't need in the first place. Here, we could have used the `rows=` query parameter in the API URL to not
request the data we would anyway drop.
Normalize
:::::::::
.. include:: _todo.rst
Output
::::::
We used :class:`bonobo.PrettyPrinter` to output the data.
It's a flexible transformation provided that helps you display the content of a stream, and you'll probably use it a
lot for various reasons.
Moving forward
::::::::::::::
You now know:
* How to ...
* How to use a reader node.
* How to use the console output.
* How to limit the number of elements in a stream.
* How to pass data from one node to another.
* How to structure a graph using chains.
**Next: :doc:`3-files`**
It's now time to jump to :doc:`3-files`.

View File

@ -1,6 +1,51 @@
Part 3: Working with Files
==========================
.. include:: _wip_note.rst
Writing to the console is nice, but using files is probably more realistic.
Let's see how to use a few builtin writers and both local and remote filesystems.
Filesystems
:::::::::::
In |bonobo|, files are accessed within a **filesystem** service which must be something with the same interface as
`fs' FileSystem objects <https://docs.pyfilesystem.org/en/latest/builtin.html>`_. As a default, you'll get an instance
of a local filesystem mapped to the current working directory as the `fs` service. You'll learn more about services in
the next step, but for now, let's just use it.
Writing using the service
:::::::::::::::::::::::::
Although |bonobo| contains helpers to write to common file formats, let's start by writing it manually.
.. code-block:: python
from bonobo.config import use
from bonobo.constants import NOT_MODIFIED
@use('fs')
def write_repr_to_file(*row, fs):
with fs.open('output.txt', 'a+') as f:
print(row, file=f)
return NOT_MODIFIED
Then, update the `get_graph(...)` function, by adding `write_repr_to_file` just before your `PrettyPrinter()` node.
Let's try to run that and think about what happens.
Each time a row comes to this node, the output file is open in "append or create" mode, a line is written, and the file
is closed.
This is **NOT** how you want to do things. Let's rewrite it so our `open(...)` call becomes execution-wide.
* Filesystems
* Reading files
@ -19,4 +64,4 @@ You now know:
* How to ...
**Next: :doc:`4-services`**
It's now time to jump to :doc:`4-services`.

View File

@ -1,6 +1,7 @@
Part 4: Services and Configurables
==================================
.. include:: _wip_note.rst
In the last section, we used a few new tools.
@ -204,4 +205,4 @@ You now know:
* How to ...
**Next: :doc:`5-packaging`**
It's now time to jump to :doc:`5-packaging`.

View File

@ -1,6 +1,8 @@
Part 5: Projects and Packaging
==============================
.. include:: _wip_note.rst
Until then, we worked with one file managing a job.
Real life often involves more complicated setups, with relations and imports between different files.
@ -13,7 +15,6 @@ kind of project structure, as the targert structure will be dicated by the hosti
sub-package would perfectly fit a django or flask project, or even a regular package, but it's up to you to chose the
structure of your project.
about using |bonobo| in a pyt
is about set of jobs working together within a project.
Let's see how to move from the current status to a package.
@ -26,3 +27,19 @@ You now know:
* How to ...
That's the end of the tutorial, you should now be familiar with all the basics.
A few appendixes to the tutorial can explain how to integrate with other systems (we'll use the "fablabs" application
created in this tutorial and extend it):
* :doc:`notebooks`
* :doc:`sqlalchemy`
* :doc:`django`
* :doc:`docker`
Then, you can either to jump head-first into your code, or you can have a better grasp at all concepts by
:doc:`reading the full bonobo guide </guide/index>`.
Happy data flows!

3
docs/tutorial/_todo.rst Normal file
View File

@ -0,0 +1,3 @@
.. warning::
This section is missing. Sorry, but stay tuned! It'll be added soon.

View File

@ -0,0 +1,12 @@
.. warning::
This section is being rewritten for |bonobo| 0.6, and it's now in a "work in progress" state.
You can read :doc:`the tutorial for the previous version (0.5) <0.5/index>`. Please note that things changed a bit
since then and you'll have quirks here and there.
You can also read the `migration guide from 0.5 to 0.6 <https://news.bonobo-project.org/migration-guide-for-bonobo-0-6-alpha-c1d36b0a9d35>`_
that will give you a good overview of the changes.
Hopefully, this document will be updated soon, and please accept our apologies about this doc status until then.

View File

@ -1,3 +1,24 @@
Working with Django
===================
.. warning::
This section does not exist yet, but it's in the plans to write it quite soon.
Meanwhile, you can check the source code and other links provided below.
Source code
:::::::::::
https://github.com/python-bonobo/bonobo/tree/master/bonobo/contrib/django
bonobo.contrib.django
:::::::::::::::::::::
.. automodule:: bonobo.contrib.django
:members:
:undoc-members:
:show-inheritance:

16
docs/tutorial/docker.rst Normal file
View File

@ -0,0 +1,16 @@
Working with Docker
===================
.. warning::
This section does not exist yet, but it's in the plans to write it quite soon.
Meanwhile, you can check the source code and other links provided below.
Source code
:::::::::::
https://github.com/python-bonobo/bonobo-docker

View File

@ -53,3 +53,4 @@ out.
If you're not stuck but had trouble understanding something, please consider contributing to the docs (using GitHub
pull requests).
.. include:: _wip_note.rst

View File

@ -1,4 +1,13 @@
Working with Jupyter Notebooks
==============================
.. warning::
This section does not exist yet, but it's in the plans to write it quite soon.
Meanwhile, you can check the source code and other links provided below.
Source code
:::::::::::
https://github.com/python-bonobo/bonobo/tree/master/bonobo/contrib/jupyter

View File

@ -1,4 +1,15 @@
Working with SQL Databases
==========================
.. warning::
This section does not exist yet, but it's in the plans to write it quite soon.
Meanwhile, you can check the source code and other links provided below.
Source code
:::::::::::
https://github.com/python-bonobo/bonobo-sqlalchemy

View File

@ -6,23 +6,32 @@ dependencies:
- wheel=0.29.0
- pip:
- appdirs==1.4.3
- certifi==2017.7.27.1
- certifi==2017.11.5
- chardet==3.0.4
- colorama==0.3.9
- fs==2.0.12
- fs==2.0.17
- graphviz==0.8.2
- idna==2.6
- jinja2==2.9.6
- jinja2==2.10
- markupsafe==1.0
- mondrian==0.4.0
- mondrian==0.6.1
- packaging==16.8
- pbr==3.1.1
- psutil==5.4.0
- psutil==5.4.3
- pyparsing==2.2.0
- python-slugify==1.2.4
- pytz==2017.3
- requests==2.18.4
- six==1.11.0
- stevedore==1.27.1
- stevedore==1.28.0
- unidecode==1.0.22
- urllib3==1.22
- whistle==1.0.0
# for docs
- alabaster==0.7.10
- sphinx-sitemap==0.2
- sphinx==1.6.5
- sphinxcontrib-websupport==1.0.1
# for examples
- pycountry ==17.9.23

View File

@ -1,28 +1,38 @@
-e .[dev]
alabaster==0.7.10
arrow==0.12.0
attrs==17.4.0
babel==2.5.1
binaryornot==0.4.4
certifi==2017.11.5
chardet==3.0.4
click==6.7
cookiecutter==1.5.1
coverage==4.4.2
docutils==0.14
future==0.16.0
idna==2.6
imagesize==0.7.1
jinja2-time==0.2.0
jinja2==2.10
markupsafe==1.0
pluggy==0.6.0
poyo==0.4.1
py==1.5.2
pygments==2.2.0
pytest-cov==2.5.1
pytest-sugar==0.9.0
pytest-sugar==0.8.0
pytest-timeout==1.2.1
pytest==3.3.1
pytest==3.3.2
python-dateutil==2.6.1
pytz==2017.3
requests==2.18.4
six==1.11.0
snowballstemmer==1.2.1
sphinx==1.6.5
sphinx-sitemap==0.2
sphinx==1.6.6
sphinxcontrib-websupport==1.0.1
termcolor==1.1.0
urllib3==1.22
whichcraft==0.4.1
yapf==0.20.0

View File

@ -1,6 +1,6 @@
-e .[docker]
appdirs==1.4.3
bonobo-docker==0.6.0a1
bonobo-docker==0.6.0
certifi==2017.11.5
chardet==3.0.4
colorama==0.3.9
@ -22,7 +22,7 @@ requests==2.18.4
semantic-version==2.6.0
six==1.11.0
stevedore==1.28.0
unidecode==0.4.21
unidecode==1.0.22
urllib3==1.22
websocket-client==0.46.0
whistle==1.0.0

View File

@ -1,6 +1,5 @@
-e .[jupyter]
appnope==0.1.0
attrs==17.4.0
bleach==2.1.2
decorator==4.1.2
entrypoints==0.2.3
@ -12,7 +11,7 @@ ipywidgets==6.0.1
jedi==0.11.1
jinja2==2.10
jsonschema==2.6.0
jupyter-client==5.2.0
jupyter-client==5.2.1
jupyter-console==5.2.0
jupyter-core==4.4.0
jupyter==1.0.0
@ -25,20 +24,17 @@ pandocfilters==1.4.2
parso==0.1.1
pexpect==4.3.1
pickleshare==0.7.4
pluggy==0.6.0
prompt-toolkit==1.0.15
ptyprocess==0.5.2
py==1.5.2
pygments==2.2.0
pytest==3.3.1
python-dateutil==2.6.1
pyzmq==17.0.0b3
pyzmq==16.0.3
qtconsole==4.3.1
simplegeneric==0.8.1
six==1.11.0
terminado==0.8.1
testpath==0.3.1
tornado==5.0a1
tornado==4.5.3
traitlets==4.3.2
wcwidth==0.1.7
webencodings==0.5.1

View File

@ -1,6 +1,6 @@
-e .[sqlalchemy]
appdirs==1.4.3
bonobo-sqlalchemy==0.6.0a1
bonobo-sqlalchemy==0.6.0
certifi==2017.11.5
chardet==3.0.4
colorama==0.3.9
@ -20,6 +20,6 @@ requests==2.18.4
six==1.11.0
sqlalchemy==1.2.0
stevedore==1.28.0
unidecode==0.4.21
unidecode==1.0.22
urllib3==1.22
whistle==1.0.0

View File

@ -18,6 +18,6 @@ pytz==2017.3
requests==2.18.4
six==1.11.0
stevedore==1.28.0
unidecode==0.4.21
unidecode==1.0.22
urllib3==1.22
whistle==1.0.0

View File

@ -64,8 +64,9 @@ setup(
],
extras_require={
'dev': [
'coverage (>= 4.4, < 5.0)', 'pytest (>= 3.1, < 4.0)', 'pytest-cov (>= 2.5, < 3.0)',
'pytest-sugar (>= 0.9, < 0.10)', 'pytest-timeout (~= 1.0)', 'sphinx (>= 1.6, < 2.0)', 'yapf'
'cookiecutter (>= 1.5, < 1.6)', 'coverage (>= 4.4, < 5.0)', 'pytest (>= 3.1, < 4.0)',
'pytest-cov (>= 2.5, < 3.0)', 'pytest-sugar (>= 0.8, < 0.9)', 'pytest-timeout (>= 1, < 2)',
'sphinx (>= 1.6, < 2.0)', 'sphinx-sitemap (>= 0.2, < 0.3)', 'yapf'
],
'docker': ['bonobo-docker (~= 0.6.0a1)'],
'jupyter': ['ipywidgets (~= 6.0)', 'jupyter (~= 1.0)'],