Documenting transformations and configurables.
This commit is contained in:
@ -1,8 +1,51 @@
|
|||||||
class Option:
|
class Option:
|
||||||
"""
|
"""
|
||||||
An Option is a descriptor for a required or optional parameter of a Configurable.
|
An Option is a descriptor for Configurable's parameters.
|
||||||
|
|
||||||
|
.. attribute:: type
|
||||||
|
|
||||||
|
Option type allows to provide a callable used to cast, clean or validate the option value. If not provided, or
|
||||||
|
None, the option's value will be the exact value user provided.
|
||||||
|
|
||||||
|
(default: None)
|
||||||
|
|
||||||
|
.. attribute:: required
|
||||||
|
|
||||||
|
If an option is required, an error will be raised if no value is provided (at runtime). If it is not, option
|
||||||
|
will have the default value if user does not override it at runtime.
|
||||||
|
|
||||||
|
(default: False)
|
||||||
|
|
||||||
|
.. attribute:: positional
|
||||||
|
|
||||||
|
If this is true, it'll be possible to provide the option value as a positional argument. Otherwise, it must
|
||||||
|
be provided as a keyword argument.
|
||||||
|
|
||||||
|
(default: False)
|
||||||
|
|
||||||
|
.. attribute:: default
|
||||||
|
|
||||||
|
Default value for non-required options.
|
||||||
|
|
||||||
|
(default: None)
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from bonobo.config import Configurable, Option
|
||||||
|
|
||||||
|
class Example(Configurable):
|
||||||
|
title = Option(str, required=True, positional=True)
|
||||||
|
keyword = Option(str, default='foo')
|
||||||
|
|
||||||
|
def call(self, s):
|
||||||
|
return self.title + ': ' + s + ' (' + self.keyword + ')'
|
||||||
|
|
||||||
|
example = Example('hello', keyword='bar')
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_creation_counter = 0
|
_creation_counter = 0
|
||||||
|
|
||||||
def __init__(self, type=None, *, required=False, positional=False, default=None):
|
def __init__(self, type=None, *, required=False, positional=False, default=None):
|
||||||
@ -32,6 +75,36 @@ class Option:
|
|||||||
|
|
||||||
|
|
||||||
class Method(Option):
|
class Method(Option):
|
||||||
|
"""
|
||||||
|
A Method is a special callable-valued option, that can be used in three different ways (but for same purpose).
|
||||||
|
|
||||||
|
* Like a normal option, the value can be provided to the Configurable constructor.
|
||||||
|
|
||||||
|
>>> from bonobo.config import Configurable, Method
|
||||||
|
|
||||||
|
>>> class MethodExample(Configurable):
|
||||||
|
... handler = Method()
|
||||||
|
|
||||||
|
>>> example1 = MethodExample(handler=str.upper)
|
||||||
|
|
||||||
|
* It can be used by a child class that overrides the Method with a normal method.
|
||||||
|
|
||||||
|
>>> class ChildMethodExample(MethodExample):
|
||||||
|
... def handler(self, s: str):
|
||||||
|
... return s.upper()
|
||||||
|
|
||||||
|
>>> example2 = ChildMethodExample()
|
||||||
|
|
||||||
|
* Finally, it also enables the class to be used as a decorator, to generate a subclass providing the Method a value.
|
||||||
|
|
||||||
|
>>> @MethodExample
|
||||||
|
... def OtherChildMethodExample(s):
|
||||||
|
... return s.upper()
|
||||||
|
|
||||||
|
>>> example3 = OtherChildMethodExample()
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__(None, required=False, positional=True)
|
super().__init__(None, required=False, positional=True)
|
||||||
|
|
||||||
|
|||||||
@ -39,6 +39,10 @@ class Service(Option):
|
|||||||
|
|
||||||
The main goal is not to tie transformations to actual dependencies, so the same can be run in different contexts
|
The main goal is not to tie transformations to actual dependencies, so the same can be run in different contexts
|
||||||
(stages like preprod, prod, or tenants like client1, client2, or anything you want).
|
(stages like preprod, prod, or tenants like client1, client2, or anything you want).
|
||||||
|
|
||||||
|
.. attribute:: name
|
||||||
|
|
||||||
|
Service name will be used to retrieve the implementation at runtime.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|||||||
@ -1,41 +1,13 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright 2012-2017 Romain Dorgueil
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
import functools
|
import functools
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from colorama import Fore, Style
|
from colorama import Style, Fore
|
||||||
|
|
||||||
from bonobo import settings
|
from bonobo import settings
|
||||||
from bonobo.plugins import Plugin
|
from bonobo.plugins import Plugin
|
||||||
from bonobo.util.term import CLEAR_EOL, MOVE_CURSOR_UP
|
from bonobo.util.term import CLEAR_EOL, MOVE_CURSOR_UP
|
||||||
|
|
||||||
|
|
||||||
@functools.lru_cache(1)
|
|
||||||
def memory_usage():
|
|
||||||
import os, psutil
|
|
||||||
process = psutil.Process(os.getpid())
|
|
||||||
return process.memory_info()[0] / float(2**20)
|
|
||||||
|
|
||||||
|
|
||||||
# @lru_cache(64)
|
|
||||||
# def execution_time(harness):
|
|
||||||
# return datetime.datetime.now() - harness._started_at
|
|
||||||
|
|
||||||
|
|
||||||
class ConsoleOutputPlugin(Plugin):
|
class ConsoleOutputPlugin(Plugin):
|
||||||
"""
|
"""
|
||||||
Outputs status information to the connected stdout. Can be a TTY, with or without support for colors/cursor
|
Outputs status information to the connected stdout. Can be a TTY, with or without support for colors/cursor
|
||||||
@ -107,3 +79,10 @@ class ConsoleOutputPlugin(Plugin):
|
|||||||
if rewind:
|
if rewind:
|
||||||
print(CLEAR_EOL)
|
print(CLEAR_EOL)
|
||||||
print(MOVE_CURSOR_UP(t_cnt + 2))
|
print(MOVE_CURSOR_UP(t_cnt + 2))
|
||||||
|
|
||||||
|
|
||||||
|
@functools.lru_cache(1)
|
||||||
|
def memory_usage():
|
||||||
|
import os, psutil
|
||||||
|
process = psutil.Process(os.getpid())
|
||||||
|
return process.memory_info()[0] / float(2**20)
|
||||||
@ -1,5 +0,0 @@
|
|||||||
from .plugin import ConsoleOutputPlugin
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
'ConsoleOutputPlugin',
|
|
||||||
]
|
|
||||||
@ -5,7 +5,6 @@ import requests # todo: make this a service so we can substitute it ?
|
|||||||
from bonobo.config import Option
|
from bonobo.config import Option
|
||||||
from bonobo.config.processors import ContextProcessor
|
from bonobo.config.processors import ContextProcessor
|
||||||
from bonobo.config.configurables import Configurable
|
from bonobo.config.configurables import Configurable
|
||||||
from bonobo.util.compat import deprecated
|
|
||||||
from bonobo.util.objects import ValueHolder
|
from bonobo.util.objects import ValueHolder
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
32
docs/_templates/index.html
vendored
32
docs/_templates/index.html
vendored
@ -3,10 +3,7 @@
|
|||||||
{% block body %}
|
{% block body %}
|
||||||
|
|
||||||
<div style="border: 2px solid red; font-weight: bold; margin: 1em; padding: 1em">
|
<div style="border: 2px solid red; font-weight: bold; margin: 1em; padding: 1em">
|
||||||
Bonobo is currently <strong>ALPHA</strong> software. That means that the doc is not finished, and that
|
Bonobo is <strong>ALPHA</strong> software. Some APIs will change.
|
||||||
some APIs will change.<br>
|
|
||||||
There are a lot of missing sections, including comparison with other tools. But if you're looking for a
|
|
||||||
replacement for X, unless X is an ETL, bonobo is probably not what you want.
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<h1 style="text-align: center">
|
<h1 style="text-align: center">
|
||||||
@ -16,26 +13,12 @@
|
|||||||
|
|
||||||
<p>
|
<p>
|
||||||
{% trans %}
|
{% trans %}
|
||||||
<strong>Bonobo</strong> is a line-by-line data-processing toolkit for python 3.5+ emphasizing simple and
|
<strong>Bonobo</strong> is a line-by-line data-processing toolkit for python 3.5+ (extract-transform-load
|
||||||
atomic data transformations defined using a directed graph of plain old python callables (functions and
|
framework) emphasizing simple and atomic data transformations defined using a directed graph of plain old
|
||||||
generators).
|
python objects (functions, iterables, generators, ...).
|
||||||
{% endtrans %}
|
{% endtrans %}
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p>
|
|
||||||
{% trans %}
|
|
||||||
<strong>Bonobo</strong> is a extract-transform-load framework that uses python code to define transformations.
|
|
||||||
{% endtrans %}
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
{% trans %}
|
|
||||||
<strong>Bonobo</strong> is your own data-monkey army. Tedious and repetitive data-processing incoming? Give
|
|
||||||
it a try!
|
|
||||||
{% endtrans %}
|
|
||||||
</p>
|
|
||||||
|
|
||||||
|
|
||||||
<h2 style="margin-bottom: 0">{% trans %}Documentation{% endtrans %}</h2>
|
<h2 style="margin-bottom: 0">{% trans %}Documentation{% endtrans %}</h2>
|
||||||
|
|
||||||
<table class="contentstable">
|
<table class="contentstable">
|
||||||
@ -95,8 +78,9 @@
|
|||||||
</li>
|
</li>
|
||||||
<li>
|
<li>
|
||||||
{% trans %}
|
{% trans %}
|
||||||
<b>Dependency injection:</b> Abstract the transformation dependencies to easily switch data sources and
|
<b>Service injection:</b> Abstract the transformation dependencies to easily switch data sources and
|
||||||
used libraries, allowing to easily test your transformations.
|
dependant libraries. You'll be able to specify the concrete implementations or configurations at
|
||||||
|
runtime, for example to switch a database connection string or an API endpoint.
|
||||||
{% endtrans %}
|
{% endtrans %}
|
||||||
</li>
|
</li>
|
||||||
<li>
|
<li>
|
||||||
@ -107,7 +91,7 @@
|
|||||||
</li>
|
</li>
|
||||||
<li>
|
<li>
|
||||||
{% trans %}
|
{% trans %}
|
||||||
Work in progress: read the <a href="https://www.bonobo-project.org/roadmap">roadmap</a>.
|
Bonobo is young, and the todo-list is huge. Read the <a href="https://www.bonobo-project.org/roadmap">roadmap</a>.
|
||||||
{% endtrans %}
|
{% endtrans %}
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|||||||
@ -10,6 +10,7 @@ There are a few things that you should know while writing transformations graphs
|
|||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|
||||||
purity
|
purity
|
||||||
|
transformations
|
||||||
services
|
services
|
||||||
|
|
||||||
Third party integrations
|
Third party integrations
|
||||||
|
|||||||
@ -1,20 +1,18 @@
|
|||||||
Services and dependencies (draft implementation)
|
Services and dependencies
|
||||||
================================================
|
=========================
|
||||||
|
|
||||||
:Status: Draft implementation
|
:Last-Modified: 20 may 2017
|
||||||
:Stability: Alpha
|
|
||||||
:Last-Modified: 28 apr 2017
|
|
||||||
|
|
||||||
Most probably, you'll want to use external systems within your transformations. Those systems may include databases,
|
You'll probably want to use external systems within your transformations. Those systems may include databases, apis
|
||||||
apis (using http, for example), filesystems, etc.
|
(using http, for example), filesystems, etc.
|
||||||
|
|
||||||
You can start by hardcoding those services. That does the job, at first.
|
You can start by hardcoding those services. That does the job, at first.
|
||||||
|
|
||||||
If you're going a little further than that, you'll feel limited, for a few reasons:
|
If you're going a little further than that, you'll feel limited, for a few reasons:
|
||||||
|
|
||||||
* Hardcoded and tightly linked dependencies make your transformations hard to test, and hard to reuse.
|
* Hardcoded and tightly linked dependencies make your transformations hard to test, and hard to reuse.
|
||||||
* Processing data on your laptop is great, but being able to do it on different systems (or stages), in different
|
* Processing data on your laptop is great, but being able to do it on different target systems (or stages), in different
|
||||||
environments, is more realistic? You probably want to contigure a different database on a staging environment,
|
environments, is more realistic. You'll want to contigure a different database on a staging environment,
|
||||||
preprod environment or production system. Maybe you have silimar systems for different clients and want to select
|
preprod environment or production system. Maybe you have silimar systems for different clients and want to select
|
||||||
the system at runtime. Etc.
|
the system at runtime. Etc.
|
||||||
|
|
||||||
@ -52,10 +50,11 @@ injected to your calls under the parameter name "database".
|
|||||||
Function-based transformations
|
Function-based transformations
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
||||||
No implementation yet, but expect something similar to CBT API, maybe using a `@Service(...)` decorator.
|
No implementation yet, but expect something similar to CBT API, maybe using a `@Service(...)` decorator. See
|
||||||
|
`issue #70 <https://github.com/python-bonobo/bonobo/issues/70>`_.
|
||||||
|
|
||||||
Execution
|
Provide implementation at run time
|
||||||
---------
|
----------------------------------
|
||||||
|
|
||||||
Let's see how to execute it:
|
Let's see how to execute it:
|
||||||
|
|
||||||
|
|||||||
89
docs/guide/transformations.rst
Normal file
89
docs/guide/transformations.rst
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
Transformations
|
||||||
|
===============
|
||||||
|
|
||||||
|
Here is some guidelines on how to write transformations, to avoid the convention-jungle that could happen without
|
||||||
|
a few rules.
|
||||||
|
|
||||||
|
|
||||||
|
Naming conventions
|
||||||
|
::::::::::::::::::
|
||||||
|
|
||||||
|
The naming convention used is the following.
|
||||||
|
|
||||||
|
If you're naming something which is an actual transformation, that can be used directly as a graph node, then use
|
||||||
|
underscores and lowercase names:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
# instance of a class based transformation
|
||||||
|
filter = Filter(...)
|
||||||
|
|
||||||
|
# function based transformation
|
||||||
|
def uppercase(s: str) -> str:
|
||||||
|
return s.upper()
|
||||||
|
|
||||||
|
If you're naming something which is configurable, that will need to be instanciated or called to obtain something that
|
||||||
|
can be used as a graph node, then use camelcase names:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
# configurable
|
||||||
|
class ChangeCase(Configurable):
|
||||||
|
modifier = Option(default='upper')
|
||||||
|
def call(self, s: str) -> str:
|
||||||
|
return getattr(s, self.modifier)()
|
||||||
|
|
||||||
|
# transformation factory
|
||||||
|
def Apply(method):
|
||||||
|
@functools.wraps(method)
|
||||||
|
def apply(s: str) -> str:
|
||||||
|
return method(s)
|
||||||
|
return apply
|
||||||
|
|
||||||
|
# result is a graph node candidate
|
||||||
|
upper = Apply(str.upper)
|
||||||
|
|
||||||
|
|
||||||
|
Function based transformations
|
||||||
|
::::::::::::::::::::::::::::::
|
||||||
|
|
||||||
|
The most basic transformations are function-based. Which means that you define a function, and it will be used directly
|
||||||
|
in a graph.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
def get_representation(row):
|
||||||
|
return repr(row)
|
||||||
|
|
||||||
|
graph = bonobo.Graph(
|
||||||
|
[...],
|
||||||
|
get_representation,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
It does not allow any configuration, but if it's an option, prefer it as it's simpler to write.
|
||||||
|
|
||||||
|
|
||||||
|
Class based transformations
|
||||||
|
:::::::::::::::::::::::::::
|
||||||
|
|
||||||
|
A lot of logic is a bit more complex, and you'll want to use classes to define some of your transformations.
|
||||||
|
|
||||||
|
The :class:`bonobo.config.Configurable` class gives you a few toys to write configurable transformations.
|
||||||
|
|
||||||
|
Options
|
||||||
|
-------
|
||||||
|
|
||||||
|
.. autoclass:: bonobo.config.Option
|
||||||
|
|
||||||
|
Services
|
||||||
|
--------
|
||||||
|
|
||||||
|
.. autoclass:: bonobo.config.Service
|
||||||
|
|
||||||
|
Method
|
||||||
|
------
|
||||||
|
|
||||||
|
.. autoclass:: bonobo.config.Method
|
||||||
|
|
||||||
|
|
||||||
@ -1,23 +1,29 @@
|
|||||||
First steps
|
First steps
|
||||||
===========
|
===========
|
||||||
|
|
||||||
We tried hard to make **Bonobo** simple. We use simple python, and we believe it should be simple to learn.
|
Bonobo uses simple python and should be quick and easy to learn.
|
||||||
|
|
||||||
|
What is Bonobo?
|
||||||
|
:::::::::::::::
|
||||||
|
|
||||||
|
Bonobo is an ETL (Extract-Transform-Load) framework for python 3.5. The goal is to define data-transformations, with
|
||||||
|
python code in charge of handling similar shaped independant lines of data.
|
||||||
|
|
||||||
|
Bonobo *is not* a statistical or data-science tool. If you're looking for a data-analysis tool in python, use Pandas.
|
||||||
|
|
||||||
|
Bonobo is a lean manufacturing assembly line for data that let you focus on the actual work instead of the plumbery.
|
||||||
|
|
||||||
|
|
||||||
Tutorial
|
Tutorial
|
||||||
::::::::
|
::::::::
|
||||||
|
|
||||||
We strongly advice that even if you're an advanced python developper, you go through the whole tutorial for two
|
|
||||||
reasons: that should be sufficient to do anything possible with **Bonobo** and that's a good moment to learn the few
|
|
||||||
concepts you'll see everywhere in the software.
|
|
||||||
|
|
||||||
If you're not familiar with python, you should first read :doc:`python`.
|
|
||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|
||||||
tut01
|
tut01
|
||||||
tut02
|
tut02
|
||||||
|
|
||||||
|
|
||||||
What's next?
|
What's next?
|
||||||
::::::::::::
|
::::::::::::
|
||||||
|
|
||||||
@ -39,3 +45,4 @@ Read about integrating external tools with bonobo
|
|||||||
* :doc:`../guide/ext/jupyter`: run transformations within jupyter notebooks.
|
* :doc:`../guide/ext/jupyter`: run transformations within jupyter notebooks.
|
||||||
* :doc:`../guide/ext/selenium`: run
|
* :doc:`../guide/ext/selenium`: run
|
||||||
* :doc:`../guide/ext/sqlalchemy`: everything you need to interract with SQL databases.
|
* :doc:`../guide/ext/sqlalchemy`: everything you need to interract with SQL databases.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user