Documenting transformations and configurables.
This commit is contained in:
@ -1,8 +1,51 @@
|
||||
class Option:
|
||||
"""
|
||||
An Option is a descriptor for a required or optional parameter of a Configurable.
|
||||
|
||||
An Option is a descriptor for Configurable's parameters.
|
||||
|
||||
.. attribute:: type
|
||||
|
||||
Option type allows to provide a callable used to cast, clean or validate the option value. If not provided, or
|
||||
None, the option's value will be the exact value user provided.
|
||||
|
||||
(default: None)
|
||||
|
||||
.. attribute:: required
|
||||
|
||||
If an option is required, an error will be raised if no value is provided (at runtime). If it is not, option
|
||||
will have the default value if user does not override it at runtime.
|
||||
|
||||
(default: False)
|
||||
|
||||
.. attribute:: positional
|
||||
|
||||
If this is true, it'll be possible to provide the option value as a positional argument. Otherwise, it must
|
||||
be provided as a keyword argument.
|
||||
|
||||
(default: False)
|
||||
|
||||
.. attribute:: default
|
||||
|
||||
Default value for non-required options.
|
||||
|
||||
(default: None)
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from bonobo.config import Configurable, Option
|
||||
|
||||
class Example(Configurable):
|
||||
title = Option(str, required=True, positional=True)
|
||||
keyword = Option(str, default='foo')
|
||||
|
||||
def call(self, s):
|
||||
return self.title + ': ' + s + ' (' + self.keyword + ')'
|
||||
|
||||
example = Example('hello', keyword='bar')
|
||||
|
||||
"""
|
||||
|
||||
_creation_counter = 0
|
||||
|
||||
def __init__(self, type=None, *, required=False, positional=False, default=None):
|
||||
@ -32,6 +75,36 @@ class Option:
|
||||
|
||||
|
||||
class Method(Option):
|
||||
"""
|
||||
A Method is a special callable-valued option, that can be used in three different ways (but for same purpose).
|
||||
|
||||
* Like a normal option, the value can be provided to the Configurable constructor.
|
||||
|
||||
>>> from bonobo.config import Configurable, Method
|
||||
|
||||
>>> class MethodExample(Configurable):
|
||||
... handler = Method()
|
||||
|
||||
>>> example1 = MethodExample(handler=str.upper)
|
||||
|
||||
* It can be used by a child class that overrides the Method with a normal method.
|
||||
|
||||
>>> class ChildMethodExample(MethodExample):
|
||||
... def handler(self, s: str):
|
||||
... return s.upper()
|
||||
|
||||
>>> example2 = ChildMethodExample()
|
||||
|
||||
* Finally, it also enables the class to be used as a decorator, to generate a subclass providing the Method a value.
|
||||
|
||||
>>> @MethodExample
|
||||
... def OtherChildMethodExample(s):
|
||||
... return s.upper()
|
||||
|
||||
>>> example3 = OtherChildMethodExample()
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(None, required=False, positional=True)
|
||||
|
||||
|
||||
@ -39,6 +39,10 @@ class Service(Option):
|
||||
|
||||
The main goal is not to tie transformations to actual dependencies, so the same can be run in different contexts
|
||||
(stages like preprod, prod, or tenants like client1, client2, or anything you want).
|
||||
|
||||
.. attribute:: name
|
||||
|
||||
Service name will be used to retrieve the implementation at runtime.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
@ -1,41 +1,13 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2012-2017 Romain Dorgueil
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import functools
|
||||
import sys
|
||||
|
||||
from colorama import Fore, Style
|
||||
from colorama import Style, Fore
|
||||
|
||||
from bonobo import settings
|
||||
from bonobo.plugins import Plugin
|
||||
from bonobo.util.term import CLEAR_EOL, MOVE_CURSOR_UP
|
||||
|
||||
|
||||
@functools.lru_cache(1)
|
||||
def memory_usage():
|
||||
import os, psutil
|
||||
process = psutil.Process(os.getpid())
|
||||
return process.memory_info()[0] / float(2**20)
|
||||
|
||||
|
||||
# @lru_cache(64)
|
||||
# def execution_time(harness):
|
||||
# return datetime.datetime.now() - harness._started_at
|
||||
|
||||
|
||||
class ConsoleOutputPlugin(Plugin):
|
||||
"""
|
||||
Outputs status information to the connected stdout. Can be a TTY, with or without support for colors/cursor
|
||||
@ -107,3 +79,10 @@ class ConsoleOutputPlugin(Plugin):
|
||||
if rewind:
|
||||
print(CLEAR_EOL)
|
||||
print(MOVE_CURSOR_UP(t_cnt + 2))
|
||||
|
||||
|
||||
@functools.lru_cache(1)
|
||||
def memory_usage():
|
||||
import os, psutil
|
||||
process = psutil.Process(os.getpid())
|
||||
return process.memory_info()[0] / float(2**20)
|
||||
@ -1,5 +0,0 @@
|
||||
from .plugin import ConsoleOutputPlugin
|
||||
|
||||
__all__ = [
|
||||
'ConsoleOutputPlugin',
|
||||
]
|
||||
@ -5,7 +5,6 @@ import requests # todo: make this a service so we can substitute it ?
|
||||
from bonobo.config import Option
|
||||
from bonobo.config.processors import ContextProcessor
|
||||
from bonobo.config.configurables import Configurable
|
||||
from bonobo.util.compat import deprecated
|
||||
from bonobo.util.objects import ValueHolder
|
||||
|
||||
|
||||
|
||||
32
docs/_templates/index.html
vendored
32
docs/_templates/index.html
vendored
@ -3,10 +3,7 @@
|
||||
{% block body %}
|
||||
|
||||
<div style="border: 2px solid red; font-weight: bold; margin: 1em; padding: 1em">
|
||||
Bonobo is currently <strong>ALPHA</strong> software. That means that the doc is not finished, and that
|
||||
some APIs will change.<br>
|
||||
There are a lot of missing sections, including comparison with other tools. But if you're looking for a
|
||||
replacement for X, unless X is an ETL, bonobo is probably not what you want.
|
||||
Bonobo is <strong>ALPHA</strong> software. Some APIs will change.
|
||||
</div>
|
||||
|
||||
<h1 style="text-align: center">
|
||||
@ -16,26 +13,12 @@
|
||||
|
||||
<p>
|
||||
{% trans %}
|
||||
<strong>Bonobo</strong> is a line-by-line data-processing toolkit for python 3.5+ emphasizing simple and
|
||||
atomic data transformations defined using a directed graph of plain old python callables (functions and
|
||||
generators).
|
||||
<strong>Bonobo</strong> is a line-by-line data-processing toolkit for python 3.5+ (extract-transform-load
|
||||
framework) emphasizing simple and atomic data transformations defined using a directed graph of plain old
|
||||
python objects (functions, iterables, generators, ...).
|
||||
{% endtrans %}
|
||||
</p>
|
||||
|
||||
<p>
|
||||
{% trans %}
|
||||
<strong>Bonobo</strong> is a extract-transform-load framework that uses python code to define transformations.
|
||||
{% endtrans %}
|
||||
</p>
|
||||
|
||||
<p>
|
||||
{% trans %}
|
||||
<strong>Bonobo</strong> is your own data-monkey army. Tedious and repetitive data-processing incoming? Give
|
||||
it a try!
|
||||
{% endtrans %}
|
||||
</p>
|
||||
|
||||
|
||||
<h2 style="margin-bottom: 0">{% trans %}Documentation{% endtrans %}</h2>
|
||||
|
||||
<table class="contentstable">
|
||||
@ -95,8 +78,9 @@
|
||||
</li>
|
||||
<li>
|
||||
{% trans %}
|
||||
<b>Dependency injection:</b> Abstract the transformation dependencies to easily switch data sources and
|
||||
used libraries, allowing to easily test your transformations.
|
||||
<b>Service injection:</b> Abstract the transformation dependencies to easily switch data sources and
|
||||
dependant libraries. You'll be able to specify the concrete implementations or configurations at
|
||||
runtime, for example to switch a database connection string or an API endpoint.
|
||||
{% endtrans %}
|
||||
</li>
|
||||
<li>
|
||||
@ -107,7 +91,7 @@
|
||||
</li>
|
||||
<li>
|
||||
{% trans %}
|
||||
Work in progress: read the <a href="https://www.bonobo-project.org/roadmap">roadmap</a>.
|
||||
Bonobo is young, and the todo-list is huge. Read the <a href="https://www.bonobo-project.org/roadmap">roadmap</a>.
|
||||
{% endtrans %}
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
@ -10,6 +10,7 @@ There are a few things that you should know while writing transformations graphs
|
||||
:maxdepth: 2
|
||||
|
||||
purity
|
||||
transformations
|
||||
services
|
||||
|
||||
Third party integrations
|
||||
|
||||
@ -1,20 +1,18 @@
|
||||
Services and dependencies (draft implementation)
|
||||
================================================
|
||||
Services and dependencies
|
||||
=========================
|
||||
|
||||
:Status: Draft implementation
|
||||
:Stability: Alpha
|
||||
:Last-Modified: 28 apr 2017
|
||||
:Last-Modified: 20 may 2017
|
||||
|
||||
Most probably, you'll want to use external systems within your transformations. Those systems may include databases,
|
||||
apis (using http, for example), filesystems, etc.
|
||||
You'll probably want to use external systems within your transformations. Those systems may include databases, apis
|
||||
(using http, for example), filesystems, etc.
|
||||
|
||||
You can start by hardcoding those services. That does the job, at first.
|
||||
|
||||
If you're going a little further than that, you'll feel limited, for a few reasons:
|
||||
|
||||
* Hardcoded and tightly linked dependencies make your transformations hard to test, and hard to reuse.
|
||||
* Processing data on your laptop is great, but being able to do it on different systems (or stages), in different
|
||||
environments, is more realistic? You probably want to contigure a different database on a staging environment,
|
||||
* Processing data on your laptop is great, but being able to do it on different target systems (or stages), in different
|
||||
environments, is more realistic. You'll want to contigure a different database on a staging environment,
|
||||
preprod environment or production system. Maybe you have silimar systems for different clients and want to select
|
||||
the system at runtime. Etc.
|
||||
|
||||
@ -52,10 +50,11 @@ injected to your calls under the parameter name "database".
|
||||
Function-based transformations
|
||||
------------------------------
|
||||
|
||||
No implementation yet, but expect something similar to CBT API, maybe using a `@Service(...)` decorator.
|
||||
No implementation yet, but expect something similar to CBT API, maybe using a `@Service(...)` decorator. See
|
||||
`issue #70 <https://github.com/python-bonobo/bonobo/issues/70>`_.
|
||||
|
||||
Execution
|
||||
---------
|
||||
Provide implementation at run time
|
||||
----------------------------------
|
||||
|
||||
Let's see how to execute it:
|
||||
|
||||
|
||||
89
docs/guide/transformations.rst
Normal file
89
docs/guide/transformations.rst
Normal file
@ -0,0 +1,89 @@
|
||||
Transformations
|
||||
===============
|
||||
|
||||
Here is some guidelines on how to write transformations, to avoid the convention-jungle that could happen without
|
||||
a few rules.
|
||||
|
||||
|
||||
Naming conventions
|
||||
::::::::::::::::::
|
||||
|
||||
The naming convention used is the following.
|
||||
|
||||
If you're naming something which is an actual transformation, that can be used directly as a graph node, then use
|
||||
underscores and lowercase names:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# instance of a class based transformation
|
||||
filter = Filter(...)
|
||||
|
||||
# function based transformation
|
||||
def uppercase(s: str) -> str:
|
||||
return s.upper()
|
||||
|
||||
If you're naming something which is configurable, that will need to be instanciated or called to obtain something that
|
||||
can be used as a graph node, then use camelcase names:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# configurable
|
||||
class ChangeCase(Configurable):
|
||||
modifier = Option(default='upper')
|
||||
def call(self, s: str) -> str:
|
||||
return getattr(s, self.modifier)()
|
||||
|
||||
# transformation factory
|
||||
def Apply(method):
|
||||
@functools.wraps(method)
|
||||
def apply(s: str) -> str:
|
||||
return method(s)
|
||||
return apply
|
||||
|
||||
# result is a graph node candidate
|
||||
upper = Apply(str.upper)
|
||||
|
||||
|
||||
Function based transformations
|
||||
::::::::::::::::::::::::::::::
|
||||
|
||||
The most basic transformations are function-based. Which means that you define a function, and it will be used directly
|
||||
in a graph.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
def get_representation(row):
|
||||
return repr(row)
|
||||
|
||||
graph = bonobo.Graph(
|
||||
[...],
|
||||
get_representation,
|
||||
)
|
||||
|
||||
|
||||
It does not allow any configuration, but if it's an option, prefer it as it's simpler to write.
|
||||
|
||||
|
||||
Class based transformations
|
||||
:::::::::::::::::::::::::::
|
||||
|
||||
A lot of logic is a bit more complex, and you'll want to use classes to define some of your transformations.
|
||||
|
||||
The :class:`bonobo.config.Configurable` class gives you a few toys to write configurable transformations.
|
||||
|
||||
Options
|
||||
-------
|
||||
|
||||
.. autoclass:: bonobo.config.Option
|
||||
|
||||
Services
|
||||
--------
|
||||
|
||||
.. autoclass:: bonobo.config.Service
|
||||
|
||||
Method
|
||||
------
|
||||
|
||||
.. autoclass:: bonobo.config.Method
|
||||
|
||||
|
||||
@ -1,23 +1,29 @@
|
||||
First steps
|
||||
===========
|
||||
|
||||
We tried hard to make **Bonobo** simple. We use simple python, and we believe it should be simple to learn.
|
||||
Bonobo uses simple python and should be quick and easy to learn.
|
||||
|
||||
What is Bonobo?
|
||||
:::::::::::::::
|
||||
|
||||
Bonobo is an ETL (Extract-Transform-Load) framework for python 3.5. The goal is to define data-transformations, with
|
||||
python code in charge of handling similar shaped independant lines of data.
|
||||
|
||||
Bonobo *is not* a statistical or data-science tool. If you're looking for a data-analysis tool in python, use Pandas.
|
||||
|
||||
Bonobo is a lean manufacturing assembly line for data that let you focus on the actual work instead of the plumbery.
|
||||
|
||||
|
||||
Tutorial
|
||||
::::::::
|
||||
|
||||
We strongly advice that even if you're an advanced python developper, you go through the whole tutorial for two
|
||||
reasons: that should be sufficient to do anything possible with **Bonobo** and that's a good moment to learn the few
|
||||
concepts you'll see everywhere in the software.
|
||||
|
||||
If you're not familiar with python, you should first read :doc:`python`.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
tut01
|
||||
tut02
|
||||
|
||||
|
||||
What's next?
|
||||
::::::::::::
|
||||
|
||||
@ -39,3 +45,4 @@ Read about integrating external tools with bonobo
|
||||
* :doc:`../guide/ext/jupyter`: run transformations within jupyter notebooks.
|
||||
* :doc:`../guide/ext/selenium`: run
|
||||
* :doc:`../guide/ext/sqlalchemy`: everything you need to interract with SQL databases.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user