Documenting transformations and configurables.

2017-05-20 13:05:07 +02:00
parent 577a781de3
commit a018cca20e
11 changed files with 210 additions and 80 deletions
--- a/bonobo/config/options.py
+++ b/bonobo/config/options.py
@ -1,8 +1,51 @@
 class Option:
    """
-    An Option is a descriptor for a required or optional parameter of a Configurable.
-    
+    An Option is a descriptor for Configurable's parameters.
+
+    .. attribute:: type
+
+        Option type allows to provide a callable used to cast, clean or validate the option value. If not provided, or
+        None, the option's value will be the exact value user provided.
+
+        (default: None)
+
+    .. attribute:: required
+
+        If an option is required, an error will be raised if no value is provided (at runtime). If it is not, option
+        will have the default value if user does not override it at runtime.
+
+        (default: False)
+
+    .. attribute:: positional
+
+        If this is true, it'll be possible to provide the option value as a positional argument. Otherwise, it must
+        be provided as a keyword argument.
+
+        (default: False)
+
+    .. attribute:: default
+
+        Default value for non-required options.
+
+        (default: None)
+
+    Example:
+
+        .. code-block:: python
+
+            from bonobo.config import Configurable, Option
+
+            class Example(Configurable):
+                title = Option(str, required=True, positional=True)
+                keyword = Option(str, default='foo')
+
+                def call(self, s):
+                    return self.title + ': ' + s + ' (' + self.keyword + ')'
+
+            example = Example('hello', keyword='bar')
+
    """
+
    _creation_counter = 0

    def __init__(self, type=None, *, required=False, positional=False, default=None):
@ -32,6 +75,36 @@ class Option:


 class Method(Option):
+    """
+    A Method is a special callable-valued option, that can be used in three different ways (but for same purpose).
+
+    * Like a normal option, the value can be provided to the Configurable constructor.
+
+        >>> from bonobo.config import Configurable, Method
+
+        >>> class MethodExample(Configurable):
+        ...     handler = Method()
+
+        >>> example1 = MethodExample(handler=str.upper)
+
+    * It can be used by a child class that overrides the Method with a normal method.
+
+        >>> class ChildMethodExample(MethodExample):
+        ...     def handler(self, s: str):
+        ...         return s.upper()
+
+        >>> example2 = ChildMethodExample()
+
+    * Finally, it also enables the class to be used as a decorator, to generate a subclass providing the Method a value.
+
+        >>> @MethodExample
+        ... def OtherChildMethodExample(s):
+        ...     return s.upper()
+
+        >>> example3 = OtherChildMethodExample()
+
+    """
+
    def __init__(self):
        super().__init__(None, required=False, positional=True)

--- a/bonobo/config/services.py
+++ b/bonobo/config/services.py
@ -39,6 +39,10 @@ class Service(Option):
            
    The main goal is not to tie transformations to actual dependencies, so the same can be run in different contexts
    (stages like preprod, prod, or tenants like client1, client2, or anything you want).
+
+    .. attribute:: name
+
+        Service name will be used to retrieve the implementation at runtime.
    
    """

--- a/bonobo/ext/console/plugin.py
+++ b/bonobo/ext/console/plugin.py
@ -1,41 +1,13 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright 2012-2017 Romain Dorgueil
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 import functools
 import sys

-from colorama import Fore, Style
+from colorama import Style, Fore

 from bonobo import settings
 from bonobo.plugins import Plugin
 from bonobo.util.term import CLEAR_EOL, MOVE_CURSOR_UP


-@functools.lru_cache(1)
-def memory_usage():
-    import os, psutil
-    process = psutil.Process(os.getpid())
-    return process.memory_info()[0] / float(2**20)
-
-
-# @lru_cache(64)
-# def execution_time(harness):
-#    return datetime.datetime.now() - harness._started_at
-
-
 class ConsoleOutputPlugin(Plugin):
    """
    Outputs status information to the connected stdout. Can be a TTY, with or without support for colors/cursor
@ -107,3 +79,10 @@ class ConsoleOutputPlugin(Plugin):
        if rewind:
            print(CLEAR_EOL)
            print(MOVE_CURSOR_UP(t_cnt + 2))
+
+
+@functools.lru_cache(1)
+def memory_usage():
+    import os, psutil
+    process = psutil.Process(os.getpid())
+    return process.memory_info()[0] / float(2**20)
--- a/bonobo/ext/console/init.py
+++ b/bonobo/ext/console/init.py
@ -1,5 +0,0 @@
-from .plugin import ConsoleOutputPlugin
-
-__all__ = [
-    'ConsoleOutputPlugin',
-]
--- a/bonobo/ext/opendatasoft.py
+++ b/bonobo/ext/opendatasoft.py
@ -5,7 +5,6 @@ import requests  # todo: make this a service so we can substitute it ?
 from bonobo.config import Option
 from bonobo.config.processors import ContextProcessor
 from bonobo.config.configurables import Configurable
-from bonobo.util.compat import deprecated
 from bonobo.util.objects import ValueHolder


--- a/bonobo/ext/pandas.py
+++ b/bonobo/ext/pandas.py
--- a/docs/_templates/index.html
+++ b/docs/_templates/index.html
@ -3,10 +3,7 @@
 {% block body %}

    <div style="border: 2px solid red; font-weight: bold; margin: 1em; padding: 1em">
-        Bonobo is currently <strong>ALPHA</strong> software. That means that the doc is not finished, and that
-        some APIs will change.<br>
-        There are a lot of missing sections, including comparison with other tools. But if you're looking for a
-        replacement for X, unless X is an ETL, bonobo is probably not what you want.
+        Bonobo is <strong>ALPHA</strong> software. Some APIs will change.
    </div>

    <h1 style="text-align: center">
@ -16,26 +13,12 @@

    <p>
        {% trans %}
-            <strong>Bonobo</strong> is a line-by-line data-processing toolkit for python 3.5+ emphasizing simple and
-            atomic data transformations defined using a directed graph of plain old python callables (functions and
-            generators).
+            <strong>Bonobo</strong> is a line-by-line data-processing toolkit for python 3.5+ (extract-transform-load
+            framework) emphasizing simple and atomic data transformations defined using a directed graph of plain old
+            python objects (functions, iterables, generators, ...).
        {% endtrans %}
    </p>

-    <p>
-        {% trans %}
-            <strong>Bonobo</strong> is a extract-transform-load framework that uses python code to define transformations.
-        {% endtrans %}
-    </p>
-
-    <p>
-        {% trans %}
-            <strong>Bonobo</strong> is your own data-monkey army. Tedious and repetitive data-processing incoming? Give
-            it a try!
-        {% endtrans %}
-    </p>
-
-
    <h2 style="margin-bottom: 0">{% trans %}Documentation{% endtrans %}</h2>

    <table class="contentstable">
@ -95,8 +78,9 @@
        </li>
        <li>
            {% trans %}
-                <b>Dependency injection:</b> Abstract the transformation dependencies to easily switch data sources and
-                used libraries, allowing to easily test your transformations.
+                <b>Service injection:</b> Abstract the transformation dependencies to easily switch data sources and
+                dependant libraries. You'll be able to specify the concrete implementations or configurations at
+                runtime, for example to switch a database connection string or an API endpoint.
            {% endtrans %}
        </li>
        <li>
@ -107,7 +91,7 @@
        </li>
        <li>
            {% trans %}
-            Work in progress: read the <a href="https://www.bonobo-project.org/roadmap">roadmap</a>.
+            Bonobo is young, and the todo-list is huge. Read the <a href="https://www.bonobo-project.org/roadmap">roadmap</a>.
            {% endtrans %}
        </li>
    </ul>
--- a/docs/guide/index.rst
+++ b/docs/guide/index.rst
@ -10,6 +10,7 @@ There are a few things that you should know while writing transformations graphs
    :maxdepth: 2

    purity
+    transformations
    services

 Third party integrations
--- a/docs/guide/services.rst
+++ b/docs/guide/services.rst
@ -1,20 +1,18 @@
-Services and dependencies (draft implementation)
-================================================
+Services and dependencies
+=========================

-:Status: Draft implementation
-:Stability: Alpha
-:Last-Modified: 28 apr 2017
+:Last-Modified: 20 may 2017

-Most probably, you'll want to use external systems within your transformations. Those systems may include databases,
-apis (using http, for example), filesystems, etc.
+You'll probably want to use external systems within your transformations. Those systems may include databases, apis
+(using http, for example), filesystems, etc.

 You can start by hardcoding those services. That does the job, at first.

 If you're going a little further than that, you'll feel limited, for a few reasons:

 * Hardcoded and tightly linked dependencies make your transformations hard to test, and hard to reuse.
-* Processing data on your laptop is great, but being able to do it on different systems (or stages), in different
-  environments, is more realistic? You probably want to contigure a different database on a staging environment,
+* Processing data on your laptop is great, but being able to do it on different target systems (or stages), in different
+  environments, is more realistic. You'll want to contigure a different database on a staging environment,
  preprod environment or production system. Maybe you have silimar systems for different clients and want to select
  the system at runtime. Etc.

@ -52,10 +50,11 @@ injected to your calls under the parameter name "database".
 Function-based transformations
 ------------------------------

-No implementation yet, but expect something similar to CBT API, maybe using a `@Service(...)` decorator.
+No implementation yet, but expect something similar to CBT API, maybe using a `@Service(...)` decorator. See
+`issue #70 <https://github.com/python-bonobo/bonobo/issues/70>`_.

-Execution
---------
+Provide implementation at run time
+----------------------------------

 Let's see how to execute it:

--- a/docs/guide/transformations.rst
+++ b/docs/guide/transformations.rst
@ -0,0 +1,89 @@
+Transformations
+===============
+
+Here is some guidelines on how to write transformations, to avoid the convention-jungle that could happen without
+a few rules.
+
+
+Naming conventions
+::::::::::::::::::
+
+The naming convention used is the following.
+
+If you're naming something which is an actual transformation, that can be used directly as a graph node, then use
+underscores and lowercase names:
+
+.. code-block:: python
+
+    # instance of a class based transformation
+    filter = Filter(...)
+
+    # function based transformation
+    def uppercase(s: str) -> str:
+        return s.upper()
+
+If you're naming something which is configurable, that will need to be instanciated or called to obtain something that
+can be used as a graph node, then use camelcase names:
+
+.. code-block:: python
+
+    # configurable
+    class ChangeCase(Configurable):
+        modifier = Option(default='upper')
+        def call(self, s: str) -> str:
+            return getattr(s, self.modifier)()
+
+    # transformation factory
+    def Apply(method):
+        @functools.wraps(method)
+        def apply(s: str) -> str:
+            return method(s)
+        return apply
+
+    # result is a graph node candidate
+    upper = Apply(str.upper)
+
+
+Function based transformations
+::::::::::::::::::::::::::::::
+
+The most basic transformations are function-based. Which means that you define a function, and it will be used directly
+in a graph.
+
+.. code-block:: python
+
+    def get_representation(row):
+        return repr(row)
+
+    graph = bonobo.Graph(
+        [...],
+        get_representation,
+    )
+
+
+It does not allow any configuration, but if it's an option, prefer it as it's simpler to write.
+
+
+Class based transformations
+:::::::::::::::::::::::::::
+
+A lot of logic is a bit more complex, and you'll want to use classes to define some of your transformations.
+
+The :class:`bonobo.config.Configurable` class gives you a few toys to write configurable transformations.
+
+Options
+-------
+
+.. autoclass:: bonobo.config.Option
+
+Services
+--------
+
+.. autoclass:: bonobo.config.Service
+
+Method
+------
+
+.. autoclass:: bonobo.config.Method
+
+
--- a/docs/tutorial/index.rst
+++ b/docs/tutorial/index.rst
@ -1,23 +1,29 @@
 First steps
 ===========

-We tried hard to make **Bonobo** simple. We use simple python, and we believe it should be simple to learn.
+Bonobo uses simple python and should be quick and easy to learn.
+
+What is Bonobo?
+:::::::::::::::
+
+Bonobo is an ETL (Extract-Transform-Load) framework for python 3.5. The goal is to define data-transformations, with
+python code in charge of handling similar shaped independant lines of data.
+
+Bonobo *is not* a statistical or data-science tool. If you're looking for a data-analysis tool in python, use Pandas.
+
+Bonobo is a lean manufacturing assembly line for data that let you focus on the actual work instead of the plumbery.
+

 Tutorial
 ::::::::

-We strongly advice that even if you're an advanced python developper, you go through the whole tutorial for two
-reasons: that should be sufficient to do anything possible with **Bonobo** and that's a good moment to learn the few
-concepts you'll see everywhere in the software.
-
-If you're not familiar with python, you should first read :doc:`python`.
-
 .. toctree::
    :maxdepth: 2

    tut01
    tut02

+
 What's next?
 ::::::::::::

@ -39,3 +45,4 @@ Read about integrating external tools with bonobo
 * :doc:`../guide/ext/jupyter`: run transformations within jupyter notebooks.
 * :doc:`../guide/ext/selenium`: run
 * :doc:`../guide/ext/sqlalchemy`: everything you need to interract with SQL databases.
+