From aa6e426768118b0be362e7404a1802e531008fab Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Tue, 16 Jan 2018 06:27:25 +0100 Subject: [PATCH] [doc] proofreading the guides, refactoring the reference. --- Makefile | 2 +- bin/update_apidoc.py | 63 ++++ bonobo/__init__.py | 3 +- bonobo/_api.py | 17 +- bonobo/config/__init__.py | 6 + bonobo/config/options.py | 2 +- bonobo/constants.py | 33 +- bonobo/contrib/django/__init__.py | 8 + bonobo/contrib/django/commands.py | 6 + bonobo/execution/__init__.py | 7 + bonobo/execution/contexts/__init__.py | 7 + bonobo/execution/events.py | 27 ++ bonobo/execution/strategies/__init__.py | 8 + bonobo/util/__init__.py | 5 + docs/_static/custom.css | 22 +- docs/_templates/base.html | 2 +- docs/conf.py | 8 +- docs/extension/django.rst | 70 +++- docs/extension/sqlalchemy.rst | 21 +- docs/guide/debugging.rst | 40 ++ docs/guide/future/services.rst | 56 +++ docs/guide/future/transformations.rst | 83 +++++ docs/guide/graphs.rst | 6 +- docs/guide/purity.rst | 9 + docs/guide/services.rst | 53 +-- docs/guide/transformations.rst | 343 ++++++++++-------- docs/reference/api.rst | 12 - docs/reference/api/bonobo.rst | 10 + docs/reference/api/bonobo/config.rst | 10 + docs/reference/api/bonobo/constants.rst | 11 + docs/reference/api/bonobo/execution.rst | 19 + .../api/bonobo/execution/contexts.rst | 10 + .../reference/api/bonobo/execution/events.rst | 10 + .../api/bonobo/execution/strategies.rst | 10 + docs/reference/api/bonobo/util.rst | 10 + docs/reference/api_config.rst | 10 - docs/reference/api_util.rst | 10 - docs/reference/index.rst | 15 +- requirements-dev.txt | 2 +- requirements-jupyter.txt | 7 +- requirements-sqlalchemy.txt | 2 +- 41 files changed, 767 insertions(+), 288 deletions(-) create mode 100644 bin/update_apidoc.py create mode 100644 docs/guide/future/services.rst create mode 100644 docs/guide/future/transformations.rst delete mode 100644 docs/reference/api.rst create mode 100644 docs/reference/api/bonobo.rst create mode 100644 docs/reference/api/bonobo/config.rst create mode 100644 docs/reference/api/bonobo/constants.rst create mode 100644 docs/reference/api/bonobo/execution.rst create mode 100644 docs/reference/api/bonobo/execution/contexts.rst create mode 100644 docs/reference/api/bonobo/execution/events.rst create mode 100644 docs/reference/api/bonobo/execution/strategies.rst create mode 100644 docs/reference/api/bonobo/util.rst delete mode 100644 docs/reference/api_config.rst delete mode 100644 docs/reference/api_util.rst diff --git a/Makefile b/Makefile index 9751306..f9a4758 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# Generated by Medikit 0.4.6 on 2018-01-14. +# Generated by Medikit 0.4.3 on 2018-01-16. # All changes will be overriden. PACKAGE ?= bonobo diff --git a/bin/update_apidoc.py b/bin/update_apidoc.py new file mode 100644 index 0000000..adf5e41 --- /dev/null +++ b/bin/update_apidoc.py @@ -0,0 +1,63 @@ +import os + +from jinja2 import Environment, DictLoader + +__path__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__), '..')) + +apidoc_root = 'docs/reference/api' + + +class Module: + def __init__(self, name, title=None, *, automodule_options=None): + + self.name = name + self.title = title or ' '.join(map(str.title, self.name.split('.')[1:])) + self.automodule_options = automodule_options or list() + + def __repr__(self): + return '<{} ({})>'.format(self.title, self.name) + + def asdict(self): + return { + 'name': self.name, + 'title': self.title, + 'automodule_options': self.automodule_options, + } + + def get_path(self): + return os.path.join(__path__, apidoc_root, *self.name.split('.')) + '.rst' + + +modules = [ + Module('bonobo', title='Bonobo'), + Module('bonobo.config'), + Module('bonobo.constants', automodule_options=['no-members']), + Module('bonobo.execution'), + Module('bonobo.execution.contexts'), + Module('bonobo.execution.events'), + Module('bonobo.execution.strategies'), + Module('bonobo.util'), +] + + + +def underlined_filter(txt, chr): + return txt + '\n' + chr * len(txt) + + +env = Environment(loader=DictLoader({ + 'module': ''' +{{ (':mod:`'~title~' <'~name~'>`') | underlined('=') }} + +.. currentmodule:: {{ name }} + +:Module: :mod:`{{ name }}` + +.. automodule:: {{ name }} +{% for opt in automodule_options %} :{{ opt }}:{{ "\n" }}{% endfor %} + '''[1:-1] + '\n'})) +env.filters['underlined'] = underlined_filter + +for module in modules: + with open(module.get_path(), 'w+') as f: + f.write(env.get_template('module').render(module.asdict())) diff --git a/bonobo/__init__.py b/bonobo/__init__.py index 32baf98..704c9ef 100644 --- a/bonobo/__init__.py +++ b/bonobo/__init__.py @@ -4,7 +4,6 @@ # transformations using a simple directed graph of python callables. # # Licensed under Apache License 2.0, read the LICENSE file in the root of the source tree. -"""Bonobo data-processing toolkit main module.""" import sys @@ -12,10 +11,12 @@ assert (sys.version_info >= (3, 5)), 'Python 3.5+ is required to use Bonobo.' from bonobo._api import * from bonobo._api import __all__ +from bonobo._api import __doc__ from bonobo._version import __version__ __all__ = ['__version__'] + __all__ __logo__ = '' +__doc__ = __doc__ __version__ = __version__ diff --git a/bonobo/_api.py b/bonobo/_api.py index df38004..5d38224 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -1,8 +1,16 @@ +""" +Contains all the tools you need to get started with the framework, including (but not limited to) generic +transformations, readers, writers, and tools for writing and executing graphs and jobs. + +All objects in this module are considered very safe to use, and backward compatibility when moving up from one version +to another is maximal. + +""" + from bonobo.execution.strategies import create_strategy from bonobo.nodes import __all__ as _all_nodes from bonobo.nodes import * from bonobo.structs import Graph -from bonobo.util import get_name from bonobo.util.api import ApiHelper from bonobo.util.environ import parse_args, get_argument_parser @@ -96,7 +104,8 @@ api.register_group(create_strategy) @api.register def open_fs(fs_url=None, *args, **kwargs): """ - Wraps :func:`fs.open_fs` function with a few candies. + Wraps :obj:`fs.opener.registry.Registry.open_fs`, with default to local current working directory and expanding ~ in + path. :param str fs_url: A filesystem URL :param parse_result: A parsed filesystem URL. @@ -105,9 +114,9 @@ def open_fs(fs_url=None, *args, **kwargs): :param bool create: True if the filesystem should be created if it does not exist. :param str cwd: The current working directory (generally only relevant for OS filesystems). :param str default_protocol: The protocol to use if one is not supplied in the FS URL (defaults to ``"osfs"``). - :returns: :class:`~fs.base.FS` object + :returns: :class:`fs.base.FS` object """ - from fs import open_fs as _open_fs + from fs.opener import open_fs as _open_fs from os.path import expanduser from os import getcwd diff --git a/bonobo/config/__init__.py b/bonobo/config/__init__.py index 6ba99e8..809fc05 100644 --- a/bonobo/config/__init__.py +++ b/bonobo/config/__init__.py @@ -1,3 +1,9 @@ +""" +The Config API, located under the :mod:`bonobo.config` namespace, contains all the tools you need to create +configurable transformations, either class-based or function-based. + +""" + from bonobo.config.configurables import Configurable from bonobo.config.functools import transformation_factory from bonobo.config.options import Method, Option diff --git a/bonobo/config/options.py b/bonobo/config/options.py index e470277..cdf9411 100644 --- a/bonobo/config/options.py +++ b/bonobo/config/options.py @@ -68,7 +68,7 @@ class Option: if self.__doc__: self.__doc__ = textwrap.dedent(self.__doc__.strip('\n')).strip() if default: - self.__doc__ += '\nDefault: {!r}'.format(default) + self.__doc__ += '\n\nDefault: {!r}'.format(default) # This hack is necessary for python3.5 self._creation_counter = Option._creation_counter diff --git a/bonobo/constants.py b/bonobo/constants.py index b1a199c..fde06de 100644 --- a/bonobo/constants.py +++ b/bonobo/constants.py @@ -1,6 +1,28 @@ -class Token: - """Factory for signal oriented queue messages or other token types.""" +""" +.. data:: BEGIN + **BEGIN** token marks the entrypoint of graphs, and all extractors will be connected to this node. + + Without this, it would be impossible for an execution to actually start anything, as it's the marker that tells + |bonobo| which node to actually call when the execution starts. + +.. data:: NOT_MODIFIED + + **NOT_MODIFIED** is a special value you can return or yield from a transformation to tell bonobo to reuse + the input data as output. + + As a convention, all loaders should return this, so loaders can be chained. + +.. data:: EMPTY + + Shortcut for "empty tuple". It's often much more clear to write (especially in a test) `write(EMPTY)` than + `write(())`, although strictly equivalent. + + +""" + + +class Token: def __init__(self, name): self.__name__ = name @@ -8,16 +30,15 @@ class Token: return '<{}>'.format(self.__name__) -BEGIN = Token('Begin') -END = Token('End') - - class Flag(Token): must_be_first = False must_be_last = False allows_data = True +BEGIN = Token('Begin') +END = Token('End') + INHERIT = Flag('Inherit') NOT_MODIFIED = Flag('NotModified') NOT_MODIFIED.must_be_first = True diff --git a/bonobo/contrib/django/__init__.py b/bonobo/contrib/django/__init__.py index d8bd00a..d159eea 100644 --- a/bonobo/contrib/django/__init__.py +++ b/bonobo/contrib/django/__init__.py @@ -1,3 +1,11 @@ +""" +This module contains all tools for Bonobo and Django to interract nicely. + +* :class:`ETLCommand` +* :func:`create_or_update` + +""" + from .utils import create_or_update from .commands import ETLCommand diff --git a/bonobo/contrib/django/commands.py b/bonobo/contrib/django/commands.py index 8729b12..c0d744c 100644 --- a/bonobo/contrib/django/commands.py +++ b/bonobo/contrib/django/commands.py @@ -26,6 +26,12 @@ class ETLCommand(BaseCommand): def create_parser(self, prog_name, subcommand): return bonobo.get_argument_parser(super().create_parser(prog_name, subcommand)) + def add_arguments(self, parser): + """ + Entry point for subclassed commands to add custom arguments. + """ + pass + def get_graph(self, *args, **options): def not_implemented(): raise NotImplementedError('You must implement {}.get_graph() method.'.format(self)) diff --git a/bonobo/execution/__init__.py b/bonobo/execution/__init__.py index 43ffbf3..88ceba1 100644 --- a/bonobo/execution/__init__.py +++ b/bonobo/execution/__init__.py @@ -1,3 +1,10 @@ +""" +Execution logic, surrounding contexts for nodes and graphs and events. + +This module is considered **internal**. + +""" + import logging logger = logging.getLogger(__name__) diff --git a/bonobo/execution/contexts/__init__.py b/bonobo/execution/contexts/__init__.py index 4c462c5..41e811e 100644 --- a/bonobo/execution/contexts/__init__.py +++ b/bonobo/execution/contexts/__init__.py @@ -1,3 +1,10 @@ +""" +Execution Contexts are objects that wraps the stateless data-structures (graphs and nodes) during a job execution to +keep an eye on their context/state (from the simplest things like i/o statistics to lifecycle and custom userland +state). + +""" + from bonobo.execution.contexts.graph import GraphExecutionContext from bonobo.execution.contexts.node import NodeExecutionContext from bonobo.execution.contexts.plugin import PluginExecutionContext diff --git a/bonobo/execution/events.py b/bonobo/execution/events.py index 3bf3986..3269bfc 100644 --- a/bonobo/execution/events.py +++ b/bonobo/execution/events.py @@ -1,3 +1,30 @@ +""" +.. data:: START + + Event dispatched before execution starts. + +.. data:: STARTED + + Event dispatched after execution starts. + +.. data:: TICK + + Event dispatched while execution runs, on a regular basis (on each "tick"). + +.. data:: STOP + + Event dispatched before execution stops. + +.. data:: STOPPED + + Event dispatched after execution stops. + +.. data:: KILL + + Event dispatched when execution is killed. + +""" + from whistle import Event START = 'execution.start' diff --git a/bonobo/execution/strategies/__init__.py b/bonobo/execution/strategies/__init__.py index 1c5d50a..7eacbcf 100644 --- a/bonobo/execution/strategies/__init__.py +++ b/bonobo/execution/strategies/__init__.py @@ -1,3 +1,11 @@ +""" +Execution strategies define how an actual job execution will happen. Default and recommended strategy is "threadpool", +for now, which leverage a :obj:`concurrent.futures.ThreadPoolExecutor` to run each node in a separate thread. + +In the future, the two strategies that would really benefit bonobo are subprocess and dask/dask.distributed. Please be +at home if you want to give it a shot. + +""" from bonobo.execution.strategies.executor import ProcessPoolExecutorStrategy, ThreadPoolExecutorStrategy from bonobo.execution.strategies.naive import NaiveStrategy diff --git a/bonobo/util/__init__.py b/bonobo/util/__init__.py index b03c0a9..894d053 100644 --- a/bonobo/util/__init__.py +++ b/bonobo/util/__init__.py @@ -1,3 +1,8 @@ +""" +The Util API, located under the :mod:`bonobo.util` namespace, contains helpers functions and decorators to work with +and inspect transformations, graphs, and nodes. + +""" from bonobo.util.collections import cast, ensure_tuple, sortedlist, tuplize from bonobo.util.compat import deprecated, deprecated_alias from bonobo.util.inspect import ( diff --git a/docs/_static/custom.css b/docs/_static/custom.css index 3de53da..9f49310 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -35,13 +35,23 @@ div.note { border: 0; } -div.admonition { - padding: 20px; -} - .last { margin-bottom: 0 !important; } -pre { - padding: 6px 20px; + +div.admonition { + padding: 16px; } + +pre { + padding: 16px; + border: 1px solid #ddd; + background-color: #fafafa; +} + +.section > dl { + border: 1px solid #ddd; + background-color: #fafafa; + margin: 16px 0; + padding: 16px; +} \ No newline at end of file diff --git a/docs/_templates/base.html b/docs/_templates/base.html index 27ca438..9ebdb07 100644 --- a/docs/_templates/base.html +++ b/docs/_templates/base.html @@ -12,7 +12,7 @@ {{ relbar() }}