Merge pull request #212 from hartym/develop

Refactoring towards __main__ usage and more pythonic way to hook into job files.
This commit is contained in:
Romain Dorgueil
2017-11-05 19:42:50 +01:00
committed by GitHub
110 changed files with 2462 additions and 1581 deletions

View File

@ -4,6 +4,7 @@ python:
- 3.5-dev
- 3.6
- 3.6-dev
- 3.7-dev
- nightly
install:
- make install-dev

View File

@ -1 +1,2 @@
include *.txt
include *.py-tpl

View File

@ -1,4 +1,4 @@
# Generated by Medikit 0.4a5 on 2017-10-28.
# Generated by Medikit 0.4.1 on 2017-11-04.
# All changes will be overriden.
PACKAGE ?= bonobo

View File

@ -29,36 +29,43 @@ python.setup(
'bonobo = bonobo.commands:entrypoint',
],
'bonobo.commands': [
'convert = bonobo.commands.convert:register',
'init = bonobo.commands.init:register',
'inspect = bonobo.commands.inspect:register',
'run = bonobo.commands.run:register',
'version = bonobo.commands.version:register',
'download = bonobo.commands.download:register',
'convert = bonobo.commands.convert:ConvertCommand',
'init = bonobo.commands.init:InitCommand',
'inspect = bonobo.commands.inspect:InspectCommand',
'run = bonobo.commands.run:RunCommand',
'version = bonobo.commands.version:VersionCommand',
'download = bonobo.commands.download:DownloadCommand',
],
}
)
python.add_requirements(
'colorama >=0.3,<1.0',
'fs >=2.0,<3.0',
'fs >=2.0,<2.1',
'jinja2 >=2.9,<2.10',
'mondrian >=0.4,<0.5',
'packaging >=16,<17',
'psutil >=5.2,<6.0',
'psutil >=5.4,<6.0',
'requests >=2.0,<3.0',
'stevedore >=1.21,<2.0',
'python-dotenv >=0.7.1,<1.0',
'stevedore >=1.27,<1.28',
'whistle >=1.0,<1.1',
dev=[
'cookiecutter >=1.5,<1.6',
'pytest-sugar >=0.8,<0.9',
'pytest-timeout >=1,<2',
],
docker=[
'bonobo-docker',
'bonobo-docker >=0.5.0',
],
jupyter=[
'jupyter >=1.0,<1.1',
'ipywidgets >=6.0.0,<7',
]
],
sqlalchemy=[
'bonobo-sqlalchemy >=0.5.1',
],
)
# Following requirements are not enforced, because some dependencies enforce them so we don't want to break
# the packaging in case it changes in dep.
python.add_requirements('colorama >=0.3', )
# vim: ft=python:

View File

@ -9,6 +9,7 @@
import sys
assert (sys.version_info >= (3, 5)), 'Python 3.5+ is required to use Bonobo.'
from bonobo._api import *
from bonobo._api import __all__
from bonobo._version import __version__

View File

@ -1,27 +1,43 @@
import logging
from bonobo.execution.strategies import create_strategy
from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \
PickleReader, PickleWriter, PrettyPrinter, RateLimited, Tee, arg0_to_kwargs, count, identity, kwargs_to_arg0, noop
from bonobo.nodes import LdjsonReader, LdjsonWriter
from bonobo.strategies import create_strategy
from bonobo.structs import Bag, ErrorBag, Graph, Token
from bonobo.util import get_name
from bonobo.util.environ import parse_args, get_argument_parser
__all__ = []
def register_api(x, __all__=__all__):
"""Register a function as being part of Bonobo's API, then returns the original function."""
__all__.append(get_name(x))
return x
def register_graph_api(x, __all__=__all__):
"""
Register a function as being part of Bonobo's API, after checking that its signature contains the right parameters
to work correctly, then returns the original function.
"""
from inspect import signature
parameters = list(signature(x).parameters)
required_parameters = {'plugins', 'services', 'strategy'}
assert parameters[0] == 'graph', 'First parameter of a graph api function must be "graph".'
assert required_parameters.intersection(
parameters) == required_parameters, 'Graph api functions must define the following parameters: ' + ', '.join(
sorted(required_parameters))
return register_api(x, __all__=__all__)
def register_api_group(*args):
for attr in args:
register_api(attr)
@register_api
def run(graph, strategy=None, plugins=None, services=None):
@register_graph_api
def run(graph, *, plugins=None, services=None, strategy=None):
"""
Main entry point of bonobo. It takes a graph and creates all the necessary plumbery around to execute it.
@ -36,12 +52,11 @@ def run(graph, strategy=None, plugins=None, services=None):
You'll probably want to provide a services dictionary mapping service names to service instances.
:param Graph graph: The :class:`Graph` to execute.
:param str strategy: The :class:`bonobo.strategies.base.Strategy` to use.
:param str strategy: The :class:`bonobo.execution.strategies.base.Strategy` to use.
:param list plugins: The list of plugins to enhance execution.
:param dict services: The implementations of services this graph will use.
:return bonobo.execution.graph.GraphExecutionContext:
"""
strategy = create_strategy(strategy)
plugins = plugins or []
@ -50,7 +65,10 @@ def run(graph, strategy=None, plugins=None, services=None):
if not settings.QUIET.get(): # pragma: no cover
if _is_interactive_console():
from bonobo.ext.console import ConsoleOutputPlugin
import mondrian
mondrian.setup(excepthook=True)
from bonobo.plugins.console import ConsoleOutputPlugin
if ConsoleOutputPlugin not in plugins:
plugins.append(ConsoleOutputPlugin)
@ -58,6 +76,7 @@ def run(graph, strategy=None, plugins=None, services=None):
try:
from bonobo.ext.jupyter import JupyterOutputPlugin
except ImportError:
import logging
logging.warning(
'Failed to load jupyter widget. Easiest way is to install the optional "jupyter" '
'dependencies with «pip install bonobo[jupyter]», but you can also install a specific '
@ -67,13 +86,34 @@ def run(graph, strategy=None, plugins=None, services=None):
if JupyterOutputPlugin not in plugins:
plugins.append(JupyterOutputPlugin)
import logging
logging.getLogger().setLevel(settings.LOGGING_LEVEL.get())
strategy = create_strategy(strategy)
return strategy.execute(graph, plugins=plugins, services=services)
# bonobo.structs
def _inspect_as_graph(graph):
return graph._repr_dot_()
_inspect_formats = {'graph': _inspect_as_graph}
@register_graph_api
def inspect(graph, *, plugins=None, services=None, strategy=None, format):
if not format in _inspect_formats:
raise NotImplementedError(
'Output format {} not implemented. Choices are: {}.'.format(
format, ', '.join(sorted(_inspect_formats.keys()))
)
)
print(_inspect_formats[format](graph))
# data structures
register_api_group(Bag, ErrorBag, Graph, Token)
# bonobo.strategies
# execution strategies
register_api(create_strategy)
@ -102,7 +142,7 @@ def open_fs(fs_url=None, *args, **kwargs):
return _open_fs(expanduser(str(fs_url)), *args, **kwargs)
# bonobo.nodes
# standard transformations
register_api_group(
CsvReader,
CsvWriter,
@ -149,3 +189,6 @@ def get_examples_path(*pathsegments):
@register_api
def open_examples_fs(*pathsegments):
return open_fs(get_examples_path(*pathsegments))
register_api_group(get_argument_parser, parse_args)

View File

@ -1 +1 @@
__version__ = '0.5.1'
__version__ = '0.6.dev0'

View File

@ -1,11 +1,23 @@
import argparse
import logging
from bonobo import logging, settings
logger = logging.get_logger()
import mondrian
from bonobo import settings
from bonobo.commands.base import BaseCommand, BaseGraphCommand
def entrypoint(args=None):
"""
Main callable for "bonobo" entrypoint.
Will load commands from "bonobo.commands" entrypoints, using stevedore.
"""
mondrian.setup(excepthook=True)
logger = logging.getLogger()
logger.setLevel(settings.LOGGING_LEVEL.get())
parser = argparse.ArgumentParser()
parser.add_argument('--debug', '-D', action='store_true')
@ -17,7 +29,15 @@ def entrypoint(args=None):
def register_extension(ext, commands=commands):
try:
parser = subparsers.add_parser(ext.name)
commands[ext.name] = ext.plugin(parser)
if isinstance(ext.plugin, type) and issubclass(ext.plugin, BaseCommand):
# current way, class based.
cmd = ext.plugin()
cmd.add_arguments(parser)
cmd.__name__ = ext.name
commands[ext.name] = cmd.handle
else:
# old school, function based.
commands[ext.name] = ext.plugin(parser)
except Exception:
logger.exception('Error while loading command {}.'.format(ext.name))
@ -25,11 +45,17 @@ def entrypoint(args=None):
mgr = ExtensionManager(namespace='bonobo.commands')
mgr.map(register_extension)
args = parser.parse_args(args).__dict__
if args.pop('debug', False):
parsed_args = parser.parse_args(args).__dict__
if parsed_args.pop('debug', False):
settings.DEBUG.set(True)
settings.LOGGING_LEVEL.set(logging.DEBUG)
logging.set_level(settings.LOGGING_LEVEL.get())
logger.setLevel(settings.LOGGING_LEVEL.get())
logger.debug('Command: ' + args['command'] + ' Arguments: ' + repr(args))
commands[args.pop('command')](**args)
logger.debug('Command: ' + parsed_args['command'] + ' Arguments: ' + repr(parsed_args))
# Get command handler, execute, rince.
command = commands[parsed_args.pop('command')]
command(**parsed_args)
return 0

129
bonobo/commands/base.py Normal file
View File

@ -0,0 +1,129 @@
import argparse
import logging
import runpy
import sys
from contextlib import contextmanager
import bonobo.util.environ
from bonobo.util import get_name
from bonobo.util.environ import get_argument_parser, parse_args
class BaseCommand:
"""
Base class for CLI commands.
"""
@property
def logger(self):
try:
return self._logger
except AttributeError:
self._logger = logging.getLogger(get_name(self))
return self._logger
def add_arguments(self, parser):
"""
Entry point for subclassed commands to add custom arguments.
"""
pass
def handle(self, *args, **options):
"""
The actual logic of the command. Subclasses must implement this method.
"""
raise NotImplementedError('Subclasses of BaseCommand must provide a handle() method')
class BaseGraphCommand(BaseCommand):
"""
Base class for CLI commands that depends on a graph definition, either from a file or from a module.
"""
required = True
handler = None
def add_arguments(self, parser):
# target arguments (cannot provide both).
source_group = parser.add_mutually_exclusive_group(required=self.required)
source_group.add_argument('file', nargs='?', type=str)
source_group.add_argument('-m', dest='mod', type=str)
# add arguments to enforce system environment.
parser = get_argument_parser(parser)
return parser
def parse_options(self, **options):
return options
def handle(self, file, mod, **options):
options = self.parse_options(**options)
with self.read(file, mod, **options) as (graph, graph_execution_options, options):
return self.do_handle(graph, **graph_execution_options, **options)
def do_handle(self, graph, **options):
if not self.handler:
raise RuntimeError('{} has no handler defined.'.format(get_name(self)))
return self.handler(graph, **options)
@contextmanager
def read(self, file, mod, **options):
_graph, _graph_execution_options = None, None
def _record(graph, **graph_execution_options):
nonlocal _graph, _graph_execution_options
_graph, _graph_execution_options = graph, graph_execution_options
with _override_runner(_record), parse_args(options) as options:
_argv = sys.argv
try:
if file:
sys.argv = [file]
self._run_path(file)
elif mod:
sys.argv = [mod]
self._run_module(mod)
else:
raise RuntimeError('No target provided.')
finally:
sys.argv = _argv
if _graph is None:
raise RuntimeError('Could not find graph.')
yield _graph, _graph_execution_options, options
def _run_path(self, file):
return runpy.run_path(file, run_name='__main__')
def _run_module(self, mod):
return runpy.run_module(mod, run_name='__main__')
@contextmanager
def _override_runner(runner):
"""
Context manager that monkey patches `bonobo.run` function with our current command logic.
:param runner: the callable that will handle the `run()` logic.
"""
import bonobo
_get_argument_parser = bonobo.util.environ.get_argument_parser
_run = bonobo.run
try:
# Original get_argument_parser would create or update an argument parser with environment options, but here we
# already had them parsed so let's patch with something that creates an empty one instead.
def get_argument_parser(parser=None):
return parser or argparse.ArgumentParser()
bonobo.util.environ.get_argument_parser = get_argument_parser
bonobo.run = runner
yield runner
finally:
# Restore our saved values.
bonobo.util.environ.get_argument_parser = _get_argument_parser
bonobo.run = _run

View File

@ -1,83 +1,84 @@
import bonobo
from bonobo.commands import BaseCommand
from bonobo.registry import READER, WRITER, default_registry
from bonobo.util.resolvers import _resolve_transformations, _resolve_options
def execute(
input_filename,
output_filename,
reader=None,
reader_option=None,
writer=None,
writer_option=None,
option=None,
transformation=None,
):
reader_factory = default_registry.get_reader_factory_for(input_filename, format=reader)
reader_options = _resolve_options((option or []) + (reader_option or []))
class ConvertCommand(BaseCommand):
def add_arguments(self, parser):
parser.add_argument('input_filename', help='Input filename.')
parser.add_argument('output_filename', help='Output filename.')
parser.add_argument(
'--' + READER,
'-r',
help='Choose the reader factory if it cannot be detected from extension, or if detection is wrong.'
)
parser.add_argument(
'--' + WRITER,
'-w',
help=
'Choose the writer factory if it cannot be detected from extension, or if detection is wrong (use - for console pretty print).'
)
parser.add_argument(
'--transformation',
'-t',
dest='transformation',
action='append',
help='Add a transformation between input and output (can be used multiple times, order is preserved).',
)
parser.add_argument(
'--option',
'-O',
dest='option',
action='append',
help='Add a named option to both reader and writer factories (i.e. foo="bar").',
)
parser.add_argument(
'--' + READER + '-option',
'-' + READER[0].upper(),
dest=READER + '_option',
action='append',
help='Add a named option to the reader factory.',
)
parser.add_argument(
'--' + WRITER + '-option',
'-' + WRITER[0].upper(),
dest=WRITER + '_option',
action='append',
help='Add a named option to the writer factory.',
)
if output_filename == '-':
writer_factory = bonobo.PrettyPrinter
else:
writer_factory = default_registry.get_writer_factory_for(output_filename, format=writer)
writer_options = _resolve_options((option or []) + (writer_option or []))
def handle(
self,
input_filename,
output_filename,
reader=None,
reader_option=None,
writer=None,
writer_option=None,
option=None,
transformation=None
):
reader_factory = default_registry.get_reader_factory_for(input_filename, format=reader)
reader_options = _resolve_options((option or []) + (reader_option or []))
transformations = _resolve_transformations(transformation)
if output_filename == '-':
writer_factory = bonobo.PrettyPrinter
else:
writer_factory = default_registry.get_writer_factory_for(output_filename, format=writer)
writer_options = _resolve_options((option or []) + (writer_option or []))
graph = bonobo.Graph()
graph.add_chain(
reader_factory(input_filename, **reader_options),
*transformations,
writer_factory(output_filename, **writer_options),
)
transformations = _resolve_transformations(transformation)
return bonobo.run(
graph, services={
'fs': bonobo.open_fs(),
}
)
graph = bonobo.Graph()
graph.add_chain(
reader_factory(input_filename, **reader_options),
*transformations,
writer_factory(output_filename, **writer_options),
)
def register(parser):
parser.add_argument('input-filename', help='Input filename.')
parser.add_argument('output-filename', help='Output filename.')
parser.add_argument(
'--' + READER,
'-r',
help='Choose the reader factory if it cannot be detected from extension, or if detection is wrong.'
)
parser.add_argument(
'--' + WRITER,
'-w',
help=
'Choose the writer factory if it cannot be detected from extension, or if detection is wrong (use - for console pretty print).'
)
parser.add_argument(
'--transformation',
'-t',
dest='transformation',
action='append',
help='Add a transformation between input and output (can be used multiple times, order is preserved).',
)
parser.add_argument(
'--option',
'-O',
dest='option',
action='append',
help='Add a named option to both reader and writer factories (i.e. foo="bar").',
)
parser.add_argument(
'--' + READER + '-option',
'-' + READER[0].upper(),
dest=READER + '_option',
action='append',
help='Add a named option to the reader factory.',
)
parser.add_argument(
'--' + WRITER + '-option',
'-' + WRITER[0].upper(),
dest=WRITER + '_option',
action='append',
help='Add a named option to the writer factory.',
)
return execute
return bonobo.run(
graph, services={
'fs': bonobo.open_fs(),
}
)

View File

@ -4,36 +4,30 @@ import re
import requests
import bonobo
from bonobo.commands import BaseCommand
EXAMPLES_BASE_URL = 'https://raw.githubusercontent.com/python-bonobo/bonobo/master/bonobo/examples/'
"""The URL to our git repository, in raw mode."""
def _write_response(response, fout):
"""Read the response and write it to the output stream in chunks."""
for chunk in response.iter_content(io.DEFAULT_BUFFER_SIZE):
fout.write(chunk)
class DownloadCommand(BaseCommand):
def handle(self, *, path, **options):
if not path.startswith('examples'):
raise ValueError('Download command currently supports examples only')
examples_path = re.sub('^examples/', '', path)
output_path = bonobo.get_examples_path(examples_path)
with _open_url(EXAMPLES_BASE_URL + examples_path) as response, open(output_path, 'wb') as fout:
for chunk in response.iter_content(io.DEFAULT_BUFFER_SIZE):
fout.write(chunk)
self.logger.info('Download saved to {}'.format(output_path))
def add_arguments(self, parser):
parser.add_argument('path', help='The relative path of the thing to download.')
def _open_url(url):
"""Open a HTTP connection to the URL and return a file-like object."""
response = requests.get(url, stream=True)
if response.status_code != 200:
raise IOError('unable to download {}, HTTP {}'.format(url, response.status_code))
raise IOError('Unable to download {}, HTTP {}'.format(url, response.status_code))
return response
def execute(path, *args, **kwargs):
path = path.lstrip('/')
if not path.startswith('examples'):
raise ValueError('download command currently supports examples only')
examples_path = re.sub('^examples/', '', path)
output_path = bonobo.get_examples_path(examples_path)
with _open_url(EXAMPLES_BASE_URL + examples_path) as response, open(output_path, 'wb') as fout:
_write_response(response, fout)
print('saved to {}'.format(output_path))
def register(parser):
parser.add_argument('path', help='The relative path of the thing to download.')
return execute

View File

@ -1,28 +1,74 @@
import os
def execute(name, branch):
try:
from cookiecutter.main import cookiecutter
except ImportError as exc:
raise ImportError(
'You must install "cookiecutter" to use this command.\n\n $ pip install cookiecutter\n'
) from exc
from jinja2 import Environment, FileSystemLoader
overwrite_if_exists = False
project_path = os.path.join(os.getcwd(), name)
if os.path.isdir(project_path) and not os.listdir(project_path):
overwrite_if_exists = True
return cookiecutter(
'https://github.com/python-bonobo/cookiecutter-bonobo.git',
extra_context={'name': name},
no_input=True,
checkout=branch,
overwrite_if_exists=overwrite_if_exists
)
from bonobo.commands import BaseCommand
def register(parser):
parser.add_argument('name')
parser.add_argument('--branch', '-b', default='master')
return execute
class InitCommand(BaseCommand):
TEMPLATES = {'bare', 'default'}
TEMPLATES_PATH = os.path.join(os.path.dirname(__file__), 'templates')
def add_arguments(self, parser):
parser.add_argument('filename')
parser.add_argument('--force', '-f', default=False, action='store_true')
target_group = parser.add_mutually_exclusive_group(required=False)
target_group.add_argument('--template', '-t', choices=self.TEMPLATES, default='default')
target_group.add_argument('--package', '-p', action='store_true', default=False)
def create_file_from_template(self, *, template, filename):
template_name = template
name, ext = os.path.splitext(filename)
if ext != '.py':
raise ValueError('Filenames should end with ".py".')
loader = FileSystemLoader(self.TEMPLATES_PATH)
env = Environment(loader=loader)
template = env.get_template(template_name + '.py-tpl')
with open(filename, 'w+') as f:
f.write(template.render(name=name))
self.logger.info('Generated {} using template {!r}.'.format(filename, template_name))
def create_package(self, *, filename):
name, ext = os.path.splitext(filename)
if ext != '':
raise ValueError('Package names should not have an extension.')
try:
import medikit.commands
except ImportError as exc:
raise ImportError(
'To initialize a package, you need to install medikit (pip install --upgrade medikit).'
) from exc
package_name = os.path.basename(filename)
medikit.commands.handle_init(
os.path.join(os.getcwd(), filename, 'Projectfile'), name=package_name, requirements=['bonobo']
)
self.logger.info('Generated "{}" package with medikit.'.format(package_name))
self.create_file_from_template(template='default', filename=os.path.join(filename, package_name, '__main__.py'))
print('Your "{}" package has been created.'.format(package_name))
print()
print('Install it...')
print()
print(' pip install --editable {}'.format(filename))
print()
print('Then maybe run the example...')
print()
print(' python -m {}'.format(package_name))
print()
print('Enjoy!')
def handle(self, *, template, filename, package=False, force=False):
if os.path.exists(filename) and not force:
raise FileExistsError('Target filename already exists, use --force to override.')
if package:
self.create_package(filename=filename)
else:
self.create_file_from_template(template=template, filename=filename)

View File

@ -1,40 +1,15 @@
import json
from bonobo.commands.run import read, register_generic_run_arguments
from bonobo.constants import BEGIN
from bonobo.util.objects import get_name
OUTPUT_GRAPHVIZ = 'graphviz'
import bonobo
from bonobo.commands import BaseGraphCommand
def _ident(graph, i):
escaped_index = str(i)
escaped_name = json.dumps(get_name(graph[i]))
return '{{{} [label={}]}}'.format(escaped_index, escaped_name)
class InspectCommand(BaseGraphCommand):
handler = staticmethod(bonobo.inspect)
def add_arguments(self, parser):
super(InspectCommand, self).add_arguments(parser)
parser.add_argument('--graph', '-g', dest='format', action='store_const', const='graph')
def execute(*, output, **kwargs):
graph, plugins, services = read(**kwargs)
if output == OUTPUT_GRAPHVIZ:
print('digraph {')
print(' rankdir = LR;')
print(' "BEGIN" [shape="point"];')
for i in graph.outputs_of(BEGIN):
print(' "BEGIN" -> ' + _ident(graph, i) + ';')
for ix in graph.topologically_sorted_indexes:
for iy in graph.outputs_of(ix):
print(' {} -> {};'.format(_ident(graph, ix), _ident(graph, iy)))
print('}')
else:
raise NotImplementedError('Output type not implemented.')
def register(parser):
register_generic_run_arguments(parser)
parser.add_argument('--graph', '-g', dest='output', action='store_const', const=OUTPUT_GRAPHVIZ)
parser.set_defaults(output=OUTPUT_GRAPHVIZ)
return execute
def parse_options(self, **options):
if not options.get('format'):
raise RuntimeError('You must provide a format (try --graph).')
return options

View File

@ -1,38 +1,57 @@
import codecs
import os
import sys
from importlib.util import spec_from_file_location, module_from_spec
from pathlib import Path
from dotenv import load_dotenv
import bonobo
from bonobo.constants import DEFAULT_SERVICES_ATTR, DEFAULT_SERVICES_FILENAME
DEFAULT_GRAPH_FILENAMES = (
'__main__.py',
'main.py',
)
DEFAULT_GRAPH_ATTR = 'get_graph'
from bonobo.commands import BaseGraphCommand
def get_default_services(filename, services=None):
dirname = os.path.dirname(filename)
services_filename = os.path.join(dirname, DEFAULT_SERVICES_FILENAME)
if os.path.exists(services_filename):
with open(services_filename) as file:
code = compile(file.read(), services_filename, 'exec')
context = {
'__name__': '__bonobo__',
'__file__': services_filename,
}
exec(code, context)
class RunCommand(BaseGraphCommand):
install = False
handler = staticmethod(bonobo.run)
return {
**context[DEFAULT_SERVICES_ATTR](),
**(services or {}),
}
return services or {}
def add_arguments(self, parser):
super(RunCommand, self).add_arguments(parser)
verbosity_group = parser.add_mutually_exclusive_group()
verbosity_group.add_argument('--quiet', '-q', action='store_true')
verbosity_group.add_argument('--verbose', '-v', action='store_true')
parser.add_argument('--install', '-I', action='store_true')
def parse_options(self, *, quiet=False, verbose=False, install=False, **options):
from bonobo import settings
settings.QUIET.set_if_true(quiet)
settings.DEBUG.set_if_true(verbose)
self.install = install
return options
def _run_path(self, file):
# add install logic
if self.install:
if os.path.isdir(file):
requirements = os.path.join(file, 'requirements.txt')
else:
requirements = os.path.join(os.path.dirname(file), 'requirements.txt')
_install_requirements(requirements)
return super()._run_path(file)
def _run_module(self, mod):
# install not implemented for a module, not sure it even make sense.
if self.install:
raise RuntimeError('--install behaviour when running a module is not defined.')
return super()._run_module(mod)
def register_generic_run_arguments(parser, required=True):
"""
Only there for backward compatibility with third party extensions.
TODO: This should be deprecated (using the @deprecated decorator) in 0.7, and removed in 0.8 or 0.9.
"""
dummy_command = BaseGraphCommand()
dummy_command.required = required
dummy_command.add_arguments(parser)
return parser
def _install_requirements(requirements):
@ -47,138 +66,3 @@ def _install_requirements(requirements):
pip.utils.pkg_resources = importlib.reload(pip.utils.pkg_resources)
import site
importlib.reload(site)
def read(
filename,
module,
install=False,
quiet=False,
verbose=False,
default_env_file=None,
default_env=None,
env_file=None,
env=None
):
import runpy
from bonobo import Graph, settings
if quiet:
settings.QUIET.set(True)
if verbose:
settings.DEBUG.set(True)
if filename:
if os.path.isdir(filename):
if install:
requirements = os.path.join(filename, 'requirements.txt')
_install_requirements(requirements)
pathname = filename
for filename in DEFAULT_GRAPH_FILENAMES:
filename = os.path.join(pathname, filename)
if os.path.exists(filename):
break
if not os.path.exists(filename):
raise IOError('Could not find entrypoint (candidates: {}).'.format(', '.join(DEFAULT_GRAPH_FILENAMES)))
elif install:
requirements = os.path.join(os.path.dirname(filename), 'requirements.txt')
_install_requirements(requirements)
spec = spec_from_file_location('__bonobo__', filename)
main = sys.modules['__bonobo__'] = module_from_spec(spec)
main.__path__ = [os.path.dirname(filename)]
main.__package__ = '__bonobo__'
spec.loader.exec_module(main)
context = main.__dict__
elif module:
context = runpy.run_module(module, run_name='__bonobo__')
filename = context['__file__']
else:
raise RuntimeError('UNEXPECTED: argparse should not allow this.')
env_dir = Path(filename).parent or Path(module).parent
if default_env_file:
for f in default_env_file:
env_file_path = str(env_dir.joinpath(f))
load_dotenv(env_file_path)
if default_env:
for e in default_env:
set_env_var(e)
if env_file:
for f in env_file:
env_file_path = str(env_dir.joinpath(f))
load_dotenv(env_file_path, override=True)
if env:
for e in env:
set_env_var(e, override=True)
graphs = dict((k, v) for k, v in context.items() if isinstance(v, Graph))
assert len(graphs) == 1, (
'Having zero or more than one graph definition in one file is unsupported for now, '
'but it is something that will be implemented in the future.\n\nExpected: 1, got: {}.'
).format(len(graphs))
graph = list(graphs.values())[0]
plugins = []
services = get_default_services(
filename, context.get(DEFAULT_SERVICES_ATTR)() if DEFAULT_SERVICES_ATTR in context else None
)
return graph, plugins, services
def set_env_var(e, override=False):
__escape_decoder = codecs.getdecoder('unicode_escape')
ename, evalue = e.split('=', 1)
def decode_escaped(escaped):
return __escape_decoder(escaped)[0]
if len(evalue) > 0:
if evalue[0] == evalue[len(evalue) - 1] in ['"', "'"]:
evalue = decode_escaped(evalue[1:-1])
if override:
os.environ[ename] = evalue
else:
os.environ.setdefault(ename, evalue)
def execute(
filename,
module,
install=False,
quiet=False,
verbose=False,
default_env_file=None,
default_env=None,
env_file=None,
env=None
):
graph, plugins, services = read(
filename, module, install, quiet, verbose, default_env_file, default_env, env_file, env
)
return bonobo.run(graph, plugins=plugins, services=services)
def register_generic_run_arguments(parser, required=True):
source_group = parser.add_mutually_exclusive_group(required=required)
source_group.add_argument('filename', nargs='?', type=str)
source_group.add_argument('--module', '-m', type=str)
parser.add_argument('--default-env-file', action='append')
parser.add_argument('--default-env', action='append')
parser.add_argument('--env-file', action='append')
parser.add_argument('--env', '-e', action='append')
return parser
def register(parser):
parser = register_generic_run_arguments(parser)
verbosity_group = parser.add_mutually_exclusive_group()
verbosity_group.add_argument('--quiet', '-q', action='store_true')
verbosity_group.add_argument('--verbose', '-v', action='store_true')
parser.add_argument('--install', '-I', action='store_true')
return execute

View File

@ -0,0 +1,15 @@
import bonobo
def get_graph(**options):
graph = bonobo.Graph()
return graph
def get_services(**options):
return {}
if __name__ == '__main__':
with bonobo.parse_args() as options:
bonobo.run(get_graph(**options), services=get_services(**options))

View File

@ -0,0 +1,55 @@
import bonobo
def extract():
"""Placeholder, change, rename, remove... """
yield 'hello'
yield 'world'
def transform(*args):
"""Placeholder, change, rename, remove... """
yield tuple(
map(str.title, args)
)
def load(*args):
"""Placeholder, change, rename, remove... """
print(*args)
def get_graph(**options):
"""
This function builds the graph that needs to be executed.
:return: bonobo.Graph
"""
graph = bonobo.Graph()
graph.add_chain(extract, transform, load)
return graph
def get_services(**options):
"""
This function builds the services dictionary, which is a simple dict of names-to-implementation used by bonobo
for runtime injection.
It will be used on top of the defaults provided by bonobo (fs, http, ...). You can override those defaults, or just
let the framework define them. You can also define your own services and naming is up to you.
:return: dict
"""
return {}
# The __main__ block actually execute the graph.
if __name__ == '__main__':
parser = bonobo.get_argument_parser()
with bonobo.parse_args(parser) as options:
bonobo.run(
get_graph(**options),
services=get_services(**options)
)

View File

@ -1,4 +1,30 @@
def format_version(mod, *, name=None, quiet=False):
from bonobo.commands import BaseCommand
class VersionCommand(BaseCommand):
def handle(self, *, all=False, quiet=False):
import bonobo
from bonobo.util.pkgs import bonobo_packages
print(_format_version(bonobo, quiet=quiet))
if all:
for name in sorted(bonobo_packages):
if name != 'bonobo':
try:
mod = __import__(name.replace('-', '_'))
try:
print(_format_version(mod, name=name, quiet=quiet))
except Exception as exc:
print('{} ({})'.format(name, exc))
except ImportError as exc:
print('{} is not importable ({}).'.format(name, exc))
def add_arguments(self, parser):
parser.add_argument('--all', '-a', action='store_true')
parser.add_argument('--quiet', '-q', action='count')
def _format_version(mod, *, name=None, quiet=False):
from bonobo.util.pkgs import bonobo_packages
args = {
'name': name or mod.__name__,
@ -14,27 +40,3 @@ def format_version(mod, *, name=None, quiet=False):
return '{version}'.format(**args)
raise RuntimeError('Hard to be so quiet...')
def execute(all=False, quiet=False):
import bonobo
from bonobo.util.pkgs import bonobo_packages
print(format_version(bonobo, quiet=quiet))
if all:
for name in sorted(bonobo_packages):
if name != 'bonobo':
try:
mod = __import__(name.replace('-', '_'))
try:
print(format_version(mod, name=name, quiet=quiet))
except Exception as exc:
print('{} ({})'.format(name, exc))
except ImportError as exc:
print('{} is not importable ({}).'.format(name, exc))
def register(parser):
parser.add_argument('--all', '-a', action='store_true')
parser.add_argument('--quiet', '-q', action='count')
return execute

View File

@ -1,3 +0,0 @@
ON_START = 'bonobo.on_start'
ON_TICK = 'bonobo.on_tick'
ON_STOP = 'bonobo.on_stop'

View File

@ -1,23 +0,0 @@
def require(package, requirement=None):
requirement = requirement or package
try:
return __import__(package)
except ImportError:
from colorama import Fore, Style
print(
Fore.YELLOW,
'This example requires the {!r} package. Install it using:'.
format(requirement),
Style.RESET_ALL,
sep=''
)
print()
print(
Fore.YELLOW,
' $ pip install {!s}'.format(requirement),
Style.RESET_ALL,
sep=''
)
print()
raise

27
bonobo/examples/clock.py Normal file
View File

@ -0,0 +1,27 @@
import bonobo
import datetime
import time
def extract():
"""Placeholder, change, rename, remove... """
for x in range(60):
if x:
time.sleep(1)
yield datetime.datetime.now()
def get_graph():
graph = bonobo.Graph()
graph.add_chain(
extract,
print,
)
return graph
if __name__ == '__main__':
parser = bonobo.get_argument_parser()
with bonobo.parse_args(parser):
bonobo.run(get_graph())

View File

@ -14,7 +14,7 @@ Extracts a list of parisian bars where you can buy a coffee for a reasonable pri
"""
import bonobo
from bonobo.commands.run import get_default_services
from bonobo.commands import get_default_services
from bonobo.ext.opendatasoft import OpenDataSoftAPI
filename = 'coffeeshops.txt'

View File

@ -19,7 +19,7 @@ import json
from colorama import Fore, Style
import bonobo
from bonobo.commands.run import get_default_services
from bonobo.commands import get_default_services
from bonobo.ext.opendatasoft import OpenDataSoftAPI
try:

View File

@ -0,0 +1,27 @@
"""
This transformation extracts the environment and prints it, sorted alphabetically, one item per line.
Used in the bonobo tests around environment management.
"""
import os
import bonobo
def extract_environ():
"""Yield all the system environment."""
yield from sorted(os.environ.items())
def get_graph():
graph = bonobo.Graph()
graph.add_chain(extract_environ, print)
return graph
if __name__ == '__main__':
parser = bonobo.get_argument_parser()
with bonobo.parse_args(parser):
bonobo.run(get_graph())

View File

@ -1,3 +0,0 @@
MY_SECRET=321
TEST_USER_PASSWORD=sweetpassword
PATH=marzo

View File

@ -1,2 +0,0 @@
TEST_USER_PASSWORD=not_sweet_password
PATH='abril'

View File

@ -1,23 +0,0 @@
import os
import bonobo
def extract():
my_secret = os.getenv('MY_SECRET')
test_user_password = os.getenv('TEST_USER_PASSWORD')
path = os.getenv('PATH')
yield my_secret
yield test_user_password
yield path
def load(s: str):
print(s)
graph = bonobo.Graph(extract, load)
if __name__ == '__main__':
bonobo.run(graph)

View File

@ -1,25 +0,0 @@
import os
import bonobo
def extract():
env_test_user = os.getenv('ENV_TEST_USER', 'user')
env_test_number = os.getenv('ENV_TEST_NUMBER', 'number')
env_test_string = os.getenv('ENV_TEST_STRING', 'string')
env_user = os.getenv('USER')
yield env_test_user
yield env_test_number
yield env_test_string
yield env_user
def load(s: str):
print(s)
graph = bonobo.Graph(extract, load)
if __name__ == '__main__':
bonobo.run(graph)

View File

@ -1,5 +1,5 @@
import bonobo
from bonobo.commands.run import get_default_services
from bonobo.commands import get_default_services
graph = bonobo.Graph(
bonobo.CsvReader('datasets/coffeeshops.txt', headers=('item', )),

View File

@ -1,6 +1,6 @@
import bonobo
from bonobo import Bag
from bonobo.commands.run import get_default_services
from bonobo.commands import get_default_services
def get_fields(**row):

View File

@ -28,7 +28,7 @@ messages categorized as spam, and (3) prints the output.
'''
import bonobo
from bonobo.commands.run import get_default_services
from bonobo.commands import get_default_services
from fs.tarfs import TarFS

View File

@ -1,5 +1,5 @@
import bonobo
from bonobo.commands.run import get_default_services
from bonobo.commands import get_default_services
def skip_comments(line):

View File

@ -8,9 +8,7 @@ def split_one(line):
graph = bonobo.Graph(
bonobo.FileReader('coffeeshops.txt'),
split_one,
bonobo.JsonWriter(
'coffeeshops.json', fs='fs.output'
),
bonobo.JsonWriter('coffeeshops.json', fs='fs.output'),
)

View File

@ -1,3 +1,7 @@
from bonobo.util.python import require
import bonobo
from bonobo.examples.types.strings import get_graph
graph = require('strings').graph
if __name__ == '__main__':
parser = bonobo.get_argument_parser()
with bonobo.parse_args(parser):
bonobo.run(get_graph())

View File

@ -14,7 +14,7 @@ Example on how to use symple python strings to communicate between transformatio
"""
from random import randint
from bonobo import Graph
import bonobo
def extract():
@ -31,9 +31,11 @@ def load(s: str):
print(s)
graph = Graph(extract, transform, load)
def get_graph():
return bonobo.Graph(extract, transform, load)
if __name__ == '__main__':
from bonobo import run
run(graph)
parser = bonobo.get_argument_parser()
with bonobo.parse_args(parser):
bonobo.run(get_graph())

View File

@ -1 +1,5 @@
from bonobo.execution.graph import GraphExecutionContext, NodeExecutionContext, PluginExecutionContext
import logging
logger = logging.getLogger(__name__)
__all__ = []

View File

@ -1,11 +1,13 @@
import traceback
import logging
import sys
from contextlib import contextmanager
from time import sleep
from logging import WARNING, ERROR
import mondrian
from bonobo.config import create_container
from bonobo.config.processors import ContextCurrifier
from bonobo.execution import logger
from bonobo.util import isconfigurabletype
from bonobo.util.errors import print_error
from bonobo.util.objects import Wrapper, get_name
@ -14,7 +16,7 @@ def recoverable(error_handler):
try:
yield
except Exception as exc: # pylint: disable=broad-except
error_handler(exc, traceback.format_exc())
error_handler(*sys.exc_info(), level=ERROR)
@contextmanager
@ -22,13 +24,12 @@ def unrecoverable(error_handler):
try:
yield
except Exception as exc: # pylint: disable=broad-except
error_handler(exc, traceback.format_exc())
error_handler(*sys.exc_info(), level=ERROR)
raise # raise unrecoverableerror from x ?
class LoopingExecutionContext(Wrapper):
alive = True
PERIOD = 0.25
PERIOD = 0.5
@property
def started(self):
@ -38,6 +39,25 @@ class LoopingExecutionContext(Wrapper):
def stopped(self):
return self._stopped
@property
def defunct(self):
return self._defunct
@property
def alive(self):
return self._started and not self._stopped
@property
def status(self):
"""One character status for this node. """
if self._defunct:
return '!'
if not self.started:
return ' '
if not self.stopped:
return '+'
return '-'
def __init__(self, wrapped, parent, services=None):
super().__init__(wrapped)
@ -52,7 +72,7 @@ class LoopingExecutionContext(Wrapper):
else:
self.services = None
self._started, self._stopped = False, False
self._started, self._stopped, self._defunct = False, False, False
self._stack = None
def __enter__(self):
@ -68,21 +88,22 @@ class LoopingExecutionContext(Wrapper):
self._started = True
self._stack = ContextCurrifier(self.wrapped, *self._get_initial_context())
if isconfigurabletype(self.wrapped):
# Not normal to have a partially configured object here, so let's warn the user instead of having get into
# the hard trouble of understanding that by himself.
raise TypeError(
'The Configurable should be fully instanciated by now, unfortunately I got a PartiallyConfigured object...'
)
self._stack.setup(self)
try:
self._stack = ContextCurrifier(self.wrapped, *self._get_initial_context())
if isconfigurabletype(self.wrapped):
# Not normal to have a partially configured object here, so let's warn the user instead of having get into
# the hard trouble of understanding that by himself.
raise TypeError(
'The Configurable should be fully instanciated by now, unfortunately I got a PartiallyConfigured object...'
)
self._stack.setup(self)
except Exception:
return self.fatal(sys.exc_info())
def loop(self):
"""Generic loop. A bit boring. """
while self.alive:
self.step()
sleep(self.PERIOD)
def step(self):
"""Left as an exercise for the children."""
@ -101,12 +122,17 @@ class LoopingExecutionContext(Wrapper):
finally:
self._stopped = True
def handle_error(self, exc, trace):
return print_error(exc, trace, context=self.wrapped)
def _get_initial_context(self):
if self.parent:
return self.parent.services.args_for(self.wrapped)
if self.services:
return self.services.args_for(self.wrapped)
return ()
def handle_error(self, exctype, exc, tb, *, level=logging.ERROR):
logging.getLogger(__name__).log(level, repr(self), exc_info=(exctype, exc, tb))
def fatal(self, exc_info):
self._defunct = True
self.input.shutdown()
self.handle_error(*exc_info, level=logging.CRITICAL)

View File

@ -1,16 +1,20 @@
import time
from functools import partial
from time import sleep
from bonobo.config import create_container
from bonobo.constants import BEGIN, END
from bonobo.execution.node import NodeExecutionContext
from bonobo.execution.plugin import PluginExecutionContext
from bonobo.execution import events
from bonobo.execution.contexts.node import NodeExecutionContext
from bonobo.execution.contexts.plugin import PluginExecutionContext
from whistle import EventDispatcher
class GraphExecutionContext:
NodeExecutionContextType = NodeExecutionContext
PluginExecutionContextType = PluginExecutionContext
TICK_PERIOD = 0.25
@property
def started(self):
return any(node.started for node in self.nodes)
@ -23,7 +27,8 @@ class GraphExecutionContext:
def alive(self):
return any(node.alive for node in self.nodes)
def __init__(self, graph, plugins=None, services=None):
def __init__(self, graph, plugins=None, services=None, dispatcher=None):
self.dispatcher = dispatcher or EventDispatcher()
self.graph = graph
self.nodes = [self.create_node_execution_context_for(node) for node in self.graph]
self.plugins = [self.create_plugin_execution_context_for(plugin) for plugin in plugins or ()]
@ -53,6 +58,8 @@ class GraphExecutionContext:
return self.NodeExecutionContextType(node, parent=self)
def create_plugin_execution_context_for(self, plugin):
if isinstance(plugin, type):
plugin = plugin()
return self.PluginExecutionContextType(plugin, parent=self)
def write(self, *messages):
@ -63,23 +70,46 @@ class GraphExecutionContext:
for message in messages:
self[i].write(message)
def dispatch(self, name):
self.dispatcher.dispatch(name, events.ExecutionEvent(self))
def start(self, starter=None):
self.register_plugins()
self.dispatch(events.START)
self.tick(pause=False)
for node in self.nodes:
if starter is None:
node.start()
else:
starter(node)
self.dispatch(events.STARTED)
def start_plugins(self, starter=None):
for plugin in self.plugins:
if starter is None:
plugin.start()
else:
starter(plugin)
def tick(self, pause=True):
self.dispatch(events.TICK)
if pause:
sleep(self.TICK_PERIOD)
def kill(self):
self.dispatch(events.KILL)
for node_context in self.nodes:
node_context.kill()
self.tick()
def stop(self, stopper=None):
for node in self.nodes:
self.dispatch(events.STOP)
for node_context in self.nodes:
if stopper is None:
node.stop()
node_context.stop()
else:
stopper(node)
stopper(node_context)
self.tick(pause=False)
self.dispatch(events.STOPPED)
self.unregister_plugins()
def register_plugins(self):
for plugin_context in self.plugins:
plugin_context.register()
def unregister_plugins(self):
for plugin_context in self.plugins:
plugin_context.unregister()

View File

@ -1,17 +1,19 @@
import traceback
import logging
import sys
from queue import Empty
from time import sleep
from types import GeneratorType
from bonobo.constants import NOT_MODIFIED, BEGIN, END
from bonobo.errors import InactiveReadableError, UnrecoverableError
from bonobo.execution.base import LoopingExecutionContext
from bonobo.execution.contexts.base import LoopingExecutionContext
from bonobo.structs.bags import Bag
from bonobo.structs.inputs import Input
from bonobo.structs.tokens import Token
from bonobo.util import get_name, iserrorbag, isloopbackbag, isbag
from bonobo.util import get_name, iserrorbag, isloopbackbag, isbag, istuple
from bonobo.util.compat import deprecated_alias
from bonobo.util.statistics import WithStatistics
from mondrian import term
class NodeExecutionContext(WithStatistics, LoopingExecutionContext):
@ -20,13 +22,8 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext):
"""
@property
def alive(self):
"""todo check if this is right, and where it is used"""
return self._started and not self._stopped
@property
def alive_str(self):
return '+' if self.alive else '-'
def killed(self):
return self._killed
def __init__(self, wrapped, parent=None, services=None, _input=None, _outputs=None):
LoopingExecutionContext.__init__(self, wrapped, parent=parent, services=services)
@ -34,13 +31,23 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext):
self.input = _input or Input()
self.outputs = _outputs or []
self._killed = False
def __str__(self):
return self.alive_str + ' ' + self.__name__ + self.get_statistics_as_string(prefix=' ')
return self.__name__ + self.get_statistics_as_string(prefix=' ')
def __repr__(self):
name, type_name = get_name(self), get_name(type(self))
return '<{}({}{}){}>'.format(type_name, self.alive_str, name, self.get_statistics_as_string(prefix=' '))
return '<{}({}{}){}>'.format(type_name, self.status, name, self.get_statistics_as_string(prefix=' '))
def get_flags_as_string(self):
if self._defunct:
return term.red('[defunct]')
if self.killed:
return term.lightred('[killed]')
if self.stopped:
return term.lightblack('[done]')
return ''
def write(self, *messages):
"""
@ -89,23 +96,27 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext):
self.increment('in')
return row
def should_loop(self):
return not any((self.defunct, self.killed))
def loop(self):
while True:
while self.should_loop():
try:
self.step()
except KeyboardInterrupt:
raise
except InactiveReadableError:
break
except Empty:
sleep(self.PERIOD)
continue
except UnrecoverableError as exc:
self.handle_error(exc, traceback.format_exc())
except UnrecoverableError:
self.handle_error(*sys.exc_info())
self.input.shutdown()
break
except Exception as exc: # pylint: disable=broad-except
self.handle_error(exc, traceback.format_exc())
except Exception: # pylint: disable=broad-except
self.handle_error(*sys.exc_info())
except BaseException:
self.handle_error(*sys.exc_info())
break
def step(self):
# Pull data from the first available input channel.
@ -117,6 +128,15 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext):
# todo add timer
self.handle_results(input_bag, input_bag.apply(self._stack))
def kill(self):
if not self.started:
raise RuntimeError('Cannot kill a node context that has not started yet.')
if self.stopped:
raise RuntimeError('Cannot kill a node context that has already stopped.')
self._killed = True
def handle_results(self, input_bag, results):
# self._exec_time += timer.duration
# Put data onto output channels
@ -124,6 +144,9 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext):
if isinstance(results, GeneratorType):
while True:
try:
# if kill flag was step, stop iterating.
if self._killed:
break
result = next(results)
except StopIteration:
break
@ -137,12 +160,47 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext):
pass
def _resolve(input_bag, output):
# NotModified means to send the input unmodified to output.
if output is NOT_MODIFIED:
return input_bag
def isflag(param):
return isinstance(param, Token) and param in (NOT_MODIFIED, )
def split_tokens(output):
"""
Split an output into token tuple, real output tuple.
:param output:
:return: tuple, tuple
"""
if isinstance(output, Token):
# just a flag
return (output, ), ()
if not istuple(output):
# no flag
return (), (output, )
i = 0
while isflag(output[i]):
i += 1
return output[:i], output[i:]
def _resolve(input_bag, output):
"""
This function is key to how bonobo works (and internal, too). It transforms a pair of input/output into what is the
real output.
:param input_bag: Bag
:param output: mixed
:return: Bag
"""
if isbag(output):
return output
return Bag(output)
tokens, output = split_tokens(output)
if len(tokens) == 1 and tokens[0] is NOT_MODIFIED:
return input_bag
return output if isbag(output) else Bag(output)

View File

@ -0,0 +1,13 @@
from bonobo.execution.contexts.base import LoopingExecutionContext
class PluginExecutionContext(LoopingExecutionContext):
@property
def dispatcher(self):
return self.parent.dispatcher
def register(self):
return self.wrapped.register(self.dispatcher)
def unregister(self):
return self.wrapped.unregister(self.dispatcher)

View File

@ -0,0 +1,13 @@
from whistle import Event
START = 'execution.start'
STARTED = 'execution.started'
TICK = 'execution.tick'
STOP = 'execution.stop'
STOPPED = 'execution.stopped'
KILL = 'execution.kill'
class ExecutionEvent(Event):
def __init__(self, context):
self.context = context

View File

@ -1,26 +0,0 @@
from bonobo.execution.base import LoopingExecutionContext, recoverable
class PluginExecutionContext(LoopingExecutionContext):
PERIOD = 0.5
def __init__(self, wrapped, parent):
# Instanciate plugin. This is not yet considered stable, as at some point we may need a way to configure
# plugins, for example if it depends on an external service.
super().__init__(wrapped(self), parent)
def start(self):
super().start()
with recoverable(self.handle_error):
self.wrapped.on_start()
def shutdown(self):
if self.started:
with recoverable(self.handle_error):
self.wrapped.on_stop()
self.alive = False
def step(self):
with recoverable(self.handle_error):
self.wrapped.on_tick()

View File

@ -1,5 +1,5 @@
from bonobo.strategies.executor import ProcessPoolExecutorStrategy, ThreadPoolExecutorStrategy
from bonobo.strategies.naive import NaiveStrategy
from bonobo.execution.strategies.executor import ProcessPoolExecutorStrategy, ThreadPoolExecutorStrategy
from bonobo.execution.strategies.naive import NaiveStrategy
__all__ = [
'create_strategy',
@ -21,8 +21,8 @@ def create_strategy(name=None):
:param name:
:return: Strategy
"""
from bonobo.strategies.base import Strategy
import logging
from bonobo.execution.strategies.base import Strategy
if isinstance(name, Strategy):
return name
@ -39,4 +39,4 @@ def create_strategy(name=None):
'Invalid strategy {}. Available choices: {}.'.format(repr(name), ', '.join(sorted(STRATEGIES.keys())))
) from exc
return factory()
return factory()

View File

@ -1,4 +1,4 @@
from bonobo.execution.graph import GraphExecutionContext
from bonobo.execution.contexts.graph import GraphExecutionContext
class Strategy:

View File

@ -0,0 +1,77 @@
import functools
import logging
import sys
from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor
from bonobo.structs.bags import Bag
from bonobo.constants import BEGIN, END
from bonobo.execution.strategies.base import Strategy
from bonobo.util import get_name
class ExecutorStrategy(Strategy):
"""
Strategy based on a concurrent.futures.Executor subclass (or similar interface).
"""
executor_factory = Executor
def create_executor(self):
return self.executor_factory()
def execute(self, graph, **kwargs):
context = self.create_graph_execution_context(graph, **kwargs)
context.write(BEGIN, Bag(), END)
futures = []
with self.create_executor() as executor:
try:
context.start(self.get_starter(executor, futures))
except:
logging.getLogger(__name__
).warning('KeyboardInterrupt received. Trying to terminate the nodes gracefully.')
while context.alive:
try:
context.tick()
except KeyboardInterrupt:
logging.getLogger(__name__).warning(
'KeyboardInterrupt received. Trying to terminate the nodes gracefully.'
)
context.kill()
break
context.stop()
return context
def get_starter(self, executor, futures):
def starter(node):
@functools.wraps(node)
def _runner():
try:
with node:
node.loop()
except:
logging.getLogger(__name__).critical(
'Uncaught exception in node execution for {}.'.format(node), exc_info=True
)
node.shutdown()
node.stop()
try:
futures.append(executor.submit(_runner))
except:
logging.getLogger(__name__).critical('futures.append', exc_info=sys.exc_info())
return starter
class ThreadPoolExecutorStrategy(ExecutorStrategy):
executor_factory = ThreadPoolExecutor
class ProcessPoolExecutorStrategy(ExecutorStrategy):
executor_factory = ProcessPoolExecutor

View File

@ -1,5 +1,5 @@
from bonobo.constants import BEGIN, END
from bonobo.strategies.base import Strategy
from bonobo.execution.strategies.base import Strategy
from bonobo.structs.bags import Bag

View File

@ -5,8 +5,7 @@ from django.core.management.base import BaseCommand, OutputWrapper
import bonobo
import bonobo.util
from bonobo.commands.run import get_default_services
from bonobo.ext.console import ConsoleOutputPlugin
from bonobo.plugins.console import ConsoleOutputPlugin
from bonobo.util.term import CLEAR_EOL

View File

@ -1,86 +0,0 @@
import logging
import sys
import textwrap
from logging import CRITICAL, DEBUG, ERROR, INFO, WARNING
from colorama import Fore, Style
from bonobo import settings
from bonobo.util.term import CLEAR_EOL
iswindows = (sys.platform == 'win32')
def get_format():
yield '{b}[%(fg)s%(levelname)s{b}][{w}'
yield '{b}][{w}'.join(('%(spent)04d', '%(name)s'))
yield '{b}]'
yield ' %(fg)s%(message)s{r}'
if not iswindows:
yield CLEAR_EOL
colors = {
'b': '' if iswindows else Fore.BLACK,
'w': '' if iswindows else Fore.LIGHTBLACK_EX,
'r': '' if iswindows else Style.RESET_ALL,
}
format = (''.join(get_format())).format(**colors)
class Filter(logging.Filter):
def filter(self, record):
record.spent = record.relativeCreated // 1000
if iswindows:
record.fg = ''
elif record.levelname == 'DEBG':
record.fg = Fore.LIGHTBLACK_EX
elif record.levelname == 'INFO':
record.fg = Fore.LIGHTWHITE_EX
elif record.levelname == 'WARN':
record.fg = Fore.LIGHTYELLOW_EX
elif record.levelname == 'ERR ':
record.fg = Fore.LIGHTRED_EX
elif record.levelname == 'CRIT':
record.fg = Fore.RED
else:
record.fg = Fore.LIGHTWHITE_EX
return True
class Formatter(logging.Formatter):
def formatException(self, ei):
tb = super().formatException(ei)
if iswindows:
return textwrap.indent(tb, ' | ')
else:
return textwrap.indent(tb, Fore.BLACK + ' | ' + Fore.WHITE)
def setup(level):
logging.addLevelName(DEBUG, 'DEBG')
logging.addLevelName(INFO, 'INFO')
logging.addLevelName(WARNING, 'WARN')
logging.addLevelName(ERROR, 'ERR ')
logging.addLevelName(CRITICAL, 'CRIT')
handler = logging.StreamHandler(sys.stderr)
handler.setFormatter(Formatter(format))
handler.addFilter(Filter())
root = logging.getLogger()
root.addHandler(handler)
root.setLevel(level)
def set_level(level):
logging.getLogger().setLevel(level)
def get_logger(name='bonobo'):
return logging.getLogger(name)
# Compatibility with python logging
getLogger = get_logger
# Setup formating and level.
setup(level=settings.LOGGING_LEVEL.get())

View File

@ -1,4 +1,7 @@
from fs.errors import ResourceNotFound
from bonobo.config import Configurable, ContextProcessor, Option, Service
from bonobo.errors import UnrecoverableError
class FileHandler(Configurable):

View File

@ -53,12 +53,12 @@ class LdjsonReader(FileReader):
def read(self, fs, file):
for line in file:
print(line)
yield self.loader(line)
class LdjsonWriter(FileWriter):
"""Write a stream of JSON objects, one object per line."""
def write(self, fs, file, lineno, **row):
lineno += 1 # class-level variable
file.write(json.dumps(row) + '\n')

View File

@ -10,5 +10,14 @@ class Plugin:
"""
def __init__(self, context):
self.context = context
def register(self, dispatcher):
"""
:param dispatcher: whistle.EventDispatcher
"""
pass
def unregister(self, dispatcher):
"""
:param dispatcher: whistle.EventDispatcher
"""
pass

View File

@ -2,14 +2,145 @@ import io
import sys
from contextlib import redirect_stdout, redirect_stderr
from colorama import Style, Fore, init
init(wrap=True)
from colorama import Style, Fore, init as initialize_colorama_output_wrappers
from bonobo import settings
from bonobo.execution import events
from bonobo.plugins import Plugin
from bonobo.util.term import CLEAR_EOL, MOVE_CURSOR_UP
initialize_colorama_output_wrappers(wrap=True)
class ConsoleOutputPlugin(Plugin):
"""
Outputs status information to the connected stdout. Can be a TTY, with or without support for colors/cursor
movements, or a non tty (pipe, file, ...). The features are adapted to terminal capabilities.
On Windows, we'll play a bit differently because we don't know how to manipulate cursor position. We'll only
display stats at the very end, and there won't be this "buffering" logic we need to display both stats and stdout.
.. attribute:: prefix
String prefix of output lines.
"""
# Standard outputs descriptors backup here, also used to override if needed.
_stdout = sys.stdout
_stderr = sys.stderr
# When the plugin is instanciated, we'll set the real value of this.
isatty = False
# Whether we're on windows, or a real operating system.
iswindows = (sys.platform == 'win32')
def __init__(self):
self.isatty = self._stdout.isatty()
def register(self, dispatcher):
dispatcher.add_listener(events.START, self.setup)
dispatcher.add_listener(events.TICK, self.tick)
dispatcher.add_listener(events.STOPPED, self.teardown)
def unregister(self, dispatcher):
dispatcher.remove_listener(events.STOPPED, self.teardown)
dispatcher.remove_listener(events.TICK, self.tick)
dispatcher.remove_listener(events.START, self.setup)
def setup(self, event):
# TODO this wont work if one instance is registered with more than one context.
# Two options:
# - move state to context
# - forbid registering more than once
self.prefix = ''
self.counter = 0
self._append_cache = ''
self.stdout = IOBuffer()
self.redirect_stdout = redirect_stdout(self._stdout if self.iswindows else self.stdout)
self.redirect_stdout.__enter__()
self.stderr = IOBuffer()
self.redirect_stderr = redirect_stderr(self._stderr if self.iswindows else self.stderr)
self.redirect_stderr.__enter__()
def tick(self, event):
if self.isatty and not self.iswindows:
self._write(event.context, rewind=True)
else:
pass # not a tty, or windows, so we'll ignore stats output
def teardown(self, event):
self._write(event.context, rewind=False)
self.redirect_stderr.__exit__(None, None, None)
self.redirect_stdout.__exit__(None, None, None)
def write(self, context, prefix='', rewind=True, append=None):
t_cnt = len(context)
if not self.iswindows:
for line in self.stdout.switch().split('\n')[:-1]:
print(line + CLEAR_EOL, file=self._stdout)
for line in self.stderr.switch().split('\n')[:-1]:
print(line + CLEAR_EOL, file=self._stderr)
alive_color = Style.BRIGHT
dead_color = Style.BRIGHT + Fore.BLACK
for i in context.graph.topologically_sorted_indexes:
node = context[i]
name_suffix = '({})'.format(i) if settings.DEBUG.get() else ''
liveliness_color = alive_color if node.alive else dead_color
liveliness_prefix = ' {}{}{} '.format(liveliness_color, node.status, Style.RESET_ALL)
_line = ''.join(
(
liveliness_prefix,
node.name,
name_suffix,
' ',
node.get_statistics_as_string(),
' ',
node.get_flags_as_string(),
Style.RESET_ALL,
' ',
)
)
print(prefix + _line + CLEAR_EOL, file=self._stderr)
if append:
# todo handle multiline
print(
''.join(
(
' `-> ', ' '.join('{}{}{}: {}'.format(Style.BRIGHT, k, Style.RESET_ALL, v) for k, v in append),
CLEAR_EOL
)
),
file=self._stderr
)
t_cnt += 1
if rewind:
print(CLEAR_EOL, file=self._stderr)
print(MOVE_CURSOR_UP(t_cnt + 2), file=self._stderr)
def _write(self, context, rewind):
if settings.PROFILE.get():
if self.counter % 10 and self._append_cache:
append = self._append_cache
else:
self._append_cache = append = (
('Memory', '{0:.2f} Mb'.format(memory_usage())),
# ('Total time', '{0} s'.format(execution_time(harness))),
)
else:
append = ()
self.write(context, prefix=self.prefix, append=append, rewind=rewind)
self.counter += 1
class IOBuffer():
"""
@ -36,136 +167,6 @@ class IOBuffer():
self.current.flush()
class ConsoleOutputPlugin(Plugin):
"""
Outputs status information to the connected stdout. Can be a TTY, with or without support for colors/cursor
movements, or a non tty (pipe, file, ...). The features are adapted to terminal capabilities.
On Windows, we'll play a bit differently because we don't know how to manipulate cursor position. We'll only
display stats at the very end, and there won't be this "buffering" logic we need to display both stats and stdout.
.. attribute:: prefix
String prefix of output lines.
"""
# Standard outputs descriptors backup here, also used to override if needed.
_stdout = sys.stdout
_stderr = sys.stderr
# When the plugin is started, we'll set the real value of this.
isatty = False
# Whether we're on windows, or a real operating system.
iswindows = (sys.platform == 'win32')
def on_start(self):
self.prefix = ''
self.counter = 0
self._append_cache = ''
self.isatty = self._stdout.isatty()
self.stdout = IOBuffer()
self.redirect_stdout = redirect_stdout(self._stdout if self.iswindows else self.stdout)
self.redirect_stdout.__enter__()
self.stderr = IOBuffer()
self.redirect_stderr = redirect_stderr(self._stderr if self.iswindows else self.stderr)
self.redirect_stderr.__enter__()
def on_tick(self):
if self.isatty and not self.iswindows:
self._write(self.context.parent, rewind=True)
else:
pass # not a tty, or windows, so we'll ignore stats output
def on_stop(self):
self._write(self.context.parent, rewind=False)
self.redirect_stderr.__exit__(None, None, None)
self.redirect_stdout.__exit__(None, None, None)
def write(self, context, prefix='', rewind=True, append=None):
t_cnt = len(context)
if not self.iswindows:
for line in self.stdout.switch().split('\n')[:-1]:
print(line + CLEAR_EOL, file=self._stdout)
for line in self.stderr.switch().split('\n')[:-1]:
print(line + CLEAR_EOL, file=self._stderr)
alive_color = Style.BRIGHT
dead_color = Style.BRIGHT + Fore.BLACK
for i in context.graph.topologically_sorted_indexes:
node = context[i]
name_suffix = '({})'.format(i) if settings.DEBUG.get() else ''
if node.alive:
_line = ''.join(
(
' ',
alive_color,
'+',
Style.RESET_ALL,
' ',
node.name,
name_suffix,
' ',
node.get_statistics_as_string(),
Style.RESET_ALL,
' ',
)
)
else:
_line = ''.join(
(
' ',
dead_color,
'-',
' ',
node.name,
name_suffix,
' ',
node.get_statistics_as_string(),
Style.RESET_ALL,
' ',
)
)
print(prefix + _line + CLEAR_EOL, file=self._stderr)
if append:
# todo handle multiline
print(
''.join(
(
' `-> ', ' '.join('{}{}{}: {}'.format(Style.BRIGHT, k, Style.RESET_ALL, v) for k, v in append),
CLEAR_EOL
)
),
file=self._stderr
)
t_cnt += 1
if rewind:
print(CLEAR_EOL, file=self._stderr)
print(MOVE_CURSOR_UP(t_cnt + 2), file=self._stderr)
def _write(self, graph_context, rewind):
if settings.PROFILE.get():
if self.counter % 10 and self._append_cache:
append = self._append_cache
else:
self._append_cache = append = (
('Memory', '{0:.2f} Mb'.format(memory_usage())),
# ('Total time', '{0} s'.format(execution_time(harness))),
)
else:
append = ()
self.write(graph_context, prefix=self.prefix, append=append, rewind=rewind)
self.counter += 1
def memory_usage():
import os, psutil
process = psutil.Process(os.getpid())

View File

@ -1,4 +1,5 @@
import logging
import os
from bonobo.errors import ValidationError
@ -51,6 +52,12 @@ class Setting:
raise ValidationError('Invalid value {!r} for setting {}.'.format(value, self.name))
self.value = value
def set_if_true(self, value):
"""Sets the value to true if it is actually true. May sound strange but the main usage is enforcing some
settings from command line."""
if value:
self.set(True)
def get(self):
try:
return self.value

View File

@ -1,84 +0,0 @@
import time
import traceback
from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor
from bonobo.constants import BEGIN, END
from bonobo.strategies.base import Strategy
from bonobo.structs.bags import Bag
from bonobo.util.errors import print_error
class ExecutorStrategy(Strategy):
"""
Strategy based on a concurrent.futures.Executor subclass (or similar interface).
"""
executor_factory = Executor
def create_executor(self):
return self.executor_factory()
def execute(self, graph, **kwargs):
context = self.create_graph_execution_context(graph, **kwargs)
context.write(BEGIN, Bag(), END)
executor = self.create_executor()
futures = []
context.start_plugins(self.get_plugin_starter(executor, futures))
context.start(self.get_starter(executor, futures))
while context.alive:
time.sleep(0.1)
for plugin_context in context.plugins:
plugin_context.shutdown()
context.stop()
executor.shutdown()
return context
def get_starter(self, executor, futures):
def starter(node):
def _runner():
try:
node.start()
except Exception as exc:
print_error(exc, traceback.format_exc(), context=node, method='start')
node.input.on_end()
else:
node.loop()
try:
node.stop()
except Exception as exc:
print_error(exc, traceback.format_exc(), context=node, method='stop')
futures.append(executor.submit(_runner))
return starter
def get_plugin_starter(self, executor, futures):
def plugin_starter(plugin):
def _runner():
with plugin:
try:
plugin.loop()
except Exception as exc:
print_error(exc, traceback.format_exc(), context=plugin)
futures.append(executor.submit(_runner))
return plugin_starter
class ThreadPoolExecutorStrategy(ExecutorStrategy):
executor_factory = ThreadPoolExecutor
class ProcessPoolExecutorStrategy(ExecutorStrategy):
executor_factory = ProcessPoolExecutor

View File

@ -1 +0,0 @@

View File

@ -1,6 +1,8 @@
import json
from copy import copy
from bonobo.constants import BEGIN
from bonobo.util import get_name
class Graph:
@ -110,6 +112,24 @@ class Graph:
self._topologcally_sorted_indexes_cache = tuple(filter(lambda i: type(i) is int, reversed(order)))
return self._topologcally_sorted_indexes_cache
def _repr_dot_(self):
src = [
'digraph {',
' rankdir = LR;',
' "BEGIN" [shape="point"];',
]
for i in self.outputs_of(BEGIN):
src.append(' "BEGIN" -> ' + _get_graphviz_node_id(self, i) + ';')
for ix in self.topologically_sorted_indexes:
for iy in self.outputs_of(ix):
src.append(' {} -> {};'.format(_get_graphviz_node_id(self, ix), _get_graphviz_node_id(self, iy)))
src.append('}')
return '\n'.join(src)
def _resolve_index(self, mixed):
""" Find the index based on various strategies for a node, probably an input or output of chain. Supported inputs are indexes, node values or names.
"""
@ -126,3 +146,9 @@ class Graph:
return self.nodes.index(mixed)
raise ValueError('Cannot find node matching {!r}.'.format(mixed))
def _get_graphviz_node_id(graph, i):
escaped_index = str(i)
escaped_name = json.dumps(get_name(graph[i]))
return '{{{} [label={}]}}'.format(escaped_index, escaped_name)

View File

@ -15,7 +15,6 @@
# limitations under the License.
from abc import ABCMeta, abstractmethod
from queue import Queue
from bonobo.constants import BEGIN, END

View File

@ -1,4 +1,4 @@
from bonobo.util.collections import sortedlist, ensure_tuple
from bonobo.util.collections import ensure_tuple, sortedlist, tuplize
from bonobo.util.compat import deprecated, deprecated_alias
from bonobo.util.inspect import (
inspect_node,
@ -15,13 +15,13 @@ from bonobo.util.inspect import (
istype,
)
from bonobo.util.objects import (get_name, get_attribute_or_create, ValueHolder)
from bonobo.util.python import require
# Bonobo's util API
__all__ = [
'ValueHolder',
'deprecated',
'deprecated_alias',
'ensure_tuple',
'get_attribute_or_create',
'get_name',
'inspect_node',
@ -35,5 +35,6 @@ __all__ = [
'ismethod',
'isoption',
'istype',
'require',
'sortedlist',
'tuplize',
]

View File

@ -22,9 +22,9 @@ def ensure_tuple(tuple_or_mixed):
def tuplize(generator):
""" Takes a generator and make it a tuple-returning function. As a side
effect, it can also decorate any iterator-returning function to force
return value to be a tuple.
"""
Decorates a generator and make it a tuple-returning function. As a side effect, it can also decorate any
iterator-returning function to force return value to be a tuple.
>>> tuplized_lambda = tuplize(lambda: [1, 2, 3])
>>> tuplized_lambda()

164
bonobo/util/environ.py Normal file
View File

@ -0,0 +1,164 @@
import argparse
import codecs
import os
import re
import warnings
from contextlib import contextmanager
__escape_decoder = codecs.getdecoder('unicode_escape')
__posix_variable = re.compile('\$\{[^\}]*\}')
def parse_var(var):
name, value = var.split('=', 1)
def decode_escaped(escaped):
return __escape_decoder(escaped)[0]
if len(value) > 1:
c = value[0]
if c in ['"', "'"] and value[-1] == c:
value = decode_escaped(value[1:-1])
return name, value
def load_env_from_file(filename):
"""
Read an env file into a collection of (name, value) tuples.
"""
if not os.path.exists(filename):
raise FileNotFoundError('Environment file {} does not exist.'.format(filename))
with open(filename) as f:
for lineno, line in enumerate(f):
line = line.strip()
if not line or line.startswith('#'):
continue
if '=' not in line:
raise SyntaxError('Invalid environment file syntax in {} at line {}.'.format(filename, lineno + 1))
name, value = parse_var(line)
yield name, value
_parser = None
def get_argument_parser(parser=None):
"""
Creates an argument parser with arguments to override the system environment.
:api: bonobo.get_argument_parser
:param _parser:
:return:
"""
if parser is None:
import argparse
parser = argparse.ArgumentParser()
# Store globally to be able to warn the user about the fact he's probably wrong not to pass a parser to
# parse_args(), later.
global _parser
_parser = parser
_parser.add_argument('--default-env-file', '-E', action='append')
_parser.add_argument('--default-env', action='append')
_parser.add_argument('--env-file', action='append')
_parser.add_argument('--env', '-e', action='append')
return _parser
@contextmanager
def parse_args(mixed=None):
"""
Context manager to extract and apply environment related options from the provided argparser result.
A dictionnary with unknown options will be yielded, so the remaining options can be used by the caller.
:api: bonobo.patch_environ
:param mixed: ArgumentParser instance, Namespace, or dict.
:return:
"""
if mixed is None:
global _parser
if _parser is not None:
warnings.warn(
'You are calling bonobo.parse_args() without a parser argument, but it looks like you created a parser before. You probably want to pass your parser to this call, or if creating a new parser here is really what you want to do, please create a new one explicitely to silence this warning.'
)
# use the api from bonobo namespace, in case a command patched it.
import bonobo
mixed = bonobo.get_argument_parser()
if isinstance(mixed, argparse.ArgumentParser):
options = mixed.parse_args()
else:
options = mixed
if not isinstance(options, dict):
options = options.__dict__
# make a copy so we don't polute our parent variables.
options = dict(options)
# storage for values before patch.
_backup = {}
# Priority order: --env > --env-file > system > --default-env > --default-env-file
#
# * The code below is reading default-env before default-env-file as if the first sets something, default-env-file
# won't override it.
# * Then, env-file is read from before env, as the behaviour will be the oposite (env will override a var even if
# env-file sets something.)
try:
# Set default environment
for name, value in map(parse_var, options.pop('default_env', []) or []):
if not name in os.environ:
if not name in _backup:
_backup[name] = os.environ.get(name, None)
os.environ[name] = value
# Read and set default environment from file(s)
for filename in options.pop('default_env_file', []) or []:
for name, value in load_env_from_file(filename):
if not name in os.environ:
if not name in _backup:
_backup[name] = os.environ.get(name, None)
os.environ[name] = value
# Read and set environment from file(s)
for filename in options.pop('env_file', []) or []:
for name, value in load_env_from_file(filename):
if not name in _backup:
_backup[name] = os.environ.get(name, None)
os.environ[name] = value
# Set environment
for name, value in map(parse_var, options.pop('env', []) or []):
if not name in _backup:
_backup[name] = os.environ.get(name, None)
os.environ[name] = value
yield options
finally:
for name, value in _backup.items():
if value is None:
del os.environ[name]
else:
os.environ[name] = value
@contextmanager
def change_working_directory(path):
old_dir = os.getcwd()
os.chdir(str(path))
try:
yield
finally:
os.chdir(old_dir)

View File

@ -1,39 +0,0 @@
import sys
from textwrap import indent
def _get_error_message(exc):
if hasattr(exc, '__str__'):
message = str(exc)
return message[0].upper() + message[1:]
return '\n'.join(exc.args),
def print_error(exc, trace, context=None, method=None):
"""
Error handler. Whatever happens in a plugin or component, if it looks like an exception, taste like an exception
or somehow make me think it is an exception, I'll handle it.
:param exc: the culprit
:param trace: Hercule Poirot's logbook.
:return: to hell
"""
from colorama import Fore, Style
prefix = '{}{} | {}'.format(Fore.RED, Style.BRIGHT, Style.RESET_ALL)
print(
Style.BRIGHT,
Fore.RED,
type(exc).__name__,
' (in {}{})'.format(type(context).__name__, '.{}()'.format(method) if method else '') if context else '',
Style.RESET_ALL,
'\n',
indent(_get_error_message(exc), prefix + Style.BRIGHT),
Style.RESET_ALL,
sep='',
file=sys.stderr,
)
print(prefix, file=sys.stderr)
print(indent(trace, prefix, predicate=lambda line: True), file=sys.stderr)

View File

@ -1,31 +0,0 @@
import inspect
import os
import runpy
class _RequiredModule:
def __init__(self, dct):
self.__dict__ = dct
class _RequiredModulesRegistry(dict):
@property
def pathname(self):
return os.path.join(os.getcwd(), os.path.dirname(inspect.getfile(inspect.stack()[2][0])))
def require(self, name):
if name not in self:
bits = name.split('.')
filename = os.path.join(self.pathname, *bits[:-1], bits[-1] + '.py')
self[name] = _RequiredModule(runpy.run_path(filename, run_name=name))
return self[name]
class WorkingDirectoryModulesRegistry(_RequiredModulesRegistry):
@property
def pathname(self):
return os.getcwd()
registry = _RequiredModulesRegistry()
require = registry.require

View File

@ -4,10 +4,29 @@ This package is considered private, and should only be used within bonobo.
"""
import json
import os
import runpy
import bonobo
from bonobo.util.collections import tuplize
from bonobo.util.python import WorkingDirectoryModulesRegistry
class _RequiredModule:
def __init__(self, dct):
self.__dict__ = dct
class _ModulesRegistry(dict):
@property
def pathname(self):
return os.getcwd()
def require(self, name):
if name not in self:
bits = name.split('.')
filename = os.path.join(self.pathname, *bits[:-1], bits[-1] + '.py')
self[name] = _RequiredModule(runpy.run_path(filename, run_name=name))
return self[name]
def _parse_option(option):
@ -52,7 +71,8 @@ def _resolve_transformations(transformations):
:param transformations: tuple(str)
:return: tuple(object)
"""
registry = WorkingDirectoryModulesRegistry()
registry = _ModulesRegistry()
transformations = transformations or []
for t in transformations:
try:
mod, attr = t.split(':', 1)

View File

@ -13,6 +13,7 @@
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import time
class WithStatistics:
@ -29,3 +30,23 @@ class WithStatistics:
def increment(self, name):
self.statistics[name] += 1
class Timer:
"""
Context manager used to time execution of stuff.
"""
def __enter__(self):
self.__start = time.time()
def __exit__(self, type=None, value=None, traceback=None):
# Error handling here
self.__finish = time.time()
@property
def duration(self):
return self.__finish - self.__start
def __str__(self):
return str(int(self.duration * 1000) / 1000.0) + 's'

View File

@ -1,8 +1,17 @@
from contextlib import contextmanager
import functools
import io
import os
import runpy
import sys
from contextlib import contextmanager, redirect_stdout, redirect_stderr
from unittest.mock import patch
from bonobo import open_fs, Token
from bonobo.execution import GraphExecutionContext
from bonobo.execution.node import NodeExecutionContext
import pytest
from bonobo import open_fs, Token, __main__, get_examples_path
from bonobo.commands import entrypoint
from bonobo.execution.contexts.graph import GraphExecutionContext
from bonobo.execution.contexts.node import NodeExecutionContext
@contextmanager
@ -64,3 +73,68 @@ class BufferingGraphExecutionContext(BufferingContext, GraphExecutionContext):
def create_node_execution_context_for(self, node):
return self.NodeExecutionContextType(node, parent=self, buffer=self.buffer)
def runner(f):
@functools.wraps(f)
def wrapped_runner(*args, catch_errors=False):
with redirect_stdout(io.StringIO()) as stdout, redirect_stderr(io.StringIO()) as stderr:
try:
f(list(args))
except BaseException as exc:
if not catch_errors:
raise
elif isinstance(catch_errors, BaseException) and not isinstance(exc, catch_errors):
raise
return stdout.getvalue(), stderr.getvalue(), exc
return stdout.getvalue(), stderr.getvalue()
return wrapped_runner
@runner
def runner_entrypoint(args):
""" Run bonobo using the python command entrypoint directly (bonobo.commands.entrypoint). """
return entrypoint(args)
@runner
def runner_module(args):
""" Run bonobo using the bonobo.__main__ file, which is equivalent as doing "python -m bonobo ..."."""
with patch.object(sys, 'argv', ['bonobo', *args]):
return runpy.run_path(__main__.__file__, run_name='__main__')
all_runners = pytest.mark.parametrize('runner', [runner_entrypoint, runner_module])
all_environ_targets = pytest.mark.parametrize(
'target', [
(get_examples_path('environ.py'), ),
(
'-m',
'bonobo.examples.environ',
),
]
)
@all_runners
@all_environ_targets
class EnvironmentTestCase():
def run_quiet(self, runner, *args):
return runner('run', '--quiet', *args)
def run_environ(self, runner, *args, environ=None):
_environ = {'PATH': '/usr/bin'}
if environ:
_environ.update(environ)
with patch.dict('os.environ', _environ, clear=True):
out, err = self.run_quiet(runner, *args)
assert 'SECRET' not in os.environ
assert 'PASSWORD' not in os.environ
if 'PATH' in _environ:
assert 'PATH' in os.environ
assert os.environ['PATH'] == _environ['PATH']
assert err == ''
return dict(map(lambda line: line.split(' ', 1), filter(None, out.split('\n'))))

View File

@ -1,21 +0,0 @@
import time
class Timer:
"""
Context manager used to time execution of stuff.
"""
def __enter__(self):
self.__start = time.time()
def __exit__(self, type=None, value=None, traceback=None):
# Error handling here
self.__finish = time.time()
@property
def duration(self):
return self.__finish - self.__start
def __str__(self):
return str(int(self.duration * 1000) / 1000.0) + 's'

View File

@ -1,3 +1,19 @@
svg {
border: 2px solid green
}
}
div.related {
width: 940px;
margin: 30px auto 0 auto;
}
@media screen and (max-width: 875px) {
div.related {
visibility: hidden;
display: none;
}
}
.brand {
font-family: 'Ubuntu', 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', serif;
}

View File

@ -4,17 +4,8 @@
{%- block extrahead %}
{{ super() }}
<style>
div.related {
width: 940px;
margin: 30px auto 0 auto;
}
@media screen and (max-width: 875px) {
div.related {
visibility: hidden;
display: none;
}
}
</style>
<link href="https://fonts.googleapis.com/css?family=Ubuntu" rel="stylesheet">
{% endblock %}
{%- block footer %}

View File

@ -4,6 +4,9 @@ Changelog
Unreleased
::::::::::
* Cookiecutter usage is removed. Linked to the fact that bonobo now use either a single file (up to you to get python
imports working as you want) or a regular fully fledged python package, we do not need it anymore.
New features
------------

View File

@ -186,3 +186,12 @@ epub_exclude_files = ['search.html']
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {'python': ('https://docs.python.org/3', None)}
rst_epilog = """
.. |bonobo| replace:: **Bonobo**
.. |longversion| replace:: v.{version}
""".format(
version = version,
)

View File

@ -4,8 +4,6 @@ Jupyter Extension
There is a builtin plugin that integrates (somewhat minimallistically, for now) bonobo within jupyter notebooks, so
you can read the execution status of a graph within a nice (ok, not so nice) html/javascript widget.
See https://github.com/jupyter-widgets/widget-cookiecutter for the base template used.
Installation
::::::::::::

View File

@ -5,16 +5,18 @@ Installation
Create an ETL project
:::::::::::::::::::::
Creating a project and starting to write code should take less than a minute:
Let's create a job.
.. code-block:: shell-session
$ pip install --upgrade bonobo cookiecutter
$ bonobo init my-etl-project
$ bonobo run my-etl-project
$ pip install --upgrade bonobo
$ bonobo create my-etl.py
$ python my-etl.py
Once you bootstrapped a project, you can start editing the default example transformation by editing
`my-etl-project/main.py`. Now, you can head to :doc:`tutorial/index`.
This job only uses one python file, and you can run it using the python interpreter. For bigger jobs or jobs that
relates to multiple files, you should create a python package.
Now, you can head to :doc:`tutorial/index`.
Other installation options

View File

@ -16,16 +16,6 @@ Syntax: `bonobo convert [-r reader] input_filename [-w writer] output_filename`
to read from csv and write to csv too (or other format) but adding a geocoder filter that would add some fields.
Bonobo Init
:::::::::::
Create an empty project, ready to use bonobo.
Syntax: `bonobo init`
Requires `cookiecutter`.
Bonobo Inspect
::::::::::::::

View File

@ -1,54 +0,0 @@
Internal roadmap notes
======================
Things that should be thought about and/or implemented, but that I don't know where to store.
Graph and node level plugins
::::::::::::::::::::::::::::
* Enhancers or node-level plugins
* Graph level plugins
* Documentation
Command line interface and environment
::::::::::::::::::::::::::::::::::::::
* How do we manage environment ? .env ?
* How do we configure plugins ?
Services and Processors
:::::::::::::::::::::::
* ContextProcessors not clean (a bit better, but still not in love with the api)
Next...
:::::::
* Release process specialised for bonobo. With changelog production, etc.
* Document how to upgrade version, like, minor need change badges, etc.
* Windows console looks crappy.
* bonobo init --with sqlalchemy,docker; cookiecutter?
* logger, vebosity level
External libs that looks good
:::::::::::::::::::::::::::::
* dask.distributed
* mediator (event dispatcher)
Version 0.4
:::::::::::
* SQLAlchemy 101
Design decisions
::::::::::::::::
* initialize / finalize better than start / stop ?
Minor stuff
:::::::::::
* Should we include datasets in the repo or not? As they may change, grow, and even eventually have licenses we can't use,
it's probably best if we don't.

258
docs/tutorial/1-init.rst Normal file
View File

@ -0,0 +1,258 @@
Part 1: Let's get started!
==========================
To get started with |bonobo|, you need to install it in a working python 3.5+ environment (you should use a
`virtualenv <https://virtualenv.pypa.io/>`_).
.. code-block:: shell-session
$ pip install bonobo
Check that the installation worked, and that you're using a version that matches this tutorial (written for bonobo
|longversion|).
.. code-block:: shell-session
$ bonobo version
See :doc:`/install` for more options.
Create an ETL job
:::::::::::::::::
Since Bonobo 0.6, it's easy to bootstrap a simple ETL job using just one file.
We'll start here, and the later stages of the tutorial will guide you toward refactoring this to a python package.
.. code-block:: shell-session
$ bonobo init tutorial.py
This will create a simple job in a `tutorial.py` file. Let's run it:
.. code-block:: shell-session
$ python tutorial.py
Hello
World
- extract in=1 out=2 [done]
- transform in=2 out=2 [done]
- load in=2 [done]
If you have a similar result, then congratulations! You just ran your first |bonobo| ETL job.
Inspect your graph
::::::::::::::::::
The basic building blocks of |bonobo| are **transformations** and **graphs**.
**Transformations** are simple python callables (like functions) that handle a transformation step for a line of data.
**Graphs** are a set of transformations, with directional links between them to define the data-flow that will happen
at runtime.
To inspect the graph of your first transformation (you must install graphviz first to do so), run:
.. code-block:: shell-session
$ bonobo inspect --graph tutorial.py | dot -Tpng -o tutorial.png
Open the generated `tutorial.png` file to have a quick look at the graph.
.. graphviz::
digraph {
rankdir = LR;
"BEGIN" [shape="point"];
"BEGIN" -> {0 [label="extract"]};
{0 [label="extract"]} -> {1 [label="transform"]};
{1 [label="transform"]} -> {2 [label="load"]};
}
You can easily understand here the structure of your graph. For such a simple graph, it's pretty much useless, but as
you'll write more complex transformations, it will be helpful.
Read the Code
:::::::::::::
Before we write our own job, let's look at the code we have in `tutorial.py`.
Import
------
.. code-block:: python
import bonobo
The highest level APIs of |bonobo| are all contained within the top level **bonobo** namespace.
If you're a beginner with the library, stick to using only those APIs (they also are the most stable APIs).
If you're an advanced user (and you'll be one quite soon), you can safely use second level APIs.
The third level APIs are considered private, and you should not use them unless you're hacking on |bonobo| directly.
Extract
-------
.. code-block:: python
def extract():
yield 'hello'
yield 'world'
This is a first transformation, written as a python generator, that will send some strings, one after the other, to its
output.
Transformations that take no input and yields a variable number of outputs are usually called **extractors**. You'll
encounter a few different types, either purely generating the data (like here), using an external service (a
database, for example) or using some filesystem (which is considered an external service too).
Extractors do not need to have its input connected to anything, and will be called exactly once when the graph is
executed.
Transform
---------
.. code-block:: python
def transform(*args):
yield tuple(
map(str.title, args)
)
This is a second transformation. It will get called a bunch of times, once for each input row it gets, and apply some
logic on the input to generate the output.
This is the most **generic** case. For each input row, you can generate zero, one or many lines of output for each line
of input.
Load
----
.. code-block:: python
def load(*args):
print(*args)
This is the third and last transformation in our "hello world" example. It will apply some logic to each row, and have
absolutely no output.
Transformations that take input and yields nothing are also called **loaders**. Like extractors, you'll encounter
different types, to work with various external systems.
Please note that as a convenience mean and because the cost is marginal, most builtin `loaders` will send their
inputs to their output, so you can easily chain more than one loader, or apply more transformations after a given
loader was applied.
Graph Factory
-------------
.. code-block:: python
def get_graph(**options):
graph = bonobo.Graph()
graph.add_chain(extract, transform, load)
return graph
All our transformations were defined above, but nothing ties them together, for now.
This "graph factory" function is in charge of the creation and configuration of a :class:`bonobo.Graph` instance, that
will be executed later.
By no mean is |bonobo| limited to simple graphs like this one. You can add as many chains as you want, and each chain
can contain as many nodes as you want.
Services Factory
----------------
.. code-block:: python
def get_services(**options):
return {}
This is the "services factory", that we'll use later to connect to external systems. Let's skip this one, for now.
(we'll dive into this topic in :doc:`4-services`)
Main Block
----------
.. code-block:: python
if __name__ == '__main__':
parser = bonobo.get_argument_parser()
with bonobo.parse_args(parser) as options:
bonobo.run(
get_graph(**options),
services=get_services(**options)
)
Here, the real thing happens.
Without diving into too much details for now, using the :func:`bonobo.parse_args` context manager will allow our job to
be configurable, later, and although we don't really need it right now, it does not harm neither.
Reading the output
::::::::::::::::::
Let's run this job once again:
.. code-block:: shell-session
$ python tutorial.py
Hello
World
- extract in=1 out=2 [done]
- transform in=2 out=2 [done]
- load in=2 [done]
The console output contains two things.
* First, it contains the real output of your job (what was :func:`print`-ed to `sys.stdout`).
* Second, it displays the execution status (on `sys.stderr`). Each line contains a "status" character, the node name,
numbers and a human readable status. This status will evolve in real time, and allows to understand a job's progress
while it's running.
* Status character:
* “ ” means that the node was not yet started.
*`-`” means that the node finished its execution.
*`+`” means that the node is currently running.
*`!`” means that the node had problems running.
* Numerical statistics:
* “`in=...`” shows the input lines count, also known as the amount of calls to your transformation.
*`out=...`” shows the output lines count.
*`read=...`” shows the count of reads applied to an external system, if the transformation supports it.
*`write=...`” shows the count of writes applied to an external system, if the transformation supports it.
*`err=...`” shows the count of exceptions that happened while running the transformation. Note that exception will abort
a call, but the execution will move to the next row.
Moving forward
::::::::::::::
That's all for this first step.
You now know:
* How to create a new job file.
* How to inspect the content of a job file.
* What should go in a job file.
* How to execute a job file.
* How to read the console output.
**Next: :doc:`2-jobs`**

12
docs/tutorial/2-jobs.rst Normal file
View File

@ -0,0 +1,12 @@
Part 2: Writing ETL Jobs
========================
Moving forward
::::::::::::::
You now know:
* How to ...
**Next: :doc:`3-files`**

12
docs/tutorial/3-files.rst Normal file
View File

@ -0,0 +1,12 @@
Part 3: Working with Files
==========================
Moving forward
::::::::::::::
You now know:
* How to ...
**Next: :doc:`4-services`**

View File

@ -0,0 +1,210 @@
Part 4: Services and Configurables
==================================
.. note::
This section lacks completeness, sorry for that (but you can still read it!).
In the last section, we used a few new tools.
Class-based transformations and configurables
:::::::::::::::::::::::::::::::::::::::::::::
Bonobo is a bit dumb. If something is callable, it considers it can be used as a transformation, and it's up to the
user to provide callables that logically fits in a graph.
You can use plain python objects with a `__call__()` method, and it ill just work.
As a lot of transformations needs common machinery, there is a few tools to quickly build transformations, most of
them requiring your class to subclass :class:`bonobo.config.Configurable`.
Configurables allows to use the following features:
* You can add **Options** (using the :class:`bonobo.config.Option` descriptor). Options can be positional, or keyword
based, can have a default value and will be consumed from the constructor arguments.
.. code-block:: python
from bonobo.config import Configurable, Option
class PrefixIt(Configurable):
prefix = Option(str, positional=True, default='>>>')
def call(self, row):
return self.prefix + ' ' + row
prefixer = PrefixIt('$')
* You can add **Services** (using the :class:`bonobo.config.Service` descriptor). Services are a subclass of
:class:`bonobo.config.Option`, sharing the same basics, but specialized in the definition of "named services" that
will be resolved at runtime (a.k.a for which we will provide an implementation at runtime). We'll dive more into that
in the next section
.. code-block:: python
from bonobo.config import Configurable, Option, Service
class HttpGet(Configurable):
url = Option(default='https://jsonplaceholder.typicode.com/users')
http = Service('http.client')
def call(self, http):
resp = http.get(self.url)
for row in resp.json():
yield row
http_get = HttpGet()
* You can add **Methods** (using the :class:`bonobo.config.Method` descriptor). :class:`bonobo.config.Method` is a
subclass of :class:`bonobo.config.Option` that allows to pass callable parameters, either to the class constructor,
or using the class as a decorator.
.. code-block:: python
from bonobo.config import Configurable, Method
class Applier(Configurable):
apply = Method()
def call(self, row):
return self.apply(row)
@Applier
def Prefixer(self, row):
return 'Hello, ' + row
prefixer = Prefixer()
* You can add **ContextProcessors**, which are an advanced feature we won't introduce here. If you're familiar with
pytest, you can think of them as pytest fixtures, execution wise.
Services
::::::::
The motivation behind services is mostly separation of concerns, testability and deployability.
Usually, your transformations will depend on services (like a filesystem, an http client, a database, a rest api, ...).
Those services can very well be hardcoded in the transformations, but there is two main drawbacks:
* You won't be able to change the implementation depending on the current environment (development laptop versus
production servers, bug-hunting session versus execution, etc.)
* You won't be able to test your transformations without testing the associated services.
To overcome those caveats of hardcoding things, we define Services in the configurable, which are basically
string-options of the service names, and we provide an implementation at the last moment possible.
There are two ways of providing implementations:
* Either file-wide, by providing a `get_services()` function that returns a dict of named implementations (we did so
with filesystems in the previous step, :doc:`tut02`)
* Either directory-wide, by providing a `get_services()` function in a specially named `_services.py` file.
The first is simpler if you only have one transformation graph in one file, the second allows to group coherent
transformations together in a directory and share the implementations.
Let's see how to use it, starting from the previous service example:
.. code-block:: python
from bonobo.config import Configurable, Option, Service
class HttpGet(Configurable):
url = Option(default='https://jsonplaceholder.typicode.com/users')
http = Service('http.client')
def call(self, http):
resp = http.get(self.url)
for row in resp.json():
yield row
We defined an "http.client" service, that obviously should have a `get()` method, returning responses that have a
`json()` method.
Let's provide two implementations for that. The first one will be using `requests <http://docs.python-requests.org/>`_,
that coincidally satisfies the described interface:
.. code-block:: python
import bonobo
import requests
def get_services():
return {
'http.client': requests
}
graph = bonobo.Graph(
HttpGet(),
print,
)
If you run this code, you should see some mock data returned by the webservice we called (assuming it's up and you can
reach it).
Now, the second implementation will replace that with a mock, used for testing purposes:
.. code-block:: python
class HttpResponseStub:
def json(self):
return [
{'id': 1, 'name': 'Leanne Graham', 'username': 'Bret', 'email': 'Sincere@april.biz', 'address': {'street': 'Kulas Light', 'suite': 'Apt. 556', 'city': 'Gwenborough', 'zipcode': '92998-3874', 'geo': {'lat': '-37.3159', 'lng': '81.1496'}}, 'phone': '1-770-736-8031 x56442', 'website': 'hildegard.org', 'company': {'name': 'Romaguera-Crona', 'catchPhrase': 'Multi-layered client-server neural-net', 'bs': 'harness real-time e-markets'}},
{'id': 2, 'name': 'Ervin Howell', 'username': 'Antonette', 'email': 'Shanna@melissa.tv', 'address': {'street': 'Victor Plains', 'suite': 'Suite 879', 'city': 'Wisokyburgh', 'zipcode': '90566-7771', 'geo': {'lat': '-43.9509', 'lng': '-34.4618'}}, 'phone': '010-692-6593 x09125', 'website': 'anastasia.net', 'company': {'name': 'Deckow-Crist', 'catchPhrase': 'Proactive didactic contingency', 'bs': 'synergize scalable supply-chains'}},
]
class HttpStub:
def get(self, url):
return HttpResponseStub()
def get_services():
return {
'http.client': HttpStub()
}
graph = bonobo.Graph(
HttpGet(),
print,
)
The `Graph` definition staying the exact same, you can easily substitute the `_services.py` file depending on your
environment (the way you're doing this is out of bonobo scope and heavily depends on your usual way of managing
configuration files on different platforms).
Starting with bonobo 0.5 (not yet released), you will be able to use service injections with function-based
transformations too, using the `bonobo.config.requires` decorator to mark a dependency.
.. code-block:: python
from bonobo.config import requires
@requires('http.client')
def http_get(http):
resp = http.get('https://jsonplaceholder.typicode.com/users')
for row in resp.json():
yield row
Read more
:::::::::
* :doc:`/guide/services`
* :doc:`/reference/api_config`
Next
::::
:doc:`tut04`.
Moving forward
::::::::::::::
You now know:
* How to ...
**Next: :doc:`5-packaging`**

View File

@ -0,0 +1,11 @@
Part 5: Projects and Packaging
==============================
Moving forward
::::::::::::::
You now know:
* How to ...

3
docs/tutorial/django.rst Normal file
View File

@ -0,0 +1,3 @@
Working with Django
===================

View File

@ -17,47 +17,43 @@ Bonobo uses simple python and should be quick and easy to learn.
Tutorial
::::::::
.. note::
.. toctree::
:maxdepth: 1
Good documentation is not easy to write. We do our best to make it better and better.
1-init
2-jobs
3-files
4-services
5-packaging
Although all content here should be accurate, you may feel a lack of completeness, for which we plead guilty and
apologize.
If you're stuck, please come and ask on our `slack channel <https://bonobo-slack.herokuapp.com/>`_, we'll figure
something out.
If you're not stuck but had trouble understanding something, please consider contributing to the docs (via GitHub
pull requests).
More
::::
.. toctree::
:maxdepth: 2
tut01
tut02
tut03
tut04
:maxdepth: 1
django
notebooks
sqlalchemy
What's next?
::::::::::::
Read a few examples
-------------------
* :doc:`The Bonobo Guide <../guide/index>`
* :doc:`Extensions <../extension/index>`
* :doc:`../reference/examples`
Read about best development practices
-------------------------------------
We're there!
::::::::::::
* :doc:`../guide/index`
* :doc:`../guide/purity`
Good documentation is not easy to write.
Read about integrating external tools with bonobo
-------------------------------------------------
Although all content here should be accurate, you may feel a lack of completeness, for which we plead guilty and
apologize.
* :doc:`../extension/docker`: run transformation graphs in isolated containers.
* :doc:`../extension/jupyter`: run transformations within jupyter notebooks.
* :doc:`../extension/selenium`: crawl the web using a real browser and work with the gathered data.
* :doc:`../extension/sqlalchemy`: everything you need to interract with SQL databases.
If you're stuck, please come to the `Bonobo Slack Channel <https://bonobo-slack.herokuapp.com/>`_ and we'll figure it
out.
If you're not stuck but had trouble understanding something, please consider contributing to the docs (using GitHub
pull requests).

View File

@ -0,0 +1,4 @@
Working with Jupyter Notebooks
==============================

View File

@ -0,0 +1,4 @@
Working with SQL Databases
==========================

View File

@ -1,8 +1,7 @@
Let's get started!
==================
To begin with Bonobo, you need to install it in a working python 3.5+ environment, and you'll also need cookiecutter
to bootstrap your project.
To get started with Bonobo, you need to install it in a working python 3.5+ environment:
.. code-block:: shell-session
@ -14,21 +13,24 @@ See :doc:`/install` for more options.
Create an empty project
:::::::::::::::::::::::
Your ETL code will live in ETL projects, which are basically a bunch of files, including python code, that bonobo
can run.
Your ETL code will live in standard python files and packages.
.. code-block:: shell-session
$ bonobo init tutorial
$ bonobo create tutorial.py
This will create a `tutorial` directory (`content description here <https://www.bonobo-project.org/with/cookiecutter>`_).
This will create a simple example job in a `tutorial.py` file.
To run this project, use:
Now, try to execute it:
.. code-block:: shell-session
$ bonobo run tutorial
$ python tutorial.py
Congratulations, you just ran your first ETL job!
.. todo:: XXX **CHANGES NEEDED BELOW THIS POINTS BEFORE 0.6** XXX
Write a first transformation
::::::::::::::::::::::::::::
@ -131,9 +133,9 @@ Rewrite it using builtins
There is a much simpler way to describe an equivalent graph:
.. literalinclude:: ../../bonobo/examples/tutorials/tut01e02.py
:language: python
:language: python
The `extract()` generator has been replaced by a list, as Bonobo will interpret non-callable iterables as a no-input
The `extract()` generator has been replaced by a list, as Bonobo will interpret non-callable iterables as a no-input
generator.
This example is also available in :mod:`bonobo.examples.tutorials.tut01e02`, and you can also run it as a module:

View File

@ -1,34 +1,26 @@
-e .[dev]
alabaster==0.7.10
arrow==0.10.0
babel==2.5.1
binaryornot==0.4.4
certifi==2017.7.27.1
chardet==3.0.4
click==6.7
cookiecutter==1.5.1
coverage==4.4.1
docutils==0.14
future==0.16.0
idna==2.6
imagesize==0.7.1
jinja2-time==0.2.0
jinja2==2.9.6
markupsafe==1.0
poyo==0.4.1
py==1.4.34
pygments==2.2.0
pytest-cov==2.5.1
pytest-sugar==0.8.0
pytest-timeout==1.2.0
pytest==3.2.3
python-dateutil==2.6.1
pytz==2017.2
pytz==2017.3
requests==2.18.4
six==1.11.0
snowballstemmer==1.2.1
sphinx==1.6.4
sphinx==1.6.5
sphinxcontrib-websupport==1.0.1
termcolor==1.1.0
urllib3==1.22
whichcraft==0.4.1
yapf==0.19.0

View File

@ -12,7 +12,7 @@ packaging==16.8
pbr==3.1.1
psutil==5.4.0
pyparsing==2.2.0
pytz==2017.2
pytz==2017.3
requests==2.18.4
six==1.11.0
stevedore==1.27.1

View File

@ -13,13 +13,13 @@ jinja2==2.9.6
jsonschema==2.6.0
jupyter-client==5.1.0
jupyter-console==5.2.0
jupyter-core==4.3.0
jupyter-core==4.4.0
jupyter==1.0.0
markupsafe==1.0
mistune==0.7.4
mistune==0.8
nbconvert==5.3.1
nbformat==4.4.0
notebook==5.2.0
notebook==5.2.1
pandocfilters==1.4.2
parso==0.1.0
pexpect==4.2.1
@ -28,7 +28,7 @@ prompt-toolkit==1.0.15
ptyprocess==0.5.2
pygments==2.2.0
python-dateutil==2.6.1
pyzmq==16.0.2
pyzmq==16.0.3
qtconsole==4.3.1
simplegeneric==0.8.1
six==1.11.0

View File

@ -0,0 +1,18 @@
-e .[sqlalchemy]
appdirs==1.4.3
bonobo-sqlalchemy==0.5.1
certifi==2017.7.27.1
chardet==3.0.4
colorama==0.3.9
fs==2.0.12
idna==2.6
packaging==16.8
pbr==3.1.1
psutil==5.4.0
pyparsing==2.2.0
pytz==2017.3
requests==2.18.4
six==1.11.0
sqlalchemy==1.1.15
stevedore==1.27.1
urllib3==1.22

View File

@ -2,17 +2,19 @@
appdirs==1.4.3
certifi==2017.7.27.1
chardet==3.0.4
click==6.7
colorama==0.3.9
fs==2.0.12
idna==2.6
jinja2==2.9.6
markupsafe==1.0
mondrian==0.4.0
packaging==16.8
pbr==3.1.1
psutil==5.4.0
pyparsing==2.2.0
python-dotenv==0.7.1
pytz==2017.2
pytz==2017.3
requests==2.18.4
six==1.11.0
stevedore==1.27.1
urllib3==1.22
whistle==1.0.0

View File

@ -53,23 +53,24 @@ setup(
packages=find_packages(exclude=['ez_setup', 'example', 'test']),
include_package_data=True,
install_requires=[
'colorama (>= 0.3, < 1.0)', 'fs (>= 2.0, < 3.0)', 'packaging (>= 16, < 17)', 'psutil (>= 5.2, < 6.0)',
'python-dotenv (>= 0.7.1, < 1.0)', 'requests (>= 2.0, < 3.0)', 'stevedore (>= 1.21, < 2.0)'
'colorama (>= 0.3)', 'fs (>= 2.0, < 2.1)', 'jinja2 (>= 2.9, < 2.10)', 'mondrian (>= 0.4, < 0.5)',
'packaging (>= 16, < 17)', 'psutil (>= 5.4, < 6.0)', 'requests (>= 2.0, < 3.0)', 'stevedore (>= 1.27, < 1.28)',
'whistle (>= 1.0, < 1.1)'
],
extras_require={
'dev': [
'cookiecutter (>= 1.5, < 1.6)', 'coverage (>= 4.4, < 5.0)', 'pytest (>= 3.1, < 4.0)',
'pytest-cov (>= 2.5, < 3.0)', 'pytest-sugar (>= 0.8, < 0.9)', 'pytest-timeout (>= 1, < 2)',
'sphinx (>= 1.6, < 2.0)'
'coverage (>= 4.4, < 5.0)', 'pytest (>= 3.1, < 4.0)', 'pytest-cov (>= 2.5, < 3.0)',
'pytest-sugar (>= 0.8, < 0.9)', 'pytest-timeout (>= 1, < 2)', 'sphinx (>= 1.6, < 2.0)', 'yapf'
],
'docker': ['bonobo-docker'],
'jupyter': ['ipywidgets (>= 6.0.0, < 7)', 'jupyter (>= 1.0, < 1.1)']
'docker': ['bonobo-docker (>= 0.5.0)'],
'jupyter': ['ipywidgets (>= 6.0.0, < 7)', 'jupyter (>= 1.0, < 1.1)'],
'sqlalchemy': ['bonobo-sqlalchemy (>= 0.5.1)']
},
entry_points={
'bonobo.commands': [
'convert = bonobo.commands.convert:register', 'init = bonobo.commands.init:register',
'inspect = bonobo.commands.inspect:register', 'run = bonobo.commands.run:register',
'version = bonobo.commands.version:register', 'download = bonobo.commands.download:register'
'convert = bonobo.commands.convert:ConvertCommand', 'init = bonobo.commands.init:InitCommand',
'inspect = bonobo.commands.inspect:InspectCommand', 'run = bonobo.commands.run:RunCommand',
'version = bonobo.commands.version:VersionCommand', 'download = bonobo.commands.download:DownloadCommand'
],
'console_scripts': ['bonobo = bonobo.commands:entrypoint']
},

View File

@ -0,0 +1,25 @@
import pkg_resources
from bonobo.util.testing import all_runners
def test_entrypoint():
commands = {}
for command in pkg_resources.iter_entry_points('bonobo.commands'):
commands[command.name] = command
assert not {
'convert',
'init',
'inspect',
'run',
'version',
}.difference(set(commands))
@all_runners
def test_no_command(runner):
_, err, exc = runner(catch_errors=True)
assert type(exc) == SystemExit
assert 'error: the following arguments are required: command' in err

View File

@ -0,0 +1,19 @@
import sys
import pytest
from bonobo.util.environ import change_working_directory
from bonobo.util.testing import all_runners
@pytest.mark.skipif(sys.version_info < (3, 6),
reason="python 3.5 does not preserve kwargs order and this cant pass for now")
@all_runners
def test_convert(runner, tmpdir):
csv_content = 'id;name\n1;Romain'
tmpdir.join('in.csv').write(csv_content)
with change_working_directory(tmpdir):
runner('convert', 'in.csv', 'out.csv')
assert tmpdir.join('out.csv').read().strip() == csv_content

View File

@ -0,0 +1,44 @@
import io
from unittest.mock import patch
import pytest
from bonobo.commands.download import EXAMPLES_BASE_URL
from bonobo.util.testing import all_runners
@all_runners
def test_download_works_for_examples(runner):
expected_bytes = b'hello world'
class MockResponse(object):
def __init__(self):
self.status_code = 200
def iter_content(self, *args, **kwargs):
return [expected_bytes]
def __enter__(self):
return self
def __exit__(self, *args, **kwargs):
pass
fout = io.BytesIO()
fout.close = lambda: None
with patch('bonobo.commands.download._open_url') as mock_open_url, \
patch('bonobo.commands.download.open') as mock_open:
mock_open_url.return_value = MockResponse()
mock_open.return_value = fout
runner('download', 'examples/datasets/coffeeshops.txt')
expected_url = EXAMPLES_BASE_URL + 'datasets/coffeeshops.txt'
mock_open_url.assert_called_once_with(expected_url)
assert fout.getvalue() == expected_bytes
@all_runners
def test_download_fails_non_example(runner):
with pytest.raises(ValueError):
runner('download', 'something/entirely/different.txt')

View File

@ -0,0 +1,29 @@
import os
import pytest
from bonobo.commands.init import InitCommand
from bonobo.util.testing import all_runners
@all_runners
def test_init_file(runner, tmpdir):
target = tmpdir.join('foo.py')
target_filename = str(target)
runner('init', target_filename)
assert os.path.exists(target_filename)
out, err = runner('run', target_filename)
assert out.replace('\n', ' ').strip() == 'Hello World'
assert not err
@all_runners
@pytest.mark.parametrize('template', InitCommand.TEMPLATES)
def test_init_file_templates(runner, template, tmpdir):
target = tmpdir.join('foo.py')
target_filename = str(target)
runner('init', target_filename)
assert os.path.exists(target_filename)
out, err = runner('run', target_filename)
assert not err

View File

@ -0,0 +1,48 @@
import os
from unittest.mock import patch
from bonobo import get_examples_path
from bonobo.util.testing import all_runners
@all_runners
def test_run(runner):
out, err = runner('run', '--quiet', get_examples_path('types/strings.py'))
out = out.split('\n')
assert out[0].startswith('Foo ')
assert out[1].startswith('Bar ')
assert out[2].startswith('Baz ')
@all_runners
def test_run_module(runner):
out, err = runner('run', '--quiet', '-m', 'bonobo.examples.types.strings')
out = out.split('\n')
assert out[0].startswith('Foo ')
assert out[1].startswith('Bar ')
assert out[2].startswith('Baz ')
@all_runners
def test_run_path(runner):
out, err = runner('run', '--quiet', get_examples_path('types'))
out = out.split('\n')
assert out[0].startswith('Foo ')
assert out[1].startswith('Bar ')
assert out[2].startswith('Baz ')
@all_runners
def test_install_requirements_for_dir(runner):
dirname = get_examples_path('types')
with patch('bonobo.commands.run._install_requirements') as install_mock:
runner('run', '--install', dirname)
install_mock.assert_called_once_with(os.path.join(dirname, 'requirements.txt'))
@all_runners
def test_install_requirements_for_file(runner):
dirname = get_examples_path('types')
with patch('bonobo.commands.run._install_requirements') as install_mock:
runner('run', '--install', os.path.join(dirname, 'strings.py'))
install_mock.assert_called_once_with(os.path.join(dirname, 'requirements.txt'))

View File

@ -0,0 +1,109 @@
import pytest
from bonobo.util.testing import EnvironmentTestCase
@pytest.fixture
def env1(tmpdir):
env_file = tmpdir.join('.env_one')
env_file.write('\n'.join((
'SECRET=unknown',
'PASSWORD=sweet',
'PATH=first',
)))
return str(env_file)
@pytest.fixture
def env2(tmpdir):
env_file = tmpdir.join('.env_two')
env_file.write('\n'.join((
'PASSWORD=bitter',
"PATH='second'",
)))
return str(env_file)
class TestDefaultEnvFile(EnvironmentTestCase):
def test_run_with_default_env_file(self, runner, target, env1):
env = self.run_environ(runner, *target, '--default-env-file', env1)
assert env.get('SECRET') == 'unknown'
assert env.get('PASSWORD') == 'sweet'
assert env.get('PATH') == '/usr/bin'
def test_run_with_multiple_default_env_files(self, runner, target, env1, env2):
env = self.run_environ(runner, *target, '--default-env-file', env1, '--default-env-file', env2)
assert env.get('SECRET') == 'unknown'
assert env.get('PASSWORD') == 'sweet'
assert env.get('PATH') == '/usr/bin'
env = self.run_environ(runner, *target, '--default-env-file', env2, '--default-env-file', env1)
assert env.get('SECRET') == 'unknown'
assert env.get('PASSWORD') == 'bitter'
assert env.get('PATH') == '/usr/bin'
class TestEnvFile(EnvironmentTestCase):
def test_run_with_file(self, runner, target, env1):
env = self.run_environ(runner, *target, '--env-file', env1)
assert env.get('SECRET') == 'unknown'
assert env.get('PASSWORD') == 'sweet'
assert env.get('PATH') == 'first'
def test_run_with_multiple_files(self, runner, target, env1, env2):
env = self.run_environ(runner, *target, '--env-file', env1, '--env-file', env2)
assert env.get('SECRET') == 'unknown'
assert env.get('PASSWORD') == 'bitter'
assert env.get('PATH') == 'second'
env = self.run_environ(runner, *target, '--env-file', env2, '--env-file', env1)
assert env.get('SECRET') == 'unknown'
assert env.get('PASSWORD') == 'sweet'
assert env.get('PATH') == 'first'
class TestEnvFileCombinations(EnvironmentTestCase):
def test_run_with_both_env_files(self, runner, target, env1, env2):
env = self.run_environ(runner, *target, '--default-env-file', env1, '--env-file', env2)
assert env.get('SECRET') == 'unknown'
assert env.get('PASSWORD') == 'bitter'
assert env.get('PATH') == 'second'
def test_run_with_both_env_files_then_overrides(self, runner, target, env1, env2):
env = self.run_environ(
runner, *target, '--default-env-file', env1, '--env-file', env2, '--env', 'PASSWORD=mine', '--env',
'SECRET=s3cr3t'
)
assert env.get('SECRET') == 's3cr3t'
assert env.get('PASSWORD') == 'mine'
assert env.get('PATH') == 'second'
class TestEnvVars(EnvironmentTestCase):
def test_run_no_env(self, runner, target):
env = self.run_environ(runner, *target, environ={'USER': 'romain'})
assert env.get('USER') == 'romain'
def test_run_env(self, runner, target):
env = self.run_environ(runner, *target, '--env', 'USER=serious', environ={'USER': 'romain'})
assert env.get('USER') == 'serious'
def test_run_env_mixed(self, runner, target):
env = self.run_environ(runner, *target, '--env', 'ONE=1', '--env', 'TWO="2"', environ={'USER': 'romain'})
assert env.get('USER') == 'romain'
assert env.get('ONE') == '1'
assert env.get('TWO') == '2'
def test_run_default_env(self, runner, target):
env = self.run_environ(runner, *target, '--default-env', 'USER=clown')
assert env.get('USER') == 'clown'
env = self.run_environ(runner, *target, '--default-env', 'USER=clown', environ={'USER': 'romain'})
assert env.get('USER') == 'romain'
env = self.run_environ(
runner, *target, '--env', 'USER=serious', '--default-env', 'USER=clown', environ={
'USER': 'romain'
}
)
assert env.get('USER') == 'serious'

View File

@ -0,0 +1,20 @@
from bonobo import __version__
from bonobo.util.testing import all_runners
@all_runners
def test_version(runner):
out, err = runner('version')
out = out.strip()
assert out.startswith('bonobo ')
assert __version__ in out
out, err = runner('version', '-q')
out = out.strip()
assert out.startswith('bonobo ')
assert __version__ in out
out, err = runner('version', '-qq')
out = out.strip()
assert not out.startswith('bonobo ')
assert __version__ in out

View File

@ -1,5 +1,10 @@
from unittest.mock import MagicMock
import pytest
from bonobo import Bag, Graph
from bonobo.strategies import NaiveStrategy
from bonobo.execution.contexts.node import NodeExecutionContext
from bonobo.execution.strategies import NaiveStrategy
from bonobo.util.testing import BufferingNodeExecutionContext, BufferingGraphExecutionContext
@ -179,3 +184,44 @@ def test_node_tuple_dict():
assert len(output) == 2
assert output[0] == ('foo', 'bar', {'id': 1})
assert output[1] == ('foo', 'baz', {'id': 2})
def test_node_lifecycle_natural():
func = MagicMock()
ctx = NodeExecutionContext(func)
assert not any((ctx.started, ctx.stopped, ctx.killed, ctx.alive))
# cannot stop before start
with pytest.raises(RuntimeError):
ctx.stop()
assert not any((ctx.started, ctx.stopped, ctx.killed, ctx.alive))
# turn the key
ctx.start()
assert all((ctx.started, ctx.alive)) and not any((ctx.stopped, ctx.killed))
ctx.stop()
assert all((ctx.started, ctx.stopped)) and not any((ctx.alive, ctx.killed))
def test_node_lifecycle_with_kill():
func = MagicMock()
ctx = NodeExecutionContext(func)
assert not any((ctx.started, ctx.stopped, ctx.killed, ctx.alive))
# cannot kill before start
with pytest.raises(RuntimeError):
ctx.kill()
assert not any((ctx.started, ctx.stopped, ctx.killed, ctx.alive))
# turn the key
ctx.start()
assert all((ctx.started, ctx.alive)) and not any((ctx.stopped, ctx.killed))
ctx.kill()
assert all((ctx.started, ctx.killed, ctx.alive)) and not ctx.stopped
ctx.stop()
assert all((ctx.started, ctx.killed, ctx.stopped)) and not ctx.alive

View File

@ -0,0 +1,18 @@
from unittest.mock import Mock
from bonobo.execution import events
def test_names():
# This test looks useless, but as it's becoming the pliugin API, I want to make sure that nothing changes here, or
# notice it otherwise.
for name in 'start', 'started', 'tick', 'stop', 'stopped', 'kill':
event_name = getattr(events, name.upper())
assert event_name == '.'.join(('execution', name))
def test_event_object():
# Same logic as above.
c = Mock()
e = events.ExecutionEvent(c)
assert e.context is c

Some files were not shown because too many files have changed in this diff Show More