diff --git a/.travis.yml b/.travis.yml index 3eb10b2..27eebb7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,7 @@ python: - 3.5-dev - 3.6 - 3.6-dev + - 3.7-dev - nightly install: - make install-dev diff --git a/MANIFEST.in b/MANIFEST.in index ab30e9a..4c2c662 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,2 @@ include *.txt +include *.py-tpl diff --git a/Makefile b/Makefile index 50039c7..8ae38cd 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# Generated by Medikit 0.4a5 on 2017-10-28. +# Generated by Medikit 0.4.1 on 2017-11-04. # All changes will be overriden. PACKAGE ?= bonobo diff --git a/Projectfile b/Projectfile index 6873522..ca6a0f4 100644 --- a/Projectfile +++ b/Projectfile @@ -29,36 +29,43 @@ python.setup( 'bonobo = bonobo.commands:entrypoint', ], 'bonobo.commands': [ - 'convert = bonobo.commands.convert:register', - 'init = bonobo.commands.init:register', - 'inspect = bonobo.commands.inspect:register', - 'run = bonobo.commands.run:register', - 'version = bonobo.commands.version:register', - 'download = bonobo.commands.download:register', + 'convert = bonobo.commands.convert:ConvertCommand', + 'init = bonobo.commands.init:InitCommand', + 'inspect = bonobo.commands.inspect:InspectCommand', + 'run = bonobo.commands.run:RunCommand', + 'version = bonobo.commands.version:VersionCommand', + 'download = bonobo.commands.download:DownloadCommand', ], } ) python.add_requirements( - 'colorama >=0.3,<1.0', - 'fs >=2.0,<3.0', + 'fs >=2.0,<2.1', + 'jinja2 >=2.9,<2.10', + 'mondrian >=0.4,<0.5', 'packaging >=16,<17', - 'psutil >=5.2,<6.0', + 'psutil >=5.4,<6.0', 'requests >=2.0,<3.0', - 'stevedore >=1.21,<2.0', - 'python-dotenv >=0.7.1,<1.0', + 'stevedore >=1.27,<1.28', + 'whistle >=1.0,<1.1', dev=[ - 'cookiecutter >=1.5,<1.6', 'pytest-sugar >=0.8,<0.9', 'pytest-timeout >=1,<2', ], docker=[ - 'bonobo-docker', + 'bonobo-docker >=0.5.0', ], jupyter=[ 'jupyter >=1.0,<1.1', 'ipywidgets >=6.0.0,<7', - ] + ], + sqlalchemy=[ + 'bonobo-sqlalchemy >=0.5.1', + ], ) +# Following requirements are not enforced, because some dependencies enforce them so we don't want to break +# the packaging in case it changes in dep. +python.add_requirements('colorama >=0.3', ) + # vim: ft=python: diff --git a/bonobo/__init__.py b/bonobo/__init__.py index 3c15c18..0ac9bc3 100644 --- a/bonobo/__init__.py +++ b/bonobo/__init__.py @@ -9,6 +9,7 @@ import sys assert (sys.version_info >= (3, 5)), 'Python 3.5+ is required to use Bonobo.' + from bonobo._api import * from bonobo._api import __all__ from bonobo._version import __version__ diff --git a/bonobo/_api.py b/bonobo/_api.py index a2c3856..af92868 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -1,27 +1,43 @@ -import logging - +from bonobo.execution.strategies import create_strategy from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \ PickleReader, PickleWriter, PrettyPrinter, RateLimited, Tee, arg0_to_kwargs, count, identity, kwargs_to_arg0, noop from bonobo.nodes import LdjsonReader, LdjsonWriter -from bonobo.strategies import create_strategy from bonobo.structs import Bag, ErrorBag, Graph, Token from bonobo.util import get_name +from bonobo.util.environ import parse_args, get_argument_parser __all__ = [] def register_api(x, __all__=__all__): + """Register a function as being part of Bonobo's API, then returns the original function.""" __all__.append(get_name(x)) return x +def register_graph_api(x, __all__=__all__): + """ + Register a function as being part of Bonobo's API, after checking that its signature contains the right parameters + to work correctly, then returns the original function. + """ + from inspect import signature + parameters = list(signature(x).parameters) + required_parameters = {'plugins', 'services', 'strategy'} + assert parameters[0] == 'graph', 'First parameter of a graph api function must be "graph".' + assert required_parameters.intersection( + parameters) == required_parameters, 'Graph api functions must define the following parameters: ' + ', '.join( + sorted(required_parameters)) + + return register_api(x, __all__=__all__) + + def register_api_group(*args): for attr in args: register_api(attr) -@register_api -def run(graph, strategy=None, plugins=None, services=None): +@register_graph_api +def run(graph, *, plugins=None, services=None, strategy=None): """ Main entry point of bonobo. It takes a graph and creates all the necessary plumbery around to execute it. @@ -36,12 +52,11 @@ def run(graph, strategy=None, plugins=None, services=None): You'll probably want to provide a services dictionary mapping service names to service instances. :param Graph graph: The :class:`Graph` to execute. - :param str strategy: The :class:`bonobo.strategies.base.Strategy` to use. + :param str strategy: The :class:`bonobo.execution.strategies.base.Strategy` to use. :param list plugins: The list of plugins to enhance execution. :param dict services: The implementations of services this graph will use. :return bonobo.execution.graph.GraphExecutionContext: """ - strategy = create_strategy(strategy) plugins = plugins or [] @@ -50,7 +65,10 @@ def run(graph, strategy=None, plugins=None, services=None): if not settings.QUIET.get(): # pragma: no cover if _is_interactive_console(): - from bonobo.ext.console import ConsoleOutputPlugin + import mondrian + mondrian.setup(excepthook=True) + + from bonobo.plugins.console import ConsoleOutputPlugin if ConsoleOutputPlugin not in plugins: plugins.append(ConsoleOutputPlugin) @@ -58,6 +76,7 @@ def run(graph, strategy=None, plugins=None, services=None): try: from bonobo.ext.jupyter import JupyterOutputPlugin except ImportError: + import logging logging.warning( 'Failed to load jupyter widget. Easiest way is to install the optional "jupyter" ' 'dependencies with «pip install bonobo[jupyter]», but you can also install a specific ' @@ -67,13 +86,34 @@ def run(graph, strategy=None, plugins=None, services=None): if JupyterOutputPlugin not in plugins: plugins.append(JupyterOutputPlugin) + import logging + logging.getLogger().setLevel(settings.LOGGING_LEVEL.get()) + strategy = create_strategy(strategy) return strategy.execute(graph, plugins=plugins, services=services) -# bonobo.structs +def _inspect_as_graph(graph): + return graph._repr_dot_() + + +_inspect_formats = {'graph': _inspect_as_graph} + + +@register_graph_api +def inspect(graph, *, plugins=None, services=None, strategy=None, format): + if not format in _inspect_formats: + raise NotImplementedError( + 'Output format {} not implemented. Choices are: {}.'.format( + format, ', '.join(sorted(_inspect_formats.keys())) + ) + ) + print(_inspect_formats[format](graph)) + + +# data structures register_api_group(Bag, ErrorBag, Graph, Token) -# bonobo.strategies +# execution strategies register_api(create_strategy) @@ -102,7 +142,7 @@ def open_fs(fs_url=None, *args, **kwargs): return _open_fs(expanduser(str(fs_url)), *args, **kwargs) -# bonobo.nodes +# standard transformations register_api_group( CsvReader, CsvWriter, @@ -149,3 +189,6 @@ def get_examples_path(*pathsegments): @register_api def open_examples_fs(*pathsegments): return open_fs(get_examples_path(*pathsegments)) + + +register_api_group(get_argument_parser, parse_args) diff --git a/bonobo/_version.py b/bonobo/_version.py index 93b60a1..2724bac 100644 --- a/bonobo/_version.py +++ b/bonobo/_version.py @@ -1 +1 @@ -__version__ = '0.5.1' +__version__ = '0.6.dev0' diff --git a/bonobo/commands/__init__.py b/bonobo/commands/__init__.py index 4e183a3..a482b53 100644 --- a/bonobo/commands/__init__.py +++ b/bonobo/commands/__init__.py @@ -1,11 +1,23 @@ import argparse +import logging -from bonobo import logging, settings - -logger = logging.get_logger() +import mondrian +from bonobo import settings +from bonobo.commands.base import BaseCommand, BaseGraphCommand def entrypoint(args=None): + """ + Main callable for "bonobo" entrypoint. + + Will load commands from "bonobo.commands" entrypoints, using stevedore. + + """ + + mondrian.setup(excepthook=True) + logger = logging.getLogger() + logger.setLevel(settings.LOGGING_LEVEL.get()) + parser = argparse.ArgumentParser() parser.add_argument('--debug', '-D', action='store_true') @@ -17,7 +29,15 @@ def entrypoint(args=None): def register_extension(ext, commands=commands): try: parser = subparsers.add_parser(ext.name) - commands[ext.name] = ext.plugin(parser) + if isinstance(ext.plugin, type) and issubclass(ext.plugin, BaseCommand): + # current way, class based. + cmd = ext.plugin() + cmd.add_arguments(parser) + cmd.__name__ = ext.name + commands[ext.name] = cmd.handle + else: + # old school, function based. + commands[ext.name] = ext.plugin(parser) except Exception: logger.exception('Error while loading command {}.'.format(ext.name)) @@ -25,11 +45,17 @@ def entrypoint(args=None): mgr = ExtensionManager(namespace='bonobo.commands') mgr.map(register_extension) - args = parser.parse_args(args).__dict__ - if args.pop('debug', False): + parsed_args = parser.parse_args(args).__dict__ + + if parsed_args.pop('debug', False): settings.DEBUG.set(True) settings.LOGGING_LEVEL.set(logging.DEBUG) - logging.set_level(settings.LOGGING_LEVEL.get()) + logger.setLevel(settings.LOGGING_LEVEL.get()) - logger.debug('Command: ' + args['command'] + ' Arguments: ' + repr(args)) - commands[args.pop('command')](**args) + logger.debug('Command: ' + parsed_args['command'] + ' Arguments: ' + repr(parsed_args)) + + # Get command handler, execute, rince. + command = commands[parsed_args.pop('command')] + command(**parsed_args) + + return 0 diff --git a/bonobo/commands/base.py b/bonobo/commands/base.py new file mode 100644 index 0000000..da2967f --- /dev/null +++ b/bonobo/commands/base.py @@ -0,0 +1,129 @@ +import argparse +import logging +import runpy +import sys +from contextlib import contextmanager + +import bonobo.util.environ +from bonobo.util import get_name +from bonobo.util.environ import get_argument_parser, parse_args + + +class BaseCommand: + """ + Base class for CLI commands. + + """ + + @property + def logger(self): + try: + return self._logger + except AttributeError: + self._logger = logging.getLogger(get_name(self)) + return self._logger + + def add_arguments(self, parser): + """ + Entry point for subclassed commands to add custom arguments. + """ + pass + + def handle(self, *args, **options): + """ + The actual logic of the command. Subclasses must implement this method. + """ + raise NotImplementedError('Subclasses of BaseCommand must provide a handle() method') + + +class BaseGraphCommand(BaseCommand): + """ + Base class for CLI commands that depends on a graph definition, either from a file or from a module. + + """ + required = True + handler = None + + def add_arguments(self, parser): + # target arguments (cannot provide both). + source_group = parser.add_mutually_exclusive_group(required=self.required) + source_group.add_argument('file', nargs='?', type=str) + source_group.add_argument('-m', dest='mod', type=str) + + # add arguments to enforce system environment. + parser = get_argument_parser(parser) + + return parser + + def parse_options(self, **options): + return options + + def handle(self, file, mod, **options): + options = self.parse_options(**options) + with self.read(file, mod, **options) as (graph, graph_execution_options, options): + return self.do_handle(graph, **graph_execution_options, **options) + + def do_handle(self, graph, **options): + if not self.handler: + raise RuntimeError('{} has no handler defined.'.format(get_name(self))) + return self.handler(graph, **options) + + @contextmanager + def read(self, file, mod, **options): + _graph, _graph_execution_options = None, None + + def _record(graph, **graph_execution_options): + nonlocal _graph, _graph_execution_options + _graph, _graph_execution_options = graph, graph_execution_options + + with _override_runner(_record), parse_args(options) as options: + _argv = sys.argv + try: + if file: + sys.argv = [file] + self._run_path(file) + elif mod: + sys.argv = [mod] + self._run_module(mod) + else: + raise RuntimeError('No target provided.') + finally: + sys.argv = _argv + + if _graph is None: + raise RuntimeError('Could not find graph.') + + yield _graph, _graph_execution_options, options + + def _run_path(self, file): + return runpy.run_path(file, run_name='__main__') + + def _run_module(self, mod): + return runpy.run_module(mod, run_name='__main__') + + +@contextmanager +def _override_runner(runner): + """ + Context manager that monkey patches `bonobo.run` function with our current command logic. + + :param runner: the callable that will handle the `run()` logic. + """ + import bonobo + + _get_argument_parser = bonobo.util.environ.get_argument_parser + _run = bonobo.run + try: + # Original get_argument_parser would create or update an argument parser with environment options, but here we + # already had them parsed so let's patch with something that creates an empty one instead. + def get_argument_parser(parser=None): + return parser or argparse.ArgumentParser() + + bonobo.util.environ.get_argument_parser = get_argument_parser + bonobo.run = runner + + yield runner + finally: + # Restore our saved values. + bonobo.util.environ.get_argument_parser = _get_argument_parser + bonobo.run = _run diff --git a/bonobo/commands/convert.py b/bonobo/commands/convert.py index e9039fd..198dce0 100644 --- a/bonobo/commands/convert.py +++ b/bonobo/commands/convert.py @@ -1,83 +1,84 @@ import bonobo +from bonobo.commands import BaseCommand from bonobo.registry import READER, WRITER, default_registry from bonobo.util.resolvers import _resolve_transformations, _resolve_options -def execute( - input_filename, - output_filename, - reader=None, - reader_option=None, - writer=None, - writer_option=None, - option=None, - transformation=None, -): - reader_factory = default_registry.get_reader_factory_for(input_filename, format=reader) - reader_options = _resolve_options((option or []) + (reader_option or [])) +class ConvertCommand(BaseCommand): + def add_arguments(self, parser): + parser.add_argument('input_filename', help='Input filename.') + parser.add_argument('output_filename', help='Output filename.') + parser.add_argument( + '--' + READER, + '-r', + help='Choose the reader factory if it cannot be detected from extension, or if detection is wrong.' + ) + parser.add_argument( + '--' + WRITER, + '-w', + help= + 'Choose the writer factory if it cannot be detected from extension, or if detection is wrong (use - for console pretty print).' + ) + parser.add_argument( + '--transformation', + '-t', + dest='transformation', + action='append', + help='Add a transformation between input and output (can be used multiple times, order is preserved).', + ) + parser.add_argument( + '--option', + '-O', + dest='option', + action='append', + help='Add a named option to both reader and writer factories (i.e. foo="bar").', + ) + parser.add_argument( + '--' + READER + '-option', + '-' + READER[0].upper(), + dest=READER + '_option', + action='append', + help='Add a named option to the reader factory.', + ) + parser.add_argument( + '--' + WRITER + '-option', + '-' + WRITER[0].upper(), + dest=WRITER + '_option', + action='append', + help='Add a named option to the writer factory.', + ) - if output_filename == '-': - writer_factory = bonobo.PrettyPrinter - else: - writer_factory = default_registry.get_writer_factory_for(output_filename, format=writer) - writer_options = _resolve_options((option or []) + (writer_option or [])) + def handle( + self, + input_filename, + output_filename, + reader=None, + reader_option=None, + writer=None, + writer_option=None, + option=None, + transformation=None + ): + reader_factory = default_registry.get_reader_factory_for(input_filename, format=reader) + reader_options = _resolve_options((option or []) + (reader_option or [])) - transformations = _resolve_transformations(transformation) + if output_filename == '-': + writer_factory = bonobo.PrettyPrinter + else: + writer_factory = default_registry.get_writer_factory_for(output_filename, format=writer) + writer_options = _resolve_options((option or []) + (writer_option or [])) - graph = bonobo.Graph() - graph.add_chain( - reader_factory(input_filename, **reader_options), - *transformations, - writer_factory(output_filename, **writer_options), - ) + transformations = _resolve_transformations(transformation) - return bonobo.run( - graph, services={ - 'fs': bonobo.open_fs(), - } - ) + graph = bonobo.Graph() + graph.add_chain( + reader_factory(input_filename, **reader_options), + *transformations, + writer_factory(output_filename, **writer_options), + ) - -def register(parser): - parser.add_argument('input-filename', help='Input filename.') - parser.add_argument('output-filename', help='Output filename.') - parser.add_argument( - '--' + READER, - '-r', - help='Choose the reader factory if it cannot be detected from extension, or if detection is wrong.' - ) - parser.add_argument( - '--' + WRITER, - '-w', - help= - 'Choose the writer factory if it cannot be detected from extension, or if detection is wrong (use - for console pretty print).' - ) - parser.add_argument( - '--transformation', - '-t', - dest='transformation', - action='append', - help='Add a transformation between input and output (can be used multiple times, order is preserved).', - ) - parser.add_argument( - '--option', - '-O', - dest='option', - action='append', - help='Add a named option to both reader and writer factories (i.e. foo="bar").', - ) - parser.add_argument( - '--' + READER + '-option', - '-' + READER[0].upper(), - dest=READER + '_option', - action='append', - help='Add a named option to the reader factory.', - ) - parser.add_argument( - '--' + WRITER + '-option', - '-' + WRITER[0].upper(), - dest=WRITER + '_option', - action='append', - help='Add a named option to the writer factory.', - ) - return execute + return bonobo.run( + graph, services={ + 'fs': bonobo.open_fs(), + } + ) diff --git a/bonobo/commands/download.py b/bonobo/commands/download.py index fd51951..96b1c2f 100644 --- a/bonobo/commands/download.py +++ b/bonobo/commands/download.py @@ -4,36 +4,30 @@ import re import requests import bonobo +from bonobo.commands import BaseCommand EXAMPLES_BASE_URL = 'https://raw.githubusercontent.com/python-bonobo/bonobo/master/bonobo/examples/' """The URL to our git repository, in raw mode.""" -def _write_response(response, fout): - """Read the response and write it to the output stream in chunks.""" - for chunk in response.iter_content(io.DEFAULT_BUFFER_SIZE): - fout.write(chunk) +class DownloadCommand(BaseCommand): + def handle(self, *, path, **options): + if not path.startswith('examples'): + raise ValueError('Download command currently supports examples only') + examples_path = re.sub('^examples/', '', path) + output_path = bonobo.get_examples_path(examples_path) + with _open_url(EXAMPLES_BASE_URL + examples_path) as response, open(output_path, 'wb') as fout: + for chunk in response.iter_content(io.DEFAULT_BUFFER_SIZE): + fout.write(chunk) + self.logger.info('Download saved to {}'.format(output_path)) + + def add_arguments(self, parser): + parser.add_argument('path', help='The relative path of the thing to download.') def _open_url(url): """Open a HTTP connection to the URL and return a file-like object.""" response = requests.get(url, stream=True) if response.status_code != 200: - raise IOError('unable to download {}, HTTP {}'.format(url, response.status_code)) + raise IOError('Unable to download {}, HTTP {}'.format(url, response.status_code)) return response - - -def execute(path, *args, **kwargs): - path = path.lstrip('/') - if not path.startswith('examples'): - raise ValueError('download command currently supports examples only') - examples_path = re.sub('^examples/', '', path) - output_path = bonobo.get_examples_path(examples_path) - with _open_url(EXAMPLES_BASE_URL + examples_path) as response, open(output_path, 'wb') as fout: - _write_response(response, fout) - print('saved to {}'.format(output_path)) - - -def register(parser): - parser.add_argument('path', help='The relative path of the thing to download.') - return execute diff --git a/bonobo/commands/init.py b/bonobo/commands/init.py index e69156c..6d4b217 100644 --- a/bonobo/commands/init.py +++ b/bonobo/commands/init.py @@ -1,28 +1,74 @@ import os -def execute(name, branch): - try: - from cookiecutter.main import cookiecutter - except ImportError as exc: - raise ImportError( - 'You must install "cookiecutter" to use this command.\n\n $ pip install cookiecutter\n' - ) from exc +from jinja2 import Environment, FileSystemLoader - overwrite_if_exists = False - project_path = os.path.join(os.getcwd(), name) - if os.path.isdir(project_path) and not os.listdir(project_path): - overwrite_if_exists = True - - return cookiecutter( - 'https://github.com/python-bonobo/cookiecutter-bonobo.git', - extra_context={'name': name}, - no_input=True, - checkout=branch, - overwrite_if_exists=overwrite_if_exists - ) +from bonobo.commands import BaseCommand -def register(parser): - parser.add_argument('name') - parser.add_argument('--branch', '-b', default='master') - return execute +class InitCommand(BaseCommand): + TEMPLATES = {'bare', 'default'} + TEMPLATES_PATH = os.path.join(os.path.dirname(__file__), 'templates') + + def add_arguments(self, parser): + parser.add_argument('filename') + parser.add_argument('--force', '-f', default=False, action='store_true') + + target_group = parser.add_mutually_exclusive_group(required=False) + target_group.add_argument('--template', '-t', choices=self.TEMPLATES, default='default') + target_group.add_argument('--package', '-p', action='store_true', default=False) + + def create_file_from_template(self, *, template, filename): + template_name = template + name, ext = os.path.splitext(filename) + if ext != '.py': + raise ValueError('Filenames should end with ".py".') + + loader = FileSystemLoader(self.TEMPLATES_PATH) + env = Environment(loader=loader) + template = env.get_template(template_name + '.py-tpl') + + with open(filename, 'w+') as f: + f.write(template.render(name=name)) + + self.logger.info('Generated {} using template {!r}.'.format(filename, template_name)) + + def create_package(self, *, filename): + name, ext = os.path.splitext(filename) + if ext != '': + raise ValueError('Package names should not have an extension.') + + try: + import medikit.commands + except ImportError as exc: + raise ImportError( + 'To initialize a package, you need to install medikit (pip install --upgrade medikit).' + ) from exc + + package_name = os.path.basename(filename) + medikit.commands.handle_init( + os.path.join(os.getcwd(), filename, 'Projectfile'), name=package_name, requirements=['bonobo'] + ) + + self.logger.info('Generated "{}" package with medikit.'.format(package_name)) + self.create_file_from_template(template='default', filename=os.path.join(filename, package_name, '__main__.py')) + + print('Your "{}" package has been created.'.format(package_name)) + print() + print('Install it...') + print() + print(' pip install --editable {}'.format(filename)) + print() + print('Then maybe run the example...') + print() + print(' python -m {}'.format(package_name)) + print() + print('Enjoy!') + + def handle(self, *, template, filename, package=False, force=False): + if os.path.exists(filename) and not force: + raise FileExistsError('Target filename already exists, use --force to override.') + + if package: + self.create_package(filename=filename) + else: + self.create_file_from_template(template=template, filename=filename) diff --git a/bonobo/commands/inspect.py b/bonobo/commands/inspect.py index 1ab6b5b..0e6dcd4 100644 --- a/bonobo/commands/inspect.py +++ b/bonobo/commands/inspect.py @@ -1,40 +1,15 @@ -import json - -from bonobo.commands.run import read, register_generic_run_arguments -from bonobo.constants import BEGIN -from bonobo.util.objects import get_name - -OUTPUT_GRAPHVIZ = 'graphviz' +import bonobo +from bonobo.commands import BaseGraphCommand -def _ident(graph, i): - escaped_index = str(i) - escaped_name = json.dumps(get_name(graph[i])) - return '{{{} [label={}]}}'.format(escaped_index, escaped_name) +class InspectCommand(BaseGraphCommand): + handler = staticmethod(bonobo.inspect) + def add_arguments(self, parser): + super(InspectCommand, self).add_arguments(parser) + parser.add_argument('--graph', '-g', dest='format', action='store_const', const='graph') -def execute(*, output, **kwargs): - graph, plugins, services = read(**kwargs) - - if output == OUTPUT_GRAPHVIZ: - print('digraph {') - print(' rankdir = LR;') - print(' "BEGIN" [shape="point"];') - - for i in graph.outputs_of(BEGIN): - print(' "BEGIN" -> ' + _ident(graph, i) + ';') - - for ix in graph.topologically_sorted_indexes: - for iy in graph.outputs_of(ix): - print(' {} -> {};'.format(_ident(graph, ix), _ident(graph, iy))) - - print('}') - else: - raise NotImplementedError('Output type not implemented.') - - -def register(parser): - register_generic_run_arguments(parser) - parser.add_argument('--graph', '-g', dest='output', action='store_const', const=OUTPUT_GRAPHVIZ) - parser.set_defaults(output=OUTPUT_GRAPHVIZ) - return execute + def parse_options(self, **options): + if not options.get('format'): + raise RuntimeError('You must provide a format (try --graph).') + return options diff --git a/bonobo/commands/run.py b/bonobo/commands/run.py index be84d18..ce76bfc 100644 --- a/bonobo/commands/run.py +++ b/bonobo/commands/run.py @@ -1,38 +1,57 @@ -import codecs import os -import sys -from importlib.util import spec_from_file_location, module_from_spec -from pathlib import Path - -from dotenv import load_dotenv import bonobo -from bonobo.constants import DEFAULT_SERVICES_ATTR, DEFAULT_SERVICES_FILENAME - -DEFAULT_GRAPH_FILENAMES = ( - '__main__.py', - 'main.py', -) -DEFAULT_GRAPH_ATTR = 'get_graph' +from bonobo.commands import BaseGraphCommand -def get_default_services(filename, services=None): - dirname = os.path.dirname(filename) - services_filename = os.path.join(dirname, DEFAULT_SERVICES_FILENAME) - if os.path.exists(services_filename): - with open(services_filename) as file: - code = compile(file.read(), services_filename, 'exec') - context = { - '__name__': '__bonobo__', - '__file__': services_filename, - } - exec(code, context) +class RunCommand(BaseGraphCommand): + install = False + handler = staticmethod(bonobo.run) - return { - **context[DEFAULT_SERVICES_ATTR](), - **(services or {}), - } - return services or {} + def add_arguments(self, parser): + super(RunCommand, self).add_arguments(parser) + + verbosity_group = parser.add_mutually_exclusive_group() + verbosity_group.add_argument('--quiet', '-q', action='store_true') + verbosity_group.add_argument('--verbose', '-v', action='store_true') + + parser.add_argument('--install', '-I', action='store_true') + + def parse_options(self, *, quiet=False, verbose=False, install=False, **options): + from bonobo import settings + settings.QUIET.set_if_true(quiet) + settings.DEBUG.set_if_true(verbose) + self.install = install + return options + + def _run_path(self, file): + # add install logic + if self.install: + if os.path.isdir(file): + requirements = os.path.join(file, 'requirements.txt') + else: + requirements = os.path.join(os.path.dirname(file), 'requirements.txt') + _install_requirements(requirements) + + return super()._run_path(file) + + def _run_module(self, mod): + # install not implemented for a module, not sure it even make sense. + if self.install: + raise RuntimeError('--install behaviour when running a module is not defined.') + + return super()._run_module(mod) + + +def register_generic_run_arguments(parser, required=True): + """ + Only there for backward compatibility with third party extensions. + TODO: This should be deprecated (using the @deprecated decorator) in 0.7, and removed in 0.8 or 0.9. + """ + dummy_command = BaseGraphCommand() + dummy_command.required = required + dummy_command.add_arguments(parser) + return parser def _install_requirements(requirements): @@ -47,138 +66,3 @@ def _install_requirements(requirements): pip.utils.pkg_resources = importlib.reload(pip.utils.pkg_resources) import site importlib.reload(site) - - -def read( - filename, - module, - install=False, - quiet=False, - verbose=False, - default_env_file=None, - default_env=None, - env_file=None, - env=None -): - import runpy - from bonobo import Graph, settings - - if quiet: - settings.QUIET.set(True) - - if verbose: - settings.DEBUG.set(True) - - if filename: - if os.path.isdir(filename): - if install: - requirements = os.path.join(filename, 'requirements.txt') - _install_requirements(requirements) - - pathname = filename - for filename in DEFAULT_GRAPH_FILENAMES: - filename = os.path.join(pathname, filename) - if os.path.exists(filename): - break - if not os.path.exists(filename): - raise IOError('Could not find entrypoint (candidates: {}).'.format(', '.join(DEFAULT_GRAPH_FILENAMES))) - elif install: - requirements = os.path.join(os.path.dirname(filename), 'requirements.txt') - _install_requirements(requirements) - spec = spec_from_file_location('__bonobo__', filename) - main = sys.modules['__bonobo__'] = module_from_spec(spec) - main.__path__ = [os.path.dirname(filename)] - main.__package__ = '__bonobo__' - spec.loader.exec_module(main) - context = main.__dict__ - elif module: - context = runpy.run_module(module, run_name='__bonobo__') - filename = context['__file__'] - else: - raise RuntimeError('UNEXPECTED: argparse should not allow this.') - - env_dir = Path(filename).parent or Path(module).parent - if default_env_file: - for f in default_env_file: - env_file_path = str(env_dir.joinpath(f)) - load_dotenv(env_file_path) - if default_env: - for e in default_env: - set_env_var(e) - if env_file: - for f in env_file: - env_file_path = str(env_dir.joinpath(f)) - load_dotenv(env_file_path, override=True) - if env: - for e in env: - set_env_var(e, override=True) - - graphs = dict((k, v) for k, v in context.items() if isinstance(v, Graph)) - - assert len(graphs) == 1, ( - 'Having zero or more than one graph definition in one file is unsupported for now, ' - 'but it is something that will be implemented in the future.\n\nExpected: 1, got: {}.' - ).format(len(graphs)) - - graph = list(graphs.values())[0] - plugins = [] - services = get_default_services( - filename, context.get(DEFAULT_SERVICES_ATTR)() if DEFAULT_SERVICES_ATTR in context else None - ) - - return graph, plugins, services - - -def set_env_var(e, override=False): - __escape_decoder = codecs.getdecoder('unicode_escape') - ename, evalue = e.split('=', 1) - - def decode_escaped(escaped): - return __escape_decoder(escaped)[0] - - if len(evalue) > 0: - if evalue[0] == evalue[len(evalue) - 1] in ['"', "'"]: - evalue = decode_escaped(evalue[1:-1]) - - if override: - os.environ[ename] = evalue - else: - os.environ.setdefault(ename, evalue) - - -def execute( - filename, - module, - install=False, - quiet=False, - verbose=False, - default_env_file=None, - default_env=None, - env_file=None, - env=None -): - graph, plugins, services = read( - filename, module, install, quiet, verbose, default_env_file, default_env, env_file, env - ) - - return bonobo.run(graph, plugins=plugins, services=services) - - -def register_generic_run_arguments(parser, required=True): - source_group = parser.add_mutually_exclusive_group(required=required) - source_group.add_argument('filename', nargs='?', type=str) - source_group.add_argument('--module', '-m', type=str) - parser.add_argument('--default-env-file', action='append') - parser.add_argument('--default-env', action='append') - parser.add_argument('--env-file', action='append') - parser.add_argument('--env', '-e', action='append') - return parser - - -def register(parser): - parser = register_generic_run_arguments(parser) - verbosity_group = parser.add_mutually_exclusive_group() - verbosity_group.add_argument('--quiet', '-q', action='store_true') - verbosity_group.add_argument('--verbose', '-v', action='store_true') - parser.add_argument('--install', '-I', action='store_true') - return execute diff --git a/bonobo/commands/templates/bare.py-tpl b/bonobo/commands/templates/bare.py-tpl new file mode 100644 index 0000000..1ca3019 --- /dev/null +++ b/bonobo/commands/templates/bare.py-tpl @@ -0,0 +1,15 @@ +import bonobo + + +def get_graph(**options): + graph = bonobo.Graph() + return graph + + +def get_services(**options): + return {} + + +if __name__ == '__main__': + with bonobo.parse_args() as options: + bonobo.run(get_graph(**options), services=get_services(**options)) diff --git a/bonobo/commands/templates/default.py-tpl b/bonobo/commands/templates/default.py-tpl new file mode 100644 index 0000000..eaea053 --- /dev/null +++ b/bonobo/commands/templates/default.py-tpl @@ -0,0 +1,55 @@ +import bonobo + + +def extract(): + """Placeholder, change, rename, remove... """ + yield 'hello' + yield 'world' + + +def transform(*args): + """Placeholder, change, rename, remove... """ + yield tuple( + map(str.title, args) + ) + + +def load(*args): + """Placeholder, change, rename, remove... """ + print(*args) + + +def get_graph(**options): + """ + This function builds the graph that needs to be executed. + + :return: bonobo.Graph + + """ + graph = bonobo.Graph() + graph.add_chain(extract, transform, load) + + return graph + + +def get_services(**options): + """ + This function builds the services dictionary, which is a simple dict of names-to-implementation used by bonobo + for runtime injection. + + It will be used on top of the defaults provided by bonobo (fs, http, ...). You can override those defaults, or just + let the framework define them. You can also define your own services and naming is up to you. + + :return: dict + """ + return {} + + +# The __main__ block actually execute the graph. +if __name__ == '__main__': + parser = bonobo.get_argument_parser() + with bonobo.parse_args(parser) as options: + bonobo.run( + get_graph(**options), + services=get_services(**options) + ) diff --git a/bonobo/commands/version.py b/bonobo/commands/version.py index 6d4f3e7..3e3239a 100644 --- a/bonobo/commands/version.py +++ b/bonobo/commands/version.py @@ -1,4 +1,30 @@ -def format_version(mod, *, name=None, quiet=False): +from bonobo.commands import BaseCommand + + +class VersionCommand(BaseCommand): + def handle(self, *, all=False, quiet=False): + import bonobo + from bonobo.util.pkgs import bonobo_packages + + print(_format_version(bonobo, quiet=quiet)) + if all: + for name in sorted(bonobo_packages): + if name != 'bonobo': + try: + mod = __import__(name.replace('-', '_')) + try: + print(_format_version(mod, name=name, quiet=quiet)) + except Exception as exc: + print('{} ({})'.format(name, exc)) + except ImportError as exc: + print('{} is not importable ({}).'.format(name, exc)) + + def add_arguments(self, parser): + parser.add_argument('--all', '-a', action='store_true') + parser.add_argument('--quiet', '-q', action='count') + + +def _format_version(mod, *, name=None, quiet=False): from bonobo.util.pkgs import bonobo_packages args = { 'name': name or mod.__name__, @@ -14,27 +40,3 @@ def format_version(mod, *, name=None, quiet=False): return '{version}'.format(**args) raise RuntimeError('Hard to be so quiet...') - - -def execute(all=False, quiet=False): - import bonobo - from bonobo.util.pkgs import bonobo_packages - - print(format_version(bonobo, quiet=quiet)) - if all: - for name in sorted(bonobo_packages): - if name != 'bonobo': - try: - mod = __import__(name.replace('-', '_')) - try: - print(format_version(mod, name=name, quiet=quiet)) - except Exception as exc: - print('{} ({})'.format(name, exc)) - except ImportError as exc: - print('{} is not importable ({}).'.format(name, exc)) - - -def register(parser): - parser.add_argument('--all', '-a', action='store_true') - parser.add_argument('--quiet', '-q', action='count') - return execute diff --git a/bonobo/events.py b/bonobo/events.py deleted file mode 100644 index 9a0cbba..0000000 --- a/bonobo/events.py +++ /dev/null @@ -1,3 +0,0 @@ -ON_START = 'bonobo.on_start' -ON_TICK = 'bonobo.on_tick' -ON_STOP = 'bonobo.on_stop' diff --git a/bonobo/examples/__init__.py b/bonobo/examples/__init__.py index 49b1544..e69de29 100644 --- a/bonobo/examples/__init__.py +++ b/bonobo/examples/__init__.py @@ -1,23 +0,0 @@ -def require(package, requirement=None): - requirement = requirement or package - - try: - return __import__(package) - except ImportError: - from colorama import Fore, Style - print( - Fore.YELLOW, - 'This example requires the {!r} package. Install it using:'. - format(requirement), - Style.RESET_ALL, - sep='' - ) - print() - print( - Fore.YELLOW, - ' $ pip install {!s}'.format(requirement), - Style.RESET_ALL, - sep='' - ) - print() - raise diff --git a/bonobo/examples/clock.py b/bonobo/examples/clock.py new file mode 100644 index 0000000..1977cba --- /dev/null +++ b/bonobo/examples/clock.py @@ -0,0 +1,27 @@ +import bonobo +import datetime +import time + + +def extract(): + """Placeholder, change, rename, remove... """ + for x in range(60): + if x: + time.sleep(1) + yield datetime.datetime.now() + + +def get_graph(): + graph = bonobo.Graph() + graph.add_chain( + extract, + print, + ) + + return graph + + +if __name__ == '__main__': + parser = bonobo.get_argument_parser() + with bonobo.parse_args(parser): + bonobo.run(get_graph()) diff --git a/bonobo/examples/datasets/coffeeshops.py b/bonobo/examples/datasets/coffeeshops.py index dc3db52..fd754ef 100644 --- a/bonobo/examples/datasets/coffeeshops.py +++ b/bonobo/examples/datasets/coffeeshops.py @@ -14,7 +14,7 @@ Extracts a list of parisian bars where you can buy a coffee for a reasonable pri """ import bonobo -from bonobo.commands.run import get_default_services +from bonobo.commands import get_default_services from bonobo.ext.opendatasoft import OpenDataSoftAPI filename = 'coffeeshops.txt' diff --git a/bonobo/examples/datasets/fablabs.py b/bonobo/examples/datasets/fablabs.py index 986aea9..d03775b 100644 --- a/bonobo/examples/datasets/fablabs.py +++ b/bonobo/examples/datasets/fablabs.py @@ -19,7 +19,7 @@ import json from colorama import Fore, Style import bonobo -from bonobo.commands.run import get_default_services +from bonobo.commands import get_default_services from bonobo.ext.opendatasoft import OpenDataSoftAPI try: diff --git a/bonobo/examples/environ.py b/bonobo/examples/environ.py new file mode 100644 index 0000000..280d2e1 --- /dev/null +++ b/bonobo/examples/environ.py @@ -0,0 +1,27 @@ +""" +This transformation extracts the environment and prints it, sorted alphabetically, one item per line. + +Used in the bonobo tests around environment management. + +""" +import os + +import bonobo + + +def extract_environ(): + """Yield all the system environment.""" + yield from sorted(os.environ.items()) + + +def get_graph(): + graph = bonobo.Graph() + graph.add_chain(extract_environ, print) + + return graph + + +if __name__ == '__main__': + parser = bonobo.get_argument_parser() + with bonobo.parse_args(parser): + bonobo.run(get_graph()) diff --git a/bonobo/examples/environment/env_files/.env_one b/bonobo/examples/environment/env_files/.env_one deleted file mode 100644 index 65f2b17..0000000 --- a/bonobo/examples/environment/env_files/.env_one +++ /dev/null @@ -1,3 +0,0 @@ -MY_SECRET=321 -TEST_USER_PASSWORD=sweetpassword -PATH=marzo \ No newline at end of file diff --git a/bonobo/examples/environment/env_files/.env_two b/bonobo/examples/environment/env_files/.env_two deleted file mode 100644 index 672d6d2..0000000 --- a/bonobo/examples/environment/env_files/.env_two +++ /dev/null @@ -1,2 +0,0 @@ -TEST_USER_PASSWORD=not_sweet_password -PATH='abril' \ No newline at end of file diff --git a/bonobo/examples/environment/env_files/get_passed_env_file.py b/bonobo/examples/environment/env_files/get_passed_env_file.py deleted file mode 100644 index bb45540..0000000 --- a/bonobo/examples/environment/env_files/get_passed_env_file.py +++ /dev/null @@ -1,23 +0,0 @@ -import os - -import bonobo - - -def extract(): - my_secret = os.getenv('MY_SECRET') - test_user_password = os.getenv('TEST_USER_PASSWORD') - path = os.getenv('PATH') - - yield my_secret - yield test_user_password - yield path - - -def load(s: str): - print(s) - - -graph = bonobo.Graph(extract, load) - -if __name__ == '__main__': - bonobo.run(graph) diff --git a/bonobo/examples/environment/env_vars/__init__.py b/bonobo/examples/environment/env_vars/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bonobo/examples/environment/env_vars/get_passed_env.py b/bonobo/examples/environment/env_vars/get_passed_env.py deleted file mode 100644 index e0c6c45..0000000 --- a/bonobo/examples/environment/env_vars/get_passed_env.py +++ /dev/null @@ -1,25 +0,0 @@ -import os - -import bonobo - - -def extract(): - env_test_user = os.getenv('ENV_TEST_USER', 'user') - env_test_number = os.getenv('ENV_TEST_NUMBER', 'number') - env_test_string = os.getenv('ENV_TEST_STRING', 'string') - env_user = os.getenv('USER') - - yield env_test_user - yield env_test_number - yield env_test_string - yield env_user - - -def load(s: str): - print(s) - - -graph = bonobo.Graph(extract, load) - -if __name__ == '__main__': - bonobo.run(graph) diff --git a/bonobo/examples/files/csv_handlers.py b/bonobo/examples/files/csv_handlers.py index 33412c3..555bc67 100644 --- a/bonobo/examples/files/csv_handlers.py +++ b/bonobo/examples/files/csv_handlers.py @@ -1,5 +1,5 @@ import bonobo -from bonobo.commands.run import get_default_services +from bonobo.commands import get_default_services graph = bonobo.Graph( bonobo.CsvReader('datasets/coffeeshops.txt', headers=('item', )), diff --git a/bonobo/examples/files/json_handlers.py b/bonobo/examples/files/json_handlers.py index 27dc38e..f1818cd 100644 --- a/bonobo/examples/files/json_handlers.py +++ b/bonobo/examples/files/json_handlers.py @@ -1,6 +1,6 @@ import bonobo from bonobo import Bag -from bonobo.commands.run import get_default_services +from bonobo.commands import get_default_services def get_fields(**row): diff --git a/bonobo/examples/files/pickle_handlers.py b/bonobo/examples/files/pickle_handlers.py index 71a2b9a..ed2ecd4 100644 --- a/bonobo/examples/files/pickle_handlers.py +++ b/bonobo/examples/files/pickle_handlers.py @@ -28,7 +28,7 @@ messages categorized as spam, and (3) prints the output. ''' import bonobo -from bonobo.commands.run import get_default_services +from bonobo.commands import get_default_services from fs.tarfs import TarFS diff --git a/bonobo/examples/files/text_handlers.py b/bonobo/examples/files/text_handlers.py index 6ca6ef8..abbae1a 100644 --- a/bonobo/examples/files/text_handlers.py +++ b/bonobo/examples/files/text_handlers.py @@ -1,5 +1,5 @@ import bonobo -from bonobo.commands.run import get_default_services +from bonobo.commands import get_default_services def skip_comments(line): diff --git a/bonobo/examples/tutorials/tut02e02_write.py b/bonobo/examples/tutorials/tut02e02_write.py index c4b065d..a33a11b 100644 --- a/bonobo/examples/tutorials/tut02e02_write.py +++ b/bonobo/examples/tutorials/tut02e02_write.py @@ -8,9 +8,7 @@ def split_one(line): graph = bonobo.Graph( bonobo.FileReader('coffeeshops.txt'), split_one, - bonobo.JsonWriter( - 'coffeeshops.json', fs='fs.output' - ), + bonobo.JsonWriter('coffeeshops.json', fs='fs.output'), ) diff --git a/bonobo/examples/types/__main__.py b/bonobo/examples/types/__main__.py index 3d1549f..ccda1a9 100644 --- a/bonobo/examples/types/__main__.py +++ b/bonobo/examples/types/__main__.py @@ -1,3 +1,7 @@ -from bonobo.util.python import require +import bonobo +from bonobo.examples.types.strings import get_graph -graph = require('strings').graph +if __name__ == '__main__': + parser = bonobo.get_argument_parser() + with bonobo.parse_args(parser): + bonobo.run(get_graph()) diff --git a/bonobo/examples/types/strings.py b/bonobo/examples/types/strings.py index 1903151..2fa765f 100644 --- a/bonobo/examples/types/strings.py +++ b/bonobo/examples/types/strings.py @@ -14,7 +14,7 @@ Example on how to use symple python strings to communicate between transformatio """ from random import randint -from bonobo import Graph +import bonobo def extract(): @@ -31,9 +31,11 @@ def load(s: str): print(s) -graph = Graph(extract, transform, load) +def get_graph(): + return bonobo.Graph(extract, transform, load) + if __name__ == '__main__': - from bonobo import run - - run(graph) + parser = bonobo.get_argument_parser() + with bonobo.parse_args(parser): + bonobo.run(get_graph()) diff --git a/bonobo/execution/__init__.py b/bonobo/execution/__init__.py index b8a83dd..43ffbf3 100644 --- a/bonobo/execution/__init__.py +++ b/bonobo/execution/__init__.py @@ -1 +1,5 @@ -from bonobo.execution.graph import GraphExecutionContext, NodeExecutionContext, PluginExecutionContext +import logging + +logger = logging.getLogger(__name__) + +__all__ = [] diff --git a/bonobo/examples/environment/env_files/__init__.py b/bonobo/execution/contexts/__init__.py similarity index 100% rename from bonobo/examples/environment/env_files/__init__.py rename to bonobo/execution/contexts/__init__.py diff --git a/bonobo/execution/base.py b/bonobo/execution/contexts/base.py similarity index 58% rename from bonobo/execution/base.py rename to bonobo/execution/contexts/base.py index b9bce36..3ca580a 100644 --- a/bonobo/execution/base.py +++ b/bonobo/execution/contexts/base.py @@ -1,11 +1,13 @@ -import traceback +import logging +import sys from contextlib import contextmanager -from time import sleep +from logging import WARNING, ERROR +import mondrian from bonobo.config import create_container from bonobo.config.processors import ContextCurrifier +from bonobo.execution import logger from bonobo.util import isconfigurabletype -from bonobo.util.errors import print_error from bonobo.util.objects import Wrapper, get_name @@ -14,7 +16,7 @@ def recoverable(error_handler): try: yield except Exception as exc: # pylint: disable=broad-except - error_handler(exc, traceback.format_exc()) + error_handler(*sys.exc_info(), level=ERROR) @contextmanager @@ -22,13 +24,12 @@ def unrecoverable(error_handler): try: yield except Exception as exc: # pylint: disable=broad-except - error_handler(exc, traceback.format_exc()) + error_handler(*sys.exc_info(), level=ERROR) raise # raise unrecoverableerror from x ? class LoopingExecutionContext(Wrapper): - alive = True - PERIOD = 0.25 + PERIOD = 0.5 @property def started(self): @@ -38,6 +39,25 @@ class LoopingExecutionContext(Wrapper): def stopped(self): return self._stopped + @property + def defunct(self): + return self._defunct + + @property + def alive(self): + return self._started and not self._stopped + + @property + def status(self): + """One character status for this node. """ + if self._defunct: + return '!' + if not self.started: + return ' ' + if not self.stopped: + return '+' + return '-' + def __init__(self, wrapped, parent, services=None): super().__init__(wrapped) @@ -52,7 +72,7 @@ class LoopingExecutionContext(Wrapper): else: self.services = None - self._started, self._stopped = False, False + self._started, self._stopped, self._defunct = False, False, False self._stack = None def __enter__(self): @@ -68,21 +88,22 @@ class LoopingExecutionContext(Wrapper): self._started = True - self._stack = ContextCurrifier(self.wrapped, *self._get_initial_context()) - if isconfigurabletype(self.wrapped): - # Not normal to have a partially configured object here, so let's warn the user instead of having get into - # the hard trouble of understanding that by himself. - raise TypeError( - 'The Configurable should be fully instanciated by now, unfortunately I got a PartiallyConfigured object...' - ) - - self._stack.setup(self) + try: + self._stack = ContextCurrifier(self.wrapped, *self._get_initial_context()) + if isconfigurabletype(self.wrapped): + # Not normal to have a partially configured object here, so let's warn the user instead of having get into + # the hard trouble of understanding that by himself. + raise TypeError( + 'The Configurable should be fully instanciated by now, unfortunately I got a PartiallyConfigured object...' + ) + self._stack.setup(self) + except Exception: + return self.fatal(sys.exc_info()) def loop(self): """Generic loop. A bit boring. """ while self.alive: self.step() - sleep(self.PERIOD) def step(self): """Left as an exercise for the children.""" @@ -101,12 +122,17 @@ class LoopingExecutionContext(Wrapper): finally: self._stopped = True - def handle_error(self, exc, trace): - return print_error(exc, trace, context=self.wrapped) - def _get_initial_context(self): if self.parent: return self.parent.services.args_for(self.wrapped) if self.services: return self.services.args_for(self.wrapped) return () + + def handle_error(self, exctype, exc, tb, *, level=logging.ERROR): + logging.getLogger(__name__).log(level, repr(self), exc_info=(exctype, exc, tb)) + + def fatal(self, exc_info): + self._defunct = True + self.input.shutdown() + self.handle_error(*exc_info, level=logging.CRITICAL) diff --git a/bonobo/execution/graph.py b/bonobo/execution/contexts/graph.py similarity index 62% rename from bonobo/execution/graph.py rename to bonobo/execution/contexts/graph.py index 77e01fa..55dbf7e 100644 --- a/bonobo/execution/graph.py +++ b/bonobo/execution/contexts/graph.py @@ -1,16 +1,20 @@ -import time from functools import partial +from time import sleep from bonobo.config import create_container from bonobo.constants import BEGIN, END -from bonobo.execution.node import NodeExecutionContext -from bonobo.execution.plugin import PluginExecutionContext +from bonobo.execution import events +from bonobo.execution.contexts.node import NodeExecutionContext +from bonobo.execution.contexts.plugin import PluginExecutionContext +from whistle import EventDispatcher class GraphExecutionContext: NodeExecutionContextType = NodeExecutionContext PluginExecutionContextType = PluginExecutionContext + TICK_PERIOD = 0.25 + @property def started(self): return any(node.started for node in self.nodes) @@ -23,7 +27,8 @@ class GraphExecutionContext: def alive(self): return any(node.alive for node in self.nodes) - def __init__(self, graph, plugins=None, services=None): + def __init__(self, graph, plugins=None, services=None, dispatcher=None): + self.dispatcher = dispatcher or EventDispatcher() self.graph = graph self.nodes = [self.create_node_execution_context_for(node) for node in self.graph] self.plugins = [self.create_plugin_execution_context_for(plugin) for plugin in plugins or ()] @@ -53,6 +58,8 @@ class GraphExecutionContext: return self.NodeExecutionContextType(node, parent=self) def create_plugin_execution_context_for(self, plugin): + if isinstance(plugin, type): + plugin = plugin() return self.PluginExecutionContextType(plugin, parent=self) def write(self, *messages): @@ -63,23 +70,46 @@ class GraphExecutionContext: for message in messages: self[i].write(message) + def dispatch(self, name): + self.dispatcher.dispatch(name, events.ExecutionEvent(self)) + def start(self, starter=None): + self.register_plugins() + self.dispatch(events.START) + self.tick(pause=False) for node in self.nodes: if starter is None: node.start() else: starter(node) + self.dispatch(events.STARTED) - def start_plugins(self, starter=None): - for plugin in self.plugins: - if starter is None: - plugin.start() - else: - starter(plugin) + def tick(self, pause=True): + self.dispatch(events.TICK) + if pause: + sleep(self.TICK_PERIOD) + + def kill(self): + self.dispatch(events.KILL) + for node_context in self.nodes: + node_context.kill() + self.tick() def stop(self, stopper=None): - for node in self.nodes: + self.dispatch(events.STOP) + for node_context in self.nodes: if stopper is None: - node.stop() + node_context.stop() else: - stopper(node) + stopper(node_context) + self.tick(pause=False) + self.dispatch(events.STOPPED) + self.unregister_plugins() + + def register_plugins(self): + for plugin_context in self.plugins: + plugin_context.register() + + def unregister_plugins(self): + for plugin_context in self.plugins: + plugin_context.unregister() diff --git a/bonobo/execution/node.py b/bonobo/execution/contexts/node.py similarity index 61% rename from bonobo/execution/node.py rename to bonobo/execution/contexts/node.py index 6c52e7d..db2c39a 100644 --- a/bonobo/execution/node.py +++ b/bonobo/execution/contexts/node.py @@ -1,17 +1,19 @@ -import traceback +import logging +import sys from queue import Empty from time import sleep from types import GeneratorType from bonobo.constants import NOT_MODIFIED, BEGIN, END from bonobo.errors import InactiveReadableError, UnrecoverableError -from bonobo.execution.base import LoopingExecutionContext +from bonobo.execution.contexts.base import LoopingExecutionContext from bonobo.structs.bags import Bag from bonobo.structs.inputs import Input from bonobo.structs.tokens import Token -from bonobo.util import get_name, iserrorbag, isloopbackbag, isbag +from bonobo.util import get_name, iserrorbag, isloopbackbag, isbag, istuple from bonobo.util.compat import deprecated_alias from bonobo.util.statistics import WithStatistics +from mondrian import term class NodeExecutionContext(WithStatistics, LoopingExecutionContext): @@ -20,13 +22,8 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): """ @property - def alive(self): - """todo check if this is right, and where it is used""" - return self._started and not self._stopped - - @property - def alive_str(self): - return '+' if self.alive else '-' + def killed(self): + return self._killed def __init__(self, wrapped, parent=None, services=None, _input=None, _outputs=None): LoopingExecutionContext.__init__(self, wrapped, parent=parent, services=services) @@ -34,13 +31,23 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): self.input = _input or Input() self.outputs = _outputs or [] + self._killed = False def __str__(self): - return self.alive_str + ' ' + self.__name__ + self.get_statistics_as_string(prefix=' ') + return self.__name__ + self.get_statistics_as_string(prefix=' ') def __repr__(self): name, type_name = get_name(self), get_name(type(self)) - return '<{}({}{}){}>'.format(type_name, self.alive_str, name, self.get_statistics_as_string(prefix=' ')) + return '<{}({}{}){}>'.format(type_name, self.status, name, self.get_statistics_as_string(prefix=' ')) + + def get_flags_as_string(self): + if self._defunct: + return term.red('[defunct]') + if self.killed: + return term.lightred('[killed]') + if self.stopped: + return term.lightblack('[done]') + return '' def write(self, *messages): """ @@ -89,23 +96,27 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): self.increment('in') return row + def should_loop(self): + return not any((self.defunct, self.killed)) + def loop(self): - while True: + while self.should_loop(): try: self.step() - except KeyboardInterrupt: - raise except InactiveReadableError: break except Empty: sleep(self.PERIOD) continue - except UnrecoverableError as exc: - self.handle_error(exc, traceback.format_exc()) + except UnrecoverableError: + self.handle_error(*sys.exc_info()) self.input.shutdown() break - except Exception as exc: # pylint: disable=broad-except - self.handle_error(exc, traceback.format_exc()) + except Exception: # pylint: disable=broad-except + self.handle_error(*sys.exc_info()) + except BaseException: + self.handle_error(*sys.exc_info()) + break def step(self): # Pull data from the first available input channel. @@ -117,6 +128,15 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): # todo add timer self.handle_results(input_bag, input_bag.apply(self._stack)) + def kill(self): + if not self.started: + raise RuntimeError('Cannot kill a node context that has not started yet.') + + if self.stopped: + raise RuntimeError('Cannot kill a node context that has already stopped.') + + self._killed = True + def handle_results(self, input_bag, results): # self._exec_time += timer.duration # Put data onto output channels @@ -124,6 +144,9 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): if isinstance(results, GeneratorType): while True: try: + # if kill flag was step, stop iterating. + if self._killed: + break result = next(results) except StopIteration: break @@ -137,12 +160,47 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): pass -def _resolve(input_bag, output): - # NotModified means to send the input unmodified to output. - if output is NOT_MODIFIED: - return input_bag +def isflag(param): + return isinstance(param, Token) and param in (NOT_MODIFIED, ) + +def split_tokens(output): + """ + Split an output into token tuple, real output tuple. + + :param output: + :return: tuple, tuple + """ + if isinstance(output, Token): + # just a flag + return (output, ), () + + if not istuple(output): + # no flag + return (), (output, ) + + i = 0 + while isflag(output[i]): + i += 1 + + return output[:i], output[i:] + + +def _resolve(input_bag, output): + """ + This function is key to how bonobo works (and internal, too). It transforms a pair of input/output into what is the + real output. + + :param input_bag: Bag + :param output: mixed + :return: Bag + """ if isbag(output): return output - return Bag(output) + tokens, output = split_tokens(output) + + if len(tokens) == 1 and tokens[0] is NOT_MODIFIED: + return input_bag + + return output if isbag(output) else Bag(output) diff --git a/bonobo/execution/contexts/plugin.py b/bonobo/execution/contexts/plugin.py new file mode 100644 index 0000000..524c2e1 --- /dev/null +++ b/bonobo/execution/contexts/plugin.py @@ -0,0 +1,13 @@ +from bonobo.execution.contexts.base import LoopingExecutionContext + + +class PluginExecutionContext(LoopingExecutionContext): + @property + def dispatcher(self): + return self.parent.dispatcher + + def register(self): + return self.wrapped.register(self.dispatcher) + + def unregister(self): + return self.wrapped.unregister(self.dispatcher) diff --git a/bonobo/execution/events.py b/bonobo/execution/events.py new file mode 100644 index 0000000..3bf3986 --- /dev/null +++ b/bonobo/execution/events.py @@ -0,0 +1,13 @@ +from whistle import Event + +START = 'execution.start' +STARTED = 'execution.started' +TICK = 'execution.tick' +STOP = 'execution.stop' +STOPPED = 'execution.stopped' +KILL = 'execution.kill' + + +class ExecutionEvent(Event): + def __init__(self, context): + self.context = context diff --git a/bonobo/execution/plugin.py b/bonobo/execution/plugin.py deleted file mode 100644 index 3379fc0..0000000 --- a/bonobo/execution/plugin.py +++ /dev/null @@ -1,26 +0,0 @@ -from bonobo.execution.base import LoopingExecutionContext, recoverable - - -class PluginExecutionContext(LoopingExecutionContext): - PERIOD = 0.5 - - def __init__(self, wrapped, parent): - # Instanciate plugin. This is not yet considered stable, as at some point we may need a way to configure - # plugins, for example if it depends on an external service. - super().__init__(wrapped(self), parent) - - def start(self): - super().start() - - with recoverable(self.handle_error): - self.wrapped.on_start() - - def shutdown(self): - if self.started: - with recoverable(self.handle_error): - self.wrapped.on_stop() - self.alive = False - - def step(self): - with recoverable(self.handle_error): - self.wrapped.on_tick() diff --git a/bonobo/strategies/__init__.py b/bonobo/execution/strategies/__init__.py similarity index 76% rename from bonobo/strategies/__init__.py rename to bonobo/execution/strategies/__init__.py index 1420da6..1c5d50a 100644 --- a/bonobo/strategies/__init__.py +++ b/bonobo/execution/strategies/__init__.py @@ -1,5 +1,5 @@ -from bonobo.strategies.executor import ProcessPoolExecutorStrategy, ThreadPoolExecutorStrategy -from bonobo.strategies.naive import NaiveStrategy +from bonobo.execution.strategies.executor import ProcessPoolExecutorStrategy, ThreadPoolExecutorStrategy +from bonobo.execution.strategies.naive import NaiveStrategy __all__ = [ 'create_strategy', @@ -21,8 +21,8 @@ def create_strategy(name=None): :param name: :return: Strategy """ - from bonobo.strategies.base import Strategy import logging + from bonobo.execution.strategies.base import Strategy if isinstance(name, Strategy): return name @@ -39,4 +39,4 @@ def create_strategy(name=None): 'Invalid strategy {}. Available choices: {}.'.format(repr(name), ', '.join(sorted(STRATEGIES.keys()))) ) from exc - return factory() \ No newline at end of file + return factory() diff --git a/bonobo/strategies/base.py b/bonobo/execution/strategies/base.py similarity index 89% rename from bonobo/strategies/base.py rename to bonobo/execution/strategies/base.py index 47f7db4..0a8d2a5 100644 --- a/bonobo/strategies/base.py +++ b/bonobo/execution/strategies/base.py @@ -1,4 +1,4 @@ -from bonobo.execution.graph import GraphExecutionContext +from bonobo.execution.contexts.graph import GraphExecutionContext class Strategy: diff --git a/bonobo/execution/strategies/executor.py b/bonobo/execution/strategies/executor.py new file mode 100644 index 0000000..ebbaef1 --- /dev/null +++ b/bonobo/execution/strategies/executor.py @@ -0,0 +1,77 @@ +import functools +import logging +import sys +from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor + +from bonobo.structs.bags import Bag +from bonobo.constants import BEGIN, END +from bonobo.execution.strategies.base import Strategy +from bonobo.util import get_name + + +class ExecutorStrategy(Strategy): + """ + Strategy based on a concurrent.futures.Executor subclass (or similar interface). + + """ + + executor_factory = Executor + + def create_executor(self): + return self.executor_factory() + + def execute(self, graph, **kwargs): + context = self.create_graph_execution_context(graph, **kwargs) + context.write(BEGIN, Bag(), END) + + futures = [] + + with self.create_executor() as executor: + try: + context.start(self.get_starter(executor, futures)) + except: + logging.getLogger(__name__ + ).warning('KeyboardInterrupt received. Trying to terminate the nodes gracefully.') + + while context.alive: + try: + context.tick() + except KeyboardInterrupt: + logging.getLogger(__name__).warning( + 'KeyboardInterrupt received. Trying to terminate the nodes gracefully.' + ) + context.kill() + break + + context.stop() + + return context + + def get_starter(self, executor, futures): + def starter(node): + @functools.wraps(node) + def _runner(): + try: + with node: + node.loop() + except: + logging.getLogger(__name__).critical( + 'Uncaught exception in node execution for {}.'.format(node), exc_info=True + ) + node.shutdown() + node.stop() + + try: + futures.append(executor.submit(_runner)) + except: + logging.getLogger(__name__).critical('futures.append', exc_info=sys.exc_info()) + + return starter + + +class ThreadPoolExecutorStrategy(ExecutorStrategy): + executor_factory = ThreadPoolExecutor + + +class ProcessPoolExecutorStrategy(ExecutorStrategy): + executor_factory = ProcessPoolExecutor diff --git a/bonobo/strategies/naive.py b/bonobo/execution/strategies/naive.py similarity index 91% rename from bonobo/strategies/naive.py rename to bonobo/execution/strategies/naive.py index 20477c1..bd581ff 100644 --- a/bonobo/strategies/naive.py +++ b/bonobo/execution/strategies/naive.py @@ -1,5 +1,5 @@ from bonobo.constants import BEGIN, END -from bonobo.strategies.base import Strategy +from bonobo.execution.strategies.base import Strategy from bonobo.structs.bags import Bag diff --git a/bonobo/ext/django.py b/bonobo/ext/django.py index 06f31a7..60b583c 100644 --- a/bonobo/ext/django.py +++ b/bonobo/ext/django.py @@ -5,8 +5,7 @@ from django.core.management.base import BaseCommand, OutputWrapper import bonobo import bonobo.util -from bonobo.commands.run import get_default_services -from bonobo.ext.console import ConsoleOutputPlugin +from bonobo.plugins.console import ConsoleOutputPlugin from bonobo.util.term import CLEAR_EOL diff --git a/bonobo/logging.py b/bonobo/logging.py deleted file mode 100644 index 071fcd3..0000000 --- a/bonobo/logging.py +++ /dev/null @@ -1,86 +0,0 @@ -import logging -import sys -import textwrap -from logging import CRITICAL, DEBUG, ERROR, INFO, WARNING - -from colorama import Fore, Style - -from bonobo import settings -from bonobo.util.term import CLEAR_EOL - -iswindows = (sys.platform == 'win32') - - -def get_format(): - yield '{b}[%(fg)s%(levelname)s{b}][{w}' - yield '{b}][{w}'.join(('%(spent)04d', '%(name)s')) - yield '{b}]' - yield ' %(fg)s%(message)s{r}' - if not iswindows: - yield CLEAR_EOL - - -colors = { - 'b': '' if iswindows else Fore.BLACK, - 'w': '' if iswindows else Fore.LIGHTBLACK_EX, - 'r': '' if iswindows else Style.RESET_ALL, -} -format = (''.join(get_format())).format(**colors) - - -class Filter(logging.Filter): - def filter(self, record): - record.spent = record.relativeCreated // 1000 - if iswindows: - record.fg = '' - elif record.levelname == 'DEBG': - record.fg = Fore.LIGHTBLACK_EX - elif record.levelname == 'INFO': - record.fg = Fore.LIGHTWHITE_EX - elif record.levelname == 'WARN': - record.fg = Fore.LIGHTYELLOW_EX - elif record.levelname == 'ERR ': - record.fg = Fore.LIGHTRED_EX - elif record.levelname == 'CRIT': - record.fg = Fore.RED - else: - record.fg = Fore.LIGHTWHITE_EX - return True - - -class Formatter(logging.Formatter): - def formatException(self, ei): - tb = super().formatException(ei) - if iswindows: - return textwrap.indent(tb, ' | ') - else: - return textwrap.indent(tb, Fore.BLACK + ' | ' + Fore.WHITE) - - -def setup(level): - logging.addLevelName(DEBUG, 'DEBG') - logging.addLevelName(INFO, 'INFO') - logging.addLevelName(WARNING, 'WARN') - logging.addLevelName(ERROR, 'ERR ') - logging.addLevelName(CRITICAL, 'CRIT') - handler = logging.StreamHandler(sys.stderr) - handler.setFormatter(Formatter(format)) - handler.addFilter(Filter()) - root = logging.getLogger() - root.addHandler(handler) - root.setLevel(level) - - -def set_level(level): - logging.getLogger().setLevel(level) - - -def get_logger(name='bonobo'): - return logging.getLogger(name) - - -# Compatibility with python logging -getLogger = get_logger - -# Setup formating and level. -setup(level=settings.LOGGING_LEVEL.get()) diff --git a/bonobo/nodes/io/base.py b/bonobo/nodes/io/base.py index af9e609..db0bc80 100644 --- a/bonobo/nodes/io/base.py +++ b/bonobo/nodes/io/base.py @@ -1,4 +1,7 @@ +from fs.errors import ResourceNotFound + from bonobo.config import Configurable, ContextProcessor, Option, Service +from bonobo.errors import UnrecoverableError class FileHandler(Configurable): diff --git a/bonobo/nodes/io/json.py b/bonobo/nodes/io/json.py index 54e7b71..bbb89ad 100644 --- a/bonobo/nodes/io/json.py +++ b/bonobo/nodes/io/json.py @@ -53,12 +53,12 @@ class LdjsonReader(FileReader): def read(self, fs, file): for line in file: - print(line) yield self.loader(line) class LdjsonWriter(FileWriter): """Write a stream of JSON objects, one object per line.""" + def write(self, fs, file, lineno, **row): lineno += 1 # class-level variable file.write(json.dumps(row) + '\n') diff --git a/bonobo/plugins.py b/bonobo/plugins/__init__.py similarity index 67% rename from bonobo/plugins.py rename to bonobo/plugins/__init__.py index 7a0f5d1..897b687 100644 --- a/bonobo/plugins.py +++ b/bonobo/plugins/__init__.py @@ -10,5 +10,14 @@ class Plugin: """ - def __init__(self, context): - self.context = context + def register(self, dispatcher): + """ + :param dispatcher: whistle.EventDispatcher + """ + pass + + def unregister(self, dispatcher): + """ + :param dispatcher: whistle.EventDispatcher + """ + pass diff --git a/bonobo/ext/console.py b/bonobo/plugins/console.py similarity index 71% rename from bonobo/ext/console.py rename to bonobo/plugins/console.py index 0e6abb3..584244c 100644 --- a/bonobo/ext/console.py +++ b/bonobo/plugins/console.py @@ -2,14 +2,145 @@ import io import sys from contextlib import redirect_stdout, redirect_stderr -from colorama import Style, Fore, init - -init(wrap=True) +from colorama import Style, Fore, init as initialize_colorama_output_wrappers from bonobo import settings +from bonobo.execution import events from bonobo.plugins import Plugin from bonobo.util.term import CLEAR_EOL, MOVE_CURSOR_UP +initialize_colorama_output_wrappers(wrap=True) + + +class ConsoleOutputPlugin(Plugin): + """ + Outputs status information to the connected stdout. Can be a TTY, with or without support for colors/cursor + movements, or a non tty (pipe, file, ...). The features are adapted to terminal capabilities. + + On Windows, we'll play a bit differently because we don't know how to manipulate cursor position. We'll only + display stats at the very end, and there won't be this "buffering" logic we need to display both stats and stdout. + + .. attribute:: prefix + + String prefix of output lines. + + """ + + # Standard outputs descriptors backup here, also used to override if needed. + _stdout = sys.stdout + _stderr = sys.stderr + + # When the plugin is instanciated, we'll set the real value of this. + isatty = False + + # Whether we're on windows, or a real operating system. + iswindows = (sys.platform == 'win32') + + def __init__(self): + self.isatty = self._stdout.isatty() + + def register(self, dispatcher): + dispatcher.add_listener(events.START, self.setup) + dispatcher.add_listener(events.TICK, self.tick) + dispatcher.add_listener(events.STOPPED, self.teardown) + + def unregister(self, dispatcher): + dispatcher.remove_listener(events.STOPPED, self.teardown) + dispatcher.remove_listener(events.TICK, self.tick) + dispatcher.remove_listener(events.START, self.setup) + + def setup(self, event): + # TODO this wont work if one instance is registered with more than one context. + # Two options: + # - move state to context + # - forbid registering more than once + self.prefix = '' + self.counter = 0 + self._append_cache = '' + + self.stdout = IOBuffer() + self.redirect_stdout = redirect_stdout(self._stdout if self.iswindows else self.stdout) + self.redirect_stdout.__enter__() + + self.stderr = IOBuffer() + self.redirect_stderr = redirect_stderr(self._stderr if self.iswindows else self.stderr) + self.redirect_stderr.__enter__() + + def tick(self, event): + if self.isatty and not self.iswindows: + self._write(event.context, rewind=True) + else: + pass # not a tty, or windows, so we'll ignore stats output + + def teardown(self, event): + self._write(event.context, rewind=False) + self.redirect_stderr.__exit__(None, None, None) + self.redirect_stdout.__exit__(None, None, None) + + def write(self, context, prefix='', rewind=True, append=None): + t_cnt = len(context) + + if not self.iswindows: + for line in self.stdout.switch().split('\n')[:-1]: + print(line + CLEAR_EOL, file=self._stdout) + for line in self.stderr.switch().split('\n')[:-1]: + print(line + CLEAR_EOL, file=self._stderr) + + alive_color = Style.BRIGHT + dead_color = Style.BRIGHT + Fore.BLACK + + for i in context.graph.topologically_sorted_indexes: + node = context[i] + name_suffix = '({})'.format(i) if settings.DEBUG.get() else '' + + liveliness_color = alive_color if node.alive else dead_color + liveliness_prefix = ' {}{}{} '.format(liveliness_color, node.status, Style.RESET_ALL) + _line = ''.join( + ( + liveliness_prefix, + node.name, + name_suffix, + ' ', + node.get_statistics_as_string(), + ' ', + node.get_flags_as_string(), + Style.RESET_ALL, + ' ', + ) + ) + print(prefix + _line + CLEAR_EOL, file=self._stderr) + + if append: + # todo handle multiline + print( + ''.join( + ( + ' `-> ', ' '.join('{}{}{}: {}'.format(Style.BRIGHT, k, Style.RESET_ALL, v) for k, v in append), + CLEAR_EOL + ) + ), + file=self._stderr + ) + t_cnt += 1 + + if rewind: + print(CLEAR_EOL, file=self._stderr) + print(MOVE_CURSOR_UP(t_cnt + 2), file=self._stderr) + + def _write(self, context, rewind): + if settings.PROFILE.get(): + if self.counter % 10 and self._append_cache: + append = self._append_cache + else: + self._append_cache = append = ( + ('Memory', '{0:.2f} Mb'.format(memory_usage())), + # ('Total time', '{0} s'.format(execution_time(harness))), + ) + else: + append = () + self.write(context, prefix=self.prefix, append=append, rewind=rewind) + self.counter += 1 + class IOBuffer(): """ @@ -36,136 +167,6 @@ class IOBuffer(): self.current.flush() -class ConsoleOutputPlugin(Plugin): - """ - Outputs status information to the connected stdout. Can be a TTY, with or without support for colors/cursor - movements, or a non tty (pipe, file, ...). The features are adapted to terminal capabilities. - - On Windows, we'll play a bit differently because we don't know how to manipulate cursor position. We'll only - display stats at the very end, and there won't be this "buffering" logic we need to display both stats and stdout. - - .. attribute:: prefix - - String prefix of output lines. - - """ - - # Standard outputs descriptors backup here, also used to override if needed. - _stdout = sys.stdout - _stderr = sys.stderr - - # When the plugin is started, we'll set the real value of this. - isatty = False - - # Whether we're on windows, or a real operating system. - iswindows = (sys.platform == 'win32') - - def on_start(self): - self.prefix = '' - self.counter = 0 - self._append_cache = '' - - self.isatty = self._stdout.isatty() - - self.stdout = IOBuffer() - self.redirect_stdout = redirect_stdout(self._stdout if self.iswindows else self.stdout) - self.redirect_stdout.__enter__() - - self.stderr = IOBuffer() - self.redirect_stderr = redirect_stderr(self._stderr if self.iswindows else self.stderr) - self.redirect_stderr.__enter__() - - def on_tick(self): - if self.isatty and not self.iswindows: - self._write(self.context.parent, rewind=True) - else: - pass # not a tty, or windows, so we'll ignore stats output - - def on_stop(self): - self._write(self.context.parent, rewind=False) - self.redirect_stderr.__exit__(None, None, None) - self.redirect_stdout.__exit__(None, None, None) - - def write(self, context, prefix='', rewind=True, append=None): - t_cnt = len(context) - - if not self.iswindows: - for line in self.stdout.switch().split('\n')[:-1]: - print(line + CLEAR_EOL, file=self._stdout) - for line in self.stderr.switch().split('\n')[:-1]: - print(line + CLEAR_EOL, file=self._stderr) - - alive_color = Style.BRIGHT - dead_color = Style.BRIGHT + Fore.BLACK - - for i in context.graph.topologically_sorted_indexes: - node = context[i] - name_suffix = '({})'.format(i) if settings.DEBUG.get() else '' - if node.alive: - _line = ''.join( - ( - ' ', - alive_color, - '+', - Style.RESET_ALL, - ' ', - node.name, - name_suffix, - ' ', - node.get_statistics_as_string(), - Style.RESET_ALL, - ' ', - ) - ) - else: - _line = ''.join( - ( - ' ', - dead_color, - '-', - ' ', - node.name, - name_suffix, - ' ', - node.get_statistics_as_string(), - Style.RESET_ALL, - ' ', - ) - ) - print(prefix + _line + CLEAR_EOL, file=self._stderr) - - if append: - # todo handle multiline - print( - ''.join( - ( - ' `-> ', ' '.join('{}{}{}: {}'.format(Style.BRIGHT, k, Style.RESET_ALL, v) for k, v in append), - CLEAR_EOL - ) - ), - file=self._stderr - ) - t_cnt += 1 - - if rewind: - print(CLEAR_EOL, file=self._stderr) - print(MOVE_CURSOR_UP(t_cnt + 2), file=self._stderr) - - def _write(self, graph_context, rewind): - if settings.PROFILE.get(): - if self.counter % 10 and self._append_cache: - append = self._append_cache - else: - self._append_cache = append = ( - ('Memory', '{0:.2f} Mb'.format(memory_usage())), - # ('Total time', '{0} s'.format(execution_time(harness))), - ) - else: - append = () - self.write(graph_context, prefix=self.prefix, append=append, rewind=rewind) - self.counter += 1 - - def memory_usage(): import os, psutil process = psutil.Process(os.getpid()) diff --git a/bonobo/settings.py b/bonobo/settings.py index ef4be2d..fdc4412 100644 --- a/bonobo/settings.py +++ b/bonobo/settings.py @@ -1,4 +1,5 @@ import logging + import os from bonobo.errors import ValidationError @@ -51,6 +52,12 @@ class Setting: raise ValidationError('Invalid value {!r} for setting {}.'.format(value, self.name)) self.value = value + def set_if_true(self, value): + """Sets the value to true if it is actually true. May sound strange but the main usage is enforcing some + settings from command line.""" + if value: + self.set(True) + def get(self): try: return self.value diff --git a/bonobo/strategies/executor.py b/bonobo/strategies/executor.py deleted file mode 100644 index 3bfabc6..0000000 --- a/bonobo/strategies/executor.py +++ /dev/null @@ -1,84 +0,0 @@ -import time -import traceback -from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor - -from bonobo.constants import BEGIN, END -from bonobo.strategies.base import Strategy -from bonobo.structs.bags import Bag -from bonobo.util.errors import print_error - - -class ExecutorStrategy(Strategy): - """ - Strategy based on a concurrent.futures.Executor subclass (or similar interface). - - """ - - executor_factory = Executor - - def create_executor(self): - return self.executor_factory() - - def execute(self, graph, **kwargs): - context = self.create_graph_execution_context(graph, **kwargs) - context.write(BEGIN, Bag(), END) - - executor = self.create_executor() - - futures = [] - - context.start_plugins(self.get_plugin_starter(executor, futures)) - context.start(self.get_starter(executor, futures)) - - while context.alive: - time.sleep(0.1) - - for plugin_context in context.plugins: - plugin_context.shutdown() - - context.stop() - - executor.shutdown() - - return context - - def get_starter(self, executor, futures): - def starter(node): - def _runner(): - try: - node.start() - except Exception as exc: - print_error(exc, traceback.format_exc(), context=node, method='start') - node.input.on_end() - else: - node.loop() - - try: - node.stop() - except Exception as exc: - print_error(exc, traceback.format_exc(), context=node, method='stop') - - futures.append(executor.submit(_runner)) - - return starter - - def get_plugin_starter(self, executor, futures): - def plugin_starter(plugin): - def _runner(): - with plugin: - try: - plugin.loop() - except Exception as exc: - print_error(exc, traceback.format_exc(), context=plugin) - - futures.append(executor.submit(_runner)) - - return plugin_starter - - -class ThreadPoolExecutorStrategy(ExecutorStrategy): - executor_factory = ThreadPoolExecutor - - -class ProcessPoolExecutorStrategy(ExecutorStrategy): - executor_factory = ProcessPoolExecutor diff --git a/bonobo/strategies/util.py b/bonobo/strategies/util.py deleted file mode 100644 index 8b13789..0000000 --- a/bonobo/strategies/util.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/bonobo/structs/graphs.py b/bonobo/structs/graphs.py index fe7c1df..e89b7e7 100644 --- a/bonobo/structs/graphs.py +++ b/bonobo/structs/graphs.py @@ -1,6 +1,8 @@ +import json from copy import copy from bonobo.constants import BEGIN +from bonobo.util import get_name class Graph: @@ -110,6 +112,24 @@ class Graph: self._topologcally_sorted_indexes_cache = tuple(filter(lambda i: type(i) is int, reversed(order))) return self._topologcally_sorted_indexes_cache + def _repr_dot_(self): + src = [ + 'digraph {', + ' rankdir = LR;', + ' "BEGIN" [shape="point"];', + ] + + for i in self.outputs_of(BEGIN): + src.append(' "BEGIN" -> ' + _get_graphviz_node_id(self, i) + ';') + + for ix in self.topologically_sorted_indexes: + for iy in self.outputs_of(ix): + src.append(' {} -> {};'.format(_get_graphviz_node_id(self, ix), _get_graphviz_node_id(self, iy))) + + src.append('}') + + return '\n'.join(src) + def _resolve_index(self, mixed): """ Find the index based on various strategies for a node, probably an input or output of chain. Supported inputs are indexes, node values or names. """ @@ -126,3 +146,9 @@ class Graph: return self.nodes.index(mixed) raise ValueError('Cannot find node matching {!r}.'.format(mixed)) + + +def _get_graphviz_node_id(graph, i): + escaped_index = str(i) + escaped_name = json.dumps(get_name(graph[i])) + return '{{{} [label={}]}}'.format(escaped_index, escaped_name) diff --git a/bonobo/structs/inputs.py b/bonobo/structs/inputs.py index 7cfe12f..9b3cd14 100644 --- a/bonobo/structs/inputs.py +++ b/bonobo/structs/inputs.py @@ -15,7 +15,6 @@ # limitations under the License. from abc import ABCMeta, abstractmethod - from queue import Queue from bonobo.constants import BEGIN, END diff --git a/bonobo/util/__init__.py b/bonobo/util/__init__.py index 4ef136e..586fe3b 100644 --- a/bonobo/util/__init__.py +++ b/bonobo/util/__init__.py @@ -1,4 +1,4 @@ -from bonobo.util.collections import sortedlist, ensure_tuple +from bonobo.util.collections import ensure_tuple, sortedlist, tuplize from bonobo.util.compat import deprecated, deprecated_alias from bonobo.util.inspect import ( inspect_node, @@ -15,13 +15,13 @@ from bonobo.util.inspect import ( istype, ) from bonobo.util.objects import (get_name, get_attribute_or_create, ValueHolder) -from bonobo.util.python import require # Bonobo's util API __all__ = [ 'ValueHolder', 'deprecated', 'deprecated_alias', + 'ensure_tuple', 'get_attribute_or_create', 'get_name', 'inspect_node', @@ -35,5 +35,6 @@ __all__ = [ 'ismethod', 'isoption', 'istype', - 'require', + 'sortedlist', + 'tuplize', ] diff --git a/bonobo/util/collections.py b/bonobo/util/collections.py index d53a7da..31765c4 100644 --- a/bonobo/util/collections.py +++ b/bonobo/util/collections.py @@ -22,9 +22,9 @@ def ensure_tuple(tuple_or_mixed): def tuplize(generator): - """ Takes a generator and make it a tuple-returning function. As a side - effect, it can also decorate any iterator-returning function to force - return value to be a tuple. + """ + Decorates a generator and make it a tuple-returning function. As a side effect, it can also decorate any + iterator-returning function to force return value to be a tuple. >>> tuplized_lambda = tuplize(lambda: [1, 2, 3]) >>> tuplized_lambda() diff --git a/bonobo/util/environ.py b/bonobo/util/environ.py new file mode 100644 index 0000000..b344d29 --- /dev/null +++ b/bonobo/util/environ.py @@ -0,0 +1,164 @@ +import argparse +import codecs +import os +import re +import warnings +from contextlib import contextmanager + +__escape_decoder = codecs.getdecoder('unicode_escape') +__posix_variable = re.compile('\$\{[^\}]*\}') + + +def parse_var(var): + name, value = var.split('=', 1) + + def decode_escaped(escaped): + return __escape_decoder(escaped)[0] + + if len(value) > 1: + c = value[0] + + if c in ['"', "'"] and value[-1] == c: + value = decode_escaped(value[1:-1]) + + return name, value + + +def load_env_from_file(filename): + """ + Read an env file into a collection of (name, value) tuples. + """ + if not os.path.exists(filename): + raise FileNotFoundError('Environment file {} does not exist.'.format(filename)) + + with open(filename) as f: + for lineno, line in enumerate(f): + line = line.strip() + if not line or line.startswith('#'): + continue + if '=' not in line: + raise SyntaxError('Invalid environment file syntax in {} at line {}.'.format(filename, lineno + 1)) + + name, value = parse_var(line) + + yield name, value + + +_parser = None + + +def get_argument_parser(parser=None): + """ + Creates an argument parser with arguments to override the system environment. + + :api: bonobo.get_argument_parser + + :param _parser: + :return: + """ + if parser is None: + import argparse + parser = argparse.ArgumentParser() + + # Store globally to be able to warn the user about the fact he's probably wrong not to pass a parser to + # parse_args(), later. + global _parser + _parser = parser + + _parser.add_argument('--default-env-file', '-E', action='append') + _parser.add_argument('--default-env', action='append') + _parser.add_argument('--env-file', action='append') + _parser.add_argument('--env', '-e', action='append') + + return _parser + + +@contextmanager +def parse_args(mixed=None): + """ + Context manager to extract and apply environment related options from the provided argparser result. + + A dictionnary with unknown options will be yielded, so the remaining options can be used by the caller. + + :api: bonobo.patch_environ + + :param mixed: ArgumentParser instance, Namespace, or dict. + :return: + """ + + if mixed is None: + global _parser + if _parser is not None: + warnings.warn( + 'You are calling bonobo.parse_args() without a parser argument, but it looks like you created a parser before. You probably want to pass your parser to this call, or if creating a new parser here is really what you want to do, please create a new one explicitely to silence this warning.' + ) + # use the api from bonobo namespace, in case a command patched it. + import bonobo + mixed = bonobo.get_argument_parser() + + if isinstance(mixed, argparse.ArgumentParser): + options = mixed.parse_args() + else: + options = mixed + + if not isinstance(options, dict): + options = options.__dict__ + + # make a copy so we don't polute our parent variables. + options = dict(options) + + # storage for values before patch. + _backup = {} + + # Priority order: --env > --env-file > system > --default-env > --default-env-file + # + # * The code below is reading default-env before default-env-file as if the first sets something, default-env-file + # won't override it. + # * Then, env-file is read from before env, as the behaviour will be the oposite (env will override a var even if + # env-file sets something.) + try: + # Set default environment + for name, value in map(parse_var, options.pop('default_env', []) or []): + if not name in os.environ: + if not name in _backup: + _backup[name] = os.environ.get(name, None) + os.environ[name] = value + + # Read and set default environment from file(s) + for filename in options.pop('default_env_file', []) or []: + for name, value in load_env_from_file(filename): + if not name in os.environ: + if not name in _backup: + _backup[name] = os.environ.get(name, None) + os.environ[name] = value + + # Read and set environment from file(s) + for filename in options.pop('env_file', []) or []: + for name, value in load_env_from_file(filename): + if not name in _backup: + _backup[name] = os.environ.get(name, None) + os.environ[name] = value + + # Set environment + for name, value in map(parse_var, options.pop('env', []) or []): + if not name in _backup: + _backup[name] = os.environ.get(name, None) + os.environ[name] = value + + yield options + finally: + for name, value in _backup.items(): + if value is None: + del os.environ[name] + else: + os.environ[name] = value + + +@contextmanager +def change_working_directory(path): + old_dir = os.getcwd() + os.chdir(str(path)) + try: + yield + finally: + os.chdir(old_dir) diff --git a/bonobo/util/errors.py b/bonobo/util/errors.py deleted file mode 100644 index cae2789..0000000 --- a/bonobo/util/errors.py +++ /dev/null @@ -1,39 +0,0 @@ -import sys -from textwrap import indent - - -def _get_error_message(exc): - if hasattr(exc, '__str__'): - message = str(exc) - return message[0].upper() + message[1:] - return '\n'.join(exc.args), - - -def print_error(exc, trace, context=None, method=None): - """ - Error handler. Whatever happens in a plugin or component, if it looks like an exception, taste like an exception - or somehow make me think it is an exception, I'll handle it. - - :param exc: the culprit - :param trace: Hercule Poirot's logbook. - :return: to hell - """ - - from colorama import Fore, Style - - prefix = '{}{} | {}'.format(Fore.RED, Style.BRIGHT, Style.RESET_ALL) - - print( - Style.BRIGHT, - Fore.RED, - type(exc).__name__, - ' (in {}{})'.format(type(context).__name__, '.{}()'.format(method) if method else '') if context else '', - Style.RESET_ALL, - '\n', - indent(_get_error_message(exc), prefix + Style.BRIGHT), - Style.RESET_ALL, - sep='', - file=sys.stderr, - ) - print(prefix, file=sys.stderr) - print(indent(trace, prefix, predicate=lambda line: True), file=sys.stderr) diff --git a/bonobo/util/python.py b/bonobo/util/python.py deleted file mode 100644 index 8648f16..0000000 --- a/bonobo/util/python.py +++ /dev/null @@ -1,31 +0,0 @@ -import inspect -import os -import runpy - - -class _RequiredModule: - def __init__(self, dct): - self.__dict__ = dct - - -class _RequiredModulesRegistry(dict): - @property - def pathname(self): - return os.path.join(os.getcwd(), os.path.dirname(inspect.getfile(inspect.stack()[2][0]))) - - def require(self, name): - if name not in self: - bits = name.split('.') - filename = os.path.join(self.pathname, *bits[:-1], bits[-1] + '.py') - self[name] = _RequiredModule(runpy.run_path(filename, run_name=name)) - return self[name] - - -class WorkingDirectoryModulesRegistry(_RequiredModulesRegistry): - @property - def pathname(self): - return os.getcwd() - - -registry = _RequiredModulesRegistry() -require = registry.require diff --git a/bonobo/util/resolvers.py b/bonobo/util/resolvers.py index 0590fc7..60934d8 100644 --- a/bonobo/util/resolvers.py +++ b/bonobo/util/resolvers.py @@ -4,10 +4,29 @@ This package is considered private, and should only be used within bonobo. """ import json +import os +import runpy import bonobo from bonobo.util.collections import tuplize -from bonobo.util.python import WorkingDirectoryModulesRegistry + + +class _RequiredModule: + def __init__(self, dct): + self.__dict__ = dct + + +class _ModulesRegistry(dict): + @property + def pathname(self): + return os.getcwd() + + def require(self, name): + if name not in self: + bits = name.split('.') + filename = os.path.join(self.pathname, *bits[:-1], bits[-1] + '.py') + self[name] = _RequiredModule(runpy.run_path(filename, run_name=name)) + return self[name] def _parse_option(option): @@ -52,7 +71,8 @@ def _resolve_transformations(transformations): :param transformations: tuple(str) :return: tuple(object) """ - registry = WorkingDirectoryModulesRegistry() + registry = _ModulesRegistry() + transformations = transformations or [] for t in transformations: try: mod, attr = t.split(':', 1) diff --git a/bonobo/util/statistics.py b/bonobo/util/statistics.py index 5d71a0f..2f9c5c2 100644 --- a/bonobo/util/statistics.py +++ b/bonobo/util/statistics.py @@ -13,6 +13,7 @@ # without warranties or conditions of any kind, either express or implied. # see the license for the specific language governing permissions and # limitations under the license. +import time class WithStatistics: @@ -29,3 +30,23 @@ class WithStatistics: def increment(self, name): self.statistics[name] += 1 + + +class Timer: + """ + Context manager used to time execution of stuff. + """ + + def __enter__(self): + self.__start = time.time() + + def __exit__(self, type=None, value=None, traceback=None): + # Error handling here + self.__finish = time.time() + + @property + def duration(self): + return self.__finish - self.__start + + def __str__(self): + return str(int(self.duration * 1000) / 1000.0) + 's' diff --git a/bonobo/util/testing.py b/bonobo/util/testing.py index 6fc7d60..9044715 100644 --- a/bonobo/util/testing.py +++ b/bonobo/util/testing.py @@ -1,8 +1,17 @@ -from contextlib import contextmanager +import functools +import io +import os +import runpy +import sys +from contextlib import contextmanager, redirect_stdout, redirect_stderr +from unittest.mock import patch -from bonobo import open_fs, Token -from bonobo.execution import GraphExecutionContext -from bonobo.execution.node import NodeExecutionContext +import pytest + +from bonobo import open_fs, Token, __main__, get_examples_path +from bonobo.commands import entrypoint +from bonobo.execution.contexts.graph import GraphExecutionContext +from bonobo.execution.contexts.node import NodeExecutionContext @contextmanager @@ -64,3 +73,68 @@ class BufferingGraphExecutionContext(BufferingContext, GraphExecutionContext): def create_node_execution_context_for(self, node): return self.NodeExecutionContextType(node, parent=self, buffer=self.buffer) + + +def runner(f): + @functools.wraps(f) + def wrapped_runner(*args, catch_errors=False): + with redirect_stdout(io.StringIO()) as stdout, redirect_stderr(io.StringIO()) as stderr: + try: + f(list(args)) + except BaseException as exc: + if not catch_errors: + raise + elif isinstance(catch_errors, BaseException) and not isinstance(exc, catch_errors): + raise + return stdout.getvalue(), stderr.getvalue(), exc + return stdout.getvalue(), stderr.getvalue() + + return wrapped_runner + + +@runner +def runner_entrypoint(args): + """ Run bonobo using the python command entrypoint directly (bonobo.commands.entrypoint). """ + return entrypoint(args) + + +@runner +def runner_module(args): + """ Run bonobo using the bonobo.__main__ file, which is equivalent as doing "python -m bonobo ...".""" + with patch.object(sys, 'argv', ['bonobo', *args]): + return runpy.run_path(__main__.__file__, run_name='__main__') + + +all_runners = pytest.mark.parametrize('runner', [runner_entrypoint, runner_module]) +all_environ_targets = pytest.mark.parametrize( + 'target', [ + (get_examples_path('environ.py'), ), + ( + '-m', + 'bonobo.examples.environ', + ), + ] +) + + +@all_runners +@all_environ_targets +class EnvironmentTestCase(): + def run_quiet(self, runner, *args): + return runner('run', '--quiet', *args) + + def run_environ(self, runner, *args, environ=None): + _environ = {'PATH': '/usr/bin'} + if environ: + _environ.update(environ) + + with patch.dict('os.environ', _environ, clear=True): + out, err = self.run_quiet(runner, *args) + assert 'SECRET' not in os.environ + assert 'PASSWORD' not in os.environ + if 'PATH' in _environ: + assert 'PATH' in os.environ + assert os.environ['PATH'] == _environ['PATH'] + + assert err == '' + return dict(map(lambda line: line.split(' ', 1), filter(None, out.split('\n')))) diff --git a/bonobo/util/time.py b/bonobo/util/time.py deleted file mode 100644 index 14de016..0000000 --- a/bonobo/util/time.py +++ /dev/null @@ -1,21 +0,0 @@ -import time - - -class Timer: - """ - Context manager used to time execution of stuff. - """ - - def __enter__(self): - self.__start = time.time() - - def __exit__(self, type=None, value=None, traceback=None): - # Error handling here - self.__finish = time.time() - - @property - def duration(self): - return self.__finish - self.__start - - def __str__(self): - return str(int(self.duration * 1000) / 1000.0) + 's' diff --git a/docs/_static/custom.css b/docs/_static/custom.css index f658da9..fa608d1 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,3 +1,19 @@ svg { border: 2px solid green -} \ No newline at end of file +} + +div.related { + width: 940px; + margin: 30px auto 0 auto; +} + +@media screen and (max-width: 875px) { + div.related { + visibility: hidden; + display: none; + } +} + +.brand { + font-family: 'Ubuntu', 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', serif; +} diff --git a/docs/_templates/base.html b/docs/_templates/base.html index f8ad58a..27ca438 100644 --- a/docs/_templates/base.html +++ b/docs/_templates/base.html @@ -4,17 +4,8 @@ {%- block extrahead %} {{ super() }} + {% endblock %} {%- block footer %} diff --git a/docs/changelog.rst b/docs/changelog.rst index a222414..66a5a05 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,9 @@ Changelog Unreleased :::::::::: +* Cookiecutter usage is removed. Linked to the fact that bonobo now use either a single file (up to you to get python + imports working as you want) or a regular fully fledged python package, we do not need it anymore. + New features ------------ diff --git a/docs/conf.py b/docs/conf.py index 93895a8..07d0424 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -186,3 +186,12 @@ epub_exclude_files = ['search.html'] # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} + +rst_epilog = """ +.. |bonobo| replace:: **Bonobo** + +.. |longversion| replace:: v.{version} + +""".format( + version = version, +) diff --git a/docs/extension/jupyter.rst b/docs/extension/jupyter.rst index 6c3385f..ed01d3b 100644 --- a/docs/extension/jupyter.rst +++ b/docs/extension/jupyter.rst @@ -4,8 +4,6 @@ Jupyter Extension There is a builtin plugin that integrates (somewhat minimallistically, for now) bonobo within jupyter notebooks, so you can read the execution status of a graph within a nice (ok, not so nice) html/javascript widget. -See https://github.com/jupyter-widgets/widget-cookiecutter for the base template used. - Installation :::::::::::: diff --git a/docs/install.rst b/docs/install.rst index c006c88..56f18ae 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -5,16 +5,18 @@ Installation Create an ETL project ::::::::::::::::::::: -Creating a project and starting to write code should take less than a minute: +Let's create a job. .. code-block:: shell-session - $ pip install --upgrade bonobo cookiecutter - $ bonobo init my-etl-project - $ bonobo run my-etl-project + $ pip install --upgrade bonobo + $ bonobo create my-etl.py + $ python my-etl.py -Once you bootstrapped a project, you can start editing the default example transformation by editing -`my-etl-project/main.py`. Now, you can head to :doc:`tutorial/index`. +This job only uses one python file, and you can run it using the python interpreter. For bigger jobs or jobs that +relates to multiple files, you should create a python package. + +Now, you can head to :doc:`tutorial/index`. Other installation options diff --git a/docs/reference/commands.rst b/docs/reference/commands.rst index 674d549..ade63be 100644 --- a/docs/reference/commands.rst +++ b/docs/reference/commands.rst @@ -16,16 +16,6 @@ Syntax: `bonobo convert [-r reader] input_filename [-w writer] output_filename` to read from csv and write to csv too (or other format) but adding a geocoder filter that would add some fields. -Bonobo Init -::::::::::: - -Create an empty project, ready to use bonobo. - -Syntax: `bonobo init` - -Requires `cookiecutter`. - - Bonobo Inspect :::::::::::::: diff --git a/docs/roadmap.rst b/docs/roadmap.rst deleted file mode 100644 index 182cf71..0000000 --- a/docs/roadmap.rst +++ /dev/null @@ -1,54 +0,0 @@ -Internal roadmap notes -====================== - -Things that should be thought about and/or implemented, but that I don't know where to store. - -Graph and node level plugins -:::::::::::::::::::::::::::: - - * Enhancers or node-level plugins - * Graph level plugins - * Documentation - -Command line interface and environment -:::::::::::::::::::::::::::::::::::::: - -* How do we manage environment ? .env ? -* How do we configure plugins ? - -Services and Processors -::::::::::::::::::::::: - -* ContextProcessors not clean (a bit better, but still not in love with the api) - -Next... -::::::: - -* Release process specialised for bonobo. With changelog production, etc. -* Document how to upgrade version, like, minor need change badges, etc. -* Windows console looks crappy. -* bonobo init --with sqlalchemy,docker; cookiecutter? -* logger, vebosity level - - -External libs that looks good -::::::::::::::::::::::::::::: - -* dask.distributed -* mediator (event dispatcher) - -Version 0.4 -::::::::::: - -* SQLAlchemy 101 - -Design decisions -:::::::::::::::: - -* initialize / finalize better than start / stop ? - -Minor stuff -::::::::::: - -* Should we include datasets in the repo or not? As they may change, grow, and even eventually have licenses we can't use, - it's probably best if we don't. \ No newline at end of file diff --git a/docs/tutorial/1-init.rst b/docs/tutorial/1-init.rst new file mode 100644 index 0000000..780d34d --- /dev/null +++ b/docs/tutorial/1-init.rst @@ -0,0 +1,258 @@ +Part 1: Let's get started! +========================== + +To get started with |bonobo|, you need to install it in a working python 3.5+ environment (you should use a +`virtualenv `_). + +.. code-block:: shell-session + + $ pip install bonobo + +Check that the installation worked, and that you're using a version that matches this tutorial (written for bonobo +|longversion|). + +.. code-block:: shell-session + + $ bonobo version + +See :doc:`/install` for more options. + + +Create an ETL job +::::::::::::::::: + +Since Bonobo 0.6, it's easy to bootstrap a simple ETL job using just one file. + +We'll start here, and the later stages of the tutorial will guide you toward refactoring this to a python package. + +.. code-block:: shell-session + + $ bonobo init tutorial.py + +This will create a simple job in a `tutorial.py` file. Let's run it: + +.. code-block:: shell-session + + $ python tutorial.py + Hello + World + - extract in=1 out=2 [done] + - transform in=2 out=2 [done] + - load in=2 [done] + +If you have a similar result, then congratulations! You just ran your first |bonobo| ETL job. + + +Inspect your graph +:::::::::::::::::: + +The basic building blocks of |bonobo| are **transformations** and **graphs**. + +**Transformations** are simple python callables (like functions) that handle a transformation step for a line of data. + +**Graphs** are a set of transformations, with directional links between them to define the data-flow that will happen +at runtime. + +To inspect the graph of your first transformation (you must install graphviz first to do so), run: + +.. code-block:: shell-session + + $ bonobo inspect --graph tutorial.py | dot -Tpng -o tutorial.png + +Open the generated `tutorial.png` file to have a quick look at the graph. + +.. graphviz:: + + digraph { + rankdir = LR; + "BEGIN" [shape="point"]; + "BEGIN" -> {0 [label="extract"]}; + {0 [label="extract"]} -> {1 [label="transform"]}; + {1 [label="transform"]} -> {2 [label="load"]}; + } + +You can easily understand here the structure of your graph. For such a simple graph, it's pretty much useless, but as +you'll write more complex transformations, it will be helpful. + + +Read the Code +::::::::::::: + +Before we write our own job, let's look at the code we have in `tutorial.py`. + + +Import +------ + +.. code-block:: python + + import bonobo + + +The highest level APIs of |bonobo| are all contained within the top level **bonobo** namespace. + +If you're a beginner with the library, stick to using only those APIs (they also are the most stable APIs). + +If you're an advanced user (and you'll be one quite soon), you can safely use second level APIs. + +The third level APIs are considered private, and you should not use them unless you're hacking on |bonobo| directly. + + +Extract +------- + +.. code-block:: python + + def extract(): + yield 'hello' + yield 'world' + +This is a first transformation, written as a python generator, that will send some strings, one after the other, to its +output. + +Transformations that take no input and yields a variable number of outputs are usually called **extractors**. You'll +encounter a few different types, either purely generating the data (like here), using an external service (a +database, for example) or using some filesystem (which is considered an external service too). + +Extractors do not need to have its input connected to anything, and will be called exactly once when the graph is +executed. + + +Transform +--------- + +.. code-block:: python + + def transform(*args): + yield tuple( + map(str.title, args) + ) + +This is a second transformation. It will get called a bunch of times, once for each input row it gets, and apply some +logic on the input to generate the output. + +This is the most **generic** case. For each input row, you can generate zero, one or many lines of output for each line +of input. + + +Load +---- + +.. code-block:: python + + def load(*args): + print(*args) + +This is the third and last transformation in our "hello world" example. It will apply some logic to each row, and have +absolutely no output. + +Transformations that take input and yields nothing are also called **loaders**. Like extractors, you'll encounter +different types, to work with various external systems. + +Please note that as a convenience mean and because the cost is marginal, most builtin `loaders` will send their +inputs to their output, so you can easily chain more than one loader, or apply more transformations after a given +loader was applied. + + +Graph Factory +------------- + +.. code-block:: python + + def get_graph(**options): + graph = bonobo.Graph() + graph.add_chain(extract, transform, load) + return graph + +All our transformations were defined above, but nothing ties them together, for now. + +This "graph factory" function is in charge of the creation and configuration of a :class:`bonobo.Graph` instance, that +will be executed later. + +By no mean is |bonobo| limited to simple graphs like this one. You can add as many chains as you want, and each chain +can contain as many nodes as you want. + + +Services Factory +---------------- + +.. code-block:: python + + def get_services(**options): + return {} + +This is the "services factory", that we'll use later to connect to external systems. Let's skip this one, for now. + +(we'll dive into this topic in :doc:`4-services`) + + +Main Block +---------- + +.. code-block:: python + + if __name__ == '__main__': + parser = bonobo.get_argument_parser() + with bonobo.parse_args(parser) as options: + bonobo.run( + get_graph(**options), + services=get_services(**options) + ) + +Here, the real thing happens. + +Without diving into too much details for now, using the :func:`bonobo.parse_args` context manager will allow our job to +be configurable, later, and although we don't really need it right now, it does not harm neither. + +Reading the output +:::::::::::::::::: + +Let's run this job once again: + +.. code-block:: shell-session + + $ python tutorial.py + Hello + World + - extract in=1 out=2 [done] + - transform in=2 out=2 [done] + - load in=2 [done] + +The console output contains two things. + +* First, it contains the real output of your job (what was :func:`print`-ed to `sys.stdout`). +* Second, it displays the execution status (on `sys.stderr`). Each line contains a "status" character, the node name, + numbers and a human readable status. This status will evolve in real time, and allows to understand a job's progress + while it's running. + + * Status character: + + * “ ” means that the node was not yet started. + * “`-`” means that the node finished its execution. + * “`+`” means that the node is currently running. + * “`!`” means that the node had problems running. + + * Numerical statistics: + + * “`in=...`” shows the input lines count, also known as the amount of calls to your transformation. + * “`out=...`” shows the output lines count. + * “`read=...`” shows the count of reads applied to an external system, if the transformation supports it. + * “`write=...`” shows the count of writes applied to an external system, if the transformation supports it. + * “`err=...`” shows the count of exceptions that happened while running the transformation. Note that exception will abort + a call, but the execution will move to the next row. + + +Moving forward +:::::::::::::: + +That's all for this first step. + +You now know: + +* How to create a new job file. +* How to inspect the content of a job file. +* What should go in a job file. +* How to execute a job file. +* How to read the console output. + +**Next: :doc:`2-jobs`** diff --git a/docs/tutorial/2-jobs.rst b/docs/tutorial/2-jobs.rst new file mode 100644 index 0000000..c3a6c8b --- /dev/null +++ b/docs/tutorial/2-jobs.rst @@ -0,0 +1,12 @@ +Part 2: Writing ETL Jobs +======================== + + +Moving forward +:::::::::::::: + +You now know: + +* How to ... + +**Next: :doc:`3-files`** diff --git a/docs/tutorial/3-files.rst b/docs/tutorial/3-files.rst new file mode 100644 index 0000000..adcc334 --- /dev/null +++ b/docs/tutorial/3-files.rst @@ -0,0 +1,12 @@ +Part 3: Working with Files +========================== + + +Moving forward +:::::::::::::: + +You now know: + +* How to ... + +**Next: :doc:`4-services`** diff --git a/docs/tutorial/4-services.rst b/docs/tutorial/4-services.rst new file mode 100644 index 0000000..e39f15b --- /dev/null +++ b/docs/tutorial/4-services.rst @@ -0,0 +1,210 @@ +Part 4: Services and Configurables +================================== + +.. note:: + + This section lacks completeness, sorry for that (but you can still read it!). + +In the last section, we used a few new tools. + +Class-based transformations and configurables +::::::::::::::::::::::::::::::::::::::::::::: + +Bonobo is a bit dumb. If something is callable, it considers it can be used as a transformation, and it's up to the +user to provide callables that logically fits in a graph. + +You can use plain python objects with a `__call__()` method, and it ill just work. + +As a lot of transformations needs common machinery, there is a few tools to quickly build transformations, most of +them requiring your class to subclass :class:`bonobo.config.Configurable`. + +Configurables allows to use the following features: + +* You can add **Options** (using the :class:`bonobo.config.Option` descriptor). Options can be positional, or keyword + based, can have a default value and will be consumed from the constructor arguments. + + .. code-block:: python + + from bonobo.config import Configurable, Option + + class PrefixIt(Configurable): + prefix = Option(str, positional=True, default='>>>') + + def call(self, row): + return self.prefix + ' ' + row + + prefixer = PrefixIt('$') + +* You can add **Services** (using the :class:`bonobo.config.Service` descriptor). Services are a subclass of + :class:`bonobo.config.Option`, sharing the same basics, but specialized in the definition of "named services" that + will be resolved at runtime (a.k.a for which we will provide an implementation at runtime). We'll dive more into that + in the next section + + .. code-block:: python + + from bonobo.config import Configurable, Option, Service + + class HttpGet(Configurable): + url = Option(default='https://jsonplaceholder.typicode.com/users') + http = Service('http.client') + + def call(self, http): + resp = http.get(self.url) + + for row in resp.json(): + yield row + + http_get = HttpGet() + + +* You can add **Methods** (using the :class:`bonobo.config.Method` descriptor). :class:`bonobo.config.Method` is a + subclass of :class:`bonobo.config.Option` that allows to pass callable parameters, either to the class constructor, + or using the class as a decorator. + + .. code-block:: python + + from bonobo.config import Configurable, Method + + class Applier(Configurable): + apply = Method() + + def call(self, row): + return self.apply(row) + + @Applier + def Prefixer(self, row): + return 'Hello, ' + row + + prefixer = Prefixer() + +* You can add **ContextProcessors**, which are an advanced feature we won't introduce here. If you're familiar with + pytest, you can think of them as pytest fixtures, execution wise. + +Services +:::::::: + +The motivation behind services is mostly separation of concerns, testability and deployability. + +Usually, your transformations will depend on services (like a filesystem, an http client, a database, a rest api, ...). +Those services can very well be hardcoded in the transformations, but there is two main drawbacks: + +* You won't be able to change the implementation depending on the current environment (development laptop versus + production servers, bug-hunting session versus execution, etc.) +* You won't be able to test your transformations without testing the associated services. + +To overcome those caveats of hardcoding things, we define Services in the configurable, which are basically +string-options of the service names, and we provide an implementation at the last moment possible. + +There are two ways of providing implementations: + +* Either file-wide, by providing a `get_services()` function that returns a dict of named implementations (we did so + with filesystems in the previous step, :doc:`tut02`) +* Either directory-wide, by providing a `get_services()` function in a specially named `_services.py` file. + +The first is simpler if you only have one transformation graph in one file, the second allows to group coherent +transformations together in a directory and share the implementations. + +Let's see how to use it, starting from the previous service example: + +.. code-block:: python + + from bonobo.config import Configurable, Option, Service + + class HttpGet(Configurable): + url = Option(default='https://jsonplaceholder.typicode.com/users') + http = Service('http.client') + + def call(self, http): + resp = http.get(self.url) + + for row in resp.json(): + yield row + +We defined an "http.client" service, that obviously should have a `get()` method, returning responses that have a +`json()` method. + +Let's provide two implementations for that. The first one will be using `requests `_, +that coincidally satisfies the described interface: + +.. code-block:: python + + import bonobo + import requests + + def get_services(): + return { + 'http.client': requests + } + + graph = bonobo.Graph( + HttpGet(), + print, + ) + +If you run this code, you should see some mock data returned by the webservice we called (assuming it's up and you can +reach it). + +Now, the second implementation will replace that with a mock, used for testing purposes: + +.. code-block:: python + + class HttpResponseStub: + def json(self): + return [ + {'id': 1, 'name': 'Leanne Graham', 'username': 'Bret', 'email': 'Sincere@april.biz', 'address': {'street': 'Kulas Light', 'suite': 'Apt. 556', 'city': 'Gwenborough', 'zipcode': '92998-3874', 'geo': {'lat': '-37.3159', 'lng': '81.1496'}}, 'phone': '1-770-736-8031 x56442', 'website': 'hildegard.org', 'company': {'name': 'Romaguera-Crona', 'catchPhrase': 'Multi-layered client-server neural-net', 'bs': 'harness real-time e-markets'}}, + {'id': 2, 'name': 'Ervin Howell', 'username': 'Antonette', 'email': 'Shanna@melissa.tv', 'address': {'street': 'Victor Plains', 'suite': 'Suite 879', 'city': 'Wisokyburgh', 'zipcode': '90566-7771', 'geo': {'lat': '-43.9509', 'lng': '-34.4618'}}, 'phone': '010-692-6593 x09125', 'website': 'anastasia.net', 'company': {'name': 'Deckow-Crist', 'catchPhrase': 'Proactive didactic contingency', 'bs': 'synergize scalable supply-chains'}}, + ] + + class HttpStub: + def get(self, url): + return HttpResponseStub() + + def get_services(): + return { + 'http.client': HttpStub() + } + + graph = bonobo.Graph( + HttpGet(), + print, + ) + +The `Graph` definition staying the exact same, you can easily substitute the `_services.py` file depending on your +environment (the way you're doing this is out of bonobo scope and heavily depends on your usual way of managing +configuration files on different platforms). + +Starting with bonobo 0.5 (not yet released), you will be able to use service injections with function-based +transformations too, using the `bonobo.config.requires` decorator to mark a dependency. + +.. code-block:: python + + from bonobo.config import requires + + @requires('http.client') + def http_get(http): + resp = http.get('https://jsonplaceholder.typicode.com/users') + + for row in resp.json(): + yield row + + +Read more +::::::::: + +* :doc:`/guide/services` +* :doc:`/reference/api_config` + +Next +:::: + +:doc:`tut04`. + + +Moving forward +:::::::::::::: + +You now know: + +* How to ... + +**Next: :doc:`5-packaging`** diff --git a/docs/tutorial/5-packaging.rst b/docs/tutorial/5-packaging.rst new file mode 100644 index 0000000..bf4537b --- /dev/null +++ b/docs/tutorial/5-packaging.rst @@ -0,0 +1,11 @@ +Part 5: Projects and Packaging +============================== + + +Moving forward +:::::::::::::: + +You now know: + +* How to ... + diff --git a/docs/tutorial/django.rst b/docs/tutorial/django.rst new file mode 100644 index 0000000..1be4f52 --- /dev/null +++ b/docs/tutorial/django.rst @@ -0,0 +1,3 @@ +Working with Django +=================== + diff --git a/docs/tutorial/index.rst b/docs/tutorial/index.rst index 4ba99c2..111c543 100644 --- a/docs/tutorial/index.rst +++ b/docs/tutorial/index.rst @@ -17,47 +17,43 @@ Bonobo uses simple python and should be quick and easy to learn. Tutorial :::::::: -.. note:: +.. toctree:: + :maxdepth: 1 - Good documentation is not easy to write. We do our best to make it better and better. + 1-init + 2-jobs + 3-files + 4-services + 5-packaging - Although all content here should be accurate, you may feel a lack of completeness, for which we plead guilty and - apologize. - - If you're stuck, please come and ask on our `slack channel `_, we'll figure - something out. - - If you're not stuck but had trouble understanding something, please consider contributing to the docs (via GitHub - pull requests). +More +:::: .. toctree:: - :maxdepth: 2 - - tut01 - tut02 - tut03 - tut04 + :maxdepth: 1 + django + notebooks + sqlalchemy What's next? :::::::::::: -Read a few examples -------------------- +* :doc:`The Bonobo Guide <../guide/index>` +* :doc:`Extensions <../extension/index>` -* :doc:`../reference/examples` -Read about best development practices -------------------------------------- +We're there! +:::::::::::: -* :doc:`../guide/index` -* :doc:`../guide/purity` +Good documentation is not easy to write. -Read about integrating external tools with bonobo -------------------------------------------------- +Although all content here should be accurate, you may feel a lack of completeness, for which we plead guilty and +apologize. -* :doc:`../extension/docker`: run transformation graphs in isolated containers. -* :doc:`../extension/jupyter`: run transformations within jupyter notebooks. -* :doc:`../extension/selenium`: crawl the web using a real browser and work with the gathered data. -* :doc:`../extension/sqlalchemy`: everything you need to interract with SQL databases. +If you're stuck, please come to the `Bonobo Slack Channel `_ and we'll figure it +out. + +If you're not stuck but had trouble understanding something, please consider contributing to the docs (using GitHub +pull requests). diff --git a/docs/tutorial/notebooks.rst b/docs/tutorial/notebooks.rst new file mode 100644 index 0000000..ed59121 --- /dev/null +++ b/docs/tutorial/notebooks.rst @@ -0,0 +1,4 @@ +Working with Jupyter Notebooks +============================== + + diff --git a/docs/tutorial/sqlalchemy.rst b/docs/tutorial/sqlalchemy.rst new file mode 100644 index 0000000..359fbd5 --- /dev/null +++ b/docs/tutorial/sqlalchemy.rst @@ -0,0 +1,4 @@ +Working with SQL Databases +========================== + + diff --git a/docs/tutorial/tut01.rst b/docs/tutorial/tut01.rst index 3d6f9eb..836ddad 100644 --- a/docs/tutorial/tut01.rst +++ b/docs/tutorial/tut01.rst @@ -1,8 +1,7 @@ Let's get started! ================== -To begin with Bonobo, you need to install it in a working python 3.5+ environment, and you'll also need cookiecutter -to bootstrap your project. +To get started with Bonobo, you need to install it in a working python 3.5+ environment: .. code-block:: shell-session @@ -14,21 +13,24 @@ See :doc:`/install` for more options. Create an empty project ::::::::::::::::::::::: -Your ETL code will live in ETL projects, which are basically a bunch of files, including python code, that bonobo -can run. +Your ETL code will live in standard python files and packages. .. code-block:: shell-session - $ bonobo init tutorial + $ bonobo create tutorial.py -This will create a `tutorial` directory (`content description here `_). +This will create a simple example job in a `tutorial.py` file. -To run this project, use: +Now, try to execute it: .. code-block:: shell-session - $ bonobo run tutorial + $ python tutorial.py +Congratulations, you just ran your first ETL job! + + +.. todo:: XXX **CHANGES NEEDED BELOW THIS POINTS BEFORE 0.6** XXX Write a first transformation :::::::::::::::::::::::::::: @@ -131,9 +133,9 @@ Rewrite it using builtins There is a much simpler way to describe an equivalent graph: .. literalinclude:: ../../bonobo/examples/tutorials/tut01e02.py - :language: python +:language: python -The `extract()` generator has been replaced by a list, as Bonobo will interpret non-callable iterables as a no-input + The `extract()` generator has been replaced by a list, as Bonobo will interpret non-callable iterables as a no-input generator. This example is also available in :mod:`bonobo.examples.tutorials.tut01e02`, and you can also run it as a module: diff --git a/requirements-dev.txt b/requirements-dev.txt index 4e005a7..8000471 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,34 +1,26 @@ -e .[dev] alabaster==0.7.10 -arrow==0.10.0 babel==2.5.1 -binaryornot==0.4.4 certifi==2017.7.27.1 chardet==3.0.4 -click==6.7 -cookiecutter==1.5.1 coverage==4.4.1 docutils==0.14 -future==0.16.0 idna==2.6 imagesize==0.7.1 -jinja2-time==0.2.0 jinja2==2.9.6 markupsafe==1.0 -poyo==0.4.1 py==1.4.34 pygments==2.2.0 pytest-cov==2.5.1 pytest-sugar==0.8.0 pytest-timeout==1.2.0 pytest==3.2.3 -python-dateutil==2.6.1 -pytz==2017.2 +pytz==2017.3 requests==2.18.4 six==1.11.0 snowballstemmer==1.2.1 -sphinx==1.6.4 +sphinx==1.6.5 sphinxcontrib-websupport==1.0.1 termcolor==1.1.0 urllib3==1.22 -whichcraft==0.4.1 +yapf==0.19.0 diff --git a/requirements-docker.txt b/requirements-docker.txt index 54bac73..8eb4059 100644 --- a/requirements-docker.txt +++ b/requirements-docker.txt @@ -12,7 +12,7 @@ packaging==16.8 pbr==3.1.1 psutil==5.4.0 pyparsing==2.2.0 -pytz==2017.2 +pytz==2017.3 requests==2.18.4 six==1.11.0 stevedore==1.27.1 diff --git a/requirements-jupyter.txt b/requirements-jupyter.txt index 2ad75ab..2c499ad 100644 --- a/requirements-jupyter.txt +++ b/requirements-jupyter.txt @@ -13,13 +13,13 @@ jinja2==2.9.6 jsonschema==2.6.0 jupyter-client==5.1.0 jupyter-console==5.2.0 -jupyter-core==4.3.0 +jupyter-core==4.4.0 jupyter==1.0.0 markupsafe==1.0 -mistune==0.7.4 +mistune==0.8 nbconvert==5.3.1 nbformat==4.4.0 -notebook==5.2.0 +notebook==5.2.1 pandocfilters==1.4.2 parso==0.1.0 pexpect==4.2.1 @@ -28,7 +28,7 @@ prompt-toolkit==1.0.15 ptyprocess==0.5.2 pygments==2.2.0 python-dateutil==2.6.1 -pyzmq==16.0.2 +pyzmq==16.0.3 qtconsole==4.3.1 simplegeneric==0.8.1 six==1.11.0 diff --git a/requirements-sqlalchemy.txt b/requirements-sqlalchemy.txt new file mode 100644 index 0000000..d33c754 --- /dev/null +++ b/requirements-sqlalchemy.txt @@ -0,0 +1,18 @@ +-e .[sqlalchemy] +appdirs==1.4.3 +bonobo-sqlalchemy==0.5.1 +certifi==2017.7.27.1 +chardet==3.0.4 +colorama==0.3.9 +fs==2.0.12 +idna==2.6 +packaging==16.8 +pbr==3.1.1 +psutil==5.4.0 +pyparsing==2.2.0 +pytz==2017.3 +requests==2.18.4 +six==1.11.0 +sqlalchemy==1.1.15 +stevedore==1.27.1 +urllib3==1.22 diff --git a/requirements.txt b/requirements.txt index 7384e3f..3a5c70d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,17 +2,19 @@ appdirs==1.4.3 certifi==2017.7.27.1 chardet==3.0.4 -click==6.7 colorama==0.3.9 fs==2.0.12 idna==2.6 +jinja2==2.9.6 +markupsafe==1.0 +mondrian==0.4.0 packaging==16.8 pbr==3.1.1 psutil==5.4.0 pyparsing==2.2.0 -python-dotenv==0.7.1 -pytz==2017.2 +pytz==2017.3 requests==2.18.4 six==1.11.0 stevedore==1.27.1 urllib3==1.22 +whistle==1.0.0 diff --git a/setup.py b/setup.py index 29c6255..d9eb59b 100644 --- a/setup.py +++ b/setup.py @@ -53,23 +53,24 @@ setup( packages=find_packages(exclude=['ez_setup', 'example', 'test']), include_package_data=True, install_requires=[ - 'colorama (>= 0.3, < 1.0)', 'fs (>= 2.0, < 3.0)', 'packaging (>= 16, < 17)', 'psutil (>= 5.2, < 6.0)', - 'python-dotenv (>= 0.7.1, < 1.0)', 'requests (>= 2.0, < 3.0)', 'stevedore (>= 1.21, < 2.0)' + 'colorama (>= 0.3)', 'fs (>= 2.0, < 2.1)', 'jinja2 (>= 2.9, < 2.10)', 'mondrian (>= 0.4, < 0.5)', + 'packaging (>= 16, < 17)', 'psutil (>= 5.4, < 6.0)', 'requests (>= 2.0, < 3.0)', 'stevedore (>= 1.27, < 1.28)', + 'whistle (>= 1.0, < 1.1)' ], extras_require={ 'dev': [ - 'cookiecutter (>= 1.5, < 1.6)', 'coverage (>= 4.4, < 5.0)', 'pytest (>= 3.1, < 4.0)', - 'pytest-cov (>= 2.5, < 3.0)', 'pytest-sugar (>= 0.8, < 0.9)', 'pytest-timeout (>= 1, < 2)', - 'sphinx (>= 1.6, < 2.0)' + 'coverage (>= 4.4, < 5.0)', 'pytest (>= 3.1, < 4.0)', 'pytest-cov (>= 2.5, < 3.0)', + 'pytest-sugar (>= 0.8, < 0.9)', 'pytest-timeout (>= 1, < 2)', 'sphinx (>= 1.6, < 2.0)', 'yapf' ], - 'docker': ['bonobo-docker'], - 'jupyter': ['ipywidgets (>= 6.0.0, < 7)', 'jupyter (>= 1.0, < 1.1)'] + 'docker': ['bonobo-docker (>= 0.5.0)'], + 'jupyter': ['ipywidgets (>= 6.0.0, < 7)', 'jupyter (>= 1.0, < 1.1)'], + 'sqlalchemy': ['bonobo-sqlalchemy (>= 0.5.1)'] }, entry_points={ 'bonobo.commands': [ - 'convert = bonobo.commands.convert:register', 'init = bonobo.commands.init:register', - 'inspect = bonobo.commands.inspect:register', 'run = bonobo.commands.run:register', - 'version = bonobo.commands.version:register', 'download = bonobo.commands.download:register' + 'convert = bonobo.commands.convert:ConvertCommand', 'init = bonobo.commands.init:InitCommand', + 'inspect = bonobo.commands.inspect:InspectCommand', 'run = bonobo.commands.run:RunCommand', + 'version = bonobo.commands.version:VersionCommand', 'download = bonobo.commands.download:DownloadCommand' ], 'console_scripts': ['bonobo = bonobo.commands:entrypoint'] }, diff --git a/tests/commands/test_clibasics.py b/tests/commands/test_clibasics.py new file mode 100644 index 0000000..1fc292b --- /dev/null +++ b/tests/commands/test_clibasics.py @@ -0,0 +1,25 @@ +import pkg_resources + +from bonobo.util.testing import all_runners + + +def test_entrypoint(): + commands = {} + + for command in pkg_resources.iter_entry_points('bonobo.commands'): + commands[command.name] = command + + assert not { + 'convert', + 'init', + 'inspect', + 'run', + 'version', + }.difference(set(commands)) + + +@all_runners +def test_no_command(runner): + _, err, exc = runner(catch_errors=True) + assert type(exc) == SystemExit + assert 'error: the following arguments are required: command' in err \ No newline at end of file diff --git a/tests/commands/test_convert.py b/tests/commands/test_convert.py new file mode 100644 index 0000000..ed6f9e2 --- /dev/null +++ b/tests/commands/test_convert.py @@ -0,0 +1,19 @@ +import sys + +import pytest + +from bonobo.util.environ import change_working_directory +from bonobo.util.testing import all_runners + + +@pytest.mark.skipif(sys.version_info < (3, 6), + reason="python 3.5 does not preserve kwargs order and this cant pass for now") +@all_runners +def test_convert(runner, tmpdir): + csv_content = 'id;name\n1;Romain' + tmpdir.join('in.csv').write(csv_content) + + with change_working_directory(tmpdir): + runner('convert', 'in.csv', 'out.csv') + + assert tmpdir.join('out.csv').read().strip() == csv_content diff --git a/tests/commands/test_download.py b/tests/commands/test_download.py new file mode 100644 index 0000000..83b0ef4 --- /dev/null +++ b/tests/commands/test_download.py @@ -0,0 +1,44 @@ +import io +from unittest.mock import patch + +import pytest + +from bonobo.commands.download import EXAMPLES_BASE_URL +from bonobo.util.testing import all_runners + + +@all_runners +def test_download_works_for_examples(runner): + expected_bytes = b'hello world' + + class MockResponse(object): + def __init__(self): + self.status_code = 200 + + def iter_content(self, *args, **kwargs): + return [expected_bytes] + + def __enter__(self): + return self + + def __exit__(self, *args, **kwargs): + pass + + fout = io.BytesIO() + fout.close = lambda: None + + with patch('bonobo.commands.download._open_url') as mock_open_url, \ + patch('bonobo.commands.download.open') as mock_open: + mock_open_url.return_value = MockResponse() + mock_open.return_value = fout + runner('download', 'examples/datasets/coffeeshops.txt') + expected_url = EXAMPLES_BASE_URL + 'datasets/coffeeshops.txt' + mock_open_url.assert_called_once_with(expected_url) + + assert fout.getvalue() == expected_bytes + + +@all_runners +def test_download_fails_non_example(runner): + with pytest.raises(ValueError): + runner('download', 'something/entirely/different.txt') \ No newline at end of file diff --git a/tests/commands/test_init.py b/tests/commands/test_init.py new file mode 100644 index 0000000..626f5e8 --- /dev/null +++ b/tests/commands/test_init.py @@ -0,0 +1,29 @@ +import os + +import pytest + +from bonobo.commands.init import InitCommand +from bonobo.util.testing import all_runners + + +@all_runners +def test_init_file(runner, tmpdir): + target = tmpdir.join('foo.py') + target_filename = str(target) + runner('init', target_filename) + assert os.path.exists(target_filename) + + out, err = runner('run', target_filename) + assert out.replace('\n', ' ').strip() == 'Hello World' + assert not err + + +@all_runners +@pytest.mark.parametrize('template', InitCommand.TEMPLATES) +def test_init_file_templates(runner, template, tmpdir): + target = tmpdir.join('foo.py') + target_filename = str(target) + runner('init', target_filename) + assert os.path.exists(target_filename) + out, err = runner('run', target_filename) + assert not err diff --git a/tests/commands/test_run.py b/tests/commands/test_run.py new file mode 100644 index 0000000..69e4f94 --- /dev/null +++ b/tests/commands/test_run.py @@ -0,0 +1,48 @@ +import os +from unittest.mock import patch + +from bonobo import get_examples_path +from bonobo.util.testing import all_runners + + +@all_runners +def test_run(runner): + out, err = runner('run', '--quiet', get_examples_path('types/strings.py')) + out = out.split('\n') + assert out[0].startswith('Foo ') + assert out[1].startswith('Bar ') + assert out[2].startswith('Baz ') + + +@all_runners +def test_run_module(runner): + out, err = runner('run', '--quiet', '-m', 'bonobo.examples.types.strings') + out = out.split('\n') + assert out[0].startswith('Foo ') + assert out[1].startswith('Bar ') + assert out[2].startswith('Baz ') + + +@all_runners +def test_run_path(runner): + out, err = runner('run', '--quiet', get_examples_path('types')) + out = out.split('\n') + assert out[0].startswith('Foo ') + assert out[1].startswith('Bar ') + assert out[2].startswith('Baz ') + + +@all_runners +def test_install_requirements_for_dir(runner): + dirname = get_examples_path('types') + with patch('bonobo.commands.run._install_requirements') as install_mock: + runner('run', '--install', dirname) + install_mock.assert_called_once_with(os.path.join(dirname, 'requirements.txt')) + + +@all_runners +def test_install_requirements_for_file(runner): + dirname = get_examples_path('types') + with patch('bonobo.commands.run._install_requirements') as install_mock: + runner('run', '--install', os.path.join(dirname, 'strings.py')) + install_mock.assert_called_once_with(os.path.join(dirname, 'requirements.txt')) diff --git a/tests/commands/test_run_environ.py b/tests/commands/test_run_environ.py new file mode 100644 index 0000000..1d966be --- /dev/null +++ b/tests/commands/test_run_environ.py @@ -0,0 +1,109 @@ +import pytest + +from bonobo.util.testing import EnvironmentTestCase + + +@pytest.fixture +def env1(tmpdir): + env_file = tmpdir.join('.env_one') + env_file.write('\n'.join(( + 'SECRET=unknown', + 'PASSWORD=sweet', + 'PATH=first', + ))) + return str(env_file) + + +@pytest.fixture +def env2(tmpdir): + env_file = tmpdir.join('.env_two') + env_file.write('\n'.join(( + 'PASSWORD=bitter', + "PATH='second'", + ))) + return str(env_file) + + +class TestDefaultEnvFile(EnvironmentTestCase): + def test_run_with_default_env_file(self, runner, target, env1): + env = self.run_environ(runner, *target, '--default-env-file', env1) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'sweet' + assert env.get('PATH') == '/usr/bin' + + def test_run_with_multiple_default_env_files(self, runner, target, env1, env2): + env = self.run_environ(runner, *target, '--default-env-file', env1, '--default-env-file', env2) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'sweet' + assert env.get('PATH') == '/usr/bin' + + env = self.run_environ(runner, *target, '--default-env-file', env2, '--default-env-file', env1) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'bitter' + assert env.get('PATH') == '/usr/bin' + + +class TestEnvFile(EnvironmentTestCase): + def test_run_with_file(self, runner, target, env1): + env = self.run_environ(runner, *target, '--env-file', env1) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'sweet' + assert env.get('PATH') == 'first' + + def test_run_with_multiple_files(self, runner, target, env1, env2): + env = self.run_environ(runner, *target, '--env-file', env1, '--env-file', env2) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'bitter' + assert env.get('PATH') == 'second' + + env = self.run_environ(runner, *target, '--env-file', env2, '--env-file', env1) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'sweet' + assert env.get('PATH') == 'first' + + +class TestEnvFileCombinations(EnvironmentTestCase): + def test_run_with_both_env_files(self, runner, target, env1, env2): + env = self.run_environ(runner, *target, '--default-env-file', env1, '--env-file', env2) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'bitter' + assert env.get('PATH') == 'second' + + def test_run_with_both_env_files_then_overrides(self, runner, target, env1, env2): + env = self.run_environ( + runner, *target, '--default-env-file', env1, '--env-file', env2, '--env', 'PASSWORD=mine', '--env', + 'SECRET=s3cr3t' + ) + assert env.get('SECRET') == 's3cr3t' + assert env.get('PASSWORD') == 'mine' + assert env.get('PATH') == 'second' + + +class TestEnvVars(EnvironmentTestCase): + def test_run_no_env(self, runner, target): + env = self.run_environ(runner, *target, environ={'USER': 'romain'}) + assert env.get('USER') == 'romain' + + def test_run_env(self, runner, target): + env = self.run_environ(runner, *target, '--env', 'USER=serious', environ={'USER': 'romain'}) + assert env.get('USER') == 'serious' + + def test_run_env_mixed(self, runner, target): + env = self.run_environ(runner, *target, '--env', 'ONE=1', '--env', 'TWO="2"', environ={'USER': 'romain'}) + assert env.get('USER') == 'romain' + assert env.get('ONE') == '1' + assert env.get('TWO') == '2' + + def test_run_default_env(self, runner, target): + env = self.run_environ(runner, *target, '--default-env', 'USER=clown') + assert env.get('USER') == 'clown' + + env = self.run_environ(runner, *target, '--default-env', 'USER=clown', environ={'USER': 'romain'}) + assert env.get('USER') == 'romain' + + env = self.run_environ( + runner, *target, '--env', 'USER=serious', '--default-env', 'USER=clown', environ={ + 'USER': 'romain' + } + ) + assert env.get('USER') == 'serious' diff --git a/tests/commands/test_version.py b/tests/commands/test_version.py new file mode 100644 index 0000000..1ee893f --- /dev/null +++ b/tests/commands/test_version.py @@ -0,0 +1,20 @@ +from bonobo import __version__ +from bonobo.util.testing import all_runners + + +@all_runners +def test_version(runner): + out, err = runner('version') + out = out.strip() + assert out.startswith('bonobo ') + assert __version__ in out + + out, err = runner('version', '-q') + out = out.strip() + assert out.startswith('bonobo ') + assert __version__ in out + + out, err = runner('version', '-qq') + out = out.strip() + assert not out.startswith('bonobo ') + assert __version__ in out \ No newline at end of file diff --git a/tests/execution/test_node.py b/tests/execution/contexts/test_node.py similarity index 77% rename from tests/execution/test_node.py rename to tests/execution/contexts/test_node.py index fef385c..ef29c6e 100644 --- a/tests/execution/test_node.py +++ b/tests/execution/contexts/test_node.py @@ -1,5 +1,10 @@ +from unittest.mock import MagicMock + +import pytest + from bonobo import Bag, Graph -from bonobo.strategies import NaiveStrategy +from bonobo.execution.contexts.node import NodeExecutionContext +from bonobo.execution.strategies import NaiveStrategy from bonobo.util.testing import BufferingNodeExecutionContext, BufferingGraphExecutionContext @@ -179,3 +184,44 @@ def test_node_tuple_dict(): assert len(output) == 2 assert output[0] == ('foo', 'bar', {'id': 1}) assert output[1] == ('foo', 'baz', {'id': 2}) + + +def test_node_lifecycle_natural(): + func = MagicMock() + + ctx = NodeExecutionContext(func) + assert not any((ctx.started, ctx.stopped, ctx.killed, ctx.alive)) + + # cannot stop before start + with pytest.raises(RuntimeError): + ctx.stop() + assert not any((ctx.started, ctx.stopped, ctx.killed, ctx.alive)) + + # turn the key + ctx.start() + assert all((ctx.started, ctx.alive)) and not any((ctx.stopped, ctx.killed)) + + ctx.stop() + assert all((ctx.started, ctx.stopped)) and not any((ctx.alive, ctx.killed)) + + +def test_node_lifecycle_with_kill(): + func = MagicMock() + + ctx = NodeExecutionContext(func) + assert not any((ctx.started, ctx.stopped, ctx.killed, ctx.alive)) + + # cannot kill before start + with pytest.raises(RuntimeError): + ctx.kill() + assert not any((ctx.started, ctx.stopped, ctx.killed, ctx.alive)) + + # turn the key + ctx.start() + assert all((ctx.started, ctx.alive)) and not any((ctx.stopped, ctx.killed)) + + ctx.kill() + assert all((ctx.started, ctx.killed, ctx.alive)) and not ctx.stopped + + ctx.stop() + assert all((ctx.started, ctx.killed, ctx.stopped)) and not ctx.alive diff --git a/tests/execution/test_events.py b/tests/execution/test_events.py new file mode 100644 index 0000000..6fbc405 --- /dev/null +++ b/tests/execution/test_events.py @@ -0,0 +1,18 @@ +from unittest.mock import Mock + +from bonobo.execution import events + + +def test_names(): + # This test looks useless, but as it's becoming the pliugin API, I want to make sure that nothing changes here, or + # notice it otherwise. + for name in 'start', 'started', 'tick', 'stop', 'stopped', 'kill': + event_name = getattr(events, name.upper()) + assert event_name == '.'.join(('execution', name)) + + +def test_event_object(): + # Same logic as above. + c = Mock() + e = events.ExecutionEvent(c) + assert e.context is c diff --git a/tests/features/test_not_modified.py b/tests/features/test_not_modified.py new file mode 100644 index 0000000..5b1b673 --- /dev/null +++ b/tests/features/test_not_modified.py @@ -0,0 +1,24 @@ +from bonobo.constants import NOT_MODIFIED +from bonobo.util.testing import BufferingNodeExecutionContext + + +def useless(*args, **kwargs): + return NOT_MODIFIED + + +def test_not_modified(): + input_messages = [ + ('foo', 'bar'), + { + 'foo': 'bar' + }, + ('foo', { + 'bar': 'baz' + }), + (), + ] + + with BufferingNodeExecutionContext(useless) as context: + context.write_sync(*input_messages) + + assert context.get_buffer() == input_messages diff --git a/tests/io/test_csv.py b/tests/io/test_csv.py index 473d243..b0b91c5 100644 --- a/tests/io/test_csv.py +++ b/tests/io/test_csv.py @@ -1,8 +1,7 @@ import pytest -from bonobo import Bag, CsvReader, CsvWriter, settings -from bonobo.constants import BEGIN, END -from bonobo.execution.node import NodeExecutionContext +from bonobo import CsvReader, CsvWriter, settings +from bonobo.execution.contexts.node import NodeExecutionContext from bonobo.util.testing import FilesystemTester, BufferingNodeExecutionContext csv_tester = FilesystemTester('csv') @@ -51,11 +50,9 @@ def test_read_csv_from_file_kwargs(tmpdir): 'a': 'a foo', 'b': 'b foo', 'c': 'c foo', - }, - { + }, { 'a': 'a bar', 'b': 'b bar', 'c': 'c bar', } ] - diff --git a/tests/io/test_file.py b/tests/io/test_file.py index d7645e7..5fc2823 100644 --- a/tests/io/test_file.py +++ b/tests/io/test_file.py @@ -2,7 +2,7 @@ import pytest from bonobo import Bag, FileReader, FileWriter from bonobo.constants import BEGIN, END -from bonobo.execution.node import NodeExecutionContext +from bonobo.execution.contexts.node import NodeExecutionContext from bonobo.util.testing import BufferingNodeExecutionContext, FilesystemTester txt_tester = FilesystemTester('txt') diff --git a/tests/io/test_json.py b/tests/io/test_json.py index 726d35f..b5b0781 100644 --- a/tests/io/test_json.py +++ b/tests/io/test_json.py @@ -2,7 +2,7 @@ import pytest from bonobo import JsonReader, JsonWriter, settings from bonobo import LdjsonReader, LdjsonWriter -from bonobo.execution.node import NodeExecutionContext +from bonobo.execution.contexts.node import NodeExecutionContext from bonobo.util.testing import FilesystemTester, BufferingNodeExecutionContext json_tester = FilesystemTester('json') @@ -41,8 +41,7 @@ stream_json_tester.input_data = '''{"foo": "bar"}\n{"baz": "boz"}''' def test_read_stream_json(tmpdir): fs, filename, services = stream_json_tester.get_services_for_reader(tmpdir) - with BufferingNodeExecutionContext(LdjsonReader(filename), - services=services) as context: + with BufferingNodeExecutionContext(LdjsonReader(filename), services=services) as context: context.write_sync(tuple()) actual = context.get_buffer() @@ -53,10 +52,13 @@ def test_read_stream_json(tmpdir): def test_write_stream_json(tmpdir): fs, filename, services = stream_json_tester.get_services_for_reader(tmpdir) - with BufferingNodeExecutionContext(LdjsonWriter(filename), - services=services) as context: - context.write_sync({'foo': 'bar'}) - context.write_sync({'baz': 'boz'}) + with BufferingNodeExecutionContext(LdjsonWriter(filename), services=services) as context: + context.write_sync( + { + 'foo': 'bar' + }, + {'baz': 'boz'}, + ) expected = '''{"foo": "bar"}\n{"baz": "boz"}\n''' with fs.open(filename) as fin: diff --git a/tests/io/test_pickle.py b/tests/io/test_pickle.py index 1f95309..8416a9f 100644 --- a/tests/io/test_pickle.py +++ b/tests/io/test_pickle.py @@ -3,7 +3,7 @@ import pickle import pytest from bonobo import Bag, PickleReader, PickleWriter -from bonobo.execution.node import NodeExecutionContext +from bonobo.execution.contexts.node import NodeExecutionContext from bonobo.util.testing import BufferingNodeExecutionContext, FilesystemTester pickle_tester = FilesystemTester('pkl', mode='wb') diff --git a/tests/plugins/test_console.py b/tests/plugins/test_console.py new file mode 100644 index 0000000..543d341 --- /dev/null +++ b/tests/plugins/test_console.py @@ -0,0 +1,36 @@ +from unittest.mock import MagicMock + +import bonobo +from bonobo.execution import events +from bonobo.execution.contexts.graph import GraphExecutionContext +from bonobo.plugins.console import ConsoleOutputPlugin +from whistle import EventDispatcher + + +def test_register_unregister(): + plugin = ConsoleOutputPlugin() + dispatcher = EventDispatcher() + + plugin.register(dispatcher) + assert plugin.setup in dispatcher.get_listeners(events.START) + assert plugin.tick in dispatcher.get_listeners(events.TICK) + assert plugin.teardown in dispatcher.get_listeners(events.STOPPED) + plugin.unregister(dispatcher) + assert plugin.setup not in dispatcher.get_listeners(events.START) + assert plugin.tick not in dispatcher.get_listeners(events.TICK) + assert plugin.teardown not in dispatcher.get_listeners(events.STOPPED) + + +def test_one_pass(): + plugin = ConsoleOutputPlugin() + dispatcher = EventDispatcher() + plugin.register(dispatcher) + + graph = bonobo.Graph() + context = MagicMock(spec=GraphExecutionContext(graph)) + + dispatcher.dispatch(events.START, events.ExecutionEvent(context)) + dispatcher.dispatch(events.TICK, events.ExecutionEvent(context)) + dispatcher.dispatch(events.STOPPED, events.ExecutionEvent(context)) + + plugin.unregister(dispatcher) diff --git a/tests/test_basicusage.py b/tests/test_basicusage.py index 58a1212..7772af3 100644 --- a/tests/test_basicusage.py +++ b/tests/test_basicusage.py @@ -1,8 +1,9 @@ +from unittest.mock import patch + import pytest import bonobo -from bonobo.execution import GraphExecutionContext -from unittest.mock import patch +from bonobo.execution.contexts.graph import GraphExecutionContext @pytest.mark.timeout(2) diff --git a/tests/test_commands.py b/tests/test_commands.py deleted file mode 100644 index c78fa5f..0000000 --- a/tests/test_commands.py +++ /dev/null @@ -1,427 +0,0 @@ -import functools -import io -import os -import runpy -import sys -from contextlib import redirect_stdout, redirect_stderr -from unittest.mock import patch, Mock - -import pkg_resources -import pytest -from cookiecutter.exceptions import OutputDirExistsException - -from bonobo import __main__, __version__, get_examples_path -from bonobo.commands import entrypoint -from bonobo.commands.run import DEFAULT_GRAPH_FILENAMES -from bonobo.commands.download import EXAMPLES_BASE_URL - - -def runner(f): - @functools.wraps(f) - def wrapped_runner(*args, catch_errors=False): - with redirect_stdout(io.StringIO()) as stdout, redirect_stderr(io.StringIO()) as stderr: - try: - f(list(args)) - except BaseException as exc: - if not catch_errors: - raise - elif isinstance(catch_errors, BaseException) and not isinstance(exc, catch_errors): - raise - return stdout.getvalue(), stderr.getvalue(), exc - return stdout.getvalue(), stderr.getvalue() - - return wrapped_runner - - -@runner -def runner_entrypoint(args): - """ Run bonobo using the python command entrypoint directly (bonobo.commands.entrypoint). """ - return entrypoint(args) - - -@runner -def runner_module(args): - """ Run bonobo using the bonobo.__main__ file, which is equivalent as doing "python -m bonobo ...".""" - with patch.object(sys, 'argv', ['bonobo', *args]): - return runpy.run_path(__main__.__file__, run_name='__main__') - - -all_runners = pytest.mark.parametrize('runner', [runner_entrypoint, runner_module]) -single_runner = pytest.mark.parametrize('runner', [runner_module]) - - -def test_entrypoint(): - commands = {} - - for command in pkg_resources.iter_entry_points('bonobo.commands'): - commands[command.name] = command - - assert not { - 'convert', - 'init', - 'inspect', - 'run', - 'version', - }.difference(set(commands)) - - -@all_runners -def test_no_command(runner): - _, err, exc = runner(catch_errors=True) - assert type(exc) == SystemExit - assert 'error: the following arguments are required: command' in err - - -@all_runners -def test_init(runner, tmpdir): - name = 'project' - tmpdir.chdir() - runner('init', name) - assert os.path.isdir(name) - assert set(os.listdir(name)) & set(DEFAULT_GRAPH_FILENAMES) - -@single_runner -def test_init_in_empty_then_nonempty_directory(runner, tmpdir): - name = 'project' - tmpdir.chdir() - os.mkdir(name) - - # run in empty dir - runner('init', name) - assert set(os.listdir(name)) & set(DEFAULT_GRAPH_FILENAMES) - - # run in non empty dir - with pytest.raises(OutputDirExistsException): - runner('init', name) - - -@single_runner -def test_init_within_empty_directory(runner, tmpdir): - tmpdir.chdir() - runner('init', '.') - assert set(os.listdir()) & set(DEFAULT_GRAPH_FILENAMES) - - -@all_runners -def test_run(runner): - out, err = runner('run', '--quiet', get_examples_path('types/strings.py')) - out = out.split('\n') - assert out[0].startswith('Foo ') - assert out[1].startswith('Bar ') - assert out[2].startswith('Baz ') - - -@all_runners -def test_run_module(runner): - out, err = runner('run', '--quiet', '-m', 'bonobo.examples.types.strings') - out = out.split('\n') - assert out[0].startswith('Foo ') - assert out[1].startswith('Bar ') - assert out[2].startswith('Baz ') - - -@all_runners -def test_run_path(runner): - out, err = runner('run', '--quiet', get_examples_path('types')) - out = out.split('\n') - assert out[0].startswith('Foo ') - assert out[1].startswith('Bar ') - assert out[2].startswith('Baz ') - - -@all_runners -def test_install_requirements_for_dir(runner): - dirname = get_examples_path('types') - with patch('bonobo.commands.run._install_requirements') as install_mock: - runner('run', '--install', dirname) - install_mock.assert_called_once_with(os.path.join(dirname, 'requirements.txt')) - - -@all_runners -def test_install_requirements_for_file(runner): - dirname = get_examples_path('types') - with patch('bonobo.commands.run._install_requirements') as install_mock: - runner('run', '--install', os.path.join(dirname, 'strings.py')) - install_mock.assert_called_once_with(os.path.join(dirname, 'requirements.txt')) - - -@all_runners -def test_version(runner): - out, err = runner('version') - out = out.strip() - assert out.startswith('bonobo ') - assert __version__ in out - - -@all_runners -def test_download_works_for_examples(runner): - expected_bytes = b'hello world' - - class MockResponse(object): - def __init__(self): - self.status_code = 200 - - def iter_content(self, *args, **kwargs): - return [expected_bytes] - - def __enter__(self): - return self - - def __exit__(self, *args, **kwargs): - pass - - fout = io.BytesIO() - fout.close = lambda: None - - with patch('bonobo.commands.download._open_url') as mock_open_url, \ - patch('bonobo.commands.download.open') as mock_open: - mock_open_url.return_value = MockResponse() - mock_open.return_value = fout - runner('download', 'examples/datasets/coffeeshops.txt') - expected_url = EXAMPLES_BASE_URL + 'datasets/coffeeshops.txt' - mock_open_url.assert_called_once_with(expected_url) - - assert fout.getvalue() == expected_bytes - - -@all_runners -def test_download_fails_non_example(runner): - with pytest.raises(ValueError): - runner('download', '/something/entirely/different.txt') - - -@all_runners -class TestDefaultEnvFile(object): - def test_run_file_with_default_env_file(self, runner): - out, err = runner( - 'run', '--quiet', '--default-env-file', '.env_one', - get_examples_path('environment/env_files/get_passed_env_file.py') - ) - out = out.split('\n') - assert out[0] == '321' - assert out[1] == 'sweetpassword' - assert out[2] != 'marzo' - - def test_run_file_with_multiple_default_env_files(self, runner): - out, err = runner( - 'run', '--quiet', '--default-env-file', '.env_one', '--default-env-file', '.env_two', - get_examples_path('environment/env_files/get_passed_env_file.py') - ) - out = out.split('\n') - assert out[0] == '321' - assert out[1] == 'sweetpassword' - assert out[2] != 'marzo' - - def test_run_module_with_default_env_file(self, runner): - out, err = runner( - 'run', '--quiet', '-m', 'bonobo.examples.environment.env_files.get_passed_env_file', '--default-env-file', - '.env_one' - ) - out = out.split('\n') - assert out[0] == '321' - assert out[1] == 'sweetpassword' - assert out[2] != 'marzo' - - def test_run_module_with_multiple_default_env_files(self, runner): - out, err = runner( - 'run', - '--quiet', - '-m', - 'bonobo.examples.environment.env_files.get_passed_env_file', - '--default-env-file', - '.env_one', - '--default-env-file', - '.env_two', - ) - out = out.split('\n') - assert out[0] == '321' - assert out[1] == 'sweetpassword' - assert out[2] != 'marzo' - - -@all_runners -class TestEnvFile(object): - def test_run_file_with_file(self, runner): - out, err = runner( - 'run', - '--quiet', - get_examples_path('environment/env_files/get_passed_env_file.py'), - '--env-file', - '.env_one', - ) - out = out.split('\n') - assert out[0] == '321' - assert out[1] == 'sweetpassword' - assert out[2] == 'marzo' - - def test_run_file_with_multiple_files(self, runner): - out, err = runner( - 'run', - '--quiet', - get_examples_path('environment/env_files/get_passed_env_file.py'), - '--env-file', - '.env_one', - '--env-file', - '.env_two', - ) - out = out.split('\n') - assert out[0] == '321' - assert out[1] == 'not_sweet_password' - assert out[2] == 'abril' - - def test_run_module_with_file(self, runner): - out, err = runner( - 'run', - '--quiet', - '-m', - 'bonobo.examples.environment.env_files.get_passed_env_file', - '--env-file', - '.env_one', - ) - out = out.split('\n') - assert out[0] == '321' - assert out[1] == 'sweetpassword' - assert out[2] == 'marzo' - - def test_run_module_with_multiple_files(self, runner): - out, err = runner( - 'run', - '--quiet', - '-m', - 'bonobo.examples.environment.env_files.get_passed_env_file', - '--env-file', - '.env_one', - '--env-file', - '.env_two', - ) - out = out.split('\n') - assert out[0] == '321' - assert out[1] == 'not_sweet_password' - assert out[2] == 'abril' - - -@all_runners -class TestEnvFileCombinations: - def test_run_file_with_default_env_file_and_env_file(self, runner): - out, err = runner( - 'run', - '--quiet', - get_examples_path('environment/env_files/get_passed_env_file.py'), - '--default-env-file', - '.env_one', - '--env-file', - '.env_two', - ) - out = out.split('\n') - assert out[0] == '321' - assert out[1] == 'not_sweet_password' - assert out[2] == 'abril' - - def test_run_file_with_default_env_file_and_env_file_and_env_vars(self, runner): - out, err = runner( - 'run', - '--quiet', - get_examples_path('environment/env_files/get_passed_env_file.py'), - '--default-env-file', - '.env_one', - '--env-file', - '.env_two', - '--env', - 'TEST_USER_PASSWORD=SWEETpassWORD', - '--env', - 'MY_SECRET=444', - ) - out = out.split('\n') - assert out[0] == '444' - assert out[1] == 'SWEETpassWORD' - assert out[2] == 'abril' - - -@all_runners -class TestDefaultEnvVars: - def test_run_file_with_default_env_var(self, runner): - out, err = runner( - 'run', '--quiet', - get_examples_path('environment/env_vars/get_passed_env.py'), '--default-env', 'USER=clowncity', '--env', - 'USER=ted' - ) - out = out.split('\n') - assert out[0] == 'user' - assert out[1] == 'number' - assert out[2] == 'string' - assert out[3] != 'clowncity' - - def test_run_file_with_default_env_vars(self, runner): - out, err = runner( - 'run', '--quiet', - get_examples_path('environment/env_vars/get_passed_env.py'), '--env', 'ENV_TEST_NUMBER=123', '--env', - 'ENV_TEST_USER=cwandrews', '--default-env', "ENV_TEST_STRING='my_test_string'" - ) - out = out.split('\n') - assert out[0] == 'cwandrews' - assert out[1] == '123' - assert out[2] == 'my_test_string' - - def test_run_module_with_default_env_var(self, runner): - out, err = runner( - 'run', '--quiet', '-m', 'bonobo.examples.environment.env_vars.get_passed_env', '--env', - 'ENV_TEST_NUMBER=123', '--default-env', 'ENV_TEST_STRING=string' - ) - out = out.split('\n') - assert out[0] == 'cwandrews' - assert out[1] == '123' - assert out[2] != 'string' - - def test_run_module_with_default_env_vars(self, runner): - out, err = runner( - 'run', '--quiet', '-m', 'bonobo.examples.environment.env_vars.get_passed_env', '--env', - 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', '--default-env', "ENV_TEST_STRING='string'" - ) - out = out.split('\n') - assert out[0] == 'cwandrews' - assert out[1] == '123' - assert out[2] != 'string' - - -@all_runners -class TestEnvVars: - def test_run_file_with_env_var(self, runner): - out, err = runner( - 'run', '--quiet', - get_examples_path('environment/env_vars/get_passed_env.py'), '--env', 'ENV_TEST_NUMBER=123' - ) - out = out.split('\n') - assert out[0] != 'test_user' - assert out[1] == '123' - assert out[2] == 'my_test_string' - - def test_run_file_with_env_vars(self, runner): - out, err = runner( - 'run', '--quiet', - get_examples_path('environment/env_vars/get_passed_env.py'), '--env', 'ENV_TEST_NUMBER=123', '--env', - 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'" - ) - out = out.split('\n') - assert out[0] == 'cwandrews' - assert out[1] == '123' - assert out[2] == 'my_test_string' - - def test_run_module_with_env_var(self, runner): - out, err = runner( - 'run', '--quiet', '-m', 'bonobo.examples.environment.env_vars.get_passed_env', '--env', - 'ENV_TEST_NUMBER=123' - ) - out = out.split('\n') - assert out[0] == 'cwandrews' - assert out[1] == '123' - assert out[2] == 'my_test_string' - - def test_run_module_with_env_vars(self, runner): - out, err = runner( - 'run', '--quiet', '-m', 'bonobo.examples.environment.env_vars.get_passed_env', '--env', - 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'" - ) - out = out.split('\n') - assert out[0] == 'cwandrews' - assert out[1] == '123' - assert out[2] == 'my_test_string' diff --git a/tests/test_execution.py b/tests/test_execution.py index 6fb33e4..84f40c5 100644 --- a/tests/test_execution.py +++ b/tests/test_execution.py @@ -1,7 +1,7 @@ from bonobo.config.processors import ContextProcessor from bonobo.constants import BEGIN, END -from bonobo.execution.graph import GraphExecutionContext -from bonobo.strategies import NaiveStrategy +from bonobo.execution.contexts.graph import GraphExecutionContext +from bonobo.execution.strategies import NaiveStrategy from bonobo.structs import Bag, Graph diff --git a/tests/util/test_python.py b/tests/util/test_python.py deleted file mode 100644 index 6b1b591..0000000 --- a/tests/util/test_python.py +++ /dev/null @@ -1,6 +0,0 @@ -from bonobo.util.python import require - - -def test_require(): - dummy = require('requireable.dummy') - assert dummy.foo == 'bar'