From f9c809f626497dfaa80d10f37de3973ad9feac07 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 29 Oct 2017 12:05:29 +0100 Subject: [PATCH 01/37] [core] Change the token parsing part in prevision of different flags. --- bonobo/execution/node.py | 47 +++++++++++++++++++++++++---- bonobo/nodes/io/json.py | 1 - tests/features/test_not_modified.py | 24 +++++++++++++++ 3 files changed, 65 insertions(+), 7 deletions(-) create mode 100644 tests/features/test_not_modified.py diff --git a/bonobo/execution/node.py b/bonobo/execution/node.py index 6c52e7d..22582e6 100644 --- a/bonobo/execution/node.py +++ b/bonobo/execution/node.py @@ -9,7 +9,7 @@ from bonobo.execution.base import LoopingExecutionContext from bonobo.structs.bags import Bag from bonobo.structs.inputs import Input from bonobo.structs.tokens import Token -from bonobo.util import get_name, iserrorbag, isloopbackbag, isbag +from bonobo.util import get_name, iserrorbag, isloopbackbag, isbag, istuple from bonobo.util.compat import deprecated_alias from bonobo.util.statistics import WithStatistics @@ -137,12 +137,47 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): pass -def _resolve(input_bag, output): - # NotModified means to send the input unmodified to output. - if output is NOT_MODIFIED: - return input_bag +def isflag(param): + return isinstance(param, Token) and param in (NOT_MODIFIED,) + +def split_tokens(output): + """ + Split an output into token tuple, real output tuple. + + :param output: + :return: tuple, tuple + """ + if isinstance(output, Token): + # just a flag + return (output,), () + + if not istuple(output): + # no flag + return (), (output,) + + i = 0 + while isflag(output[i]): + i += 1 + + return output[:i], output[i:] + + +def _resolve(input_bag, output): + """ + This function is key to how bonobo works (and internal, too). It transforms a pair of input/output into what is the + real output. + + :param input_bag: Bag + :param output: mixed + :return: Bag + """ if isbag(output): return output - return Bag(output) + tokens, output = split_tokens(output) + + if len(tokens) == 1 and tokens[0] is NOT_MODIFIED: + return input_bag + + return output if isbag(output) else Bag(output) diff --git a/bonobo/nodes/io/json.py b/bonobo/nodes/io/json.py index 54e7b71..404cdcb 100644 --- a/bonobo/nodes/io/json.py +++ b/bonobo/nodes/io/json.py @@ -53,7 +53,6 @@ class LdjsonReader(FileReader): def read(self, fs, file): for line in file: - print(line) yield self.loader(line) diff --git a/tests/features/test_not_modified.py b/tests/features/test_not_modified.py new file mode 100644 index 0000000..ddc537b --- /dev/null +++ b/tests/features/test_not_modified.py @@ -0,0 +1,24 @@ +from bonobo.constants import NOT_MODIFIED +from bonobo.util.testing import BufferingNodeExecutionContext + + +def useless(*args, **kwargs): + return NOT_MODIFIED + + +def test_not_modified(): + input_messages = [ + ('foo', 'bar'), + {'foo': 'bar'}, + ('foo', {'bar': 'baz'}), + (), + ] + + with BufferingNodeExecutionContext(useless) as context: + context.write_sync(*input_messages) + + assert context.get_buffer() == input_messages + + + + From cac6920040177648ab5da8f64ac5f32ab4b2c064 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 29 Oct 2017 12:13:52 +0100 Subject: [PATCH 02/37] Minor test change. --- tests/io/test_json.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/io/test_json.py b/tests/io/test_json.py index 726d35f..a3b25f5 100644 --- a/tests/io/test_json.py +++ b/tests/io/test_json.py @@ -20,10 +20,10 @@ def test_write_json_ioformat_arg0(tmpdir): @pytest.mark.parametrize('add_kwargs', ( - {}, - { - 'ioformat': settings.IOFORMAT_KWARGS, - }, + {}, + { + 'ioformat': settings.IOFORMAT_KWARGS, + }, )) def test_write_json_kwargs(tmpdir, add_kwargs): fs, filename, services = json_tester.get_services_for_writer(tmpdir) @@ -55,8 +55,10 @@ def test_write_stream_json(tmpdir): with BufferingNodeExecutionContext(LdjsonWriter(filename), services=services) as context: - context.write_sync({'foo': 'bar'}) - context.write_sync({'baz': 'boz'}) + context.write_sync( + {'foo': 'bar'}, + {'baz': 'boz'}, + ) expected = '''{"foo": "bar"}\n{"baz": "boz"}\n''' with fs.open(filename) as fin: From 8351897e3a21921df7d57e002b03cc8471686caa Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 29 Oct 2017 19:23:50 +0100 Subject: [PATCH 03/37] [core] Refactoring of commands to move towards a more pythonic way of running the jobs. Commands are now classes, and bonobo "graph" related commands now hooks into bonobo.run() calls so it will use what you actually put in your __main__ block. --- MANIFEST.in | 1 + Makefile | 2 +- Projectfile | 23 +-- bonobo/_api.py | 7 +- bonobo/commands/__init__.py | 166 ++++++++++++++++- bonobo/commands/convert.py | 138 +++++++-------- bonobo/commands/download.py | 37 ++-- bonobo/commands/init.py | 51 +++--- bonobo/commands/inspect.py | 47 ++--- bonobo/commands/run.py | 215 ++++++----------------- bonobo/commands/templates/default.py-tpl | 50 ++++++ bonobo/commands/version.py | 52 +++--- bonobo/examples/datasets/coffeeshops.py | 2 +- bonobo/examples/datasets/fablabs.py | 2 +- bonobo/examples/files/csv_handlers.py | 2 +- bonobo/examples/files/json_handlers.py | 2 +- bonobo/examples/files/pickle_handlers.py | 2 +- bonobo/examples/files/text_handlers.py | 2 +- bonobo/ext/django.py | 2 +- bonobo/settings.py | 6 + bonobo/structs/graphs.py | 26 +++ requirements-dev.txt | 2 +- requirements-docker.txt | 2 + requirements-jupyter.txt | 2 +- requirements.txt | 2 + setup.py | 11 +- 26 files changed, 483 insertions(+), 371 deletions(-) create mode 100644 bonobo/commands/templates/default.py-tpl diff --git a/MANIFEST.in b/MANIFEST.in index ab30e9a..4c2c662 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,2 @@ include *.txt +include *.py-tpl diff --git a/Makefile b/Makefile index 50039c7..bb68335 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# Generated by Medikit 0.4a5 on 2017-10-28. +# Generated by Medikit 0.4a5 on 2017-10-29. # All changes will be overriden. PACKAGE ?= bonobo diff --git a/Projectfile b/Projectfile index 6873522..d730328 100644 --- a/Projectfile +++ b/Projectfile @@ -29,24 +29,25 @@ python.setup( 'bonobo = bonobo.commands:entrypoint', ], 'bonobo.commands': [ - 'convert = bonobo.commands.convert:register', - 'init = bonobo.commands.init:register', - 'inspect = bonobo.commands.inspect:register', - 'run = bonobo.commands.run:register', - 'version = bonobo.commands.version:register', - 'download = bonobo.commands.download:register', + 'convert = bonobo.commands.convert:ConvertCommand', + 'init = bonobo.commands.init:InitCommand', + 'inspect = bonobo.commands.inspect:InspectCommand', + 'run = bonobo.commands.run:RunCommand', + 'version = bonobo.commands.version:VersionCommand', + 'download = bonobo.commands.download:DownloadCommand', ], } ) python.add_requirements( - 'colorama >=0.3,<1.0', - 'fs >=2.0,<3.0', + 'colorama >=0.3,<0.4', + 'fs >=2.0,<2.1', + 'jinja2 >=2.9,<2.10', 'packaging >=16,<17', - 'psutil >=5.2,<6.0', + 'psutil >=5.4,<6.0', + 'python-dotenv >=0.7,<0.8', 'requests >=2.0,<3.0', - 'stevedore >=1.21,<2.0', - 'python-dotenv >=0.7.1,<1.0', + 'stevedore >=1.27,<1.28', dev=[ 'cookiecutter >=1.5,<1.6', 'pytest-sugar >=0.8,<0.9', diff --git a/bonobo/_api.py b/bonobo/_api.py index a2c3856..fb1ef78 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -1,5 +1,3 @@ -import logging - from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \ PickleReader, PickleWriter, PrettyPrinter, RateLimited, Tee, arg0_to_kwargs, count, identity, kwargs_to_arg0, noop from bonobo.nodes import LdjsonReader, LdjsonWriter @@ -21,7 +19,7 @@ def register_api_group(*args): @register_api -def run(graph, strategy=None, plugins=None, services=None): +def run(graph, *, plugins=None, services=None, **options): """ Main entry point of bonobo. It takes a graph and creates all the necessary plumbery around to execute it. @@ -41,7 +39,7 @@ def run(graph, strategy=None, plugins=None, services=None): :param dict services: The implementations of services this graph will use. :return bonobo.execution.graph.GraphExecutionContext: """ - strategy = create_strategy(strategy) + strategy = create_strategy(options.pop('strategy', None)) plugins = plugins or [] @@ -58,6 +56,7 @@ def run(graph, strategy=None, plugins=None, services=None): try: from bonobo.ext.jupyter import JupyterOutputPlugin except ImportError: + import logging logging.warning( 'Failed to load jupyter widget. Easiest way is to install the optional "jupyter" ' 'dependencies with «pip install bonobo[jupyter]», but you can also install a specific ' diff --git a/bonobo/commands/__init__.py b/bonobo/commands/__init__.py index 4e183a3..be877d7 100644 --- a/bonobo/commands/__init__.py +++ b/bonobo/commands/__init__.py @@ -1,10 +1,97 @@ import argparse +import codecs +import os +import os.path +import runpy +from contextlib import contextmanager +from functools import partial -from bonobo import logging, settings +from bonobo import settings, logging +from bonobo.constants import DEFAULT_SERVICES_FILENAME, DEFAULT_SERVICES_ATTR +from bonobo.util import get_name logger = logging.get_logger() +class BaseCommand: + @property + def logger(self): + try: + return self._logger + except AttributeError: + self._logger = logging.get_logger(get_name(self)) + return self._logger + + def add_arguments(self, parser): + """ + Entry point for subclassed commands to add custom arguments. + """ + pass + + def handle(self, *args, **options): + """ + The actual logic of the command. Subclasses must implement this method. + """ + raise NotImplementedError('Subclasses of BaseCommand must provide a handle() method') + + +class BaseGraphCommand(BaseCommand): + required = True + + def add_arguments(self, parser): + # target arguments (cannot provide both). + source_group = parser.add_mutually_exclusive_group(required=self.required) + source_group.add_argument('file', nargs='?', type=str) + source_group.add_argument('-m', dest='mod', type=str) + + # arguments to enforce system environment. + parser.add_argument('--default-env-file', action='append') + parser.add_argument('--default-env', action='append') + parser.add_argument('--env-file', action='append') + parser.add_argument('--env', '-e', action='append') + + return parser + + def _run_path(self, file): + return runpy.run_path(file, run_name='__main__') + + def _run_module(self, mod): + return runpy.run_module(mod, run_name='__main__') + + def read(self, *, file, mod, **options): + + """ + + get_default_services( + filename, context.get(DEFAULT_SERVICES_ATTR)() if DEFAULT_SERVICES_ATTR in context else None + ) + + """ + + _graph, _options = None, None + + def _record(graph, **options): + nonlocal _graph, _options + _graph, _options = graph, options + + with _override_runner(_record), _override_environment(): + if file: + self._run_path(file) + elif mod: + self._run_module(mod) + else: + raise RuntimeError('No target provided.') + + if _graph is None: + raise RuntimeError('Could not find graph.') + + + return _graph, _options + + def handle(self, *args, **options): + pass + + def entrypoint(args=None): parser = argparse.ArgumentParser() parser.add_argument('--debug', '-D', action='store_true') @@ -17,7 +104,15 @@ def entrypoint(args=None): def register_extension(ext, commands=commands): try: parser = subparsers.add_parser(ext.name) - commands[ext.name] = ext.plugin(parser) + if isinstance(ext.plugin, type) and issubclass(ext.plugin, BaseCommand): + # current way, class based. + cmd = ext.plugin() + cmd.add_arguments(parser) + cmd.__name__ = ext.name + commands[ext.name] = cmd.handle + else: + # old school, function based. + commands[ext.name] = ext.plugin(parser) except Exception: logger.exception('Error while loading command {}.'.format(ext.name)) @@ -33,3 +128,70 @@ def entrypoint(args=None): logger.debug('Command: ' + args['command'] + ' Arguments: ' + repr(args)) commands[args.pop('command')](**args) + + +@contextmanager +def _override_runner(runner): + import bonobo + _runner_backup = bonobo.run + try: + bonobo.run = runner + yield runner + finally: + bonobo.run = _runner_backup + + +@contextmanager +def _override_environment(root_dir=None, **options): + yield + return + if default_env_file: + for f in default_env_file: + env_file_path = str(env_dir.joinpath(f)) + load_dotenv(env_file_path) + if default_env: + for e in default_env: + set_env_var(e) + if env_file: + for f in env_file: + env_file_path = str(env_dir.joinpath(f)) + load_dotenv(env_file_path, override=True) + if env: + for e in env: + set_env_var(e, override=True) + + +def get_default_services(filename, services=None): + dirname = os.path.dirname(filename) + services_filename = os.path.join(dirname, DEFAULT_SERVICES_FILENAME) + if os.path.exists(services_filename): + with open(services_filename) as file: + code = compile(file.read(), services_filename, 'exec') + context = { + '__name__': '__services__', + '__file__': services_filename, + } + exec(code, context) + + return { + **context[DEFAULT_SERVICES_ATTR](), + **(services or {}), + } + return services or {} + + +def set_env_var(e, override=False): + __escape_decoder = codecs.getdecoder('unicode_escape') + ename, evalue = e.split('=', 1) + + def decode_escaped(escaped): + return __escape_decoder(escaped)[0] + + if len(evalue) > 0: + if evalue[0] == evalue[len(evalue) - 1] in ['"', "'"]: + evalue = decode_escaped(evalue[1:-1]) + + if override: + os.environ[ename] = evalue + else: + os.environ.setdefault(ename, evalue) \ No newline at end of file diff --git a/bonobo/commands/convert.py b/bonobo/commands/convert.py index e9039fd..918d81e 100644 --- a/bonobo/commands/convert.py +++ b/bonobo/commands/convert.py @@ -1,83 +1,75 @@ import bonobo +from bonobo.commands import BaseCommand from bonobo.registry import READER, WRITER, default_registry from bonobo.util.resolvers import _resolve_transformations, _resolve_options -def execute( - input_filename, - output_filename, - reader=None, - reader_option=None, - writer=None, - writer_option=None, - option=None, - transformation=None, -): - reader_factory = default_registry.get_reader_factory_for(input_filename, format=reader) - reader_options = _resolve_options((option or []) + (reader_option or [])) +class ConvertCommand(BaseCommand): + def add_arguments(self, parser): + parser.add_argument('input-filename', help='Input filename.') + parser.add_argument('output-filename', help='Output filename.') + parser.add_argument( + '--' + READER, + '-r', + help='Choose the reader factory if it cannot be detected from extension, or if detection is wrong.' + ) + parser.add_argument( + '--' + WRITER, + '-w', + help= + 'Choose the writer factory if it cannot be detected from extension, or if detection is wrong (use - for console pretty print).' + ) + parser.add_argument( + '--transformation', + '-t', + dest='transformation', + action='append', + help='Add a transformation between input and output (can be used multiple times, order is preserved).', + ) + parser.add_argument( + '--option', + '-O', + dest='option', + action='append', + help='Add a named option to both reader and writer factories (i.e. foo="bar").', + ) + parser.add_argument( + '--' + READER + '-option', + '-' + READER[0].upper(), + dest=READER + '_option', + action='append', + help='Add a named option to the reader factory.', + ) + parser.add_argument( + '--' + WRITER + '-option', + '-' + WRITER[0].upper(), + dest=WRITER + '_option', + action='append', + help='Add a named option to the writer factory.', + ) - if output_filename == '-': - writer_factory = bonobo.PrettyPrinter - else: - writer_factory = default_registry.get_writer_factory_for(output_filename, format=writer) - writer_options = _resolve_options((option or []) + (writer_option or [])) + def handle(self, input_filename, output_filename, reader=None, reader_option=None, writer=None, writer_option=None, + option=None, transformation=None): + reader_factory = default_registry.get_reader_factory_for(input_filename, format=reader) + reader_options = _resolve_options((option or []) + (reader_option or [])) - transformations = _resolve_transformations(transformation) + if output_filename == '-': + writer_factory = bonobo.PrettyPrinter + else: + writer_factory = default_registry.get_writer_factory_for(output_filename, format=writer) + writer_options = _resolve_options((option or []) + (writer_option or [])) - graph = bonobo.Graph() - graph.add_chain( - reader_factory(input_filename, **reader_options), - *transformations, - writer_factory(output_filename, **writer_options), - ) + transformations = _resolve_transformations(transformation) - return bonobo.run( - graph, services={ - 'fs': bonobo.open_fs(), - } - ) + graph = bonobo.Graph() + graph.add_chain( + reader_factory(input_filename, **reader_options), + *transformations, + writer_factory(output_filename, **writer_options), + ) - -def register(parser): - parser.add_argument('input-filename', help='Input filename.') - parser.add_argument('output-filename', help='Output filename.') - parser.add_argument( - '--' + READER, - '-r', - help='Choose the reader factory if it cannot be detected from extension, or if detection is wrong.' - ) - parser.add_argument( - '--' + WRITER, - '-w', - help= - 'Choose the writer factory if it cannot be detected from extension, or if detection is wrong (use - for console pretty print).' - ) - parser.add_argument( - '--transformation', - '-t', - dest='transformation', - action='append', - help='Add a transformation between input and output (can be used multiple times, order is preserved).', - ) - parser.add_argument( - '--option', - '-O', - dest='option', - action='append', - help='Add a named option to both reader and writer factories (i.e. foo="bar").', - ) - parser.add_argument( - '--' + READER + '-option', - '-' + READER[0].upper(), - dest=READER + '_option', - action='append', - help='Add a named option to the reader factory.', - ) - parser.add_argument( - '--' + WRITER + '-option', - '-' + WRITER[0].upper(), - dest=WRITER + '_option', - action='append', - help='Add a named option to the writer factory.', - ) - return execute + return bonobo.run( + graph, services={ + 'fs': bonobo.open_fs(), + } + ) diff --git a/bonobo/commands/download.py b/bonobo/commands/download.py index fd51951..9333db4 100644 --- a/bonobo/commands/download.py +++ b/bonobo/commands/download.py @@ -4,36 +4,31 @@ import re import requests import bonobo +from bonobo.commands import BaseCommand EXAMPLES_BASE_URL = 'https://raw.githubusercontent.com/python-bonobo/bonobo/master/bonobo/examples/' """The URL to our git repository, in raw mode.""" -def _write_response(response, fout): - """Read the response and write it to the output stream in chunks.""" - for chunk in response.iter_content(io.DEFAULT_BUFFER_SIZE): - fout.write(chunk) +class DownloadCommand(BaseCommand): + def handle(self, *, path, **options): + path = path.lstrip('/') + if not path.startswith('examples'): + raise ValueError('Download command currently supports examples only') + examples_path = re.sub('^examples/', '', path) + output_path = bonobo.get_examples_path(examples_path) + with _open_url(EXAMPLES_BASE_URL + examples_path) as response, open(output_path, 'wb') as fout: + for chunk in response.iter_content(io.DEFAULT_BUFFER_SIZE): + fout.write(chunk) + self.logger.info('Download saved to {}'.format(output_path)) + + def add_arguments(self, parser): + parser.add_argument('path', help='The relative path of the thing to download.') def _open_url(url): """Open a HTTP connection to the URL and return a file-like object.""" response = requests.get(url, stream=True) if response.status_code != 200: - raise IOError('unable to download {}, HTTP {}'.format(url, response.status_code)) + raise IOError('Unable to download {}, HTTP {}'.format(url, response.status_code)) return response - - -def execute(path, *args, **kwargs): - path = path.lstrip('/') - if not path.startswith('examples'): - raise ValueError('download command currently supports examples only') - examples_path = re.sub('^examples/', '', path) - output_path = bonobo.get_examples_path(examples_path) - with _open_url(EXAMPLES_BASE_URL + examples_path) as response, open(output_path, 'wb') as fout: - _write_response(response, fout) - print('saved to {}'.format(output_path)) - - -def register(parser): - parser.add_argument('path', help='The relative path of the thing to download.') - return execute diff --git a/bonobo/commands/init.py b/bonobo/commands/init.py index e69156c..c0c50f1 100644 --- a/bonobo/commands/init.py +++ b/bonobo/commands/init.py @@ -1,28 +1,33 @@ import os -def execute(name, branch): - try: - from cookiecutter.main import cookiecutter - except ImportError as exc: - raise ImportError( - 'You must install "cookiecutter" to use this command.\n\n $ pip install cookiecutter\n' - ) from exc +from jinja2 import Environment, FileSystemLoader - overwrite_if_exists = False - project_path = os.path.join(os.getcwd(), name) - if os.path.isdir(project_path) and not os.listdir(project_path): - overwrite_if_exists = True - - return cookiecutter( - 'https://github.com/python-bonobo/cookiecutter-bonobo.git', - extra_context={'name': name}, - no_input=True, - checkout=branch, - overwrite_if_exists=overwrite_if_exists - ) +from bonobo.commands import BaseCommand -def register(parser): - parser.add_argument('name') - parser.add_argument('--branch', '-b', default='master') - return execute +class InitCommand(BaseCommand): + TEMPLATES = {'job'} + TEMPLATES_PATH = os.path.join(os.path.dirname(__file__), 'templates') + + def add_arguments(self, parser): + parser.add_argument('template', choices=self.TEMPLATES) + parser.add_argument('filename') + parser.add_argument('--force', '-f', default=False, action='store_true') + + def handle(self, *, template, filename, force=False): + template_name = template + name, ext = os.path.splitext(filename) + if ext != '.py': + raise ValueError('Filenames should end with ".py".') + + loader = FileSystemLoader(self.TEMPLATES_PATH) + env = Environment(loader=loader) + template = env.get_template(template_name + '.py-tpl') + + if os.path.exists(filename) and not force: + raise FileExistsError('Target filename already exists, use --force to override.') + + with open(filename, 'w+') as f: + f.write(template.render(name=name)) + + self.logger.info('Generated {} using template {!r}.'.format(filename, template_name)) diff --git a/bonobo/commands/inspect.py b/bonobo/commands/inspect.py index 1ab6b5b..9a802d9 100644 --- a/bonobo/commands/inspect.py +++ b/bonobo/commands/inspect.py @@ -1,40 +1,21 @@ -import json +from bonobo.commands import BaseGraphCommand -from bonobo.commands.run import read, register_generic_run_arguments -from bonobo.constants import BEGIN -from bonobo.util.objects import get_name - -OUTPUT_GRAPHVIZ = 'graphviz' +OUTPUT_GRAPH = 'graphviz' -def _ident(graph, i): - escaped_index = str(i) - escaped_name = json.dumps(get_name(graph[i])) - return '{{{} [label={}]}}'.format(escaped_index, escaped_name) +class InspectCommand(BaseGraphCommand): + def add_arguments(self, parser): + super(InspectCommand, self).add_arguments(parser) + parser.add_argument('--graph', '-g', dest='output', action='store_const', const=OUTPUT_GRAPH) + def handle(self, output=None, **options): + if output is None: + raise ValueError('Output type must be provided (try --graph/-g).') -def execute(*, output, **kwargs): - graph, plugins, services = read(**kwargs) + graph, params = self.read(**options) - if output == OUTPUT_GRAPHVIZ: - print('digraph {') - print(' rankdir = LR;') - print(' "BEGIN" [shape="point"];') + if output == OUTPUT_GRAPH: + print(graph._repr_dot_()) + else: + raise NotImplementedError('Output type not implemented.') - for i in graph.outputs_of(BEGIN): - print(' "BEGIN" -> ' + _ident(graph, i) + ';') - - for ix in graph.topologically_sorted_indexes: - for iy in graph.outputs_of(ix): - print(' {} -> {};'.format(_ident(graph, ix), _ident(graph, iy))) - - print('}') - else: - raise NotImplementedError('Output type not implemented.') - - -def register(parser): - register_generic_run_arguments(parser) - parser.add_argument('--graph', '-g', dest='output', action='store_const', const=OUTPUT_GRAPHVIZ) - parser.set_defaults(output=OUTPUT_GRAPHVIZ) - return execute diff --git a/bonobo/commands/run.py b/bonobo/commands/run.py index be84d18..799816f 100644 --- a/bonobo/commands/run.py +++ b/bonobo/commands/run.py @@ -1,38 +1,60 @@ -import codecs import os -import sys -from importlib.util import spec_from_file_location, module_from_spec -from pathlib import Path - -from dotenv import load_dotenv import bonobo -from bonobo.constants import DEFAULT_SERVICES_ATTR, DEFAULT_SERVICES_FILENAME - -DEFAULT_GRAPH_FILENAMES = ( - '__main__.py', - 'main.py', -) -DEFAULT_GRAPH_ATTR = 'get_graph' +from bonobo.commands import BaseGraphCommand -def get_default_services(filename, services=None): - dirname = os.path.dirname(filename) - services_filename = os.path.join(dirname, DEFAULT_SERVICES_FILENAME) - if os.path.exists(services_filename): - with open(services_filename) as file: - code = compile(file.read(), services_filename, 'exec') - context = { - '__name__': '__bonobo__', - '__file__': services_filename, - } - exec(code, context) +class RunCommand(BaseGraphCommand): + install = False - return { - **context[DEFAULT_SERVICES_ATTR](), - **(services or {}), - } - return services or {} + def add_arguments(self, parser): + super(RunCommand, self).add_arguments(parser) + + verbosity_group = parser.add_mutually_exclusive_group() + verbosity_group.add_argument('--quiet', '-q', action='store_true') + verbosity_group.add_argument('--verbose', '-v', action='store_true') + + parser.add_argument('--install', '-I', action='store_true') + + def _run_path(self, file): + if self.install: + if os.path.isdir(file): + requirements = os.path.join(file, 'requirements.txt') + else: + requirements = os.path.join(os.path.dirname(file), 'requirements.txt') + _install_requirements(requirements) + + return super()._run_path(file) + + def _run_module(self, mod): + if self.install: + raise RuntimeError('--install behaviour when running a module is not defined.') + + return super()._run_module(mod) + + def handle(self, *args, quiet=False, verbose=False, install=False, **options): + from bonobo import settings + + settings.QUIET.set_if_true(quiet) + settings.DEBUG.set_if_true(verbose) + self.install = install + + graph, params = self.read(**options) + + params['plugins'] = set(params.pop('plugins', ())).union(set(options.pop('plugins', ()))) + + return bonobo.run(graph, **params) + + +def register_generic_run_arguments(parser, required=True): + """ + Only there for backward compatibility with third party extensions. + TODO: This should be deprecated (using the @deprecated decorator) in 0.7, and removed in 0.8 or 0.9. + """ + dummy_command = BaseGraphCommand() + dummy_command.required = required + dummy_command.add_arguments(parser) + return parser def _install_requirements(requirements): @@ -47,138 +69,3 @@ def _install_requirements(requirements): pip.utils.pkg_resources = importlib.reload(pip.utils.pkg_resources) import site importlib.reload(site) - - -def read( - filename, - module, - install=False, - quiet=False, - verbose=False, - default_env_file=None, - default_env=None, - env_file=None, - env=None -): - import runpy - from bonobo import Graph, settings - - if quiet: - settings.QUIET.set(True) - - if verbose: - settings.DEBUG.set(True) - - if filename: - if os.path.isdir(filename): - if install: - requirements = os.path.join(filename, 'requirements.txt') - _install_requirements(requirements) - - pathname = filename - for filename in DEFAULT_GRAPH_FILENAMES: - filename = os.path.join(pathname, filename) - if os.path.exists(filename): - break - if not os.path.exists(filename): - raise IOError('Could not find entrypoint (candidates: {}).'.format(', '.join(DEFAULT_GRAPH_FILENAMES))) - elif install: - requirements = os.path.join(os.path.dirname(filename), 'requirements.txt') - _install_requirements(requirements) - spec = spec_from_file_location('__bonobo__', filename) - main = sys.modules['__bonobo__'] = module_from_spec(spec) - main.__path__ = [os.path.dirname(filename)] - main.__package__ = '__bonobo__' - spec.loader.exec_module(main) - context = main.__dict__ - elif module: - context = runpy.run_module(module, run_name='__bonobo__') - filename = context['__file__'] - else: - raise RuntimeError('UNEXPECTED: argparse should not allow this.') - - env_dir = Path(filename).parent or Path(module).parent - if default_env_file: - for f in default_env_file: - env_file_path = str(env_dir.joinpath(f)) - load_dotenv(env_file_path) - if default_env: - for e in default_env: - set_env_var(e) - if env_file: - for f in env_file: - env_file_path = str(env_dir.joinpath(f)) - load_dotenv(env_file_path, override=True) - if env: - for e in env: - set_env_var(e, override=True) - - graphs = dict((k, v) for k, v in context.items() if isinstance(v, Graph)) - - assert len(graphs) == 1, ( - 'Having zero or more than one graph definition in one file is unsupported for now, ' - 'but it is something that will be implemented in the future.\n\nExpected: 1, got: {}.' - ).format(len(graphs)) - - graph = list(graphs.values())[0] - plugins = [] - services = get_default_services( - filename, context.get(DEFAULT_SERVICES_ATTR)() if DEFAULT_SERVICES_ATTR in context else None - ) - - return graph, plugins, services - - -def set_env_var(e, override=False): - __escape_decoder = codecs.getdecoder('unicode_escape') - ename, evalue = e.split('=', 1) - - def decode_escaped(escaped): - return __escape_decoder(escaped)[0] - - if len(evalue) > 0: - if evalue[0] == evalue[len(evalue) - 1] in ['"', "'"]: - evalue = decode_escaped(evalue[1:-1]) - - if override: - os.environ[ename] = evalue - else: - os.environ.setdefault(ename, evalue) - - -def execute( - filename, - module, - install=False, - quiet=False, - verbose=False, - default_env_file=None, - default_env=None, - env_file=None, - env=None -): - graph, plugins, services = read( - filename, module, install, quiet, verbose, default_env_file, default_env, env_file, env - ) - - return bonobo.run(graph, plugins=plugins, services=services) - - -def register_generic_run_arguments(parser, required=True): - source_group = parser.add_mutually_exclusive_group(required=required) - source_group.add_argument('filename', nargs='?', type=str) - source_group.add_argument('--module', '-m', type=str) - parser.add_argument('--default-env-file', action='append') - parser.add_argument('--default-env', action='append') - parser.add_argument('--env-file', action='append') - parser.add_argument('--env', '-e', action='append') - return parser - - -def register(parser): - parser = register_generic_run_arguments(parser) - verbosity_group = parser.add_mutually_exclusive_group() - verbosity_group.add_argument('--quiet', '-q', action='store_true') - verbosity_group.add_argument('--verbose', '-v', action='store_true') - parser.add_argument('--install', '-I', action='store_true') - return execute diff --git a/bonobo/commands/templates/default.py-tpl b/bonobo/commands/templates/default.py-tpl new file mode 100644 index 0000000..1d8d6a5 --- /dev/null +++ b/bonobo/commands/templates/default.py-tpl @@ -0,0 +1,50 @@ +import bonobo + +def extract(): + """Placeholder, change, rename, remove... """ + yield 'hello' + yield 'world' + + +def transform(*args): + """Placeholder, change, rename, remove... """ + yield tuple( + map(str.title, args) + ) + + +def load(*args): + """Placeholder, change, rename, remove... """ + print(*args) + + +def get_graph(): + """ + This function builds the graph that needs to be executed. + + :return: bonobo.Graph + + """ + graph = bonobo.Graph() + graph.add_chain(extract, transform, load) + + return graph + + +def get_services(): + """ + This function builds the services dictionary, which is a simple dict of names-to-implementation used by bonobo + for runtime injection. + + It will be used on top of the defaults provided by bonobo (fs, http, ...). You can override those defaults, or just + let the framework define them. You can also define your own services and naming is up to you. + + :return: dict + """ + return {} + + +# The __main__ block actually execute the graph. +if __name__ == '__main__': + # Although you're not required to use it, bonobo's graph related commands will hook to this call (inspect, run, ...). + bonobo.run(get_graph(), services=get_services()) diff --git a/bonobo/commands/version.py b/bonobo/commands/version.py index 6d4f3e7..3e3239a 100644 --- a/bonobo/commands/version.py +++ b/bonobo/commands/version.py @@ -1,4 +1,30 @@ -def format_version(mod, *, name=None, quiet=False): +from bonobo.commands import BaseCommand + + +class VersionCommand(BaseCommand): + def handle(self, *, all=False, quiet=False): + import bonobo + from bonobo.util.pkgs import bonobo_packages + + print(_format_version(bonobo, quiet=quiet)) + if all: + for name in sorted(bonobo_packages): + if name != 'bonobo': + try: + mod = __import__(name.replace('-', '_')) + try: + print(_format_version(mod, name=name, quiet=quiet)) + except Exception as exc: + print('{} ({})'.format(name, exc)) + except ImportError as exc: + print('{} is not importable ({}).'.format(name, exc)) + + def add_arguments(self, parser): + parser.add_argument('--all', '-a', action='store_true') + parser.add_argument('--quiet', '-q', action='count') + + +def _format_version(mod, *, name=None, quiet=False): from bonobo.util.pkgs import bonobo_packages args = { 'name': name or mod.__name__, @@ -14,27 +40,3 @@ def format_version(mod, *, name=None, quiet=False): return '{version}'.format(**args) raise RuntimeError('Hard to be so quiet...') - - -def execute(all=False, quiet=False): - import bonobo - from bonobo.util.pkgs import bonobo_packages - - print(format_version(bonobo, quiet=quiet)) - if all: - for name in sorted(bonobo_packages): - if name != 'bonobo': - try: - mod = __import__(name.replace('-', '_')) - try: - print(format_version(mod, name=name, quiet=quiet)) - except Exception as exc: - print('{} ({})'.format(name, exc)) - except ImportError as exc: - print('{} is not importable ({}).'.format(name, exc)) - - -def register(parser): - parser.add_argument('--all', '-a', action='store_true') - parser.add_argument('--quiet', '-q', action='count') - return execute diff --git a/bonobo/examples/datasets/coffeeshops.py b/bonobo/examples/datasets/coffeeshops.py index dc3db52..fd754ef 100644 --- a/bonobo/examples/datasets/coffeeshops.py +++ b/bonobo/examples/datasets/coffeeshops.py @@ -14,7 +14,7 @@ Extracts a list of parisian bars where you can buy a coffee for a reasonable pri """ import bonobo -from bonobo.commands.run import get_default_services +from bonobo.commands import get_default_services from bonobo.ext.opendatasoft import OpenDataSoftAPI filename = 'coffeeshops.txt' diff --git a/bonobo/examples/datasets/fablabs.py b/bonobo/examples/datasets/fablabs.py index 986aea9..d03775b 100644 --- a/bonobo/examples/datasets/fablabs.py +++ b/bonobo/examples/datasets/fablabs.py @@ -19,7 +19,7 @@ import json from colorama import Fore, Style import bonobo -from bonobo.commands.run import get_default_services +from bonobo.commands import get_default_services from bonobo.ext.opendatasoft import OpenDataSoftAPI try: diff --git a/bonobo/examples/files/csv_handlers.py b/bonobo/examples/files/csv_handlers.py index 33412c3..555bc67 100644 --- a/bonobo/examples/files/csv_handlers.py +++ b/bonobo/examples/files/csv_handlers.py @@ -1,5 +1,5 @@ import bonobo -from bonobo.commands.run import get_default_services +from bonobo.commands import get_default_services graph = bonobo.Graph( bonobo.CsvReader('datasets/coffeeshops.txt', headers=('item', )), diff --git a/bonobo/examples/files/json_handlers.py b/bonobo/examples/files/json_handlers.py index 27dc38e..f1818cd 100644 --- a/bonobo/examples/files/json_handlers.py +++ b/bonobo/examples/files/json_handlers.py @@ -1,6 +1,6 @@ import bonobo from bonobo import Bag -from bonobo.commands.run import get_default_services +from bonobo.commands import get_default_services def get_fields(**row): diff --git a/bonobo/examples/files/pickle_handlers.py b/bonobo/examples/files/pickle_handlers.py index 71a2b9a..ed2ecd4 100644 --- a/bonobo/examples/files/pickle_handlers.py +++ b/bonobo/examples/files/pickle_handlers.py @@ -28,7 +28,7 @@ messages categorized as spam, and (3) prints the output. ''' import bonobo -from bonobo.commands.run import get_default_services +from bonobo.commands import get_default_services from fs.tarfs import TarFS diff --git a/bonobo/examples/files/text_handlers.py b/bonobo/examples/files/text_handlers.py index 6ca6ef8..abbae1a 100644 --- a/bonobo/examples/files/text_handlers.py +++ b/bonobo/examples/files/text_handlers.py @@ -1,5 +1,5 @@ import bonobo -from bonobo.commands.run import get_default_services +from bonobo.commands import get_default_services def skip_comments(line): diff --git a/bonobo/ext/django.py b/bonobo/ext/django.py index 06f31a7..d35d131 100644 --- a/bonobo/ext/django.py +++ b/bonobo/ext/django.py @@ -5,7 +5,7 @@ from django.core.management.base import BaseCommand, OutputWrapper import bonobo import bonobo.util -from bonobo.commands.run import get_default_services +from bonobo.commands import get_default_services from bonobo.ext.console import ConsoleOutputPlugin from bonobo.util.term import CLEAR_EOL diff --git a/bonobo/settings.py b/bonobo/settings.py index ef4be2d..05d2089 100644 --- a/bonobo/settings.py +++ b/bonobo/settings.py @@ -51,6 +51,12 @@ class Setting: raise ValidationError('Invalid value {!r} for setting {}.'.format(value, self.name)) self.value = value + def set_if_true(self, value): + """Sets the value to true if it is actually true. May sound strange but the main usage is enforcing some + settings from command line.""" + if value: + self.set(True) + def get(self): try: return self.value diff --git a/bonobo/structs/graphs.py b/bonobo/structs/graphs.py index fe7c1df..e89b7e7 100644 --- a/bonobo/structs/graphs.py +++ b/bonobo/structs/graphs.py @@ -1,6 +1,8 @@ +import json from copy import copy from bonobo.constants import BEGIN +from bonobo.util import get_name class Graph: @@ -110,6 +112,24 @@ class Graph: self._topologcally_sorted_indexes_cache = tuple(filter(lambda i: type(i) is int, reversed(order))) return self._topologcally_sorted_indexes_cache + def _repr_dot_(self): + src = [ + 'digraph {', + ' rankdir = LR;', + ' "BEGIN" [shape="point"];', + ] + + for i in self.outputs_of(BEGIN): + src.append(' "BEGIN" -> ' + _get_graphviz_node_id(self, i) + ';') + + for ix in self.topologically_sorted_indexes: + for iy in self.outputs_of(ix): + src.append(' {} -> {};'.format(_get_graphviz_node_id(self, ix), _get_graphviz_node_id(self, iy))) + + src.append('}') + + return '\n'.join(src) + def _resolve_index(self, mixed): """ Find the index based on various strategies for a node, probably an input or output of chain. Supported inputs are indexes, node values or names. """ @@ -126,3 +146,9 @@ class Graph: return self.nodes.index(mixed) raise ValueError('Cannot find node matching {!r}.'.format(mixed)) + + +def _get_graphviz_node_id(graph, i): + escaped_index = str(i) + escaped_name = json.dumps(get_name(graph[i])) + return '{{{} [label={}]}}'.format(escaped_index, escaped_name) diff --git a/requirements-dev.txt b/requirements-dev.txt index 4e005a7..553fefc 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -27,7 +27,7 @@ pytz==2017.2 requests==2.18.4 six==1.11.0 snowballstemmer==1.2.1 -sphinx==1.6.4 +sphinx==1.6.5 sphinxcontrib-websupport==1.0.1 termcolor==1.1.0 urllib3==1.22 diff --git a/requirements-docker.txt b/requirements-docker.txt index 54bac73..d6f1160 100644 --- a/requirements-docker.txt +++ b/requirements-docker.txt @@ -3,6 +3,7 @@ appdirs==1.4.3 bonobo-docker==0.5.0 certifi==2017.7.27.1 chardet==3.0.4 +click==6.7 colorama==0.3.9 docker-pycreds==0.2.1 docker==2.3.0 @@ -12,6 +13,7 @@ packaging==16.8 pbr==3.1.1 psutil==5.4.0 pyparsing==2.2.0 +python-dotenv==0.7.1 pytz==2017.2 requests==2.18.4 six==1.11.0 diff --git a/requirements-jupyter.txt b/requirements-jupyter.txt index 2ad75ab..4e1d024 100644 --- a/requirements-jupyter.txt +++ b/requirements-jupyter.txt @@ -16,7 +16,7 @@ jupyter-console==5.2.0 jupyter-core==4.3.0 jupyter==1.0.0 markupsafe==1.0 -mistune==0.7.4 +mistune==0.8 nbconvert==5.3.1 nbformat==4.4.0 notebook==5.2.0 diff --git a/requirements.txt b/requirements.txt index 7384e3f..0d69f1a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,8 @@ click==6.7 colorama==0.3.9 fs==2.0.12 idna==2.6 +jinja2==2.9.6 +markupsafe==1.0 packaging==16.8 pbr==3.1.1 psutil==5.4.0 diff --git a/setup.py b/setup.py index 29c6255..0c01b43 100644 --- a/setup.py +++ b/setup.py @@ -53,8 +53,9 @@ setup( packages=find_packages(exclude=['ez_setup', 'example', 'test']), include_package_data=True, install_requires=[ - 'colorama (>= 0.3, < 1.0)', 'fs (>= 2.0, < 3.0)', 'packaging (>= 16, < 17)', 'psutil (>= 5.2, < 6.0)', - 'python-dotenv (>= 0.7.1, < 1.0)', 'requests (>= 2.0, < 3.0)', 'stevedore (>= 1.21, < 2.0)' + 'colorama (>= 0.3, < 0.4)', 'fs (>= 2.0, < 2.1)', 'jinja2 (>= 2.9, < 2.10)', 'packaging (>= 16, < 17)', + 'psutil (>= 5.4, < 6.0)', 'python-dotenv (>= 0.7, < 0.8)', 'requests (>= 2.0, < 3.0)', + 'stevedore (>= 1.27, < 1.28)' ], extras_require={ 'dev': [ @@ -67,9 +68,9 @@ setup( }, entry_points={ 'bonobo.commands': [ - 'convert = bonobo.commands.convert:register', 'init = bonobo.commands.init:register', - 'inspect = bonobo.commands.inspect:register', 'run = bonobo.commands.run:register', - 'version = bonobo.commands.version:register', 'download = bonobo.commands.download:register' + 'convert = bonobo.commands.convert:ConvertCommand', 'init = bonobo.commands.init:InitCommand', + 'inspect = bonobo.commands.inspect:InspectCommand', 'run = bonobo.commands.run:RunCommand', + 'version = bonobo.commands.version:VersionCommand', 'download = bonobo.commands.download:DownloadCommand' ], 'console_scripts': ['bonobo = bonobo.commands:entrypoint'] }, From c770287466c377f8b9c3c823df5faf95fd3e5188 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 29 Oct 2017 23:46:39 +0100 Subject: [PATCH 04/37] [core] still refactoring env-related stuff towards using __main__ blocks (but with argparser, if needed). --- Projectfile | 7 ++- bonobo/_api.py | 61 +++++++++++++++++++++++- bonobo/_version.py | 2 +- bonobo/commands/__init__.py | 93 ++++++++++++++++--------------------- bonobo/commands/init.py | 4 +- bonobo/commands/run.py | 4 +- tests/test_commands.py | 31 ------------- 7 files changed, 110 insertions(+), 92 deletions(-) diff --git a/Projectfile b/Projectfile index d730328..eb230d9 100644 --- a/Projectfile +++ b/Projectfile @@ -54,12 +54,15 @@ python.add_requirements( 'pytest-timeout >=1,<2', ], docker=[ - 'bonobo-docker', + 'bonobo-docker >=0.5.0', ], jupyter=[ 'jupyter >=1.0,<1.1', 'ipywidgets >=6.0.0,<7', - ] + ], + sqlalchemy=[ + 'bonobo-sqlalchemy >=0.5.1', + ], ) # vim: ft=python: diff --git a/bonobo/_api.py b/bonobo/_api.py index fb1ef78..43e9a29 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -1,3 +1,7 @@ +import argparse +from contextlib import contextmanager + +import os from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \ PickleReader, PickleWriter, PrettyPrinter, RateLimited, Tee, arg0_to_kwargs, count, identity, kwargs_to_arg0, noop from bonobo.nodes import LdjsonReader, LdjsonWriter @@ -19,7 +23,60 @@ def register_api_group(*args): @register_api -def run(graph, *, plugins=None, services=None, **options): +def get_argument_parser(parser=None): + if parser is None: + import argparse + parser = argparse.ArgumentParser() + + parser.add_argument('--default-env-file', action='append') + parser.add_argument('--default-env', action='append') + parser.add_argument('--env-file', action='append') + parser.add_argument('--env', '-e', action='append') + + return parser + + +@register_api +@contextmanager +def parse_args(parser, *, args=None, namespace=None): + options = parser.parse_args(args=args, namespace=namespace) + + with patch_environ(options) as options: + yield options + + +@register_api +@contextmanager +def patch_environ(options): + from dotenv import load_dotenv + from bonobo.commands import set_env_var + + options = options if isinstance(options, dict) else options.__dict__ + + default_env_file = options.pop('default_env_file', []) + default_env = options.pop('default_env', []) + env_file = options.pop('env_file', []) + env = options.pop('env', []) + + if default_env_file: + for f in default_env_file: + load_dotenv(os.path.join(os.getcwd(), f)) + if default_env: + for e in default_env: + set_env_var(e) + if env_file: + for f in env_file: + load_dotenv(os.path.join(os.getcwd(), f), override=True) + if env: + for e in env: + set_env_var(e, override=True) + + yield options + ## TODO XXX put it back !!! + + +@register_api +def run(graph, *, plugins=None, services=None, strategy=None): """ Main entry point of bonobo. It takes a graph and creates all the necessary plumbery around to execute it. @@ -39,7 +96,7 @@ def run(graph, *, plugins=None, services=None, **options): :param dict services: The implementations of services this graph will use. :return bonobo.execution.graph.GraphExecutionContext: """ - strategy = create_strategy(options.pop('strategy', None)) + strategy = create_strategy(strategy) plugins = plugins or [] diff --git a/bonobo/_version.py b/bonobo/_version.py index 93b60a1..ebc2ff2 100644 --- a/bonobo/_version.py +++ b/bonobo/_version.py @@ -1 +1 @@ -__version__ = '0.5.1' +__version__ = '0.6-dev' diff --git a/bonobo/commands/__init__.py b/bonobo/commands/__init__.py index be877d7..c5d4908 100644 --- a/bonobo/commands/__init__.py +++ b/bonobo/commands/__init__.py @@ -3,10 +3,10 @@ import codecs import os import os.path import runpy +import sys from contextlib import contextmanager -from functools import partial -from bonobo import settings, logging +from bonobo import settings, logging, get_argument_parser, patch_environ from bonobo.constants import DEFAULT_SERVICES_FILENAME, DEFAULT_SERVICES_ATTR from bonobo.util import get_name @@ -44,11 +44,8 @@ class BaseGraphCommand(BaseCommand): source_group.add_argument('file', nargs='?', type=str) source_group.add_argument('-m', dest='mod', type=str) - # arguments to enforce system environment. - parser.add_argument('--default-env-file', action='append') - parser.add_argument('--default-env', action='append') - parser.add_argument('--env-file', action='append') - parser.add_argument('--env', '-e', action='append') + # add arguments to enforce system environment. + parser = get_argument_parser(parser) return parser @@ -58,34 +55,30 @@ class BaseGraphCommand(BaseCommand): def _run_module(self, mod): return runpy.run_module(mod, run_name='__main__') - def read(self, *, file, mod, **options): - - """ - - get_default_services( - filename, context.get(DEFAULT_SERVICES_ATTR)() if DEFAULT_SERVICES_ATTR in context else None - ) - - """ - + def read(self, *, file, mod, args=None, **options): _graph, _options = None, None def _record(graph, **options): nonlocal _graph, _options _graph, _options = graph, options - with _override_runner(_record), _override_environment(): - if file: - self._run_path(file) - elif mod: - self._run_module(mod) - else: - raise RuntimeError('No target provided.') + with _override_runner(_record), patch_environ(options): + _argv = sys.argv + try: + if file: + sys.argv = [file] + list(args) if args else [file] + self._run_path(file) + elif mod: + sys.argv = [mod, *(args or ())] + self._run_module(mod) + else: + raise RuntimeError('No target provided.') + finally: + sys.argv = _argv if _graph is None: raise RuntimeError('Could not find graph.') - return _graph, _options def handle(self, *args, **options): @@ -120,45 +113,41 @@ def entrypoint(args=None): mgr = ExtensionManager(namespace='bonobo.commands') mgr.map(register_extension) - args = parser.parse_args(args).__dict__ - if args.pop('debug', False): + parsed_args, remaining = parser.parse_known_args(args) + parsed_args = parsed_args.__dict__ + + if parsed_args.pop('debug', False): settings.DEBUG.set(True) settings.LOGGING_LEVEL.set(logging.DEBUG) logging.set_level(settings.LOGGING_LEVEL.get()) - logger.debug('Command: ' + args['command'] + ' Arguments: ' + repr(args)) - commands[args.pop('command')](**args) + logger.debug('Command: ' + parsed_args['command'] + ' Arguments: ' + repr(parsed_args)) + + # Get command handler + command = commands[parsed_args.pop('command')] + + if len(remaining): + command(_remaining_args=remaining, **parsed_args) + else: + command(**parsed_args) @contextmanager def _override_runner(runner): import bonobo - _runner_backup = bonobo.run + _get_argument_parser = bonobo.get_argument_parser + _run = bonobo.run try: + def get_argument_parser(parser=None): + return parser or argparse.ArgumentParser() + + bonobo.get_argument_parser = get_argument_parser bonobo.run = runner + yield runner finally: - bonobo.run = _runner_backup - - -@contextmanager -def _override_environment(root_dir=None, **options): - yield - return - if default_env_file: - for f in default_env_file: - env_file_path = str(env_dir.joinpath(f)) - load_dotenv(env_file_path) - if default_env: - for e in default_env: - set_env_var(e) - if env_file: - for f in env_file: - env_file_path = str(env_dir.joinpath(f)) - load_dotenv(env_file_path, override=True) - if env: - for e in env: - set_env_var(e, override=True) + bonobo.get_argument_parser = _get_argument_parser + bonobo.run = _run def get_default_services(filename, services=None): @@ -194,4 +183,4 @@ def set_env_var(e, override=False): if override: os.environ[ename] = evalue else: - os.environ.setdefault(ename, evalue) \ No newline at end of file + os.environ.setdefault(ename, evalue) diff --git a/bonobo/commands/init.py b/bonobo/commands/init.py index c0c50f1..6c6c2ff 100644 --- a/bonobo/commands/init.py +++ b/bonobo/commands/init.py @@ -6,13 +6,13 @@ from bonobo.commands import BaseCommand class InitCommand(BaseCommand): - TEMPLATES = {'job'} + TEMPLATES = {'default'} TEMPLATES_PATH = os.path.join(os.path.dirname(__file__), 'templates') def add_arguments(self, parser): - parser.add_argument('template', choices=self.TEMPLATES) parser.add_argument('filename') parser.add_argument('--force', '-f', default=False, action='store_true') + parser.add_argument('--template', '-t', choices=self.TEMPLATES, default='default') def handle(self, *, template, filename, force=False): template_name = template diff --git a/bonobo/commands/run.py b/bonobo/commands/run.py index 799816f..514bb5d 100644 --- a/bonobo/commands/run.py +++ b/bonobo/commands/run.py @@ -32,14 +32,14 @@ class RunCommand(BaseGraphCommand): return super()._run_module(mod) - def handle(self, *args, quiet=False, verbose=False, install=False, **options): + def handle(self, quiet=False, verbose=False, install=False, _remaining_args=None, **options): from bonobo import settings settings.QUIET.set_if_true(quiet) settings.DEBUG.set_if_true(verbose) self.install = install - graph, params = self.read(**options) + graph, params = self.read(args=_remaining_args, **options) params['plugins'] = set(params.pop('plugins', ())).union(set(options.pop('plugins', ()))) diff --git a/tests/test_commands.py b/tests/test_commands.py index c78fa5f..fe55f87 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -12,7 +12,6 @@ from cookiecutter.exceptions import OutputDirExistsException from bonobo import __main__, __version__, get_examples_path from bonobo.commands import entrypoint -from bonobo.commands.run import DEFAULT_GRAPH_FILENAMES from bonobo.commands.download import EXAMPLES_BASE_URL @@ -72,36 +71,6 @@ def test_no_command(runner): assert 'error: the following arguments are required: command' in err -@all_runners -def test_init(runner, tmpdir): - name = 'project' - tmpdir.chdir() - runner('init', name) - assert os.path.isdir(name) - assert set(os.listdir(name)) & set(DEFAULT_GRAPH_FILENAMES) - -@single_runner -def test_init_in_empty_then_nonempty_directory(runner, tmpdir): - name = 'project' - tmpdir.chdir() - os.mkdir(name) - - # run in empty dir - runner('init', name) - assert set(os.listdir(name)) & set(DEFAULT_GRAPH_FILENAMES) - - # run in non empty dir - with pytest.raises(OutputDirExistsException): - runner('init', name) - - -@single_runner -def test_init_within_empty_directory(runner, tmpdir): - tmpdir.chdir() - runner('init', '.') - assert set(os.listdir()) & set(DEFAULT_GRAPH_FILENAMES) - - @all_runners def test_run(runner): out, err = runner('run', '--quiet', get_examples_path('types/strings.py')) From b6c7d598dc8f047efe8cb7b0edbf693cc1705e03 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Mon, 30 Oct 2017 08:28:18 +0100 Subject: [PATCH 05/37] =?UTF-8?q?[core]=20Simplification:=20as=20truthfull?= =?UTF-8?q?y=20stated=20by=20Maik=20at=20Pycon.DE=20sprint=20=C2=ABlets=20?= =?UTF-8?q?try=20not=20to=20turn=20python=20into=20javascript=C2=BB.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bonobo/commands/__init__.py | 19 ------------------- bonobo/util/__init__.py | 2 -- bonobo/util/python.py | 31 ------------------------------- 3 files changed, 52 deletions(-) delete mode 100644 bonobo/util/python.py diff --git a/bonobo/commands/__init__.py b/bonobo/commands/__init__.py index c5d4908..56d615d 100644 --- a/bonobo/commands/__init__.py +++ b/bonobo/commands/__init__.py @@ -150,25 +150,6 @@ def _override_runner(runner): bonobo.run = _run -def get_default_services(filename, services=None): - dirname = os.path.dirname(filename) - services_filename = os.path.join(dirname, DEFAULT_SERVICES_FILENAME) - if os.path.exists(services_filename): - with open(services_filename) as file: - code = compile(file.read(), services_filename, 'exec') - context = { - '__name__': '__services__', - '__file__': services_filename, - } - exec(code, context) - - return { - **context[DEFAULT_SERVICES_ATTR](), - **(services or {}), - } - return services or {} - - def set_env_var(e, override=False): __escape_decoder = codecs.getdecoder('unicode_escape') ename, evalue = e.split('=', 1) diff --git a/bonobo/util/__init__.py b/bonobo/util/__init__.py index 4ef136e..27e50b2 100644 --- a/bonobo/util/__init__.py +++ b/bonobo/util/__init__.py @@ -15,7 +15,6 @@ from bonobo.util.inspect import ( istype, ) from bonobo.util.objects import (get_name, get_attribute_or_create, ValueHolder) -from bonobo.util.python import require # Bonobo's util API __all__ = [ @@ -35,5 +34,4 @@ __all__ = [ 'ismethod', 'isoption', 'istype', - 'require', ] diff --git a/bonobo/util/python.py b/bonobo/util/python.py deleted file mode 100644 index 8648f16..0000000 --- a/bonobo/util/python.py +++ /dev/null @@ -1,31 +0,0 @@ -import inspect -import os -import runpy - - -class _RequiredModule: - def __init__(self, dct): - self.__dict__ = dct - - -class _RequiredModulesRegistry(dict): - @property - def pathname(self): - return os.path.join(os.getcwd(), os.path.dirname(inspect.getfile(inspect.stack()[2][0]))) - - def require(self, name): - if name not in self: - bits = name.split('.') - filename = os.path.join(self.pathname, *bits[:-1], bits[-1] + '.py') - self[name] = _RequiredModule(runpy.run_path(filename, run_name=name)) - return self[name] - - -class WorkingDirectoryModulesRegistry(_RequiredModulesRegistry): - @property - def pathname(self): - return os.getcwd() - - -registry = _RequiredModulesRegistry() -require = registry.require From 40a745fe083f620b98fe72b112ee3b5cc48026c2 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 1 Nov 2017 12:46:03 +0100 Subject: [PATCH 06/37] Module registry reimported as it is needed for "bonobo convert". --- bonobo/_version.py | 2 +- bonobo/structs/inputs.py | 1 - bonobo/util/__init__.py | 5 ++++- bonobo/util/collections.py | 6 +++--- bonobo/util/resolvers.py | 23 +++++++++++++++++++++-- tests/util/test_python.py | 6 ------ 6 files changed, 29 insertions(+), 14 deletions(-) delete mode 100644 tests/util/test_python.py diff --git a/bonobo/_version.py b/bonobo/_version.py index ebc2ff2..2724bac 100644 --- a/bonobo/_version.py +++ b/bonobo/_version.py @@ -1 +1 @@ -__version__ = '0.6-dev' +__version__ = '0.6.dev0' diff --git a/bonobo/structs/inputs.py b/bonobo/structs/inputs.py index 7cfe12f..9b3cd14 100644 --- a/bonobo/structs/inputs.py +++ b/bonobo/structs/inputs.py @@ -15,7 +15,6 @@ # limitations under the License. from abc import ABCMeta, abstractmethod - from queue import Queue from bonobo.constants import BEGIN, END diff --git a/bonobo/util/__init__.py b/bonobo/util/__init__.py index 27e50b2..586fe3b 100644 --- a/bonobo/util/__init__.py +++ b/bonobo/util/__init__.py @@ -1,4 +1,4 @@ -from bonobo.util.collections import sortedlist, ensure_tuple +from bonobo.util.collections import ensure_tuple, sortedlist, tuplize from bonobo.util.compat import deprecated, deprecated_alias from bonobo.util.inspect import ( inspect_node, @@ -21,6 +21,7 @@ __all__ = [ 'ValueHolder', 'deprecated', 'deprecated_alias', + 'ensure_tuple', 'get_attribute_or_create', 'get_name', 'inspect_node', @@ -34,4 +35,6 @@ __all__ = [ 'ismethod', 'isoption', 'istype', + 'sortedlist', + 'tuplize', ] diff --git a/bonobo/util/collections.py b/bonobo/util/collections.py index d53a7da..31765c4 100644 --- a/bonobo/util/collections.py +++ b/bonobo/util/collections.py @@ -22,9 +22,9 @@ def ensure_tuple(tuple_or_mixed): def tuplize(generator): - """ Takes a generator and make it a tuple-returning function. As a side - effect, it can also decorate any iterator-returning function to force - return value to be a tuple. + """ + Decorates a generator and make it a tuple-returning function. As a side effect, it can also decorate any + iterator-returning function to force return value to be a tuple. >>> tuplized_lambda = tuplize(lambda: [1, 2, 3]) >>> tuplized_lambda() diff --git a/bonobo/util/resolvers.py b/bonobo/util/resolvers.py index 0590fc7..c4a1a90 100644 --- a/bonobo/util/resolvers.py +++ b/bonobo/util/resolvers.py @@ -4,10 +4,29 @@ This package is considered private, and should only be used within bonobo. """ import json +import os +import runpy import bonobo from bonobo.util.collections import tuplize -from bonobo.util.python import WorkingDirectoryModulesRegistry + + +class _RequiredModule: + def __init__(self, dct): + self.__dict__ = dct + + +class _ModulesRegistry(dict): + @property + def pathname(self): + return os.getcwd() + + def require(self, name): + if name not in self: + bits = name.split('.') + filename = os.path.join(self.pathname, *bits[:-1], bits[-1] + '.py') + self[name] = _RequiredModule(runpy.run_path(filename, run_name=name)) + return self[name] def _parse_option(option): @@ -52,7 +71,7 @@ def _resolve_transformations(transformations): :param transformations: tuple(str) :return: tuple(object) """ - registry = WorkingDirectoryModulesRegistry() + registry = _ModulesRegistry() for t in transformations: try: mod, attr = t.split(':', 1) diff --git a/tests/util/test_python.py b/tests/util/test_python.py deleted file mode 100644 index 6b1b591..0000000 --- a/tests/util/test_python.py +++ /dev/null @@ -1,6 +0,0 @@ -from bonobo.util.python import require - - -def test_require(): - dummy = require('requireable.dummy') - assert dummy.foo == 'bar' From 69bb3cb09161e88319a90710e42a48f3677e39d4 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 1 Nov 2017 13:12:19 +0100 Subject: [PATCH 07/37] Switch logger setup to mondrian (deps). --- Projectfile | 9 ++++++-- docs/roadmap.rst | 54 ------------------------------------------------ 2 files changed, 7 insertions(+), 56 deletions(-) delete mode 100644 docs/roadmap.rst diff --git a/Projectfile b/Projectfile index eb230d9..bb22b87 100644 --- a/Projectfile +++ b/Projectfile @@ -40,16 +40,15 @@ python.setup( ) python.add_requirements( - 'colorama >=0.3,<0.4', 'fs >=2.0,<2.1', 'jinja2 >=2.9,<2.10', + 'mondrian >=0.2,<0.3', 'packaging >=16,<17', 'psutil >=5.4,<6.0', 'python-dotenv >=0.7,<0.8', 'requests >=2.0,<3.0', 'stevedore >=1.27,<1.28', dev=[ - 'cookiecutter >=1.5,<1.6', 'pytest-sugar >=0.8,<0.9', 'pytest-timeout >=1,<2', ], @@ -65,4 +64,10 @@ python.add_requirements( ], ) +# Following requirements are not enforced, because some dependencies enforce them so we don't want to break +# the packaging in case it changes in dep. +python.add_requirements( + 'colorama >=0.3', +) + # vim: ft=python: diff --git a/docs/roadmap.rst b/docs/roadmap.rst deleted file mode 100644 index 182cf71..0000000 --- a/docs/roadmap.rst +++ /dev/null @@ -1,54 +0,0 @@ -Internal roadmap notes -====================== - -Things that should be thought about and/or implemented, but that I don't know where to store. - -Graph and node level plugins -:::::::::::::::::::::::::::: - - * Enhancers or node-level plugins - * Graph level plugins - * Documentation - -Command line interface and environment -:::::::::::::::::::::::::::::::::::::: - -* How do we manage environment ? .env ? -* How do we configure plugins ? - -Services and Processors -::::::::::::::::::::::: - -* ContextProcessors not clean (a bit better, but still not in love with the api) - -Next... -::::::: - -* Release process specialised for bonobo. With changelog production, etc. -* Document how to upgrade version, like, minor need change badges, etc. -* Windows console looks crappy. -* bonobo init --with sqlalchemy,docker; cookiecutter? -* logger, vebosity level - - -External libs that looks good -::::::::::::::::::::::::::::: - -* dask.distributed -* mediator (event dispatcher) - -Version 0.4 -::::::::::: - -* SQLAlchemy 101 - -Design decisions -:::::::::::::::: - -* initialize / finalize better than start / stop ? - -Minor stuff -::::::::::: - -* Should we include datasets in the repo or not? As they may change, grow, and even eventually have licenses we can't use, - it's probably best if we don't. \ No newline at end of file From e6596cf3f361537b686528c398019a8f0487e2e7 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 1 Nov 2017 13:19:24 +0100 Subject: [PATCH 08/37] Removes cookiecutter. --- docs/changelog.rst | 3 +++ docs/extension/jupyter.rst | 2 -- docs/install.rst | 14 ++++++++------ docs/reference/commands.rst | 10 ---------- docs/tutorial/tut01.rst | 22 ++++++++++++---------- tests/test_commands.py | 3 +-- 6 files changed, 24 insertions(+), 30 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index a222414..66a5a05 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,9 @@ Changelog Unreleased :::::::::: +* Cookiecutter usage is removed. Linked to the fact that bonobo now use either a single file (up to you to get python + imports working as you want) or a regular fully fledged python package, we do not need it anymore. + New features ------------ diff --git a/docs/extension/jupyter.rst b/docs/extension/jupyter.rst index 6c3385f..ed01d3b 100644 --- a/docs/extension/jupyter.rst +++ b/docs/extension/jupyter.rst @@ -4,8 +4,6 @@ Jupyter Extension There is a builtin plugin that integrates (somewhat minimallistically, for now) bonobo within jupyter notebooks, so you can read the execution status of a graph within a nice (ok, not so nice) html/javascript widget. -See https://github.com/jupyter-widgets/widget-cookiecutter for the base template used. - Installation :::::::::::: diff --git a/docs/install.rst b/docs/install.rst index c006c88..56f18ae 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -5,16 +5,18 @@ Installation Create an ETL project ::::::::::::::::::::: -Creating a project and starting to write code should take less than a minute: +Let's create a job. .. code-block:: shell-session - $ pip install --upgrade bonobo cookiecutter - $ bonobo init my-etl-project - $ bonobo run my-etl-project + $ pip install --upgrade bonobo + $ bonobo create my-etl.py + $ python my-etl.py -Once you bootstrapped a project, you can start editing the default example transformation by editing -`my-etl-project/main.py`. Now, you can head to :doc:`tutorial/index`. +This job only uses one python file, and you can run it using the python interpreter. For bigger jobs or jobs that +relates to multiple files, you should create a python package. + +Now, you can head to :doc:`tutorial/index`. Other installation options diff --git a/docs/reference/commands.rst b/docs/reference/commands.rst index 674d549..ade63be 100644 --- a/docs/reference/commands.rst +++ b/docs/reference/commands.rst @@ -16,16 +16,6 @@ Syntax: `bonobo convert [-r reader] input_filename [-w writer] output_filename` to read from csv and write to csv too (or other format) but adding a geocoder filter that would add some fields. -Bonobo Init -::::::::::: - -Create an empty project, ready to use bonobo. - -Syntax: `bonobo init` - -Requires `cookiecutter`. - - Bonobo Inspect :::::::::::::: diff --git a/docs/tutorial/tut01.rst b/docs/tutorial/tut01.rst index 3d6f9eb..836ddad 100644 --- a/docs/tutorial/tut01.rst +++ b/docs/tutorial/tut01.rst @@ -1,8 +1,7 @@ Let's get started! ================== -To begin with Bonobo, you need to install it in a working python 3.5+ environment, and you'll also need cookiecutter -to bootstrap your project. +To get started with Bonobo, you need to install it in a working python 3.5+ environment: .. code-block:: shell-session @@ -14,21 +13,24 @@ See :doc:`/install` for more options. Create an empty project ::::::::::::::::::::::: -Your ETL code will live in ETL projects, which are basically a bunch of files, including python code, that bonobo -can run. +Your ETL code will live in standard python files and packages. .. code-block:: shell-session - $ bonobo init tutorial + $ bonobo create tutorial.py -This will create a `tutorial` directory (`content description here `_). +This will create a simple example job in a `tutorial.py` file. -To run this project, use: +Now, try to execute it: .. code-block:: shell-session - $ bonobo run tutorial + $ python tutorial.py +Congratulations, you just ran your first ETL job! + + +.. todo:: XXX **CHANGES NEEDED BELOW THIS POINTS BEFORE 0.6** XXX Write a first transformation :::::::::::::::::::::::::::: @@ -131,9 +133,9 @@ Rewrite it using builtins There is a much simpler way to describe an equivalent graph: .. literalinclude:: ../../bonobo/examples/tutorials/tut01e02.py - :language: python +:language: python -The `extract()` generator has been replaced by a list, as Bonobo will interpret non-callable iterables as a no-input + The `extract()` generator has been replaced by a list, as Bonobo will interpret non-callable iterables as a no-input generator. This example is also available in :mod:`bonobo.examples.tutorials.tut01e02`, and you can also run it as a module: diff --git a/tests/test_commands.py b/tests/test_commands.py index fe55f87..64d530b 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -4,11 +4,10 @@ import os import runpy import sys from contextlib import redirect_stdout, redirect_stderr -from unittest.mock import patch, Mock +from unittest.mock import patch import pkg_resources import pytest -from cookiecutter.exceptions import OutputDirExistsException from bonobo import __main__, __version__, get_examples_path from bonobo.commands import entrypoint From e06b616251afca480321c43129e69cac3b28a746 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 1 Nov 2017 18:46:45 +0100 Subject: [PATCH 09/37] Refactoring the runner to go more towards standard python, also adds the ability to use bonobo argument parser from standard python execution. --- Projectfile | 4 +- bonobo/_api.py | 79 ++--- bonobo/commands/__init__.py | 136 +------- bonobo/commands/base.py | 129 +++++++ bonobo/commands/convert.py | 13 +- bonobo/commands/inspect.py | 22 +- bonobo/commands/run.py | 23 +- bonobo/examples/environ.py | 28 ++ bonobo/examples/tutorials/tut02e02_write.py | 4 +- bonobo/examples/types/__main__.py | 8 +- bonobo/examples/types/strings.py | 12 +- bonobo/execution/node.py | 6 +- bonobo/nodes/io/json.py | 1 + bonobo/util/environ.py | 154 +++++++++ tests/features/test_not_modified.py | 12 +- tests/io/test_csv.py | 4 +- tests/io/test_json.py | 18 +- tests/test_commands.py | 354 +++++++------------- 18 files changed, 537 insertions(+), 470 deletions(-) create mode 100644 bonobo/commands/base.py create mode 100644 bonobo/examples/environ.py create mode 100644 bonobo/util/environ.py diff --git a/Projectfile b/Projectfile index bb22b87..3a10d0b 100644 --- a/Projectfile +++ b/Projectfile @@ -66,8 +66,6 @@ python.add_requirements( # Following requirements are not enforced, because some dependencies enforce them so we don't want to break # the packaging in case it changes in dep. -python.add_requirements( - 'colorama >=0.3', -) +python.add_requirements('colorama >=0.3', ) # vim: ft=python: diff --git a/bonobo/_api.py b/bonobo/_api.py index 43e9a29..f1e5da5 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -1,13 +1,10 @@ -import argparse -from contextlib import contextmanager - -import os from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \ PickleReader, PickleWriter, PrettyPrinter, RateLimited, Tee, arg0_to_kwargs, count, identity, kwargs_to_arg0, noop from bonobo.nodes import LdjsonReader, LdjsonWriter from bonobo.strategies import create_strategy from bonobo.structs import Bag, ErrorBag, Graph, Token from bonobo.util import get_name +from bonobo.util.environ import parse_args, get_argument_parser __all__ = [] @@ -22,59 +19,6 @@ def register_api_group(*args): register_api(attr) -@register_api -def get_argument_parser(parser=None): - if parser is None: - import argparse - parser = argparse.ArgumentParser() - - parser.add_argument('--default-env-file', action='append') - parser.add_argument('--default-env', action='append') - parser.add_argument('--env-file', action='append') - parser.add_argument('--env', '-e', action='append') - - return parser - - -@register_api -@contextmanager -def parse_args(parser, *, args=None, namespace=None): - options = parser.parse_args(args=args, namespace=namespace) - - with patch_environ(options) as options: - yield options - - -@register_api -@contextmanager -def patch_environ(options): - from dotenv import load_dotenv - from bonobo.commands import set_env_var - - options = options if isinstance(options, dict) else options.__dict__ - - default_env_file = options.pop('default_env_file', []) - default_env = options.pop('default_env', []) - env_file = options.pop('env_file', []) - env = options.pop('env', []) - - if default_env_file: - for f in default_env_file: - load_dotenv(os.path.join(os.getcwd(), f)) - if default_env: - for e in default_env: - set_env_var(e) - if env_file: - for f in env_file: - load_dotenv(os.path.join(os.getcwd(), f), override=True) - if env: - for e in env: - set_env_var(e, override=True) - - yield options - ## TODO XXX put it back !!! - - @register_api def run(graph, *, plugins=None, services=None, strategy=None): """ @@ -126,6 +70,24 @@ def run(graph, *, plugins=None, services=None, strategy=None): return strategy.execute(graph, plugins=plugins, services=services) +def _inspect_as_graph(graph): + return graph._repr_dot_() + + +_inspect_formats = {'graph': _inspect_as_graph} + + +@register_api +def inspect(graph, *, format): + if not format in _inspect_formats: + raise NotImplementedError( + 'Output format {} not implemented. Choices are: {}.'.format( + format, ', '.join(sorted(_inspect_formats.keys())) + ) + ) + print(_inspect_formats[format](graph)) + + # bonobo.structs register_api_group(Bag, ErrorBag, Graph, Token) @@ -205,3 +167,6 @@ def get_examples_path(*pathsegments): @register_api def open_examples_fs(*pathsegments): return open_fs(get_examples_path(*pathsegments)) + + +register_api_group(get_argument_parser, parse_args) diff --git a/bonobo/commands/__init__.py b/bonobo/commands/__init__.py index 56d615d..cd9559c 100644 --- a/bonobo/commands/__init__.py +++ b/bonobo/commands/__init__.py @@ -1,91 +1,19 @@ import argparse -import codecs -import os -import os.path -import runpy -import sys -from contextlib import contextmanager -from bonobo import settings, logging, get_argument_parser, patch_environ -from bonobo.constants import DEFAULT_SERVICES_FILENAME, DEFAULT_SERVICES_ATTR -from bonobo.util import get_name +from bonobo import settings, logging +from bonobo.commands.base import BaseCommand, BaseGraphCommand logger = logging.get_logger() -class BaseCommand: - @property - def logger(self): - try: - return self._logger - except AttributeError: - self._logger = logging.get_logger(get_name(self)) - return self._logger - - def add_arguments(self, parser): - """ - Entry point for subclassed commands to add custom arguments. - """ - pass - - def handle(self, *args, **options): - """ - The actual logic of the command. Subclasses must implement this method. - """ - raise NotImplementedError('Subclasses of BaseCommand must provide a handle() method') - - -class BaseGraphCommand(BaseCommand): - required = True - - def add_arguments(self, parser): - # target arguments (cannot provide both). - source_group = parser.add_mutually_exclusive_group(required=self.required) - source_group.add_argument('file', nargs='?', type=str) - source_group.add_argument('-m', dest='mod', type=str) - - # add arguments to enforce system environment. - parser = get_argument_parser(parser) - - return parser - - def _run_path(self, file): - return runpy.run_path(file, run_name='__main__') - - def _run_module(self, mod): - return runpy.run_module(mod, run_name='__main__') - - def read(self, *, file, mod, args=None, **options): - _graph, _options = None, None - - def _record(graph, **options): - nonlocal _graph, _options - _graph, _options = graph, options - - with _override_runner(_record), patch_environ(options): - _argv = sys.argv - try: - if file: - sys.argv = [file] + list(args) if args else [file] - self._run_path(file) - elif mod: - sys.argv = [mod, *(args or ())] - self._run_module(mod) - else: - raise RuntimeError('No target provided.') - finally: - sys.argv = _argv - - if _graph is None: - raise RuntimeError('Could not find graph.') - - return _graph, _options - - def handle(self, *args, **options): - pass - - def entrypoint(args=None): + """ + Main callable for "bonobo" entrypoint. + + Will load commands from "bonobo.commands" entrypoints, using stevedore. + + """ + parser = argparse.ArgumentParser() parser.add_argument('--debug', '-D', action='store_true') @@ -113,8 +41,7 @@ def entrypoint(args=None): mgr = ExtensionManager(namespace='bonobo.commands') mgr.map(register_extension) - parsed_args, remaining = parser.parse_known_args(args) - parsed_args = parsed_args.__dict__ + parsed_args = parser.parse_args(args).__dict__ if parsed_args.pop('debug', False): settings.DEBUG.set(True) @@ -123,45 +50,6 @@ def entrypoint(args=None): logger.debug('Command: ' + parsed_args['command'] + ' Arguments: ' + repr(parsed_args)) - # Get command handler + # Get command handler, execute, rince. command = commands[parsed_args.pop('command')] - - if len(remaining): - command(_remaining_args=remaining, **parsed_args) - else: - command(**parsed_args) - - -@contextmanager -def _override_runner(runner): - import bonobo - _get_argument_parser = bonobo.get_argument_parser - _run = bonobo.run - try: - def get_argument_parser(parser=None): - return parser or argparse.ArgumentParser() - - bonobo.get_argument_parser = get_argument_parser - bonobo.run = runner - - yield runner - finally: - bonobo.get_argument_parser = _get_argument_parser - bonobo.run = _run - - -def set_env_var(e, override=False): - __escape_decoder = codecs.getdecoder('unicode_escape') - ename, evalue = e.split('=', 1) - - def decode_escaped(escaped): - return __escape_decoder(escaped)[0] - - if len(evalue) > 0: - if evalue[0] == evalue[len(evalue) - 1] in ['"', "'"]: - evalue = decode_escaped(evalue[1:-1]) - - if override: - os.environ[ename] = evalue - else: - os.environ.setdefault(ename, evalue) + command(**parsed_args) diff --git a/bonobo/commands/base.py b/bonobo/commands/base.py new file mode 100644 index 0000000..43513b0 --- /dev/null +++ b/bonobo/commands/base.py @@ -0,0 +1,129 @@ +import argparse +import runpy +import sys +from contextlib import contextmanager + +import bonobo.util.environ +from bonobo import logging +from bonobo.util.environ import get_argument_parser, parse_args +from bonobo.util import get_name + + +class BaseCommand: + """ + Base class for CLI commands. + + """ + + @property + def logger(self): + try: + return self._logger + except AttributeError: + self._logger = logging.get_logger(get_name(self)) + return self._logger + + def add_arguments(self, parser): + """ + Entry point for subclassed commands to add custom arguments. + """ + pass + + def handle(self, *args, **options): + """ + The actual logic of the command. Subclasses must implement this method. + """ + raise NotImplementedError('Subclasses of BaseCommand must provide a handle() method') + + +class BaseGraphCommand(BaseCommand): + """ + Base class for CLI commands that depends on a graph definition, either from a file or from a module. + + """ + required = True + handler = None + + def add_arguments(self, parser): + # target arguments (cannot provide both). + source_group = parser.add_mutually_exclusive_group(required=self.required) + source_group.add_argument('file', nargs='?', type=str) + source_group.add_argument('-m', dest='mod', type=str) + + # add arguments to enforce system environment. + parser = get_argument_parser(parser) + + return parser + + def parse_options(self, **options): + return options + + def handle(self, file, mod, **options): + options = self.parse_options(**options) + with self.read(file, mod, **options) as (graph, graph_execution_options, options): + return self.do_handle(graph, **graph_execution_options, **options) + + def do_handle(self, graph, **options): + if not self.handler: + raise RuntimeError('{} has no handler defined.'.format(get_name(self))) + return self.handler(graph, **options) + + @contextmanager + def read(self, file, mod, **options): + _graph, _graph_execution_options = None, None + + def _record(graph, **graph_execution_options): + nonlocal _graph, _graph_execution_options + _graph, _graph_execution_options = graph, graph_execution_options + + with _override_runner(_record), parse_args(options) as options: + _argv = sys.argv + try: + if file: + sys.argv = [file] + self._run_path(file) + elif mod: + sys.argv = [mod] + self._run_module(mod) + else: + raise RuntimeError('No target provided.') + finally: + sys.argv = _argv + + if _graph is None: + raise RuntimeError('Could not find graph.') + + yield _graph, _graph_execution_options, options + + def _run_path(self, file): + return runpy.run_path(file, run_name='__main__') + + def _run_module(self, mod): + return runpy.run_module(mod, run_name='__main__') + + +@contextmanager +def _override_runner(runner): + """ + Context manager that monkey patches `bonobo.run` function with our current command logic. + + :param runner: the callable that will handle the `run()` logic. + """ + import bonobo + + _get_argument_parser = bonobo.util.environ.get_argument_parser + _run = bonobo.run + try: + # Original get_argument_parser would create or update an argument parser with environment options, but here we + # already had them parsed so let's patch with something that creates an empty one instead. + def get_argument_parser(parser=None): + return parser or argparse.ArgumentParser() + + bonobo.util.environ.get_argument_parser = get_argument_parser + bonobo.run = runner + + yield runner + finally: + # Restore our saved values. + bonobo.util.environ.get_argument_parser = _get_argument_parser + bonobo.run = _run diff --git a/bonobo/commands/convert.py b/bonobo/commands/convert.py index 918d81e..faf175c 100644 --- a/bonobo/commands/convert.py +++ b/bonobo/commands/convert.py @@ -48,8 +48,17 @@ class ConvertCommand(BaseCommand): help='Add a named option to the writer factory.', ) - def handle(self, input_filename, output_filename, reader=None, reader_option=None, writer=None, writer_option=None, - option=None, transformation=None): + def handle( + self, + input_filename, + output_filename, + reader=None, + reader_option=None, + writer=None, + writer_option=None, + option=None, + transformation=None + ): reader_factory = default_registry.get_reader_factory_for(input_filename, format=reader) reader_options = _resolve_options((option or []) + (reader_option or [])) diff --git a/bonobo/commands/inspect.py b/bonobo/commands/inspect.py index 9a802d9..0e6dcd4 100644 --- a/bonobo/commands/inspect.py +++ b/bonobo/commands/inspect.py @@ -1,21 +1,15 @@ +import bonobo from bonobo.commands import BaseGraphCommand -OUTPUT_GRAPH = 'graphviz' - class InspectCommand(BaseGraphCommand): + handler = staticmethod(bonobo.inspect) + def add_arguments(self, parser): super(InspectCommand, self).add_arguments(parser) - parser.add_argument('--graph', '-g', dest='output', action='store_const', const=OUTPUT_GRAPH) - - def handle(self, output=None, **options): - if output is None: - raise ValueError('Output type must be provided (try --graph/-g).') - - graph, params = self.read(**options) - - if output == OUTPUT_GRAPH: - print(graph._repr_dot_()) - else: - raise NotImplementedError('Output type not implemented.') + parser.add_argument('--graph', '-g', dest='format', action='store_const', const='graph') + def parse_options(self, **options): + if not options.get('format'): + raise RuntimeError('You must provide a format (try --graph).') + return options diff --git a/bonobo/commands/run.py b/bonobo/commands/run.py index 514bb5d..ce76bfc 100644 --- a/bonobo/commands/run.py +++ b/bonobo/commands/run.py @@ -6,6 +6,7 @@ from bonobo.commands import BaseGraphCommand class RunCommand(BaseGraphCommand): install = False + handler = staticmethod(bonobo.run) def add_arguments(self, parser): super(RunCommand, self).add_arguments(parser) @@ -16,7 +17,15 @@ class RunCommand(BaseGraphCommand): parser.add_argument('--install', '-I', action='store_true') + def parse_options(self, *, quiet=False, verbose=False, install=False, **options): + from bonobo import settings + settings.QUIET.set_if_true(quiet) + settings.DEBUG.set_if_true(verbose) + self.install = install + return options + def _run_path(self, file): + # add install logic if self.install: if os.path.isdir(file): requirements = os.path.join(file, 'requirements.txt') @@ -27,24 +36,12 @@ class RunCommand(BaseGraphCommand): return super()._run_path(file) def _run_module(self, mod): + # install not implemented for a module, not sure it even make sense. if self.install: raise RuntimeError('--install behaviour when running a module is not defined.') return super()._run_module(mod) - def handle(self, quiet=False, verbose=False, install=False, _remaining_args=None, **options): - from bonobo import settings - - settings.QUIET.set_if_true(quiet) - settings.DEBUG.set_if_true(verbose) - self.install = install - - graph, params = self.read(args=_remaining_args, **options) - - params['plugins'] = set(params.pop('plugins', ())).union(set(options.pop('plugins', ()))) - - return bonobo.run(graph, **params) - def register_generic_run_arguments(parser, required=True): """ diff --git a/bonobo/examples/environ.py b/bonobo/examples/environ.py new file mode 100644 index 0000000..ea1b39b --- /dev/null +++ b/bonobo/examples/environ.py @@ -0,0 +1,28 @@ +import os + +import bonobo + + +def extract_environ(): + yield from sorted(os.environ.items()) + + +def get_graph(): + """ + This function builds the graph that needs to be executed. + + :return: bonobo.Graph + + """ + graph = bonobo.Graph() + graph.add_chain(extract_environ, print) + + return graph + + +# The __main__ block actually execute the graph. +if __name__ == '__main__': + parser = bonobo.get_argument_parser() + parser.add_argument('-v', action='append', dest='vars') + with bonobo.parse_args(parser): + bonobo.run(get_graph()) diff --git a/bonobo/examples/tutorials/tut02e02_write.py b/bonobo/examples/tutorials/tut02e02_write.py index c4b065d..a33a11b 100644 --- a/bonobo/examples/tutorials/tut02e02_write.py +++ b/bonobo/examples/tutorials/tut02e02_write.py @@ -8,9 +8,7 @@ def split_one(line): graph = bonobo.Graph( bonobo.FileReader('coffeeshops.txt'), split_one, - bonobo.JsonWriter( - 'coffeeshops.json', fs='fs.output' - ), + bonobo.JsonWriter('coffeeshops.json', fs='fs.output'), ) diff --git a/bonobo/examples/types/__main__.py b/bonobo/examples/types/__main__.py index 3d1549f..ccda1a9 100644 --- a/bonobo/examples/types/__main__.py +++ b/bonobo/examples/types/__main__.py @@ -1,3 +1,7 @@ -from bonobo.util.python import require +import bonobo +from bonobo.examples.types.strings import get_graph -graph = require('strings').graph +if __name__ == '__main__': + parser = bonobo.get_argument_parser() + with bonobo.parse_args(parser): + bonobo.run(get_graph()) diff --git a/bonobo/examples/types/strings.py b/bonobo/examples/types/strings.py index 1903151..2fa765f 100644 --- a/bonobo/examples/types/strings.py +++ b/bonobo/examples/types/strings.py @@ -14,7 +14,7 @@ Example on how to use symple python strings to communicate between transformatio """ from random import randint -from bonobo import Graph +import bonobo def extract(): @@ -31,9 +31,11 @@ def load(s: str): print(s) -graph = Graph(extract, transform, load) +def get_graph(): + return bonobo.Graph(extract, transform, load) + if __name__ == '__main__': - from bonobo import run - - run(graph) + parser = bonobo.get_argument_parser() + with bonobo.parse_args(parser): + bonobo.run(get_graph()) diff --git a/bonobo/execution/node.py b/bonobo/execution/node.py index 22582e6..e727531 100644 --- a/bonobo/execution/node.py +++ b/bonobo/execution/node.py @@ -138,7 +138,7 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): def isflag(param): - return isinstance(param, Token) and param in (NOT_MODIFIED,) + return isinstance(param, Token) and param in (NOT_MODIFIED, ) def split_tokens(output): @@ -150,11 +150,11 @@ def split_tokens(output): """ if isinstance(output, Token): # just a flag - return (output,), () + return (output, ), () if not istuple(output): # no flag - return (), (output,) + return (), (output, ) i = 0 while isflag(output[i]): diff --git a/bonobo/nodes/io/json.py b/bonobo/nodes/io/json.py index 404cdcb..bbb89ad 100644 --- a/bonobo/nodes/io/json.py +++ b/bonobo/nodes/io/json.py @@ -58,6 +58,7 @@ class LdjsonReader(FileReader): class LdjsonWriter(FileWriter): """Write a stream of JSON objects, one object per line.""" + def write(self, fs, file, lineno, **row): lineno += 1 # class-level variable file.write(json.dumps(row) + '\n') diff --git a/bonobo/util/environ.py b/bonobo/util/environ.py new file mode 100644 index 0000000..16f7c9c --- /dev/null +++ b/bonobo/util/environ.py @@ -0,0 +1,154 @@ +import argparse +import codecs +import os +import re +import warnings +from contextlib import contextmanager + +__escape_decoder = codecs.getdecoder('unicode_escape') +__posix_variable = re.compile('\$\{[^\}]*\}') + + +def parse_var(var): + name, value = var.split('=', 1) + + def decode_escaped(escaped): + return __escape_decoder(escaped)[0] + + if len(value) > 1: + c = value[0] + + if c in ['"', "'"] and value[-1] == c: + value = decode_escaped(value[1:-1]) + + return name, value + + +def load_env_from_file(filename): + """ + Read an env file into a collection of (name, value) tuples. + """ + if not os.path.exists(filename): + raise FileNotFoundError('Environment file {} does not exist.'.format(filename)) + + with open(filename) as f: + for lineno, line in enumerate(f): + line = line.strip() + if not line or line.startswith('#'): + continue + if '=' not in line: + raise SyntaxError('Invalid environment file syntax in {} at line {}.'.format(filename, lineno + 1)) + + name, value = parse_var(line) + + yield name, value + + +_parser = None + + +def get_argument_parser(parser=None): + """ + Creates an argument parser with arguments to override the system environment. + + :api: bonobo.get_argument_parser + + :param _parser: + :return: + """ + if parser is None: + import argparse + parser = argparse.ArgumentParser() + + # Store globally to be able to warn the user about the fact he's probably wrong not to pass a parser to + # parse_args(), later. + global _parser + _parser = parser + + _parser.add_argument('--default-env-file', '-E', action='append') + _parser.add_argument('--default-env', action='append') + _parser.add_argument('--env-file', action='append') + _parser.add_argument('--env', '-e', action='append') + + return _parser + + +@contextmanager +def parse_args(mixed=None): + """ + Context manager to extract and apply environment related options from the provided argparser result. + + A dictionnary with unknown options will be yielded, so the remaining options can be used by the caller. + + :api: bonobo.patch_environ + + :param mixed: ArgumentParser instance, Namespace, or dict. + :return: + """ + + if mixed is None: + global _parser + if _parser is not None: + warnings.warn( + 'You are calling bonobo.parse_args() without a parser argument, but it looks like you created a parser before. You probably want to pass your parser to this call, or if creating a new parser here is really what you want to do, please create a new one explicitely to silence this warning.' + ) + # use the api from bonobo namespace, in case a command patched it. + import bonobo + mixed = bonobo.get_argument_parser() + + if isinstance(mixed, argparse.ArgumentParser): + options = mixed.parse_args() + else: + options = mixed + + if not isinstance(options, dict): + options = options.__dict__ + + # make a copy so we don't polute our parent variables. + options = dict(options) + + # storage for values before patch. + _backup = {} + + # Priority order: --env > --env-file > system > --default-env > --default-env-file + # + # * The code below is reading default-env before default-env-file as if the first sets something, default-env-file + # won't override it. + # * Then, env-file is read from before env, as the behaviour will be the oposite (env will override a var even if + # env-file sets something.) + try: + # Set default environment + for name, value in map(parse_var, options.pop('default_env', []) or []): + if not name in os.environ: + if not name in _backup: + _backup[name] = os.environ.get(name, None) + os.environ[name] = value + + # Read and set default environment from file(s) + for filename in options.pop('default_env_file', []) or []: + for name, value in load_env_from_file(filename): + if not name in os.environ: + if not name in _backup: + _backup[name] = os.environ.get(name, None) + os.environ[name] = value + + # Read and set environment from file(s) + for filename in options.pop('env_file', []) or []: + for name, value in load_env_from_file(filename): + if not name in _backup: + _backup[name] = os.environ.get(name, None) + os.environ[name] = value + + # Set environment + for name, value in map(parse_var, options.pop('env', []) or []): + if not name in _backup: + _backup[name] = os.environ.get(name, None) + os.environ[name] = value + + yield options + finally: + for name, value in _backup.items(): + if value is None: + del os.environ[name] + else: + os.environ[name] = value diff --git a/tests/features/test_not_modified.py b/tests/features/test_not_modified.py index ddc537b..5b1b673 100644 --- a/tests/features/test_not_modified.py +++ b/tests/features/test_not_modified.py @@ -9,8 +9,12 @@ def useless(*args, **kwargs): def test_not_modified(): input_messages = [ ('foo', 'bar'), - {'foo': 'bar'}, - ('foo', {'bar': 'baz'}), + { + 'foo': 'bar' + }, + ('foo', { + 'bar': 'baz' + }), (), ] @@ -18,7 +22,3 @@ def test_not_modified(): context.write_sync(*input_messages) assert context.get_buffer() == input_messages - - - - diff --git a/tests/io/test_csv.py b/tests/io/test_csv.py index 473d243..291c241 100644 --- a/tests/io/test_csv.py +++ b/tests/io/test_csv.py @@ -51,11 +51,9 @@ def test_read_csv_from_file_kwargs(tmpdir): 'a': 'a foo', 'b': 'b foo', 'c': 'c foo', - }, - { + }, { 'a': 'a bar', 'b': 'b bar', 'c': 'c bar', } ] - diff --git a/tests/io/test_json.py b/tests/io/test_json.py index a3b25f5..b72a3de 100644 --- a/tests/io/test_json.py +++ b/tests/io/test_json.py @@ -20,10 +20,10 @@ def test_write_json_ioformat_arg0(tmpdir): @pytest.mark.parametrize('add_kwargs', ( - {}, - { - 'ioformat': settings.IOFORMAT_KWARGS, - }, + {}, + { + 'ioformat': settings.IOFORMAT_KWARGS, + }, )) def test_write_json_kwargs(tmpdir, add_kwargs): fs, filename, services = json_tester.get_services_for_writer(tmpdir) @@ -41,8 +41,7 @@ stream_json_tester.input_data = '''{"foo": "bar"}\n{"baz": "boz"}''' def test_read_stream_json(tmpdir): fs, filename, services = stream_json_tester.get_services_for_reader(tmpdir) - with BufferingNodeExecutionContext(LdjsonReader(filename), - services=services) as context: + with BufferingNodeExecutionContext(LdjsonReader(filename), services=services) as context: context.write_sync(tuple()) actual = context.get_buffer() @@ -53,10 +52,11 @@ def test_read_stream_json(tmpdir): def test_write_stream_json(tmpdir): fs, filename, services = stream_json_tester.get_services_for_reader(tmpdir) - with BufferingNodeExecutionContext(LdjsonWriter(filename), - services=services) as context: + with BufferingNodeExecutionContext(LdjsonWriter(filename), services=services) as context: context.write_sync( - {'foo': 'bar'}, + { + 'foo': 'bar' + }, {'baz': 'boz'}, ) diff --git a/tests/test_commands.py b/tests/test_commands.py index 64d530b..4bfc8ba 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -45,7 +45,6 @@ def runner_module(args): all_runners = pytest.mark.parametrize('runner', [runner_entrypoint, runner_module]) -single_runner = pytest.mark.parametrize('runner', [runner_module]) def test_entrypoint(): @@ -158,238 +157,141 @@ def test_download_fails_non_example(runner): runner('download', '/something/entirely/different.txt') -@all_runners -class TestDefaultEnvFile(object): - def test_run_file_with_default_env_file(self, runner): - out, err = runner( - 'run', '--quiet', '--default-env-file', '.env_one', - get_examples_path('environment/env_files/get_passed_env_file.py') - ) - out = out.split('\n') - assert out[0] == '321' - assert out[1] == 'sweetpassword' - assert out[2] != 'marzo' +@pytest.fixture +def env1(tmpdir): + env_file = tmpdir.join('.env_one') + env_file.write('\n'.join(( + 'SECRET=unknown', + 'PASSWORD=sweet', + 'PATH=first', + ))) + return str(env_file) - def test_run_file_with_multiple_default_env_files(self, runner): - out, err = runner( - 'run', '--quiet', '--default-env-file', '.env_one', '--default-env-file', '.env_two', - get_examples_path('environment/env_files/get_passed_env_file.py') - ) - out = out.split('\n') - assert out[0] == '321' - assert out[1] == 'sweetpassword' - assert out[2] != 'marzo' - def test_run_module_with_default_env_file(self, runner): - out, err = runner( - 'run', '--quiet', '-m', 'bonobo.examples.environment.env_files.get_passed_env_file', '--default-env-file', - '.env_one' - ) - out = out.split('\n') - assert out[0] == '321' - assert out[1] == 'sweetpassword' - assert out[2] != 'marzo' +@pytest.fixture +def env2(tmpdir): + env_file = tmpdir.join('.env_two') + env_file.write('\n'.join(( + 'PASSWORD=bitter', + "PATH='second'", + ))) + return str(env_file) - def test_run_module_with_multiple_default_env_files(self, runner): - out, err = runner( - 'run', - '--quiet', + +all_environ_targets = pytest.mark.parametrize( + 'target', [ + (get_examples_path('environ.py'), ), + ( '-m', - 'bonobo.examples.environment.env_files.get_passed_env_file', - '--default-env-file', - '.env_one', - '--default-env-file', - '.env_two', - ) - out = out.split('\n') - assert out[0] == '321' - assert out[1] == 'sweetpassword' - assert out[2] != 'marzo' + 'bonobo.examples.environ', + ), + ] +) @all_runners -class TestEnvFile(object): - def test_run_file_with_file(self, runner): - out, err = runner( - 'run', - '--quiet', - get_examples_path('environment/env_files/get_passed_env_file.py'), - '--env-file', - '.env_one', +@all_environ_targets +class EnvironmentTestCase(): + def run_quiet(self, runner, *args): + return runner('run', '--quiet', *args) + + def run_environ(self, runner, *args, environ=None): + _environ = {'PATH': '/usr/bin'} + if environ: + _environ.update(environ) + + with patch.dict('os.environ', _environ, clear=True): + out, err = self.run_quiet(runner, *args) + assert 'SECRET' not in os.environ + assert 'PASSWORD' not in os.environ + if 'PATH' in _environ: + assert 'PATH' in os.environ + assert os.environ['PATH'] == _environ['PATH'] + + assert err == '' + return dict(map(lambda line: line.split(' ', 1), filter(None, out.split('\n')))) + + +class TestDefaultEnvFile(EnvironmentTestCase): + def test_run_with_default_env_file(self, runner, target, env1): + env = self.run_environ(runner, *target, '--default-env-file', env1) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'sweet' + assert env.get('PATH') == '/usr/bin' + + def test_run_with_multiple_default_env_files(self, runner, target, env1, env2): + env = self.run_environ(runner, *target, '--default-env-file', env1, '--default-env-file', env2) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'sweet' + assert env.get('PATH') == '/usr/bin' + + env = self.run_environ(runner, *target, '--default-env-file', env2, '--default-env-file', env1) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'bitter' + assert env.get('PATH') == '/usr/bin' + + +class TestEnvFile(EnvironmentTestCase): + def test_run_with_file(self, runner, target, env1): + env = self.run_environ(runner, *target, '--env-file', env1) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'sweet' + assert env.get('PATH') == 'first' + + def test_run_with_multiple_files(self, runner, target, env1, env2): + env = self.run_environ(runner, *target, '--env-file', env1, '--env-file', env2) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'bitter' + assert env.get('PATH') == 'second' + + env = self.run_environ(runner, *target, '--env-file', env2, '--env-file', env1) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'sweet' + assert env.get('PATH') == 'first' + + +class TestEnvFileCombinations(EnvironmentTestCase): + def test_run_with_both_env_files(self, runner, target, env1, env2): + env = self.run_environ(runner, *target, '--default-env-file', env1, '--env-file', env2) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'bitter' + assert env.get('PATH') == 'second' + + def test_run_with_both_env_files_then_overrides(self, runner, target, env1, env2): + env = self.run_environ( + runner, *target, '--default-env-file', env1, '--env-file', env2, '--env', 'PASSWORD=mine', '--env', + 'SECRET=s3cr3t' ) - out = out.split('\n') - assert out[0] == '321' - assert out[1] == 'sweetpassword' - assert out[2] == 'marzo' + assert env.get('SECRET') == 's3cr3t' + assert env.get('PASSWORD') == 'mine' + assert env.get('PATH') == 'second' - def test_run_file_with_multiple_files(self, runner): - out, err = runner( - 'run', - '--quiet', - get_examples_path('environment/env_files/get_passed_env_file.py'), - '--env-file', - '.env_one', - '--env-file', - '.env_two', + +class TestEnvVars(EnvironmentTestCase): + def test_run_no_env(self, runner, target): + env = self.run_environ(runner, *target, environ={'USER': 'romain'}) + assert env.get('USER') == 'romain' + + def test_run_env(self, runner, target): + env = self.run_environ(runner, *target, '--env', 'USER=serious', environ={'USER': 'romain'}) + assert env.get('USER') == 'serious' + + def test_run_env_mixed(self, runner, target): + env = self.run_environ(runner, *target, '--env', 'ONE=1', '--env', 'TWO="2"', environ={'USER': 'romain'}) + assert env.get('USER') == 'romain' + assert env.get('ONE') == '1' + assert env.get('TWO') == '2' + + def test_run_default_env(self, runner, target): + env = self.run_environ(runner, *target, '--default-env', 'USER=clown') + assert env.get('USER') == 'clown' + + env = self.run_environ(runner, *target, '--default-env', 'USER=clown', environ={'USER': 'romain'}) + assert env.get('USER') == 'romain' + + env = self.run_environ( + runner, *target, '--env', 'USER=serious', '--default-env', 'USER=clown', environ={ + 'USER': 'romain' + } ) - out = out.split('\n') - assert out[0] == '321' - assert out[1] == 'not_sweet_password' - assert out[2] == 'abril' - - def test_run_module_with_file(self, runner): - out, err = runner( - 'run', - '--quiet', - '-m', - 'bonobo.examples.environment.env_files.get_passed_env_file', - '--env-file', - '.env_one', - ) - out = out.split('\n') - assert out[0] == '321' - assert out[1] == 'sweetpassword' - assert out[2] == 'marzo' - - def test_run_module_with_multiple_files(self, runner): - out, err = runner( - 'run', - '--quiet', - '-m', - 'bonobo.examples.environment.env_files.get_passed_env_file', - '--env-file', - '.env_one', - '--env-file', - '.env_two', - ) - out = out.split('\n') - assert out[0] == '321' - assert out[1] == 'not_sweet_password' - assert out[2] == 'abril' - - -@all_runners -class TestEnvFileCombinations: - def test_run_file_with_default_env_file_and_env_file(self, runner): - out, err = runner( - 'run', - '--quiet', - get_examples_path('environment/env_files/get_passed_env_file.py'), - '--default-env-file', - '.env_one', - '--env-file', - '.env_two', - ) - out = out.split('\n') - assert out[0] == '321' - assert out[1] == 'not_sweet_password' - assert out[2] == 'abril' - - def test_run_file_with_default_env_file_and_env_file_and_env_vars(self, runner): - out, err = runner( - 'run', - '--quiet', - get_examples_path('environment/env_files/get_passed_env_file.py'), - '--default-env-file', - '.env_one', - '--env-file', - '.env_two', - '--env', - 'TEST_USER_PASSWORD=SWEETpassWORD', - '--env', - 'MY_SECRET=444', - ) - out = out.split('\n') - assert out[0] == '444' - assert out[1] == 'SWEETpassWORD' - assert out[2] == 'abril' - - -@all_runners -class TestDefaultEnvVars: - def test_run_file_with_default_env_var(self, runner): - out, err = runner( - 'run', '--quiet', - get_examples_path('environment/env_vars/get_passed_env.py'), '--default-env', 'USER=clowncity', '--env', - 'USER=ted' - ) - out = out.split('\n') - assert out[0] == 'user' - assert out[1] == 'number' - assert out[2] == 'string' - assert out[3] != 'clowncity' - - def test_run_file_with_default_env_vars(self, runner): - out, err = runner( - 'run', '--quiet', - get_examples_path('environment/env_vars/get_passed_env.py'), '--env', 'ENV_TEST_NUMBER=123', '--env', - 'ENV_TEST_USER=cwandrews', '--default-env', "ENV_TEST_STRING='my_test_string'" - ) - out = out.split('\n') - assert out[0] == 'cwandrews' - assert out[1] == '123' - assert out[2] == 'my_test_string' - - def test_run_module_with_default_env_var(self, runner): - out, err = runner( - 'run', '--quiet', '-m', 'bonobo.examples.environment.env_vars.get_passed_env', '--env', - 'ENV_TEST_NUMBER=123', '--default-env', 'ENV_TEST_STRING=string' - ) - out = out.split('\n') - assert out[0] == 'cwandrews' - assert out[1] == '123' - assert out[2] != 'string' - - def test_run_module_with_default_env_vars(self, runner): - out, err = runner( - 'run', '--quiet', '-m', 'bonobo.examples.environment.env_vars.get_passed_env', '--env', - 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', '--default-env', "ENV_TEST_STRING='string'" - ) - out = out.split('\n') - assert out[0] == 'cwandrews' - assert out[1] == '123' - assert out[2] != 'string' - - -@all_runners -class TestEnvVars: - def test_run_file_with_env_var(self, runner): - out, err = runner( - 'run', '--quiet', - get_examples_path('environment/env_vars/get_passed_env.py'), '--env', 'ENV_TEST_NUMBER=123' - ) - out = out.split('\n') - assert out[0] != 'test_user' - assert out[1] == '123' - assert out[2] == 'my_test_string' - - def test_run_file_with_env_vars(self, runner): - out, err = runner( - 'run', '--quiet', - get_examples_path('environment/env_vars/get_passed_env.py'), '--env', 'ENV_TEST_NUMBER=123', '--env', - 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'" - ) - out = out.split('\n') - assert out[0] == 'cwandrews' - assert out[1] == '123' - assert out[2] == 'my_test_string' - - def test_run_module_with_env_var(self, runner): - out, err = runner( - 'run', '--quiet', '-m', 'bonobo.examples.environment.env_vars.get_passed_env', '--env', - 'ENV_TEST_NUMBER=123' - ) - out = out.split('\n') - assert out[0] == 'cwandrews' - assert out[1] == '123' - assert out[2] == 'my_test_string' - - def test_run_module_with_env_vars(self, runner): - out, err = runner( - 'run', '--quiet', '-m', 'bonobo.examples.environment.env_vars.get_passed_env', '--env', - 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'" - ) - out = out.split('\n') - assert out[0] == 'cwandrews' - assert out[1] == '123' - assert out[2] == 'my_test_string' + assert env.get('USER') == 'serious' From ac6cba02cb472fdadf90da7f1b7dd32cee5f6366 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 1 Nov 2017 18:48:53 +0100 Subject: [PATCH 10/37] Remove files in examples that are not used anymore. --- .../examples/environment/env_files/.env_one | 3 --- .../examples/environment/env_files/.env_two | 2 -- .../environment/env_files/__init__.py | 0 .../env_files/get_passed_env_file.py | 23 ----------------- .../examples/environment/env_vars/__init__.py | 0 .../environment/env_vars/get_passed_env.py | 25 ------------------- 6 files changed, 53 deletions(-) delete mode 100644 bonobo/examples/environment/env_files/.env_one delete mode 100644 bonobo/examples/environment/env_files/.env_two delete mode 100644 bonobo/examples/environment/env_files/__init__.py delete mode 100644 bonobo/examples/environment/env_files/get_passed_env_file.py delete mode 100644 bonobo/examples/environment/env_vars/__init__.py delete mode 100644 bonobo/examples/environment/env_vars/get_passed_env.py diff --git a/bonobo/examples/environment/env_files/.env_one b/bonobo/examples/environment/env_files/.env_one deleted file mode 100644 index 65f2b17..0000000 --- a/bonobo/examples/environment/env_files/.env_one +++ /dev/null @@ -1,3 +0,0 @@ -MY_SECRET=321 -TEST_USER_PASSWORD=sweetpassword -PATH=marzo \ No newline at end of file diff --git a/bonobo/examples/environment/env_files/.env_two b/bonobo/examples/environment/env_files/.env_two deleted file mode 100644 index 672d6d2..0000000 --- a/bonobo/examples/environment/env_files/.env_two +++ /dev/null @@ -1,2 +0,0 @@ -TEST_USER_PASSWORD=not_sweet_password -PATH='abril' \ No newline at end of file diff --git a/bonobo/examples/environment/env_files/__init__.py b/bonobo/examples/environment/env_files/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bonobo/examples/environment/env_files/get_passed_env_file.py b/bonobo/examples/environment/env_files/get_passed_env_file.py deleted file mode 100644 index bb45540..0000000 --- a/bonobo/examples/environment/env_files/get_passed_env_file.py +++ /dev/null @@ -1,23 +0,0 @@ -import os - -import bonobo - - -def extract(): - my_secret = os.getenv('MY_SECRET') - test_user_password = os.getenv('TEST_USER_PASSWORD') - path = os.getenv('PATH') - - yield my_secret - yield test_user_password - yield path - - -def load(s: str): - print(s) - - -graph = bonobo.Graph(extract, load) - -if __name__ == '__main__': - bonobo.run(graph) diff --git a/bonobo/examples/environment/env_vars/__init__.py b/bonobo/examples/environment/env_vars/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bonobo/examples/environment/env_vars/get_passed_env.py b/bonobo/examples/environment/env_vars/get_passed_env.py deleted file mode 100644 index e0c6c45..0000000 --- a/bonobo/examples/environment/env_vars/get_passed_env.py +++ /dev/null @@ -1,25 +0,0 @@ -import os - -import bonobo - - -def extract(): - env_test_user = os.getenv('ENV_TEST_USER', 'user') - env_test_number = os.getenv('ENV_TEST_NUMBER', 'number') - env_test_string = os.getenv('ENV_TEST_STRING', 'string') - env_user = os.getenv('USER') - - yield env_test_user - yield env_test_number - yield env_test_string - yield env_user - - -def load(s: str): - print(s) - - -graph = bonobo.Graph(extract, load) - -if __name__ == '__main__': - bonobo.run(graph) From b5a8af3efe5fd159e9a33b54d2ae561406b45811 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 1 Nov 2017 18:49:29 +0100 Subject: [PATCH 11/37] Remove unused argument. --- bonobo/examples/environ.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bonobo/examples/environ.py b/bonobo/examples/environ.py index ea1b39b..17d4e36 100644 --- a/bonobo/examples/environ.py +++ b/bonobo/examples/environ.py @@ -23,6 +23,5 @@ def get_graph(): # The __main__ block actually execute the graph. if __name__ == '__main__': parser = bonobo.get_argument_parser() - parser.add_argument('-v', action='append', dest='vars') with bonobo.parse_args(parser): bonobo.run(get_graph()) From 5cc514e490bc2c3527e39e16f00ce93f2def2c1a Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 1 Nov 2017 18:53:27 +0100 Subject: [PATCH 12/37] Update dependencies, remove python-dotenv. --- Makefile | 2 +- Projectfile | 1 - requirements-dev.txt | 12 ++---------- requirements-docker.txt | 4 +--- requirements-jupyter.txt | 4 ++-- requirements-sqlalchemy.txt | 18 ++++++++++++++++++ requirements.txt | 5 ++--- setup.py | 15 +++++++-------- 8 files changed, 33 insertions(+), 28 deletions(-) create mode 100644 requirements-sqlalchemy.txt diff --git a/Makefile b/Makefile index bb68335..89f16e8 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# Generated by Medikit 0.4a5 on 2017-10-29. +# Generated by Medikit 0.4a9 on 2017-11-01. # All changes will be overriden. PACKAGE ?= bonobo diff --git a/Projectfile b/Projectfile index 3a10d0b..c51ac38 100644 --- a/Projectfile +++ b/Projectfile @@ -45,7 +45,6 @@ python.add_requirements( 'mondrian >=0.2,<0.3', 'packaging >=16,<17', 'psutil >=5.4,<6.0', - 'python-dotenv >=0.7,<0.8', 'requests >=2.0,<3.0', 'stevedore >=1.27,<1.28', dev=[ diff --git a/requirements-dev.txt b/requirements-dev.txt index 553fefc..8000471 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,29 +1,21 @@ -e .[dev] alabaster==0.7.10 -arrow==0.10.0 babel==2.5.1 -binaryornot==0.4.4 certifi==2017.7.27.1 chardet==3.0.4 -click==6.7 -cookiecutter==1.5.1 coverage==4.4.1 docutils==0.14 -future==0.16.0 idna==2.6 imagesize==0.7.1 -jinja2-time==0.2.0 jinja2==2.9.6 markupsafe==1.0 -poyo==0.4.1 py==1.4.34 pygments==2.2.0 pytest-cov==2.5.1 pytest-sugar==0.8.0 pytest-timeout==1.2.0 pytest==3.2.3 -python-dateutil==2.6.1 -pytz==2017.2 +pytz==2017.3 requests==2.18.4 six==1.11.0 snowballstemmer==1.2.1 @@ -31,4 +23,4 @@ sphinx==1.6.5 sphinxcontrib-websupport==1.0.1 termcolor==1.1.0 urllib3==1.22 -whichcraft==0.4.1 +yapf==0.19.0 diff --git a/requirements-docker.txt b/requirements-docker.txt index d6f1160..8eb4059 100644 --- a/requirements-docker.txt +++ b/requirements-docker.txt @@ -3,7 +3,6 @@ appdirs==1.4.3 bonobo-docker==0.5.0 certifi==2017.7.27.1 chardet==3.0.4 -click==6.7 colorama==0.3.9 docker-pycreds==0.2.1 docker==2.3.0 @@ -13,8 +12,7 @@ packaging==16.8 pbr==3.1.1 psutil==5.4.0 pyparsing==2.2.0 -python-dotenv==0.7.1 -pytz==2017.2 +pytz==2017.3 requests==2.18.4 six==1.11.0 stevedore==1.27.1 diff --git a/requirements-jupyter.txt b/requirements-jupyter.txt index 4e1d024..1978875 100644 --- a/requirements-jupyter.txt +++ b/requirements-jupyter.txt @@ -13,7 +13,7 @@ jinja2==2.9.6 jsonschema==2.6.0 jupyter-client==5.1.0 jupyter-console==5.2.0 -jupyter-core==4.3.0 +jupyter-core==4.4.0 jupyter==1.0.0 markupsafe==1.0 mistune==0.8 @@ -28,7 +28,7 @@ prompt-toolkit==1.0.15 ptyprocess==0.5.2 pygments==2.2.0 python-dateutil==2.6.1 -pyzmq==16.0.2 +pyzmq==16.0.3 qtconsole==4.3.1 simplegeneric==0.8.1 six==1.11.0 diff --git a/requirements-sqlalchemy.txt b/requirements-sqlalchemy.txt new file mode 100644 index 0000000..7083f9e --- /dev/null +++ b/requirements-sqlalchemy.txt @@ -0,0 +1,18 @@ +-e .[sqlalchemy] +appdirs==1.4.3 +bonobo-sqlalchemy==0.5.1 +certifi==2017.7.27.1 +chardet==3.0.4 +colorama==0.3.9 +fs==2.0.12 +idna==2.6 +packaging==16.8 +pbr==3.1.1 +psutil==5.4.0 +pyparsing==2.2.0 +pytz==2017.3 +requests==2.18.4 +six==1.11.0 +sqlalchemy==1.1.14 +stevedore==1.27.1 +urllib3==1.22 diff --git a/requirements.txt b/requirements.txt index 0d69f1a..0b8ada0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,18 +2,17 @@ appdirs==1.4.3 certifi==2017.7.27.1 chardet==3.0.4 -click==6.7 colorama==0.3.9 fs==2.0.12 idna==2.6 jinja2==2.9.6 markupsafe==1.0 +mondrian==0.2.0 packaging==16.8 pbr==3.1.1 psutil==5.4.0 pyparsing==2.2.0 -python-dotenv==0.7.1 -pytz==2017.2 +pytz==2017.3 requests==2.18.4 six==1.11.0 stevedore==1.27.1 diff --git a/setup.py b/setup.py index 0c01b43..2b949c0 100644 --- a/setup.py +++ b/setup.py @@ -53,18 +53,17 @@ setup( packages=find_packages(exclude=['ez_setup', 'example', 'test']), include_package_data=True, install_requires=[ - 'colorama (>= 0.3, < 0.4)', 'fs (>= 2.0, < 2.1)', 'jinja2 (>= 2.9, < 2.10)', 'packaging (>= 16, < 17)', - 'psutil (>= 5.4, < 6.0)', 'python-dotenv (>= 0.7, < 0.8)', 'requests (>= 2.0, < 3.0)', - 'stevedore (>= 1.27, < 1.28)' + 'colorama (>= 0.3)', 'fs (>= 2.0, < 2.1)', 'jinja2 (>= 2.9, < 2.10)', 'mondrian (>= 0.2, < 0.3)', + 'packaging (>= 16, < 17)', 'psutil (>= 5.4, < 6.0)', 'requests (>= 2.0, < 3.0)', 'stevedore (>= 1.27, < 1.28)' ], extras_require={ 'dev': [ - 'cookiecutter (>= 1.5, < 1.6)', 'coverage (>= 4.4, < 5.0)', 'pytest (>= 3.1, < 4.0)', - 'pytest-cov (>= 2.5, < 3.0)', 'pytest-sugar (>= 0.8, < 0.9)', 'pytest-timeout (>= 1, < 2)', - 'sphinx (>= 1.6, < 2.0)' + 'coverage (>= 4.4, < 5.0)', 'pytest (>= 3.1, < 4.0)', 'pytest-cov (>= 2.5, < 3.0)', + 'pytest-sugar (>= 0.8, < 0.9)', 'pytest-timeout (>= 1, < 2)', 'sphinx (>= 1.6, < 2.0)', 'yapf' ], - 'docker': ['bonobo-docker'], - 'jupyter': ['ipywidgets (>= 6.0.0, < 7)', 'jupyter (>= 1.0, < 1.1)'] + 'docker': ['bonobo-docker (>= 0.5.0)'], + 'jupyter': ['ipywidgets (>= 6.0.0, < 7)', 'jupyter (>= 1.0, < 1.1)'], + 'sqlalchemy': ['bonobo-sqlalchemy (>= 0.5.1)'] }, entry_points={ 'bonobo.commands': [ From ddd84c4f505e2e51b0a05c0e9d02e48851a7397d Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 1 Nov 2017 18:56:44 +0100 Subject: [PATCH 13/37] [examples] comments. --- bonobo/examples/environ.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/bonobo/examples/environ.py b/bonobo/examples/environ.py index 17d4e36..280d2e1 100644 --- a/bonobo/examples/environ.py +++ b/bonobo/examples/environ.py @@ -1,26 +1,26 @@ +""" +This transformation extracts the environment and prints it, sorted alphabetically, one item per line. + +Used in the bonobo tests around environment management. + +""" import os import bonobo def extract_environ(): + """Yield all the system environment.""" yield from sorted(os.environ.items()) def get_graph(): - """ - This function builds the graph that needs to be executed. - - :return: bonobo.Graph - - """ graph = bonobo.Graph() graph.add_chain(extract_environ, print) return graph -# The __main__ block actually execute the graph. if __name__ == '__main__': parser = bonobo.get_argument_parser() with bonobo.parse_args(parser): From 23404d06ddd06a545fa62df68c8782109337c242 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 1 Nov 2017 19:02:48 +0100 Subject: [PATCH 14/37] Still cleaning up. --- bonobo/events.py | 3 --- bonobo/examples/__init__.py | 23 ----------------------- tests/test_commands.py | 10 ++++++++++ 3 files changed, 10 insertions(+), 26 deletions(-) delete mode 100644 bonobo/events.py diff --git a/bonobo/events.py b/bonobo/events.py deleted file mode 100644 index 9a0cbba..0000000 --- a/bonobo/events.py +++ /dev/null @@ -1,3 +0,0 @@ -ON_START = 'bonobo.on_start' -ON_TICK = 'bonobo.on_tick' -ON_STOP = 'bonobo.on_stop' diff --git a/bonobo/examples/__init__.py b/bonobo/examples/__init__.py index 49b1544..e69de29 100644 --- a/bonobo/examples/__init__.py +++ b/bonobo/examples/__init__.py @@ -1,23 +0,0 @@ -def require(package, requirement=None): - requirement = requirement or package - - try: - return __import__(package) - except ImportError: - from colorama import Fore, Style - print( - Fore.YELLOW, - 'This example requires the {!r} package. Install it using:'. - format(requirement), - Style.RESET_ALL, - sep='' - ) - print() - print( - Fore.YELLOW, - ' $ pip install {!s}'.format(requirement), - Style.RESET_ALL, - sep='' - ) - print() - raise diff --git a/tests/test_commands.py b/tests/test_commands.py index 4bfc8ba..8a2c9b8 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -119,6 +119,16 @@ def test_version(runner): assert out.startswith('bonobo ') assert __version__ in out + out, err = runner('version', '-q') + out = out.strip() + assert out.startswith('bonobo ') + assert __version__ in out + + out, err = runner('version', '-qq') + out = out.strip() + assert not out.startswith('bonobo ') + assert __version__ in out + @all_runners def test_download_works_for_examples(runner): From 8e85fa04e222df4a83122e71d3df8a50c7c8e576 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 1 Nov 2017 19:45:35 +0100 Subject: [PATCH 15/37] Adds the ability to initialize a package from bonobo init. --- bonobo/commands/__init__.py | 9 ++++++- bonobo/commands/init.py | 49 +++++++++++++++++++++++++++++++++---- 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/bonobo/commands/__init__.py b/bonobo/commands/__init__.py index cd9559c..2f1c7cd 100644 --- a/bonobo/commands/__init__.py +++ b/bonobo/commands/__init__.py @@ -1,7 +1,9 @@ import argparse +import traceback from bonobo import settings, logging from bonobo.commands.base import BaseCommand, BaseGraphCommand +from bonobo.util.errors import print_error logger = logging.get_logger() @@ -52,4 +54,9 @@ def entrypoint(args=None): # Get command handler, execute, rince. command = commands[parsed_args.pop('command')] - command(**parsed_args) + + try: + command(**parsed_args) + except Exception as exc: + print_error(exc, traceback.format_exc()) + return 255 diff --git a/bonobo/commands/init.py b/bonobo/commands/init.py index 6c6c2ff..e5d11d2 100644 --- a/bonobo/commands/init.py +++ b/bonobo/commands/init.py @@ -12,9 +12,12 @@ class InitCommand(BaseCommand): def add_arguments(self, parser): parser.add_argument('filename') parser.add_argument('--force', '-f', default=False, action='store_true') - parser.add_argument('--template', '-t', choices=self.TEMPLATES, default='default') - def handle(self, *, template, filename, force=False): + target_group = parser.add_mutually_exclusive_group(required=True) + target_group.add_argument('--template', '-t', choices=self.TEMPLATES, default='default') + target_group.add_argument('--package', '-p', action='store_true', default=False) + + def create_file_from_template(self, *, template, filename): template_name = template name, ext = os.path.splitext(filename) if ext != '.py': @@ -24,10 +27,46 @@ class InitCommand(BaseCommand): env = Environment(loader=loader) template = env.get_template(template_name + '.py-tpl') - if os.path.exists(filename) and not force: - raise FileExistsError('Target filename already exists, use --force to override.') - with open(filename, 'w+') as f: f.write(template.render(name=name)) self.logger.info('Generated {} using template {!r}.'.format(filename, template_name)) + + def create_package(self, *, filename): + name, ext = os.path.splitext(filename) + if ext != '': + raise ValueError('Package names should not have an extension.') + + try: + import medikit.commands + except ImportError as exc: + raise ImportError( + 'To initialize a package, you need to install medikit (pip install --upgrade medikit).') from exc + + package_name = os.path.basename(filename) + medikit.commands.handle_init(os.path.join(os.getcwd(), filename, 'Projectfile'), name=package_name, + requirements=['bonobo']) + + self.logger.info('Generated "{}" package with medikit.'.format(package_name)) + self.create_file_from_template(template='default', filename=os.path.join(filename, package_name, '__main__.py')) + + print('Your "{}" package has been created.'.format(package_name)) + print() + print('Install it...') + print() + print(' pip install --editable {}'.format(filename)) + print() + print('Then maybe run the example...') + print() + print(' python -m {}'.format(package_name)) + print() + print('Enjoy!') + + def handle(self, *, template, filename, package=False, force=False): + if os.path.exists(filename) and not force: + raise FileExistsError('Target filename already exists, use --force to override.') + + if package: + self.create_package(filename=filename) + else: + self.create_file_from_template(template=template, filename=filename) From 7035cc01e058115ce9d903c0f1e44e7d4f8c384b Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 1 Nov 2017 19:52:34 +0100 Subject: [PATCH 16/37] Adds argument parser support in default template. --- bonobo/commands/templates/default.py-tpl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bonobo/commands/templates/default.py-tpl b/bonobo/commands/templates/default.py-tpl index 1d8d6a5..c72efb5 100644 --- a/bonobo/commands/templates/default.py-tpl +++ b/bonobo/commands/templates/default.py-tpl @@ -1,5 +1,6 @@ import bonobo + def extract(): """Placeholder, change, rename, remove... """ yield 'hello' @@ -46,5 +47,6 @@ def get_services(): # The __main__ block actually execute the graph. if __name__ == '__main__': - # Although you're not required to use it, bonobo's graph related commands will hook to this call (inspect, run, ...). - bonobo.run(get_graph(), services=get_services()) + parser = bonobo.get_argument_parser() + with bonobo.parse_args(parser): + bonobo.run(get_graph(), services=get_services()) From 58923f4a84f16e5a53ccb8736361ac762aed7182 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 1 Nov 2017 19:57:51 +0100 Subject: [PATCH 17/37] [logging] Switching to mondrian, who got all our formating code. --- bonobo/commands/__init__.py | 10 +++-- bonobo/commands/base.py | 6 +-- bonobo/logging.py | 86 ------------------------------------- 3 files changed, 9 insertions(+), 93 deletions(-) delete mode 100644 bonobo/logging.py diff --git a/bonobo/commands/__init__.py b/bonobo/commands/__init__.py index 2f1c7cd..c015d78 100644 --- a/bonobo/commands/__init__.py +++ b/bonobo/commands/__init__.py @@ -1,12 +1,12 @@ import argparse import traceback +import logging +import mondrian -from bonobo import settings, logging +from bonobo import settings from bonobo.commands.base import BaseCommand, BaseGraphCommand from bonobo.util.errors import print_error -logger = logging.get_logger() - def entrypoint(args=None): """ @@ -16,6 +16,8 @@ def entrypoint(args=None): """ + logger = mondrian.getLogger() + parser = argparse.ArgumentParser() parser.add_argument('--debug', '-D', action='store_true') @@ -48,7 +50,7 @@ def entrypoint(args=None): if parsed_args.pop('debug', False): settings.DEBUG.set(True) settings.LOGGING_LEVEL.set(logging.DEBUG) - logging.set_level(settings.LOGGING_LEVEL.get()) + logger.setLevel(settings.LOGGING_LEVEL.get()) logger.debug('Command: ' + parsed_args['command'] + ' Arguments: ' + repr(parsed_args)) diff --git a/bonobo/commands/base.py b/bonobo/commands/base.py index 43513b0..da2967f 100644 --- a/bonobo/commands/base.py +++ b/bonobo/commands/base.py @@ -1,12 +1,12 @@ import argparse +import logging import runpy import sys from contextlib import contextmanager import bonobo.util.environ -from bonobo import logging -from bonobo.util.environ import get_argument_parser, parse_args from bonobo.util import get_name +from bonobo.util.environ import get_argument_parser, parse_args class BaseCommand: @@ -20,7 +20,7 @@ class BaseCommand: try: return self._logger except AttributeError: - self._logger = logging.get_logger(get_name(self)) + self._logger = logging.getLogger(get_name(self)) return self._logger def add_arguments(self, parser): diff --git a/bonobo/logging.py b/bonobo/logging.py deleted file mode 100644 index 071fcd3..0000000 --- a/bonobo/logging.py +++ /dev/null @@ -1,86 +0,0 @@ -import logging -import sys -import textwrap -from logging import CRITICAL, DEBUG, ERROR, INFO, WARNING - -from colorama import Fore, Style - -from bonobo import settings -from bonobo.util.term import CLEAR_EOL - -iswindows = (sys.platform == 'win32') - - -def get_format(): - yield '{b}[%(fg)s%(levelname)s{b}][{w}' - yield '{b}][{w}'.join(('%(spent)04d', '%(name)s')) - yield '{b}]' - yield ' %(fg)s%(message)s{r}' - if not iswindows: - yield CLEAR_EOL - - -colors = { - 'b': '' if iswindows else Fore.BLACK, - 'w': '' if iswindows else Fore.LIGHTBLACK_EX, - 'r': '' if iswindows else Style.RESET_ALL, -} -format = (''.join(get_format())).format(**colors) - - -class Filter(logging.Filter): - def filter(self, record): - record.spent = record.relativeCreated // 1000 - if iswindows: - record.fg = '' - elif record.levelname == 'DEBG': - record.fg = Fore.LIGHTBLACK_EX - elif record.levelname == 'INFO': - record.fg = Fore.LIGHTWHITE_EX - elif record.levelname == 'WARN': - record.fg = Fore.LIGHTYELLOW_EX - elif record.levelname == 'ERR ': - record.fg = Fore.LIGHTRED_EX - elif record.levelname == 'CRIT': - record.fg = Fore.RED - else: - record.fg = Fore.LIGHTWHITE_EX - return True - - -class Formatter(logging.Formatter): - def formatException(self, ei): - tb = super().formatException(ei) - if iswindows: - return textwrap.indent(tb, ' | ') - else: - return textwrap.indent(tb, Fore.BLACK + ' | ' + Fore.WHITE) - - -def setup(level): - logging.addLevelName(DEBUG, 'DEBG') - logging.addLevelName(INFO, 'INFO') - logging.addLevelName(WARNING, 'WARN') - logging.addLevelName(ERROR, 'ERR ') - logging.addLevelName(CRITICAL, 'CRIT') - handler = logging.StreamHandler(sys.stderr) - handler.setFormatter(Formatter(format)) - handler.addFilter(Filter()) - root = logging.getLogger() - root.addHandler(handler) - root.setLevel(level) - - -def set_level(level): - logging.getLogger().setLevel(level) - - -def get_logger(name='bonobo'): - return logging.getLogger(name) - - -# Compatibility with python logging -getLogger = get_logger - -# Setup formating and level. -setup(level=settings.LOGGING_LEVEL.get()) From fb86bc950716973888fc509e0a2582253f5f5343 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Thu, 2 Nov 2017 00:08:09 +0100 Subject: [PATCH 18/37] [errors] Move error handling in transformations to use mondrian. --- Makefile | 2 +- Projectfile | 2 +- bonobo/__init__.py | 1 + bonobo/_api.py | 7 +++++-- bonobo/commands/__init__.py | 16 +++++++--------- bonobo/commands/download.py | 1 - bonobo/commands/init.py | 10 ++++++---- bonobo/execution/__init__.py | 4 +++- bonobo/execution/base.py | 19 +++++++++++++------ bonobo/execution/node.py | 6 ++++-- bonobo/ext/django.py | 1 - bonobo/settings.py | 1 + requirements.txt | 2 +- setup.py | 2 +- tests/test_commands.py | 2 +- 15 files changed, 45 insertions(+), 31 deletions(-) diff --git a/Makefile b/Makefile index 89f16e8..14825bf 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# Generated by Medikit 0.4a9 on 2017-11-01. +# Generated by Medikit 0.4a10 on 2017-11-01. # All changes will be overriden. PACKAGE ?= bonobo diff --git a/Projectfile b/Projectfile index c51ac38..3a48797 100644 --- a/Projectfile +++ b/Projectfile @@ -42,7 +42,7 @@ python.setup( python.add_requirements( 'fs >=2.0,<2.1', 'jinja2 >=2.9,<2.10', - 'mondrian >=0.2,<0.3', + 'mondrian >=0.3,<0.4', 'packaging >=16,<17', 'psutil >=5.4,<6.0', 'requests >=2.0,<3.0', diff --git a/bonobo/__init__.py b/bonobo/__init__.py index 3c15c18..0ac9bc3 100644 --- a/bonobo/__init__.py +++ b/bonobo/__init__.py @@ -9,6 +9,7 @@ import sys assert (sys.version_info >= (3, 5)), 'Python 3.5+ is required to use Bonobo.' + from bonobo._api import * from bonobo._api import __all__ from bonobo._version import __version__ diff --git a/bonobo/_api.py b/bonobo/_api.py index f1e5da5..d4dbda2 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -40,7 +40,6 @@ def run(graph, *, plugins=None, services=None, strategy=None): :param dict services: The implementations of services this graph will use. :return bonobo.execution.graph.GraphExecutionContext: """ - strategy = create_strategy(strategy) plugins = plugins or [] @@ -49,6 +48,10 @@ def run(graph, *, plugins=None, services=None, strategy=None): if not settings.QUIET.get(): # pragma: no cover if _is_interactive_console(): + import mondrian + mondrian.setup() + mondrian.setupExceptHook() + from bonobo.ext.console import ConsoleOutputPlugin if ConsoleOutputPlugin not in plugins: plugins.append(ConsoleOutputPlugin) @@ -67,7 +70,7 @@ def run(graph, *, plugins=None, services=None, strategy=None): if JupyterOutputPlugin not in plugins: plugins.append(JupyterOutputPlugin) - return strategy.execute(graph, plugins=plugins, services=services) + return create_strategy(strategy).execute(graph, plugins=plugins, services=services) def _inspect_as_graph(graph): diff --git a/bonobo/commands/__init__.py b/bonobo/commands/__init__.py index c015d78..39cfa05 100644 --- a/bonobo/commands/__init__.py +++ b/bonobo/commands/__init__.py @@ -1,11 +1,9 @@ import argparse -import traceback import logging -import mondrian +import mondrian from bonobo import settings from bonobo.commands.base import BaseCommand, BaseGraphCommand -from bonobo.util.errors import print_error def entrypoint(args=None): @@ -16,7 +14,10 @@ def entrypoint(args=None): """ - logger = mondrian.getLogger() + mondrian.setup() + mondrian.setupExceptHook() + + logger = logging.getLogger() parser = argparse.ArgumentParser() parser.add_argument('--debug', '-D', action='store_true') @@ -56,9 +57,6 @@ def entrypoint(args=None): # Get command handler, execute, rince. command = commands[parsed_args.pop('command')] + command(**parsed_args) - try: - command(**parsed_args) - except Exception as exc: - print_error(exc, traceback.format_exc()) - return 255 + return 0 diff --git a/bonobo/commands/download.py b/bonobo/commands/download.py index 9333db4..96b1c2f 100644 --- a/bonobo/commands/download.py +++ b/bonobo/commands/download.py @@ -12,7 +12,6 @@ EXAMPLES_BASE_URL = 'https://raw.githubusercontent.com/python-bonobo/bonobo/mast class DownloadCommand(BaseCommand): def handle(self, *, path, **options): - path = path.lstrip('/') if not path.startswith('examples'): raise ValueError('Download command currently supports examples only') examples_path = re.sub('^examples/', '', path) diff --git a/bonobo/commands/init.py b/bonobo/commands/init.py index e5d11d2..8c50b16 100644 --- a/bonobo/commands/init.py +++ b/bonobo/commands/init.py @@ -13,7 +13,7 @@ class InitCommand(BaseCommand): parser.add_argument('filename') parser.add_argument('--force', '-f', default=False, action='store_true') - target_group = parser.add_mutually_exclusive_group(required=True) + target_group = parser.add_mutually_exclusive_group(required=False) target_group.add_argument('--template', '-t', choices=self.TEMPLATES, default='default') target_group.add_argument('--package', '-p', action='store_true', default=False) @@ -41,11 +41,13 @@ class InitCommand(BaseCommand): import medikit.commands except ImportError as exc: raise ImportError( - 'To initialize a package, you need to install medikit (pip install --upgrade medikit).') from exc + 'To initialize a package, you need to install medikit (pip install --upgrade medikit).' + ) from exc package_name = os.path.basename(filename) - medikit.commands.handle_init(os.path.join(os.getcwd(), filename, 'Projectfile'), name=package_name, - requirements=['bonobo']) + medikit.commands.handle_init( + os.path.join(os.getcwd(), filename, 'Projectfile'), name=package_name, requirements=['bonobo'] + ) self.logger.info('Generated "{}" package with medikit.'.format(package_name)) self.create_file_from_template(template='default', filename=os.path.join(filename, package_name, '__main__.py')) diff --git a/bonobo/execution/__init__.py b/bonobo/execution/__init__.py index b8a83dd..eea436a 100644 --- a/bonobo/execution/__init__.py +++ b/bonobo/execution/__init__.py @@ -1 +1,3 @@ -from bonobo.execution.graph import GraphExecutionContext, NodeExecutionContext, PluginExecutionContext +import logging + +logger = logging.getLogger(__name__) diff --git a/bonobo/execution/base.py b/bonobo/execution/base.py index b9bce36..d469631 100644 --- a/bonobo/execution/base.py +++ b/bonobo/execution/base.py @@ -1,12 +1,17 @@ -import traceback +import logging from contextlib import contextmanager +from logging import WARNING, ERROR from time import sleep +import sys + +import mondrian + from bonobo.config import create_container from bonobo.config.processors import ContextCurrifier from bonobo.util import isconfigurabletype -from bonobo.util.errors import print_error from bonobo.util.objects import Wrapper, get_name +from bonobo.execution import logger @contextmanager @@ -14,7 +19,7 @@ def recoverable(error_handler): try: yield except Exception as exc: # pylint: disable=broad-except - error_handler(exc, traceback.format_exc()) + error_handler(*sys.exc_info(), level=ERROR) @contextmanager @@ -22,7 +27,7 @@ def unrecoverable(error_handler): try: yield except Exception as exc: # pylint: disable=broad-except - error_handler(exc, traceback.format_exc()) + error_handler(*sys.exc_info(), level=ERROR) raise # raise unrecoverableerror from x ? @@ -101,8 +106,10 @@ class LoopingExecutionContext(Wrapper): finally: self._stopped = True - def handle_error(self, exc, trace): - return print_error(exc, trace, context=self.wrapped) + def handle_error(self, exctype, exc, tb): + mondrian.excepthook( + exctype, exc, tb, level=WARNING, context='{} in {}'.format(exctype.__name__, get_name(self)), logger=logger + ) def _get_initial_context(self): if self.parent: diff --git a/bonobo/execution/node.py b/bonobo/execution/node.py index e727531..7781a78 100644 --- a/bonobo/execution/node.py +++ b/bonobo/execution/node.py @@ -3,6 +3,8 @@ from queue import Empty from time import sleep from types import GeneratorType +import sys + from bonobo.constants import NOT_MODIFIED, BEGIN, END from bonobo.errors import InactiveReadableError, UnrecoverableError from bonobo.execution.base import LoopingExecutionContext @@ -101,11 +103,11 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): sleep(self.PERIOD) continue except UnrecoverableError as exc: - self.handle_error(exc, traceback.format_exc()) + self.handle_error(*sys.exc_info()) self.input.shutdown() break except Exception as exc: # pylint: disable=broad-except - self.handle_error(exc, traceback.format_exc()) + self.handle_error(*sys.exc_info()) def step(self): # Pull data from the first available input channel. diff --git a/bonobo/ext/django.py b/bonobo/ext/django.py index d35d131..d9d17f7 100644 --- a/bonobo/ext/django.py +++ b/bonobo/ext/django.py @@ -5,7 +5,6 @@ from django.core.management.base import BaseCommand, OutputWrapper import bonobo import bonobo.util -from bonobo.commands import get_default_services from bonobo.ext.console import ConsoleOutputPlugin from bonobo.util.term import CLEAR_EOL diff --git a/bonobo/settings.py b/bonobo/settings.py index 05d2089..fdc4412 100644 --- a/bonobo/settings.py +++ b/bonobo/settings.py @@ -1,4 +1,5 @@ import logging + import os from bonobo.errors import ValidationError diff --git a/requirements.txt b/requirements.txt index 0b8ada0..35ab601 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ fs==2.0.12 idna==2.6 jinja2==2.9.6 markupsafe==1.0 -mondrian==0.2.0 +mondrian==0.3.0 packaging==16.8 pbr==3.1.1 psutil==5.4.0 diff --git a/setup.py b/setup.py index 2b949c0..9bfecb1 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,7 @@ setup( packages=find_packages(exclude=['ez_setup', 'example', 'test']), include_package_data=True, install_requires=[ - 'colorama (>= 0.3)', 'fs (>= 2.0, < 2.1)', 'jinja2 (>= 2.9, < 2.10)', 'mondrian (>= 0.2, < 0.3)', + 'colorama (>= 0.3)', 'fs (>= 2.0, < 2.1)', 'jinja2 (>= 2.9, < 2.10)', 'mondrian (>= 0.3, < 0.4)', 'packaging (>= 16, < 17)', 'psutil (>= 5.4, < 6.0)', 'requests (>= 2.0, < 3.0)', 'stevedore (>= 1.27, < 1.28)' ], extras_require={ diff --git a/tests/test_commands.py b/tests/test_commands.py index 8a2c9b8..e7e3523 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -164,7 +164,7 @@ def test_download_works_for_examples(runner): @all_runners def test_download_fails_non_example(runner): with pytest.raises(ValueError): - runner('download', '/something/entirely/different.txt') + runner('download', 'something/entirely/different.txt') @pytest.fixture From 23542dc675b251418e1aa7ac2473a12831634038 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Thu, 2 Nov 2017 00:17:03 +0100 Subject: [PATCH 19/37] Removing old error handler. --- bonobo/strategies/executor.py | 18 +++++++++------- bonobo/util/errors.py | 39 ----------------------------------- 2 files changed, 11 insertions(+), 46 deletions(-) delete mode 100644 bonobo/util/errors.py diff --git a/bonobo/strategies/executor.py b/bonobo/strategies/executor.py index 3bfabc6..24ca154 100644 --- a/bonobo/strategies/executor.py +++ b/bonobo/strategies/executor.py @@ -1,11 +1,15 @@ import time + +import sys + +import mondrian import traceback from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor +from bonobo.util import get_name from bonobo.constants import BEGIN, END from bonobo.strategies.base import Strategy from bonobo.structs.bags import Bag -from bonobo.util.errors import print_error class ExecutorStrategy(Strategy): @@ -47,16 +51,16 @@ class ExecutorStrategy(Strategy): def _runner(): try: node.start() - except Exception as exc: - print_error(exc, traceback.format_exc(), context=node, method='start') + except Exception: + mondrian.excepthook(*sys.exc_info(), context='Could not start node {}.'.format(get_name(node))) node.input.on_end() else: node.loop() try: node.stop() - except Exception as exc: - print_error(exc, traceback.format_exc(), context=node, method='stop') + except Exception: + mondrian.excepthook(*sys.exc_info(), context='Could not stop node {}.'.format(get_name(node))) futures.append(executor.submit(_runner)) @@ -68,8 +72,8 @@ class ExecutorStrategy(Strategy): with plugin: try: plugin.loop() - except Exception as exc: - print_error(exc, traceback.format_exc(), context=plugin) + except Exception: + mondrian.excepthook(*sys.exc_info(), context='In plugin loop for {}...'.format(get_name(plugin))) futures.append(executor.submit(_runner)) diff --git a/bonobo/util/errors.py b/bonobo/util/errors.py deleted file mode 100644 index cae2789..0000000 --- a/bonobo/util/errors.py +++ /dev/null @@ -1,39 +0,0 @@ -import sys -from textwrap import indent - - -def _get_error_message(exc): - if hasattr(exc, '__str__'): - message = str(exc) - return message[0].upper() + message[1:] - return '\n'.join(exc.args), - - -def print_error(exc, trace, context=None, method=None): - """ - Error handler. Whatever happens in a plugin or component, if it looks like an exception, taste like an exception - or somehow make me think it is an exception, I'll handle it. - - :param exc: the culprit - :param trace: Hercule Poirot's logbook. - :return: to hell - """ - - from colorama import Fore, Style - - prefix = '{}{} | {}'.format(Fore.RED, Style.BRIGHT, Style.RESET_ALL) - - print( - Style.BRIGHT, - Fore.RED, - type(exc).__name__, - ' (in {}{})'.format(type(context).__name__, '.{}()'.format(method) if method else '') if context else '', - Style.RESET_ALL, - '\n', - indent(_get_error_message(exc), prefix + Style.BRIGHT), - Style.RESET_ALL, - sep='', - file=sys.stderr, - ) - print(prefix, file=sys.stderr) - print(indent(trace, prefix, predicate=lambda line: True), file=sys.stderr) From 9037d946546878076419ff7404698fc14f3d791c Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Thu, 2 Nov 2017 07:25:38 +0100 Subject: [PATCH 20/37] Fix imports. --- bonobo/execution/__init__.py | 2 ++ bonobo/execution/base.py | 7 ++----- bonobo/strategies/executor.py | 4 +++- bonobo/util/testing.py | 2 +- tests/io/test_csv.py | 3 +-- tests/test_basicusage.py | 5 +++-- 6 files changed, 12 insertions(+), 11 deletions(-) diff --git a/bonobo/execution/__init__.py b/bonobo/execution/__init__.py index eea436a..43ffbf3 100644 --- a/bonobo/execution/__init__.py +++ b/bonobo/execution/__init__.py @@ -1,3 +1,5 @@ import logging logger = logging.getLogger(__name__) + +__all__ = [] diff --git a/bonobo/execution/base.py b/bonobo/execution/base.py index d469631..08375a8 100644 --- a/bonobo/execution/base.py +++ b/bonobo/execution/base.py @@ -1,17 +1,14 @@ -import logging +import sys from contextlib import contextmanager from logging import WARNING, ERROR from time import sleep -import sys - import mondrian - from bonobo.config import create_container from bonobo.config.processors import ContextCurrifier +from bonobo.execution import logger from bonobo.util import isconfigurabletype from bonobo.util.objects import Wrapper, get_name -from bonobo.execution import logger @contextmanager diff --git a/bonobo/strategies/executor.py b/bonobo/strategies/executor.py index 24ca154..8c27d40 100644 --- a/bonobo/strategies/executor.py +++ b/bonobo/strategies/executor.py @@ -73,7 +73,9 @@ class ExecutorStrategy(Strategy): try: plugin.loop() except Exception: - mondrian.excepthook(*sys.exc_info(), context='In plugin loop for {}...'.format(get_name(plugin))) + mondrian.excepthook( + *sys.exc_info(), context='In plugin loop for {}...'.format(get_name(plugin)) + ) futures.append(executor.submit(_runner)) diff --git a/bonobo/util/testing.py b/bonobo/util/testing.py index 6fc7d60..7dc8f38 100644 --- a/bonobo/util/testing.py +++ b/bonobo/util/testing.py @@ -1,7 +1,7 @@ from contextlib import contextmanager from bonobo import open_fs, Token -from bonobo.execution import GraphExecutionContext +from bonobo.execution.graph import GraphExecutionContext from bonobo.execution.node import NodeExecutionContext diff --git a/tests/io/test_csv.py b/tests/io/test_csv.py index 291c241..1c4c6cc 100644 --- a/tests/io/test_csv.py +++ b/tests/io/test_csv.py @@ -1,7 +1,6 @@ import pytest -from bonobo import Bag, CsvReader, CsvWriter, settings -from bonobo.constants import BEGIN, END +from bonobo import CsvReader, CsvWriter, settings from bonobo.execution.node import NodeExecutionContext from bonobo.util.testing import FilesystemTester, BufferingNodeExecutionContext diff --git a/tests/test_basicusage.py b/tests/test_basicusage.py index 58a1212..f002d36 100644 --- a/tests/test_basicusage.py +++ b/tests/test_basicusage.py @@ -1,8 +1,9 @@ +from unittest.mock import patch + import pytest import bonobo -from bonobo.execution import GraphExecutionContext -from unittest.mock import patch +from bonobo.execution.graph import GraphExecutionContext @pytest.mark.timeout(2) From d988d304742c5d41028859289a0cd31b787820bf Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Thu, 2 Nov 2017 08:25:30 +0100 Subject: [PATCH 21/37] Update to mondrian 0.4a0. --- Makefile | 2 +- Projectfile | 2 +- requirements.txt | 2 +- setup.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 14825bf..8baafb1 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# Generated by Medikit 0.4a10 on 2017-11-01. +# Generated by Medikit 0.4a10 on 2017-11-02. # All changes will be overriden. PACKAGE ?= bonobo diff --git a/Projectfile b/Projectfile index 3a48797..a91bfe5 100644 --- a/Projectfile +++ b/Projectfile @@ -42,7 +42,7 @@ python.setup( python.add_requirements( 'fs >=2.0,<2.1', 'jinja2 >=2.9,<2.10', - 'mondrian >=0.3,<0.4', + 'mondrian ==0.4a0', 'packaging >=16,<17', 'psutil >=5.4,<6.0', 'requests >=2.0,<3.0', diff --git a/requirements.txt b/requirements.txt index 35ab601..82f5cdb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ fs==2.0.12 idna==2.6 jinja2==2.9.6 markupsafe==1.0 -mondrian==0.3.0 +mondrian==0.4a0 packaging==16.8 pbr==3.1.1 psutil==5.4.0 diff --git a/setup.py b/setup.py index 9bfecb1..219e0e6 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,7 @@ setup( packages=find_packages(exclude=['ez_setup', 'example', 'test']), include_package_data=True, install_requires=[ - 'colorama (>= 0.3)', 'fs (>= 2.0, < 2.1)', 'jinja2 (>= 2.9, < 2.10)', 'mondrian (>= 0.3, < 0.4)', + 'colorama (>= 0.3)', 'fs (>= 2.0, < 2.1)', 'jinja2 (>= 2.9, < 2.10)', 'mondrian (== 0.4a0)', 'packaging (>= 16, < 17)', 'psutil (>= 5.4, < 6.0)', 'requests (>= 2.0, < 3.0)', 'stevedore (>= 1.27, < 1.28)' ], extras_require={ From 6bd1130e34021888d63cdc840e109254b1ce5f39 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 4 Nov 2017 11:20:15 +0100 Subject: [PATCH 22/37] [core] Refactoring to use an event dispatcher in the main thread. Plugins now run in the main thread, instead of their own threads, and the API changed to use an event dispatcher approach instead of a static class interface. --- Makefile | 2 +- Projectfile | 3 +- bonobo/_api.py | 10 ++- bonobo/commands/__init__.py | 4 +- bonobo/examples/clock.py | 26 +++++++ bonobo/execution/base.py | 17 ++++- bonobo/execution/events.py | 13 ++++ bonobo/execution/graph.py | 52 ++++++++++--- bonobo/execution/node.py | 54 +++++++++----- bonobo/execution/plugin.py | 27 ++----- bonobo/ext/django.py | 2 +- bonobo/{plugins.py => plugins/__init__.py} | 13 +++- bonobo/{ext => plugins}/console.py | 86 +++++++++++++--------- bonobo/strategies/executor.py | 64 +++++----------- bonobo/strategies/util.py | 1 - requirements.txt | 3 +- setup.py | 4 +- 17 files changed, 233 insertions(+), 148 deletions(-) create mode 100644 bonobo/examples/clock.py create mode 100644 bonobo/execution/events.py rename bonobo/{plugins.py => plugins/__init__.py} (67%) rename bonobo/{ext => plugins}/console.py (83%) delete mode 100644 bonobo/strategies/util.py diff --git a/Makefile b/Makefile index 8baafb1..483467d 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# Generated by Medikit 0.4a10 on 2017-11-02. +# Generated by Medikit 0.4a10 on 2017-11-03. # All changes will be overriden. PACKAGE ?= bonobo diff --git a/Projectfile b/Projectfile index a91bfe5..7aa05b5 100644 --- a/Projectfile +++ b/Projectfile @@ -42,11 +42,12 @@ python.setup( python.add_requirements( 'fs >=2.0,<2.1', 'jinja2 >=2.9,<2.10', - 'mondrian ==0.4a0', + 'mondrian ==0.4a1', 'packaging >=16,<17', 'psutil >=5.4,<6.0', 'requests >=2.0,<3.0', 'stevedore >=1.27,<1.28', + 'whistle ==1.0a3', dev=[ 'pytest-sugar >=0.8,<0.9', 'pytest-timeout >=1,<2', diff --git a/bonobo/_api.py b/bonobo/_api.py index d4dbda2..9a82f41 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -49,10 +49,9 @@ def run(graph, *, plugins=None, services=None, strategy=None): if not settings.QUIET.get(): # pragma: no cover if _is_interactive_console(): import mondrian - mondrian.setup() - mondrian.setupExceptHook() + mondrian.setup(excepthook=True) - from bonobo.ext.console import ConsoleOutputPlugin + from bonobo.plugins.console import ConsoleOutputPlugin if ConsoleOutputPlugin not in plugins: plugins.append(ConsoleOutputPlugin) @@ -70,7 +69,10 @@ def run(graph, *, plugins=None, services=None, strategy=None): if JupyterOutputPlugin not in plugins: plugins.append(JupyterOutputPlugin) - return create_strategy(strategy).execute(graph, plugins=plugins, services=services) + import logging + logging.getLogger().setLevel(settings.LOGGING_LEVEL.get()) + strategy = create_strategy(strategy) + return strategy.execute(graph, plugins=plugins, services=services) def _inspect_as_graph(graph): diff --git a/bonobo/commands/__init__.py b/bonobo/commands/__init__.py index 39cfa05..f42d5c6 100644 --- a/bonobo/commands/__init__.py +++ b/bonobo/commands/__init__.py @@ -14,9 +14,7 @@ def entrypoint(args=None): """ - mondrian.setup() - mondrian.setupExceptHook() - + mondrian.setup(excepthook=True) logger = logging.getLogger() parser = argparse.ArgumentParser() diff --git a/bonobo/examples/clock.py b/bonobo/examples/clock.py new file mode 100644 index 0000000..765f077 --- /dev/null +++ b/bonobo/examples/clock.py @@ -0,0 +1,26 @@ +import bonobo +import datetime +import time + + +def extract(): + """Placeholder, change, rename, remove... """ + for x in range(60): + if x: + time.sleep(1) + yield datetime.datetime.now() + + +def get_graph(): + graph = bonobo.Graph() + graph.add_chain( + extract, + print, + ) + + return graph + +if __name__ == '__main__': + parser = bonobo.get_argument_parser() + with bonobo.parse_args(parser): + bonobo.run(get_graph()) diff --git a/bonobo/execution/base.py b/bonobo/execution/base.py index 08375a8..74dd89b 100644 --- a/bonobo/execution/base.py +++ b/bonobo/execution/base.py @@ -29,8 +29,7 @@ def unrecoverable(error_handler): class LoopingExecutionContext(Wrapper): - alive = True - PERIOD = 0.25 + PERIOD = 0.5 @property def started(self): @@ -40,6 +39,19 @@ class LoopingExecutionContext(Wrapper): def stopped(self): return self._stopped + @property + def alive(self): + return self._started and not self._stopped + + @property + def status(self): + """One character status for this node. """ + if not self.started: + return ' ' + if not self.stopped: + return '+' + return '-' + def __init__(self, wrapped, parent, services=None): super().__init__(wrapped) @@ -84,7 +96,6 @@ class LoopingExecutionContext(Wrapper): """Generic loop. A bit boring. """ while self.alive: self.step() - sleep(self.PERIOD) def step(self): """Left as an exercise for the children.""" diff --git a/bonobo/execution/events.py b/bonobo/execution/events.py new file mode 100644 index 0000000..036e879 --- /dev/null +++ b/bonobo/execution/events.py @@ -0,0 +1,13 @@ +from whistle import Event + +START = 'execution.start' +STARTED = 'execution.started' +TICK = 'execution.tick' +STOP = 'execution.stop' +STOPPED = 'execution.stopped' +KILL = 'execution.kill' + + +class ExecutionEvent(Event): + def __init__(self, graph_context): + self.graph_context = graph_context diff --git a/bonobo/execution/graph.py b/bonobo/execution/graph.py index 77e01fa..deaa150 100644 --- a/bonobo/execution/graph.py +++ b/bonobo/execution/graph.py @@ -1,8 +1,11 @@ -import time from functools import partial +from time import sleep + +from whistle import EventDispatcher from bonobo.config import create_container from bonobo.constants import BEGIN, END +from bonobo.execution import events from bonobo.execution.node import NodeExecutionContext from bonobo.execution.plugin import PluginExecutionContext @@ -11,6 +14,8 @@ class GraphExecutionContext: NodeExecutionContextType = NodeExecutionContext PluginExecutionContextType = PluginExecutionContext + TICK_PERIOD = 0.25 + @property def started(self): return any(node.started for node in self.nodes) @@ -23,7 +28,8 @@ class GraphExecutionContext: def alive(self): return any(node.alive for node in self.nodes) - def __init__(self, graph, plugins=None, services=None): + def __init__(self, graph, plugins=None, services=None, dispatcher=None): + self.dispatcher = dispatcher or EventDispatcher() self.graph = graph self.nodes = [self.create_node_execution_context_for(node) for node in self.graph] self.plugins = [self.create_plugin_execution_context_for(plugin) for plugin in plugins or ()] @@ -53,6 +59,8 @@ class GraphExecutionContext: return self.NodeExecutionContextType(node, parent=self) def create_plugin_execution_context_for(self, plugin): + if isinstance(plugin, type): + plugin = plugin() return self.PluginExecutionContextType(plugin, parent=self) def write(self, *messages): @@ -63,23 +71,45 @@ class GraphExecutionContext: for message in messages: self[i].write(message) + def dispatch(self, name): + self.dispatcher.dispatch(name, events.ExecutionEvent(self)) + def start(self, starter=None): + self.register_plugins() + self.dispatch(events.START) + self.tick() for node in self.nodes: if starter is None: node.start() else: starter(node) + self.dispatch(events.STARTED) - def start_plugins(self, starter=None): - for plugin in self.plugins: - if starter is None: - plugin.start() - else: - starter(plugin) + def tick(self): + self.dispatch(events.TICK) + sleep(self.TICK_PERIOD) + + def kill(self): + self.dispatch(events.KILL) + for node_context in self.nodes: + node_context.kill() + self.tick() def stop(self, stopper=None): - for node in self.nodes: + self.dispatch(events.STOP) + for node_context in self.nodes: if stopper is None: - node.stop() + node_context.stop() else: - stopper(node) + stopper(node_context) + self.tick() + self.dispatch(events.STOPPED) + self.unregister_plugins() + + def register_plugins(self): + for plugin_context in self.plugins: + plugin_context.register() + + def unregister_plugins(self): + for plugin_context in self.plugins: + plugin_context.unregister() diff --git a/bonobo/execution/node.py b/bonobo/execution/node.py index 7781a78..fdb0c9f 100644 --- a/bonobo/execution/node.py +++ b/bonobo/execution/node.py @@ -1,10 +1,9 @@ -import traceback +import sys +import threading from queue import Empty from time import sleep from types import GeneratorType -import sys - from bonobo.constants import NOT_MODIFIED, BEGIN, END from bonobo.errors import InactiveReadableError, UnrecoverableError from bonobo.execution.base import LoopingExecutionContext @@ -22,13 +21,8 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): """ @property - def alive(self): - """todo check if this is right, and where it is used""" - return self._started and not self._stopped - - @property - def alive_str(self): - return '+' if self.alive else '-' + def killed(self): + return self._killed def __init__(self, wrapped, parent=None, services=None, _input=None, _outputs=None): LoopingExecutionContext.__init__(self, wrapped, parent=parent, services=services) @@ -36,13 +30,19 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): self.input = _input or Input() self.outputs = _outputs or [] + self._killed = False def __str__(self): - return self.alive_str + ' ' + self.__name__ + self.get_statistics_as_string(prefix=' ') + return self.__name__ + self.get_statistics_as_string(prefix=' ') def __repr__(self): name, type_name = get_name(self), get_name(type(self)) - return '<{}({}{}){}>'.format(type_name, self.alive_str, name, self.get_statistics_as_string(prefix=' ')) + return '<{}({}{}){}>'.format(type_name, self.status, name, self.get_statistics_as_string(prefix=' ')) + + def get_flags_as_string(self): + if self.killed: + return '[killed]' + return '' def write(self, *messages): """ @@ -92,22 +92,26 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): return row def loop(self): - while True: + while not self._killed: try: self.step() except KeyboardInterrupt: - raise + self.handle_error(*sys.exc_info()) + break except InactiveReadableError: break except Empty: sleep(self.PERIOD) continue - except UnrecoverableError as exc: + except UnrecoverableError: self.handle_error(*sys.exc_info()) self.input.shutdown() break - except Exception as exc: # pylint: disable=broad-except + except Exception: # pylint: disable=broad-except self.handle_error(*sys.exc_info()) + except BaseException: + self.handle_error(*sys.exc_info()) + break def step(self): # Pull data from the first available input channel. @@ -119,6 +123,15 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): # todo add timer self.handle_results(input_bag, input_bag.apply(self._stack)) + def kill(self): + if not self.started: + raise RuntimeError('Cannot kill a node context that has not started yet.') + + if self.stopped: + raise RuntimeError('Cannot kill a node context that has already stopped.') + + self._killed = True + def handle_results(self, input_bag, results): # self._exec_time += timer.duration # Put data onto output channels @@ -126,6 +139,9 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): if isinstance(results, GeneratorType): while True: try: + # if kill flag was step, stop iterating. + if self._killed: + break result = next(results) except StopIteration: break @@ -140,7 +156,7 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): def isflag(param): - return isinstance(param, Token) and param in (NOT_MODIFIED, ) + return isinstance(param, Token) and param in (NOT_MODIFIED,) def split_tokens(output): @@ -152,11 +168,11 @@ def split_tokens(output): """ if isinstance(output, Token): # just a flag - return (output, ), () + return (output,), () if not istuple(output): # no flag - return (), (output, ) + return (), (output,) i = 0 while isflag(output[i]): diff --git a/bonobo/execution/plugin.py b/bonobo/execution/plugin.py index 3379fc0..f552724 100644 --- a/bonobo/execution/plugin.py +++ b/bonobo/execution/plugin.py @@ -2,25 +2,12 @@ from bonobo.execution.base import LoopingExecutionContext, recoverable class PluginExecutionContext(LoopingExecutionContext): - PERIOD = 0.5 + @property + def dispatcher(self): + return self.parent.dispatcher - def __init__(self, wrapped, parent): - # Instanciate plugin. This is not yet considered stable, as at some point we may need a way to configure - # plugins, for example if it depends on an external service. - super().__init__(wrapped(self), parent) + def register(self): + return self.wrapped.register(self.dispatcher) - def start(self): - super().start() - - with recoverable(self.handle_error): - self.wrapped.on_start() - - def shutdown(self): - if self.started: - with recoverable(self.handle_error): - self.wrapped.on_stop() - self.alive = False - - def step(self): - with recoverable(self.handle_error): - self.wrapped.on_tick() + def unregister(self): + return self.wrapped.unregister(self.dispatcher) diff --git a/bonobo/ext/django.py b/bonobo/ext/django.py index d9d17f7..60b583c 100644 --- a/bonobo/ext/django.py +++ b/bonobo/ext/django.py @@ -5,7 +5,7 @@ from django.core.management.base import BaseCommand, OutputWrapper import bonobo import bonobo.util -from bonobo.ext.console import ConsoleOutputPlugin +from bonobo.plugins.console import ConsoleOutputPlugin from bonobo.util.term import CLEAR_EOL diff --git a/bonobo/plugins.py b/bonobo/plugins/__init__.py similarity index 67% rename from bonobo/plugins.py rename to bonobo/plugins/__init__.py index 7a0f5d1..897b687 100644 --- a/bonobo/plugins.py +++ b/bonobo/plugins/__init__.py @@ -10,5 +10,14 @@ class Plugin: """ - def __init__(self, context): - self.context = context + def register(self, dispatcher): + """ + :param dispatcher: whistle.EventDispatcher + """ + pass + + def unregister(self, dispatcher): + """ + :param dispatcher: whistle.EventDispatcher + """ + pass diff --git a/bonobo/ext/console.py b/bonobo/plugins/console.py similarity index 83% rename from bonobo/ext/console.py rename to bonobo/plugins/console.py index 0e6abb3..814894b 100644 --- a/bonobo/ext/console.py +++ b/bonobo/plugins/console.py @@ -2,38 +2,14 @@ import io import sys from contextlib import redirect_stdout, redirect_stderr -from colorama import Style, Fore, init - -init(wrap=True) +from colorama import Style, Fore, init as initialize_colorama_output_wrappers from bonobo import settings +from bonobo.execution import events from bonobo.plugins import Plugin from bonobo.util.term import CLEAR_EOL, MOVE_CURSOR_UP - -class IOBuffer(): - """ - The role of IOBuffer is to overcome the problem of multiple threads wanting to write to stdout at the same time. It - works a bit like a videogame: there are two buffers, one that is used to write, and one which is used to read from. - On each cycle, we swap the buffers, and the console plugin handle output of the one which is not anymore "active". - - """ - - def __init__(self): - self.current = io.StringIO() - self.write = self.current.write - - def switch(self): - previous = self.current - self.current = io.StringIO() - self.write = self.current.write - try: - return previous.getvalue() - finally: - previous.close() - - def flush(self): - self.current.flush() +initialize_colorama_output_wrappers(wrap=True) class ConsoleOutputPlugin(Plugin): @@ -60,13 +36,24 @@ class ConsoleOutputPlugin(Plugin): # Whether we're on windows, or a real operating system. iswindows = (sys.platform == 'win32') - def on_start(self): + def __init__(self): + self.isatty = self._stdout.isatty() + + def register(self, dispatcher): + dispatcher.add_listener(events.START, self.setup) + dispatcher.add_listener(events.TICK, self.tick) + dispatcher.add_listener(events.STOPPED, self.teardown) + + def unregister(self, dispatcher): + dispatcher.remove_listener(events.STOPPED, self.teardown) + dispatcher.remove_listener(events.TICK, self.tick) + dispatcher.remove_listener(events.START, self.setup) + + def setup(self, event): self.prefix = '' self.counter = 0 self._append_cache = '' - self.isatty = self._stdout.isatty() - self.stdout = IOBuffer() self.redirect_stdout = redirect_stdout(self._stdout if self.iswindows else self.stdout) self.redirect_stdout.__enter__() @@ -75,14 +62,14 @@ class ConsoleOutputPlugin(Plugin): self.redirect_stderr = redirect_stderr(self._stderr if self.iswindows else self.stderr) self.redirect_stderr.__enter__() - def on_tick(self): + def tick(self, event): if self.isatty and not self.iswindows: - self._write(self.context.parent, rewind=True) + self._write(event.graph_context, rewind=True) else: pass # not a tty, or windows, so we'll ignore stats output - def on_stop(self): - self._write(self.context.parent, rewind=False) + def teardown(self, event): + self._write(event.graph_context, rewind=False) self.redirect_stderr.__exit__(None, None, None) self.redirect_stdout.__exit__(None, None, None) @@ -113,6 +100,8 @@ class ConsoleOutputPlugin(Plugin): name_suffix, ' ', node.get_statistics_as_string(), + ' ', + node.get_flags_as_string(), Style.RESET_ALL, ' ', ) @@ -128,6 +117,8 @@ class ConsoleOutputPlugin(Plugin): name_suffix, ' ', node.get_statistics_as_string(), + ' ', + node.get_flags_as_string(), Style.RESET_ALL, ' ', ) @@ -166,7 +157,32 @@ class ConsoleOutputPlugin(Plugin): self.counter += 1 +class IOBuffer(): + """ + The role of IOBuffer is to overcome the problem of multiple threads wanting to write to stdout at the same time. It + works a bit like a videogame: there are two buffers, one that is used to write, and one which is used to read from. + On each cycle, we swap the buffers, and the console plugin handle output of the one which is not anymore "active". + + """ + + def __init__(self): + self.current = io.StringIO() + self.write = self.current.write + + def switch(self): + previous = self.current + self.current = io.StringIO() + self.write = self.current.write + try: + return previous.getvalue() + finally: + previous.close() + + def flush(self): + self.current.flush() + + def memory_usage(): import os, psutil process = psutil.Process(os.getpid()) - return process.memory_info()[0] / float(2**20) + return process.memory_info()[0] / float(2 ** 20) diff --git a/bonobo/strategies/executor.py b/bonobo/strategies/executor.py index 8c27d40..e5ffdc0 100644 --- a/bonobo/strategies/executor.py +++ b/bonobo/strategies/executor.py @@ -1,10 +1,8 @@ -import time - +import functools +import logging import sys - -import mondrian -import traceback -from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor +from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor, wait, FIRST_EXCEPTION +from time import sleep from bonobo.util import get_name from bonobo.constants import BEGIN, END @@ -27,60 +25,38 @@ class ExecutorStrategy(Strategy): context = self.create_graph_execution_context(graph, **kwargs) context.write(BEGIN, Bag(), END) - executor = self.create_executor() - futures = [] - context.start_plugins(self.get_plugin_starter(executor, futures)) - context.start(self.get_starter(executor, futures)) + with self.create_executor() as executor: + context.start(self.get_starter(executor, futures)) - while context.alive: - time.sleep(0.1) + while context.alive: + try: + context.tick() + except KeyboardInterrupt: + logging.getLogger(__name__).warning('KeyboardInterrupt received. Trying to terminate the nodes gracefully.') + context.kill() + break - for plugin_context in context.plugins: - plugin_context.shutdown() - - context.stop() - - executor.shutdown() + context.stop() return context def get_starter(self, executor, futures): def starter(node): + @functools.wraps(node) def _runner(): try: - node.start() - except Exception: - mondrian.excepthook(*sys.exc_info(), context='Could not start node {}.'.format(get_name(node))) - node.input.on_end() - else: - node.loop() - - try: - node.stop() - except Exception: - mondrian.excepthook(*sys.exc_info(), context='Could not stop node {}.'.format(get_name(node))) + with node: + node.loop() + except BaseException as exc: + logging.getLogger(__name__).info('Got {} in {} runner.'.format(get_name(exc), node), + exc_info=sys.exc_info()) futures.append(executor.submit(_runner)) return starter - def get_plugin_starter(self, executor, futures): - def plugin_starter(plugin): - def _runner(): - with plugin: - try: - plugin.loop() - except Exception: - mondrian.excepthook( - *sys.exc_info(), context='In plugin loop for {}...'.format(get_name(plugin)) - ) - - futures.append(executor.submit(_runner)) - - return plugin_starter - class ThreadPoolExecutorStrategy(ExecutorStrategy): executor_factory = ThreadPoolExecutor diff --git a/bonobo/strategies/util.py b/bonobo/strategies/util.py deleted file mode 100644 index 8b13789..0000000 --- a/bonobo/strategies/util.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/requirements.txt b/requirements.txt index 82f5cdb..f579bdc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ fs==2.0.12 idna==2.6 jinja2==2.9.6 markupsafe==1.0 -mondrian==0.4a0 +mondrian==0.3.0 packaging==16.8 pbr==3.1.1 psutil==5.4.0 @@ -17,3 +17,4 @@ requests==2.18.4 six==1.11.0 stevedore==1.27.1 urllib3==1.22 +whistle==1.0a3 diff --git a/setup.py b/setup.py index 219e0e6..b914b72 100644 --- a/setup.py +++ b/setup.py @@ -53,8 +53,8 @@ setup( packages=find_packages(exclude=['ez_setup', 'example', 'test']), include_package_data=True, install_requires=[ - 'colorama (>= 0.3)', 'fs (>= 2.0, < 2.1)', 'jinja2 (>= 2.9, < 2.10)', 'mondrian (== 0.4a0)', - 'packaging (>= 16, < 17)', 'psutil (>= 5.4, < 6.0)', 'requests (>= 2.0, < 3.0)', 'stevedore (>= 1.27, < 1.28)' + 'colorama (>= 0.3)', 'fs (>= 2.0, < 2.1)', 'jinja2 (>= 2.9, < 2.10)', 'mondrian', 'packaging (>= 16, < 17)', + 'psutil (>= 5.4, < 6.0)', 'requests (>= 2.0, < 3.0)', 'stevedore (>= 1.27, < 1.28)', 'whistle (== 1.0a3)' ], extras_require={ 'dev': [ From 5f300225a858a25a37445b9305353675c9573534 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 4 Nov 2017 12:10:28 +0100 Subject: [PATCH 23/37] Update requirements with first whistle stable. --- Makefile | 2 +- Projectfile | 2 +- requirements-jupyter.txt | 2 +- requirements-sqlalchemy.txt | 2 +- requirements.txt | 4 ++-- setup.py | 5 +++-- 6 files changed, 9 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 483467d..ccd73ba 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# Generated by Medikit 0.4a10 on 2017-11-03. +# Generated by Medikit 0.4a11 on 2017-11-04. # All changes will be overriden. PACKAGE ?= bonobo diff --git a/Projectfile b/Projectfile index 7aa05b5..d0e800e 100644 --- a/Projectfile +++ b/Projectfile @@ -47,7 +47,7 @@ python.add_requirements( 'psutil >=5.4,<6.0', 'requests >=2.0,<3.0', 'stevedore >=1.27,<1.28', - 'whistle ==1.0a3', + 'whistle >=1.0,<1.1', dev=[ 'pytest-sugar >=0.8,<0.9', 'pytest-timeout >=1,<2', diff --git a/requirements-jupyter.txt b/requirements-jupyter.txt index 1978875..2c499ad 100644 --- a/requirements-jupyter.txt +++ b/requirements-jupyter.txt @@ -19,7 +19,7 @@ markupsafe==1.0 mistune==0.8 nbconvert==5.3.1 nbformat==4.4.0 -notebook==5.2.0 +notebook==5.2.1 pandocfilters==1.4.2 parso==0.1.0 pexpect==4.2.1 diff --git a/requirements-sqlalchemy.txt b/requirements-sqlalchemy.txt index 7083f9e..d33c754 100644 --- a/requirements-sqlalchemy.txt +++ b/requirements-sqlalchemy.txt @@ -13,6 +13,6 @@ pyparsing==2.2.0 pytz==2017.3 requests==2.18.4 six==1.11.0 -sqlalchemy==1.1.14 +sqlalchemy==1.1.15 stevedore==1.27.1 urllib3==1.22 diff --git a/requirements.txt b/requirements.txt index f579bdc..dd9696d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ fs==2.0.12 idna==2.6 jinja2==2.9.6 markupsafe==1.0 -mondrian==0.3.0 +mondrian==0.4a1 packaging==16.8 pbr==3.1.1 psutil==5.4.0 @@ -17,4 +17,4 @@ requests==2.18.4 six==1.11.0 stevedore==1.27.1 urllib3==1.22 -whistle==1.0a3 +whistle==1.0.0 diff --git a/setup.py b/setup.py index b914b72..d622505 100644 --- a/setup.py +++ b/setup.py @@ -53,8 +53,9 @@ setup( packages=find_packages(exclude=['ez_setup', 'example', 'test']), include_package_data=True, install_requires=[ - 'colorama (>= 0.3)', 'fs (>= 2.0, < 2.1)', 'jinja2 (>= 2.9, < 2.10)', 'mondrian', 'packaging (>= 16, < 17)', - 'psutil (>= 5.4, < 6.0)', 'requests (>= 2.0, < 3.0)', 'stevedore (>= 1.27, < 1.28)', 'whistle (== 1.0a3)' + 'colorama (>= 0.3)', 'fs (>= 2.0, < 2.1)', 'jinja2 (>= 2.9, < 2.10)', 'mondrian (== 0.4a1)', + 'packaging (>= 16, < 17)', 'psutil (>= 5.4, < 6.0)', 'requests (>= 2.0, < 3.0)', 'stevedore (>= 1.27, < 1.28)', + 'whistle (>= 1.0, < 1.1)' ], extras_require={ 'dev': [ From 1108b319dbf31f801b45f61b2c01b27f6c9bbb09 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 4 Nov 2017 12:15:18 +0100 Subject: [PATCH 24/37] Adds 3.7-dev target to travis runner. --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 3eb10b2..27eebb7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,7 @@ python: - 3.5-dev - 3.6 - 3.6-dev + - 3.7-dev - nightly install: - make install-dev From 017bb4a9a6e0fda84da5980db1b548479b6aa2cf Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 4 Nov 2017 12:24:30 +0100 Subject: [PATCH 25/37] Adds a test for default file init command. --- bonobo/examples/clock.py | 5 +++-- bonobo/execution/node.py | 6 +++--- bonobo/plugins/console.py | 2 +- bonobo/strategies/executor.py | 9 ++++++--- tests/test_commands.py | 11 +++++++++++ 5 files changed, 24 insertions(+), 9 deletions(-) diff --git a/bonobo/examples/clock.py b/bonobo/examples/clock.py index 765f077..1977cba 100644 --- a/bonobo/examples/clock.py +++ b/bonobo/examples/clock.py @@ -14,12 +14,13 @@ def extract(): def get_graph(): graph = bonobo.Graph() graph.add_chain( - extract, - print, + extract, + print, ) return graph + if __name__ == '__main__': parser = bonobo.get_argument_parser() with bonobo.parse_args(parser): diff --git a/bonobo/execution/node.py b/bonobo/execution/node.py index fdb0c9f..7771812 100644 --- a/bonobo/execution/node.py +++ b/bonobo/execution/node.py @@ -156,7 +156,7 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): def isflag(param): - return isinstance(param, Token) and param in (NOT_MODIFIED,) + return isinstance(param, Token) and param in (NOT_MODIFIED, ) def split_tokens(output): @@ -168,11 +168,11 @@ def split_tokens(output): """ if isinstance(output, Token): # just a flag - return (output,), () + return (output, ), () if not istuple(output): # no flag - return (), (output,) + return (), (output, ) i = 0 while isflag(output[i]): diff --git a/bonobo/plugins/console.py b/bonobo/plugins/console.py index 814894b..0548d68 100644 --- a/bonobo/plugins/console.py +++ b/bonobo/plugins/console.py @@ -185,4 +185,4 @@ class IOBuffer(): def memory_usage(): import os, psutil process = psutil.Process(os.getpid()) - return process.memory_info()[0] / float(2 ** 20) + return process.memory_info()[0] / float(2**20) diff --git a/bonobo/strategies/executor.py b/bonobo/strategies/executor.py index e5ffdc0..49c5d4a 100644 --- a/bonobo/strategies/executor.py +++ b/bonobo/strategies/executor.py @@ -34,7 +34,9 @@ class ExecutorStrategy(Strategy): try: context.tick() except KeyboardInterrupt: - logging.getLogger(__name__).warning('KeyboardInterrupt received. Trying to terminate the nodes gracefully.') + logging.getLogger(__name__).warning( + 'KeyboardInterrupt received. Trying to terminate the nodes gracefully.' + ) context.kill() break @@ -50,8 +52,9 @@ class ExecutorStrategy(Strategy): with node: node.loop() except BaseException as exc: - logging.getLogger(__name__).info('Got {} in {} runner.'.format(get_name(exc), node), - exc_info=sys.exc_info()) + logging.getLogger(__name__).info( + 'Got {} in {} runner.'.format(get_name(exc), node), exc_info=sys.exc_info() + ) futures.append(executor.submit(_runner)) diff --git a/tests/test_commands.py b/tests/test_commands.py index e7e3523..2877648 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -112,6 +112,17 @@ def test_install_requirements_for_file(runner): install_mock.assert_called_once_with(os.path.join(dirname, 'requirements.txt')) +@all_runners +def test_init_file(runner, tmpdir): + target = tmpdir.join('foo.py') + runner('init', str(target)) + assert os.path.exists(target) + + out, err = runner('run', str(target)) + assert out.replace('\n', ' ').strip() == 'Hello World' + assert not err + + @all_runners def test_version(runner): out, err = runner('version') From 0b8168f7daa7619779c29e1ff78d26b2188b97c8 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 4 Nov 2017 12:29:46 +0100 Subject: [PATCH 26/37] [tests] Fix path usage for python 3.5 --- tests/test_commands.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_commands.py b/tests/test_commands.py index 2877648..64f3363 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -115,10 +115,11 @@ def test_install_requirements_for_file(runner): @all_runners def test_init_file(runner, tmpdir): target = tmpdir.join('foo.py') - runner('init', str(target)) - assert os.path.exists(target) + target_filename = str(target) + runner('init', target_filename) + assert os.path.exists(target_filename) - out, err = runner('run', str(target)) + out, err = runner('run', target_filename) assert out.replace('\n', ' ').strip() == 'Hello World' assert not err @@ -201,7 +202,7 @@ def env2(tmpdir): all_environ_targets = pytest.mark.parametrize( 'target', [ - (get_examples_path('environ.py'), ), + (get_examples_path('environ.py'),), ( '-m', 'bonobo.examples.environ', From 2c9729c4cadeafe1472b9492419b61e417b60ca2 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 4 Nov 2017 13:01:43 +0100 Subject: [PATCH 27/37] Remove the sleep() in tick() that causes a minimum execution time of 2*PERIOD, more explicit status display and a small test case for console plugin. --- bonobo/execution/graph.py | 9 +++--- bonobo/execution/node.py | 2 ++ bonobo/plugins/console.py | 57 +++++++++++++---------------------- tests/plugins/test_console.py | 36 ++++++++++++++++++++++ 4 files changed, 64 insertions(+), 40 deletions(-) create mode 100644 tests/plugins/test_console.py diff --git a/bonobo/execution/graph.py b/bonobo/execution/graph.py index deaa150..0c5bc36 100644 --- a/bonobo/execution/graph.py +++ b/bonobo/execution/graph.py @@ -77,7 +77,7 @@ class GraphExecutionContext: def start(self, starter=None): self.register_plugins() self.dispatch(events.START) - self.tick() + self.tick(pause=False) for node in self.nodes: if starter is None: node.start() @@ -85,9 +85,10 @@ class GraphExecutionContext: starter(node) self.dispatch(events.STARTED) - def tick(self): + def tick(self, pause=True): self.dispatch(events.TICK) - sleep(self.TICK_PERIOD) + if pause: + sleep(self.TICK_PERIOD) def kill(self): self.dispatch(events.KILL) @@ -102,7 +103,7 @@ class GraphExecutionContext: node_context.stop() else: stopper(node_context) - self.tick() + self.tick(pause=False) self.dispatch(events.STOPPED) self.unregister_plugins() diff --git a/bonobo/execution/node.py b/bonobo/execution/node.py index 7771812..daa035f 100644 --- a/bonobo/execution/node.py +++ b/bonobo/execution/node.py @@ -42,6 +42,8 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): def get_flags_as_string(self): if self.killed: return '[killed]' + if self.stopped: + return '[done]' return '' def write(self, *messages): diff --git a/bonobo/plugins/console.py b/bonobo/plugins/console.py index 0548d68..dc511b7 100644 --- a/bonobo/plugins/console.py +++ b/bonobo/plugins/console.py @@ -30,7 +30,7 @@ class ConsoleOutputPlugin(Plugin): _stdout = sys.stdout _stderr = sys.stderr - # When the plugin is started, we'll set the real value of this. + # When the plugin is instanciated, we'll set the real value of this. isatty = False # Whether we're on windows, or a real operating system. @@ -50,6 +50,10 @@ class ConsoleOutputPlugin(Plugin): dispatcher.remove_listener(events.START, self.setup) def setup(self, event): + # TODO this wont work if one instance is registered with more than one context. + # Two options: + # - move state to context + # - forbid registering more than once self.prefix = '' self.counter = 0 self._append_cache = '' @@ -88,41 +92,22 @@ class ConsoleOutputPlugin(Plugin): for i in context.graph.topologically_sorted_indexes: node = context[i] name_suffix = '({})'.format(i) if settings.DEBUG.get() else '' - if node.alive: - _line = ''.join( - ( - ' ', - alive_color, - '+', - Style.RESET_ALL, - ' ', - node.name, - name_suffix, - ' ', - node.get_statistics_as_string(), - ' ', - node.get_flags_as_string(), - Style.RESET_ALL, - ' ', - ) - ) - else: - _line = ''.join( - ( - ' ', - dead_color, - '-', - ' ', - node.name, - name_suffix, - ' ', - node.get_statistics_as_string(), - ' ', - node.get_flags_as_string(), - Style.RESET_ALL, - ' ', - ) + + liveliness_color = alive_color if node.alive else dead_color + liveliness_prefix = ' {}{}{} '.format(liveliness_color, node.status, Style.RESET_ALL) + _line = ''.join( + ( + liveliness_prefix, + node.name, + name_suffix, + ' ', + node.get_statistics_as_string(), + ' ', + node.get_flags_as_string(), + Style.RESET_ALL, + ' ', ) + ) print(prefix + _line + CLEAR_EOL, file=self._stderr) if append: @@ -185,4 +170,4 @@ class IOBuffer(): def memory_usage(): import os, psutil process = psutil.Process(os.getpid()) - return process.memory_info()[0] / float(2**20) + return process.memory_info()[0] / float(2 ** 20) diff --git a/tests/plugins/test_console.py b/tests/plugins/test_console.py new file mode 100644 index 0000000..4a34f7a --- /dev/null +++ b/tests/plugins/test_console.py @@ -0,0 +1,36 @@ +from unittest.mock import MagicMock + +import bonobo +from bonobo.execution import events +from bonobo.execution.graph import GraphExecutionContext +from bonobo.plugins.console import ConsoleOutputPlugin +from whistle import EventDispatcher + + +def test_register_unregister(): + plugin = ConsoleOutputPlugin() + dispatcher = EventDispatcher() + + plugin.register(dispatcher) + assert plugin.setup in dispatcher.get_listeners(events.START) + assert plugin.tick in dispatcher.get_listeners(events.TICK) + assert plugin.teardown in dispatcher.get_listeners(events.STOPPED) + plugin.unregister(dispatcher) + assert plugin.setup not in dispatcher.get_listeners(events.START) + assert plugin.tick not in dispatcher.get_listeners(events.TICK) + assert plugin.teardown not in dispatcher.get_listeners(events.STOPPED) + + +def test_one_pass(): + plugin = ConsoleOutputPlugin() + dispatcher = EventDispatcher() + plugin.register(dispatcher) + + graph = bonobo.Graph() + context = MagicMock(spec=GraphExecutionContext(graph)) + + dispatcher.dispatch(events.START, events.ExecutionEvent(context)) + dispatcher.dispatch(events.TICK, events.ExecutionEvent(context)) + dispatcher.dispatch(events.STOPPED, events.ExecutionEvent(context)) + + plugin.unregister(dispatcher) From 28884231b32d5e7659fff0287c4ee765b7f1b712 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 4 Nov 2017 13:13:47 +0100 Subject: [PATCH 28/37] [core] Moves bonobo.execution context related package to new bonobo.execution.contexts package, also moves bonobo.strategies to new bonobo.execution.strategies package, so everything related to execution is now contained under the bonobo.execution package. --- bonobo/_api.py | 10 +++++----- bonobo/execution/contexts/__init__.py | 0 bonobo/execution/{ => contexts}/base.py | 1 - bonobo/execution/{ => contexts}/graph.py | 7 +++---- bonobo/execution/{ => contexts}/node.py | 3 +-- bonobo/execution/{ => contexts}/plugin.py | 2 +- bonobo/{ => execution}/strategies/__init__.py | 8 ++++---- bonobo/{ => execution}/strategies/base.py | 2 +- bonobo/{ => execution}/strategies/executor.py | 9 ++++----- bonobo/{ => execution}/strategies/naive.py | 2 +- bonobo/plugins/console.py | 2 +- bonobo/util/testing.py | 4 ++-- tests/execution/test_node.py | 2 +- tests/io/test_csv.py | 2 +- tests/io/test_file.py | 2 +- tests/io/test_json.py | 2 +- tests/io/test_pickle.py | 2 +- tests/plugins/test_console.py | 2 +- tests/test_basicusage.py | 2 +- tests/test_commands.py | 2 +- tests/test_execution.py | 4 ++-- 21 files changed, 33 insertions(+), 37 deletions(-) create mode 100644 bonobo/execution/contexts/__init__.py rename bonobo/execution/{ => contexts}/base.py (99%) rename bonobo/execution/{ => contexts}/graph.py (96%) rename bonobo/execution/{ => contexts}/node.py (98%) rename bonobo/execution/{ => contexts}/plugin.py (80%) rename bonobo/{ => execution}/strategies/__init__.py (76%) rename bonobo/{ => execution}/strategies/base.py (89%) rename bonobo/{ => execution}/strategies/executor.py (94%) rename bonobo/{ => execution}/strategies/naive.py (91%) diff --git a/bonobo/_api.py b/bonobo/_api.py index 9a82f41..816dfda 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -1,7 +1,7 @@ +from bonobo.execution.strategies import create_strategy from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \ PickleReader, PickleWriter, PrettyPrinter, RateLimited, Tee, arg0_to_kwargs, count, identity, kwargs_to_arg0, noop from bonobo.nodes import LdjsonReader, LdjsonWriter -from bonobo.strategies import create_strategy from bonobo.structs import Bag, ErrorBag, Graph, Token from bonobo.util import get_name from bonobo.util.environ import parse_args, get_argument_parser @@ -35,7 +35,7 @@ def run(graph, *, plugins=None, services=None, strategy=None): You'll probably want to provide a services dictionary mapping service names to service instances. :param Graph graph: The :class:`Graph` to execute. - :param str strategy: The :class:`bonobo.strategies.base.Strategy` to use. + :param str strategy: The :class:`bonobo.execution.strategies.base.Strategy` to use. :param list plugins: The list of plugins to enhance execution. :param dict services: The implementations of services this graph will use. :return bonobo.execution.graph.GraphExecutionContext: @@ -93,10 +93,10 @@ def inspect(graph, *, format): print(_inspect_formats[format](graph)) -# bonobo.structs +# data structures register_api_group(Bag, ErrorBag, Graph, Token) -# bonobo.strategies +# execution strategies register_api(create_strategy) @@ -125,7 +125,7 @@ def open_fs(fs_url=None, *args, **kwargs): return _open_fs(expanduser(str(fs_url)), *args, **kwargs) -# bonobo.nodes +# standard transformations register_api_group( CsvReader, CsvWriter, diff --git a/bonobo/execution/contexts/__init__.py b/bonobo/execution/contexts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bonobo/execution/base.py b/bonobo/execution/contexts/base.py similarity index 99% rename from bonobo/execution/base.py rename to bonobo/execution/contexts/base.py index 74dd89b..39598bc 100644 --- a/bonobo/execution/base.py +++ b/bonobo/execution/contexts/base.py @@ -1,7 +1,6 @@ import sys from contextlib import contextmanager from logging import WARNING, ERROR -from time import sleep import mondrian from bonobo.config import create_container diff --git a/bonobo/execution/graph.py b/bonobo/execution/contexts/graph.py similarity index 96% rename from bonobo/execution/graph.py rename to bonobo/execution/contexts/graph.py index 0c5bc36..55dbf7e 100644 --- a/bonobo/execution/graph.py +++ b/bonobo/execution/contexts/graph.py @@ -1,13 +1,12 @@ from functools import partial from time import sleep -from whistle import EventDispatcher - from bonobo.config import create_container from bonobo.constants import BEGIN, END from bonobo.execution import events -from bonobo.execution.node import NodeExecutionContext -from bonobo.execution.plugin import PluginExecutionContext +from bonobo.execution.contexts.node import NodeExecutionContext +from bonobo.execution.contexts.plugin import PluginExecutionContext +from whistle import EventDispatcher class GraphExecutionContext: diff --git a/bonobo/execution/node.py b/bonobo/execution/contexts/node.py similarity index 98% rename from bonobo/execution/node.py rename to bonobo/execution/contexts/node.py index daa035f..8511825 100644 --- a/bonobo/execution/node.py +++ b/bonobo/execution/contexts/node.py @@ -1,12 +1,11 @@ import sys -import threading from queue import Empty from time import sleep from types import GeneratorType from bonobo.constants import NOT_MODIFIED, BEGIN, END from bonobo.errors import InactiveReadableError, UnrecoverableError -from bonobo.execution.base import LoopingExecutionContext +from bonobo.execution.contexts.base import LoopingExecutionContext from bonobo.structs.bags import Bag from bonobo.structs.inputs import Input from bonobo.structs.tokens import Token diff --git a/bonobo/execution/plugin.py b/bonobo/execution/contexts/plugin.py similarity index 80% rename from bonobo/execution/plugin.py rename to bonobo/execution/contexts/plugin.py index f552724..524c2e1 100644 --- a/bonobo/execution/plugin.py +++ b/bonobo/execution/contexts/plugin.py @@ -1,4 +1,4 @@ -from bonobo.execution.base import LoopingExecutionContext, recoverable +from bonobo.execution.contexts.base import LoopingExecutionContext class PluginExecutionContext(LoopingExecutionContext): diff --git a/bonobo/strategies/__init__.py b/bonobo/execution/strategies/__init__.py similarity index 76% rename from bonobo/strategies/__init__.py rename to bonobo/execution/strategies/__init__.py index 1420da6..1c5d50a 100644 --- a/bonobo/strategies/__init__.py +++ b/bonobo/execution/strategies/__init__.py @@ -1,5 +1,5 @@ -from bonobo.strategies.executor import ProcessPoolExecutorStrategy, ThreadPoolExecutorStrategy -from bonobo.strategies.naive import NaiveStrategy +from bonobo.execution.strategies.executor import ProcessPoolExecutorStrategy, ThreadPoolExecutorStrategy +from bonobo.execution.strategies.naive import NaiveStrategy __all__ = [ 'create_strategy', @@ -21,8 +21,8 @@ def create_strategy(name=None): :param name: :return: Strategy """ - from bonobo.strategies.base import Strategy import logging + from bonobo.execution.strategies.base import Strategy if isinstance(name, Strategy): return name @@ -39,4 +39,4 @@ def create_strategy(name=None): 'Invalid strategy {}. Available choices: {}.'.format(repr(name), ', '.join(sorted(STRATEGIES.keys()))) ) from exc - return factory() \ No newline at end of file + return factory() diff --git a/bonobo/strategies/base.py b/bonobo/execution/strategies/base.py similarity index 89% rename from bonobo/strategies/base.py rename to bonobo/execution/strategies/base.py index 47f7db4..0a8d2a5 100644 --- a/bonobo/strategies/base.py +++ b/bonobo/execution/strategies/base.py @@ -1,4 +1,4 @@ -from bonobo.execution.graph import GraphExecutionContext +from bonobo.execution.contexts.graph import GraphExecutionContext class Strategy: diff --git a/bonobo/strategies/executor.py b/bonobo/execution/strategies/executor.py similarity index 94% rename from bonobo/strategies/executor.py rename to bonobo/execution/strategies/executor.py index 49c5d4a..ebaba32 100644 --- a/bonobo/strategies/executor.py +++ b/bonobo/execution/strategies/executor.py @@ -1,13 +1,12 @@ import functools import logging import sys -from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor, wait, FIRST_EXCEPTION -from time import sleep +from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor -from bonobo.util import get_name -from bonobo.constants import BEGIN, END -from bonobo.strategies.base import Strategy from bonobo.structs.bags import Bag +from bonobo.constants import BEGIN, END +from bonobo.execution.strategies.base import Strategy +from bonobo.util import get_name class ExecutorStrategy(Strategy): diff --git a/bonobo/strategies/naive.py b/bonobo/execution/strategies/naive.py similarity index 91% rename from bonobo/strategies/naive.py rename to bonobo/execution/strategies/naive.py index 20477c1..bd581ff 100644 --- a/bonobo/strategies/naive.py +++ b/bonobo/execution/strategies/naive.py @@ -1,5 +1,5 @@ from bonobo.constants import BEGIN, END -from bonobo.strategies.base import Strategy +from bonobo.execution.strategies.base import Strategy from bonobo.structs.bags import Bag diff --git a/bonobo/plugins/console.py b/bonobo/plugins/console.py index dc511b7..d5f9914 100644 --- a/bonobo/plugins/console.py +++ b/bonobo/plugins/console.py @@ -170,4 +170,4 @@ class IOBuffer(): def memory_usage(): import os, psutil process = psutil.Process(os.getpid()) - return process.memory_info()[0] / float(2 ** 20) + return process.memory_info()[0] / float(2**20) diff --git a/bonobo/util/testing.py b/bonobo/util/testing.py index 7dc8f38..6dd2c8a 100644 --- a/bonobo/util/testing.py +++ b/bonobo/util/testing.py @@ -1,8 +1,8 @@ from contextlib import contextmanager from bonobo import open_fs, Token -from bonobo.execution.graph import GraphExecutionContext -from bonobo.execution.node import NodeExecutionContext +from bonobo.execution.contexts.graph import GraphExecutionContext +from bonobo.execution.contexts.node import NodeExecutionContext @contextmanager diff --git a/tests/execution/test_node.py b/tests/execution/test_node.py index fef385c..34a8ae1 100644 --- a/tests/execution/test_node.py +++ b/tests/execution/test_node.py @@ -1,5 +1,5 @@ from bonobo import Bag, Graph -from bonobo.strategies import NaiveStrategy +from bonobo.execution.strategies import NaiveStrategy from bonobo.util.testing import BufferingNodeExecutionContext, BufferingGraphExecutionContext diff --git a/tests/io/test_csv.py b/tests/io/test_csv.py index 1c4c6cc..b0b91c5 100644 --- a/tests/io/test_csv.py +++ b/tests/io/test_csv.py @@ -1,7 +1,7 @@ import pytest from bonobo import CsvReader, CsvWriter, settings -from bonobo.execution.node import NodeExecutionContext +from bonobo.execution.contexts.node import NodeExecutionContext from bonobo.util.testing import FilesystemTester, BufferingNodeExecutionContext csv_tester = FilesystemTester('csv') diff --git a/tests/io/test_file.py b/tests/io/test_file.py index d7645e7..5fc2823 100644 --- a/tests/io/test_file.py +++ b/tests/io/test_file.py @@ -2,7 +2,7 @@ import pytest from bonobo import Bag, FileReader, FileWriter from bonobo.constants import BEGIN, END -from bonobo.execution.node import NodeExecutionContext +from bonobo.execution.contexts.node import NodeExecutionContext from bonobo.util.testing import BufferingNodeExecutionContext, FilesystemTester txt_tester = FilesystemTester('txt') diff --git a/tests/io/test_json.py b/tests/io/test_json.py index b72a3de..b5b0781 100644 --- a/tests/io/test_json.py +++ b/tests/io/test_json.py @@ -2,7 +2,7 @@ import pytest from bonobo import JsonReader, JsonWriter, settings from bonobo import LdjsonReader, LdjsonWriter -from bonobo.execution.node import NodeExecutionContext +from bonobo.execution.contexts.node import NodeExecutionContext from bonobo.util.testing import FilesystemTester, BufferingNodeExecutionContext json_tester = FilesystemTester('json') diff --git a/tests/io/test_pickle.py b/tests/io/test_pickle.py index 1f95309..8416a9f 100644 --- a/tests/io/test_pickle.py +++ b/tests/io/test_pickle.py @@ -3,7 +3,7 @@ import pickle import pytest from bonobo import Bag, PickleReader, PickleWriter -from bonobo.execution.node import NodeExecutionContext +from bonobo.execution.contexts.node import NodeExecutionContext from bonobo.util.testing import BufferingNodeExecutionContext, FilesystemTester pickle_tester = FilesystemTester('pkl', mode='wb') diff --git a/tests/plugins/test_console.py b/tests/plugins/test_console.py index 4a34f7a..543d341 100644 --- a/tests/plugins/test_console.py +++ b/tests/plugins/test_console.py @@ -2,7 +2,7 @@ from unittest.mock import MagicMock import bonobo from bonobo.execution import events -from bonobo.execution.graph import GraphExecutionContext +from bonobo.execution.contexts.graph import GraphExecutionContext from bonobo.plugins.console import ConsoleOutputPlugin from whistle import EventDispatcher diff --git a/tests/test_basicusage.py b/tests/test_basicusage.py index f002d36..7772af3 100644 --- a/tests/test_basicusage.py +++ b/tests/test_basicusage.py @@ -3,7 +3,7 @@ from unittest.mock import patch import pytest import bonobo -from bonobo.execution.graph import GraphExecutionContext +from bonobo.execution.contexts.graph import GraphExecutionContext @pytest.mark.timeout(2) diff --git a/tests/test_commands.py b/tests/test_commands.py index 64f3363..255bae7 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -202,7 +202,7 @@ def env2(tmpdir): all_environ_targets = pytest.mark.parametrize( 'target', [ - (get_examples_path('environ.py'),), + (get_examples_path('environ.py'), ), ( '-m', 'bonobo.examples.environ', diff --git a/tests/test_execution.py b/tests/test_execution.py index 6fb33e4..84f40c5 100644 --- a/tests/test_execution.py +++ b/tests/test_execution.py @@ -1,7 +1,7 @@ from bonobo.config.processors import ContextProcessor from bonobo.constants import BEGIN, END -from bonobo.execution.graph import GraphExecutionContext -from bonobo.strategies import NaiveStrategy +from bonobo.execution.contexts.graph import GraphExecutionContext +from bonobo.execution.strategies import NaiveStrategy from bonobo.structs import Bag, Graph From 83fc1743fcdeb23eba5a7e571533a9b7da0dd114 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 4 Nov 2017 13:20:53 +0100 Subject: [PATCH 29/37] Small changes in events, and associated tests. --- bonobo/execution/events.py | 4 ++-- bonobo/plugins/console.py | 8 ++++---- tests/execution/{ => contexts}/test_node.py | 0 tests/execution/test_events.py | 17 +++++++++++++++++ 4 files changed, 23 insertions(+), 6 deletions(-) rename tests/execution/{ => contexts}/test_node.py (100%) create mode 100644 tests/execution/test_events.py diff --git a/bonobo/execution/events.py b/bonobo/execution/events.py index 036e879..3bf3986 100644 --- a/bonobo/execution/events.py +++ b/bonobo/execution/events.py @@ -9,5 +9,5 @@ KILL = 'execution.kill' class ExecutionEvent(Event): - def __init__(self, graph_context): - self.graph_context = graph_context + def __init__(self, context): + self.context = context diff --git a/bonobo/plugins/console.py b/bonobo/plugins/console.py index d5f9914..584244c 100644 --- a/bonobo/plugins/console.py +++ b/bonobo/plugins/console.py @@ -68,12 +68,12 @@ class ConsoleOutputPlugin(Plugin): def tick(self, event): if self.isatty and not self.iswindows: - self._write(event.graph_context, rewind=True) + self._write(event.context, rewind=True) else: pass # not a tty, or windows, so we'll ignore stats output def teardown(self, event): - self._write(event.graph_context, rewind=False) + self._write(event.context, rewind=False) self.redirect_stderr.__exit__(None, None, None) self.redirect_stdout.__exit__(None, None, None) @@ -127,7 +127,7 @@ class ConsoleOutputPlugin(Plugin): print(CLEAR_EOL, file=self._stderr) print(MOVE_CURSOR_UP(t_cnt + 2), file=self._stderr) - def _write(self, graph_context, rewind): + def _write(self, context, rewind): if settings.PROFILE.get(): if self.counter % 10 and self._append_cache: append = self._append_cache @@ -138,7 +138,7 @@ class ConsoleOutputPlugin(Plugin): ) else: append = () - self.write(graph_context, prefix=self.prefix, append=append, rewind=rewind) + self.write(context, prefix=self.prefix, append=append, rewind=rewind) self.counter += 1 diff --git a/tests/execution/test_node.py b/tests/execution/contexts/test_node.py similarity index 100% rename from tests/execution/test_node.py rename to tests/execution/contexts/test_node.py diff --git a/tests/execution/test_events.py b/tests/execution/test_events.py new file mode 100644 index 0000000..8abeb57 --- /dev/null +++ b/tests/execution/test_events.py @@ -0,0 +1,17 @@ +from unittest.mock import Mock + +from bonobo.execution import events + + +def test_names(): + # This test looks useless, but as it's becoming the pliugin API, I want to make sure that nothing changes here, or + # notice it otherwise. + for name in 'start', 'started', 'tick', 'stop', 'stopped', 'kill': + event_name = getattr(events, name.upper()) + assert event_name == '.'.join(('execution', name)) + +def test_event_object(): + # Same logic as above. + c = Mock() + e = events.ExecutionEvent(c) + assert e.context is c From 25e919ab969772adc47a5c92ba3e08da110eaf87 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 4 Nov 2017 13:36:54 +0100 Subject: [PATCH 30/37] [tests] adds node context lifecycle test.( --- tests/execution/contexts/test_node.py | 48 +++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/execution/contexts/test_node.py b/tests/execution/contexts/test_node.py index 34a8ae1..648743b 100644 --- a/tests/execution/contexts/test_node.py +++ b/tests/execution/contexts/test_node.py @@ -1,4 +1,9 @@ +from unittest.mock import MagicMock + +import pytest + from bonobo import Bag, Graph +from bonobo.execution.contexts.node import NodeExecutionContext from bonobo.execution.strategies import NaiveStrategy from bonobo.util.testing import BufferingNodeExecutionContext, BufferingGraphExecutionContext @@ -179,3 +184,46 @@ def test_node_tuple_dict(): assert len(output) == 2 assert output[0] == ('foo', 'bar', {'id': 1}) assert output[1] == ('foo', 'baz', {'id': 2}) + +def test_node_lifecycle_natural(): + func = MagicMock() + + ctx = NodeExecutionContext(func) + assert not any((ctx.started, ctx.stopped, ctx.killed, ctx.alive)) + + # cannot stop before start + with pytest.raises(RuntimeError): + ctx.stop() + assert not any((ctx.started, ctx.stopped, ctx.killed, ctx.alive)) + + # turn the key + ctx.start() + assert all((ctx.started, ctx.alive)) and not any((ctx.stopped, ctx.killed)) + + ctx.stop() + assert all((ctx.started, ctx.stopped)) and not any((ctx.alive, ctx.killed)) + +def test_node_lifecycle_with_kill(): + func = MagicMock() + + ctx = NodeExecutionContext(func) + assert not any((ctx.started, ctx.stopped, ctx.killed, ctx.alive)) + + # cannot kill before start + with pytest.raises(RuntimeError): + ctx.kill() + assert not any((ctx.started, ctx.stopped, ctx.killed, ctx.alive)) + + # turn the key + ctx.start() + assert all((ctx.started, ctx.alive)) and not any((ctx.stopped, ctx.killed)) + + ctx.kill() + assert all((ctx.started, ctx.killed, ctx.alive)) and not ctx.stopped + + ctx.stop() + assert all((ctx.started, ctx.killed, ctx.stopped)) and not ctx.alive + + + + From 0b969d31e0b008fdbf8d86ecc8294f72eb26a3af Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 4 Nov 2017 14:55:08 +0100 Subject: [PATCH 31/37] Adds basic test for convert command. --- bonobo/commands/convert.py | 4 +- bonobo/execution/contexts/base.py | 42 ++-- bonobo/execution/contexts/node.py | 16 +- bonobo/execution/strategies/executor.py | 19 +- bonobo/nodes/io/base.py | 3 + bonobo/util/environ.py | 10 + bonobo/util/resolvers.py | 1 + bonobo/util/testing.py | 78 +++++- tests/commands/test_clibasics.py | 25 ++ tests/commands/test_convert.py | 13 + tests/commands/test_download.py | 44 ++++ tests/commands/test_init.py | 15 ++ tests/commands/test_run.py | 48 ++++ tests/commands/test_run_environ.py | 109 ++++++++ tests/commands/test_version.py | 20 ++ tests/execution/contexts/test_node.py | 6 +- tests/execution/test_events.py | 1 + tests/test_commands.py | 319 ------------------------ 18 files changed, 420 insertions(+), 353 deletions(-) create mode 100644 tests/commands/test_clibasics.py create mode 100644 tests/commands/test_convert.py create mode 100644 tests/commands/test_download.py create mode 100644 tests/commands/test_init.py create mode 100644 tests/commands/test_run.py create mode 100644 tests/commands/test_run_environ.py create mode 100644 tests/commands/test_version.py delete mode 100644 tests/test_commands.py diff --git a/bonobo/commands/convert.py b/bonobo/commands/convert.py index faf175c..198dce0 100644 --- a/bonobo/commands/convert.py +++ b/bonobo/commands/convert.py @@ -6,8 +6,8 @@ from bonobo.util.resolvers import _resolve_transformations, _resolve_options class ConvertCommand(BaseCommand): def add_arguments(self, parser): - parser.add_argument('input-filename', help='Input filename.') - parser.add_argument('output-filename', help='Output filename.') + parser.add_argument('input_filename', help='Input filename.') + parser.add_argument('output_filename', help='Output filename.') parser.add_argument( '--' + READER, '-r', diff --git a/bonobo/execution/contexts/base.py b/bonobo/execution/contexts/base.py index 39598bc..3ca580a 100644 --- a/bonobo/execution/contexts/base.py +++ b/bonobo/execution/contexts/base.py @@ -1,3 +1,4 @@ +import logging import sys from contextlib import contextmanager from logging import WARNING, ERROR @@ -38,6 +39,10 @@ class LoopingExecutionContext(Wrapper): def stopped(self): return self._stopped + @property + def defunct(self): + return self._defunct + @property def alive(self): return self._started and not self._stopped @@ -45,6 +50,8 @@ class LoopingExecutionContext(Wrapper): @property def status(self): """One character status for this node. """ + if self._defunct: + return '!' if not self.started: return ' ' if not self.stopped: @@ -65,7 +72,7 @@ class LoopingExecutionContext(Wrapper): else: self.services = None - self._started, self._stopped = False, False + self._started, self._stopped, self._defunct = False, False, False self._stack = None def __enter__(self): @@ -81,15 +88,17 @@ class LoopingExecutionContext(Wrapper): self._started = True - self._stack = ContextCurrifier(self.wrapped, *self._get_initial_context()) - if isconfigurabletype(self.wrapped): - # Not normal to have a partially configured object here, so let's warn the user instead of having get into - # the hard trouble of understanding that by himself. - raise TypeError( - 'The Configurable should be fully instanciated by now, unfortunately I got a PartiallyConfigured object...' - ) - - self._stack.setup(self) + try: + self._stack = ContextCurrifier(self.wrapped, *self._get_initial_context()) + if isconfigurabletype(self.wrapped): + # Not normal to have a partially configured object here, so let's warn the user instead of having get into + # the hard trouble of understanding that by himself. + raise TypeError( + 'The Configurable should be fully instanciated by now, unfortunately I got a PartiallyConfigured object...' + ) + self._stack.setup(self) + except Exception: + return self.fatal(sys.exc_info()) def loop(self): """Generic loop. A bit boring. """ @@ -113,14 +122,17 @@ class LoopingExecutionContext(Wrapper): finally: self._stopped = True - def handle_error(self, exctype, exc, tb): - mondrian.excepthook( - exctype, exc, tb, level=WARNING, context='{} in {}'.format(exctype.__name__, get_name(self)), logger=logger - ) - def _get_initial_context(self): if self.parent: return self.parent.services.args_for(self.wrapped) if self.services: return self.services.args_for(self.wrapped) return () + + def handle_error(self, exctype, exc, tb, *, level=logging.ERROR): + logging.getLogger(__name__).log(level, repr(self), exc_info=(exctype, exc, tb)) + + def fatal(self, exc_info): + self._defunct = True + self.input.shutdown() + self.handle_error(*exc_info, level=logging.CRITICAL) diff --git a/bonobo/execution/contexts/node.py b/bonobo/execution/contexts/node.py index 8511825..db2c39a 100644 --- a/bonobo/execution/contexts/node.py +++ b/bonobo/execution/contexts/node.py @@ -1,3 +1,4 @@ +import logging import sys from queue import Empty from time import sleep @@ -12,6 +13,7 @@ from bonobo.structs.tokens import Token from bonobo.util import get_name, iserrorbag, isloopbackbag, isbag, istuple from bonobo.util.compat import deprecated_alias from bonobo.util.statistics import WithStatistics +from mondrian import term class NodeExecutionContext(WithStatistics, LoopingExecutionContext): @@ -39,10 +41,12 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): return '<{}({}{}){}>'.format(type_name, self.status, name, self.get_statistics_as_string(prefix=' ')) def get_flags_as_string(self): + if self._defunct: + return term.red('[defunct]') if self.killed: - return '[killed]' + return term.lightred('[killed]') if self.stopped: - return '[done]' + return term.lightblack('[done]') return '' def write(self, *messages): @@ -92,13 +96,13 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): self.increment('in') return row + def should_loop(self): + return not any((self.defunct, self.killed)) + def loop(self): - while not self._killed: + while self.should_loop(): try: self.step() - except KeyboardInterrupt: - self.handle_error(*sys.exc_info()) - break except InactiveReadableError: break except Empty: diff --git a/bonobo/execution/strategies/executor.py b/bonobo/execution/strategies/executor.py index ebaba32..ebbaef1 100644 --- a/bonobo/execution/strategies/executor.py +++ b/bonobo/execution/strategies/executor.py @@ -27,7 +27,11 @@ class ExecutorStrategy(Strategy): futures = [] with self.create_executor() as executor: - context.start(self.get_starter(executor, futures)) + try: + context.start(self.get_starter(executor, futures)) + except: + logging.getLogger(__name__ + ).warning('KeyboardInterrupt received. Trying to terminate the nodes gracefully.') while context.alive: try: @@ -50,12 +54,17 @@ class ExecutorStrategy(Strategy): try: with node: node.loop() - except BaseException as exc: - logging.getLogger(__name__).info( - 'Got {} in {} runner.'.format(get_name(exc), node), exc_info=sys.exc_info() + except: + logging.getLogger(__name__).critical( + 'Uncaught exception in node execution for {}.'.format(node), exc_info=True ) + node.shutdown() + node.stop() - futures.append(executor.submit(_runner)) + try: + futures.append(executor.submit(_runner)) + except: + logging.getLogger(__name__).critical('futures.append', exc_info=sys.exc_info()) return starter diff --git a/bonobo/nodes/io/base.py b/bonobo/nodes/io/base.py index af9e609..db0bc80 100644 --- a/bonobo/nodes/io/base.py +++ b/bonobo/nodes/io/base.py @@ -1,4 +1,7 @@ +from fs.errors import ResourceNotFound + from bonobo.config import Configurable, ContextProcessor, Option, Service +from bonobo.errors import UnrecoverableError class FileHandler(Configurable): diff --git a/bonobo/util/environ.py b/bonobo/util/environ.py index 16f7c9c..b344d29 100644 --- a/bonobo/util/environ.py +++ b/bonobo/util/environ.py @@ -152,3 +152,13 @@ def parse_args(mixed=None): del os.environ[name] else: os.environ[name] = value + + +@contextmanager +def change_working_directory(path): + old_dir = os.getcwd() + os.chdir(str(path)) + try: + yield + finally: + os.chdir(old_dir) diff --git a/bonobo/util/resolvers.py b/bonobo/util/resolvers.py index c4a1a90..60934d8 100644 --- a/bonobo/util/resolvers.py +++ b/bonobo/util/resolvers.py @@ -72,6 +72,7 @@ def _resolve_transformations(transformations): :return: tuple(object) """ registry = _ModulesRegistry() + transformations = transformations or [] for t in transformations: try: mod, attr = t.split(':', 1) diff --git a/bonobo/util/testing.py b/bonobo/util/testing.py index 6dd2c8a..9044715 100644 --- a/bonobo/util/testing.py +++ b/bonobo/util/testing.py @@ -1,6 +1,15 @@ -from contextlib import contextmanager +import functools +import io +import os +import runpy +import sys +from contextlib import contextmanager, redirect_stdout, redirect_stderr +from unittest.mock import patch -from bonobo import open_fs, Token +import pytest + +from bonobo import open_fs, Token, __main__, get_examples_path +from bonobo.commands import entrypoint from bonobo.execution.contexts.graph import GraphExecutionContext from bonobo.execution.contexts.node import NodeExecutionContext @@ -64,3 +73,68 @@ class BufferingGraphExecutionContext(BufferingContext, GraphExecutionContext): def create_node_execution_context_for(self, node): return self.NodeExecutionContextType(node, parent=self, buffer=self.buffer) + + +def runner(f): + @functools.wraps(f) + def wrapped_runner(*args, catch_errors=False): + with redirect_stdout(io.StringIO()) as stdout, redirect_stderr(io.StringIO()) as stderr: + try: + f(list(args)) + except BaseException as exc: + if not catch_errors: + raise + elif isinstance(catch_errors, BaseException) and not isinstance(exc, catch_errors): + raise + return stdout.getvalue(), stderr.getvalue(), exc + return stdout.getvalue(), stderr.getvalue() + + return wrapped_runner + + +@runner +def runner_entrypoint(args): + """ Run bonobo using the python command entrypoint directly (bonobo.commands.entrypoint). """ + return entrypoint(args) + + +@runner +def runner_module(args): + """ Run bonobo using the bonobo.__main__ file, which is equivalent as doing "python -m bonobo ...".""" + with patch.object(sys, 'argv', ['bonobo', *args]): + return runpy.run_path(__main__.__file__, run_name='__main__') + + +all_runners = pytest.mark.parametrize('runner', [runner_entrypoint, runner_module]) +all_environ_targets = pytest.mark.parametrize( + 'target', [ + (get_examples_path('environ.py'), ), + ( + '-m', + 'bonobo.examples.environ', + ), + ] +) + + +@all_runners +@all_environ_targets +class EnvironmentTestCase(): + def run_quiet(self, runner, *args): + return runner('run', '--quiet', *args) + + def run_environ(self, runner, *args, environ=None): + _environ = {'PATH': '/usr/bin'} + if environ: + _environ.update(environ) + + with patch.dict('os.environ', _environ, clear=True): + out, err = self.run_quiet(runner, *args) + assert 'SECRET' not in os.environ + assert 'PASSWORD' not in os.environ + if 'PATH' in _environ: + assert 'PATH' in os.environ + assert os.environ['PATH'] == _environ['PATH'] + + assert err == '' + return dict(map(lambda line: line.split(' ', 1), filter(None, out.split('\n')))) diff --git a/tests/commands/test_clibasics.py b/tests/commands/test_clibasics.py new file mode 100644 index 0000000..1fc292b --- /dev/null +++ b/tests/commands/test_clibasics.py @@ -0,0 +1,25 @@ +import pkg_resources + +from bonobo.util.testing import all_runners + + +def test_entrypoint(): + commands = {} + + for command in pkg_resources.iter_entry_points('bonobo.commands'): + commands[command.name] = command + + assert not { + 'convert', + 'init', + 'inspect', + 'run', + 'version', + }.difference(set(commands)) + + +@all_runners +def test_no_command(runner): + _, err, exc = runner(catch_errors=True) + assert type(exc) == SystemExit + assert 'error: the following arguments are required: command' in err \ No newline at end of file diff --git a/tests/commands/test_convert.py b/tests/commands/test_convert.py new file mode 100644 index 0000000..ea0c3c4 --- /dev/null +++ b/tests/commands/test_convert.py @@ -0,0 +1,13 @@ +from bonobo.util.environ import change_working_directory +from bonobo.util.testing import all_runners + + +@all_runners +def test_convert(runner, tmpdir): + csv_content = 'id;name\n1;Romain' + tmpdir.join('in.csv').write(csv_content) + + with change_working_directory(tmpdir): + runner('convert', 'in.csv', 'out.csv') + + assert tmpdir.join('out.csv').read().strip() == csv_content diff --git a/tests/commands/test_download.py b/tests/commands/test_download.py new file mode 100644 index 0000000..83b0ef4 --- /dev/null +++ b/tests/commands/test_download.py @@ -0,0 +1,44 @@ +import io +from unittest.mock import patch + +import pytest + +from bonobo.commands.download import EXAMPLES_BASE_URL +from bonobo.util.testing import all_runners + + +@all_runners +def test_download_works_for_examples(runner): + expected_bytes = b'hello world' + + class MockResponse(object): + def __init__(self): + self.status_code = 200 + + def iter_content(self, *args, **kwargs): + return [expected_bytes] + + def __enter__(self): + return self + + def __exit__(self, *args, **kwargs): + pass + + fout = io.BytesIO() + fout.close = lambda: None + + with patch('bonobo.commands.download._open_url') as mock_open_url, \ + patch('bonobo.commands.download.open') as mock_open: + mock_open_url.return_value = MockResponse() + mock_open.return_value = fout + runner('download', 'examples/datasets/coffeeshops.txt') + expected_url = EXAMPLES_BASE_URL + 'datasets/coffeeshops.txt' + mock_open_url.assert_called_once_with(expected_url) + + assert fout.getvalue() == expected_bytes + + +@all_runners +def test_download_fails_non_example(runner): + with pytest.raises(ValueError): + runner('download', 'something/entirely/different.txt') \ No newline at end of file diff --git a/tests/commands/test_init.py b/tests/commands/test_init.py new file mode 100644 index 0000000..a551a9a --- /dev/null +++ b/tests/commands/test_init.py @@ -0,0 +1,15 @@ +import os + +from bonobo.util.testing import all_runners + + +@all_runners +def test_init_file(runner, tmpdir): + target = tmpdir.join('foo.py') + target_filename = str(target) + runner('init', target_filename) + assert os.path.exists(target_filename) + + out, err = runner('run', target_filename) + assert out.replace('\n', ' ').strip() == 'Hello World' + assert not err \ No newline at end of file diff --git a/tests/commands/test_run.py b/tests/commands/test_run.py new file mode 100644 index 0000000..69e4f94 --- /dev/null +++ b/tests/commands/test_run.py @@ -0,0 +1,48 @@ +import os +from unittest.mock import patch + +from bonobo import get_examples_path +from bonobo.util.testing import all_runners + + +@all_runners +def test_run(runner): + out, err = runner('run', '--quiet', get_examples_path('types/strings.py')) + out = out.split('\n') + assert out[0].startswith('Foo ') + assert out[1].startswith('Bar ') + assert out[2].startswith('Baz ') + + +@all_runners +def test_run_module(runner): + out, err = runner('run', '--quiet', '-m', 'bonobo.examples.types.strings') + out = out.split('\n') + assert out[0].startswith('Foo ') + assert out[1].startswith('Bar ') + assert out[2].startswith('Baz ') + + +@all_runners +def test_run_path(runner): + out, err = runner('run', '--quiet', get_examples_path('types')) + out = out.split('\n') + assert out[0].startswith('Foo ') + assert out[1].startswith('Bar ') + assert out[2].startswith('Baz ') + + +@all_runners +def test_install_requirements_for_dir(runner): + dirname = get_examples_path('types') + with patch('bonobo.commands.run._install_requirements') as install_mock: + runner('run', '--install', dirname) + install_mock.assert_called_once_with(os.path.join(dirname, 'requirements.txt')) + + +@all_runners +def test_install_requirements_for_file(runner): + dirname = get_examples_path('types') + with patch('bonobo.commands.run._install_requirements') as install_mock: + runner('run', '--install', os.path.join(dirname, 'strings.py')) + install_mock.assert_called_once_with(os.path.join(dirname, 'requirements.txt')) diff --git a/tests/commands/test_run_environ.py b/tests/commands/test_run_environ.py new file mode 100644 index 0000000..1d966be --- /dev/null +++ b/tests/commands/test_run_environ.py @@ -0,0 +1,109 @@ +import pytest + +from bonobo.util.testing import EnvironmentTestCase + + +@pytest.fixture +def env1(tmpdir): + env_file = tmpdir.join('.env_one') + env_file.write('\n'.join(( + 'SECRET=unknown', + 'PASSWORD=sweet', + 'PATH=first', + ))) + return str(env_file) + + +@pytest.fixture +def env2(tmpdir): + env_file = tmpdir.join('.env_two') + env_file.write('\n'.join(( + 'PASSWORD=bitter', + "PATH='second'", + ))) + return str(env_file) + + +class TestDefaultEnvFile(EnvironmentTestCase): + def test_run_with_default_env_file(self, runner, target, env1): + env = self.run_environ(runner, *target, '--default-env-file', env1) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'sweet' + assert env.get('PATH') == '/usr/bin' + + def test_run_with_multiple_default_env_files(self, runner, target, env1, env2): + env = self.run_environ(runner, *target, '--default-env-file', env1, '--default-env-file', env2) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'sweet' + assert env.get('PATH') == '/usr/bin' + + env = self.run_environ(runner, *target, '--default-env-file', env2, '--default-env-file', env1) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'bitter' + assert env.get('PATH') == '/usr/bin' + + +class TestEnvFile(EnvironmentTestCase): + def test_run_with_file(self, runner, target, env1): + env = self.run_environ(runner, *target, '--env-file', env1) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'sweet' + assert env.get('PATH') == 'first' + + def test_run_with_multiple_files(self, runner, target, env1, env2): + env = self.run_environ(runner, *target, '--env-file', env1, '--env-file', env2) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'bitter' + assert env.get('PATH') == 'second' + + env = self.run_environ(runner, *target, '--env-file', env2, '--env-file', env1) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'sweet' + assert env.get('PATH') == 'first' + + +class TestEnvFileCombinations(EnvironmentTestCase): + def test_run_with_both_env_files(self, runner, target, env1, env2): + env = self.run_environ(runner, *target, '--default-env-file', env1, '--env-file', env2) + assert env.get('SECRET') == 'unknown' + assert env.get('PASSWORD') == 'bitter' + assert env.get('PATH') == 'second' + + def test_run_with_both_env_files_then_overrides(self, runner, target, env1, env2): + env = self.run_environ( + runner, *target, '--default-env-file', env1, '--env-file', env2, '--env', 'PASSWORD=mine', '--env', + 'SECRET=s3cr3t' + ) + assert env.get('SECRET') == 's3cr3t' + assert env.get('PASSWORD') == 'mine' + assert env.get('PATH') == 'second' + + +class TestEnvVars(EnvironmentTestCase): + def test_run_no_env(self, runner, target): + env = self.run_environ(runner, *target, environ={'USER': 'romain'}) + assert env.get('USER') == 'romain' + + def test_run_env(self, runner, target): + env = self.run_environ(runner, *target, '--env', 'USER=serious', environ={'USER': 'romain'}) + assert env.get('USER') == 'serious' + + def test_run_env_mixed(self, runner, target): + env = self.run_environ(runner, *target, '--env', 'ONE=1', '--env', 'TWO="2"', environ={'USER': 'romain'}) + assert env.get('USER') == 'romain' + assert env.get('ONE') == '1' + assert env.get('TWO') == '2' + + def test_run_default_env(self, runner, target): + env = self.run_environ(runner, *target, '--default-env', 'USER=clown') + assert env.get('USER') == 'clown' + + env = self.run_environ(runner, *target, '--default-env', 'USER=clown', environ={'USER': 'romain'}) + assert env.get('USER') == 'romain' + + env = self.run_environ( + runner, *target, '--env', 'USER=serious', '--default-env', 'USER=clown', environ={ + 'USER': 'romain' + } + ) + assert env.get('USER') == 'serious' diff --git a/tests/commands/test_version.py b/tests/commands/test_version.py new file mode 100644 index 0000000..1ee893f --- /dev/null +++ b/tests/commands/test_version.py @@ -0,0 +1,20 @@ +from bonobo import __version__ +from bonobo.util.testing import all_runners + + +@all_runners +def test_version(runner): + out, err = runner('version') + out = out.strip() + assert out.startswith('bonobo ') + assert __version__ in out + + out, err = runner('version', '-q') + out = out.strip() + assert out.startswith('bonobo ') + assert __version__ in out + + out, err = runner('version', '-qq') + out = out.strip() + assert not out.startswith('bonobo ') + assert __version__ in out \ No newline at end of file diff --git a/tests/execution/contexts/test_node.py b/tests/execution/contexts/test_node.py index 648743b..ef29c6e 100644 --- a/tests/execution/contexts/test_node.py +++ b/tests/execution/contexts/test_node.py @@ -185,6 +185,7 @@ def test_node_tuple_dict(): assert output[0] == ('foo', 'bar', {'id': 1}) assert output[1] == ('foo', 'baz', {'id': 2}) + def test_node_lifecycle_natural(): func = MagicMock() @@ -203,6 +204,7 @@ def test_node_lifecycle_natural(): ctx.stop() assert all((ctx.started, ctx.stopped)) and not any((ctx.alive, ctx.killed)) + def test_node_lifecycle_with_kill(): func = MagicMock() @@ -223,7 +225,3 @@ def test_node_lifecycle_with_kill(): ctx.stop() assert all((ctx.started, ctx.killed, ctx.stopped)) and not ctx.alive - - - - diff --git a/tests/execution/test_events.py b/tests/execution/test_events.py index 8abeb57..6fbc405 100644 --- a/tests/execution/test_events.py +++ b/tests/execution/test_events.py @@ -10,6 +10,7 @@ def test_names(): event_name = getattr(events, name.upper()) assert event_name == '.'.join(('execution', name)) + def test_event_object(): # Same logic as above. c = Mock() diff --git a/tests/test_commands.py b/tests/test_commands.py deleted file mode 100644 index 255bae7..0000000 --- a/tests/test_commands.py +++ /dev/null @@ -1,319 +0,0 @@ -import functools -import io -import os -import runpy -import sys -from contextlib import redirect_stdout, redirect_stderr -from unittest.mock import patch - -import pkg_resources -import pytest - -from bonobo import __main__, __version__, get_examples_path -from bonobo.commands import entrypoint -from bonobo.commands.download import EXAMPLES_BASE_URL - - -def runner(f): - @functools.wraps(f) - def wrapped_runner(*args, catch_errors=False): - with redirect_stdout(io.StringIO()) as stdout, redirect_stderr(io.StringIO()) as stderr: - try: - f(list(args)) - except BaseException as exc: - if not catch_errors: - raise - elif isinstance(catch_errors, BaseException) and not isinstance(exc, catch_errors): - raise - return stdout.getvalue(), stderr.getvalue(), exc - return stdout.getvalue(), stderr.getvalue() - - return wrapped_runner - - -@runner -def runner_entrypoint(args): - """ Run bonobo using the python command entrypoint directly (bonobo.commands.entrypoint). """ - return entrypoint(args) - - -@runner -def runner_module(args): - """ Run bonobo using the bonobo.__main__ file, which is equivalent as doing "python -m bonobo ...".""" - with patch.object(sys, 'argv', ['bonobo', *args]): - return runpy.run_path(__main__.__file__, run_name='__main__') - - -all_runners = pytest.mark.parametrize('runner', [runner_entrypoint, runner_module]) - - -def test_entrypoint(): - commands = {} - - for command in pkg_resources.iter_entry_points('bonobo.commands'): - commands[command.name] = command - - assert not { - 'convert', - 'init', - 'inspect', - 'run', - 'version', - }.difference(set(commands)) - - -@all_runners -def test_no_command(runner): - _, err, exc = runner(catch_errors=True) - assert type(exc) == SystemExit - assert 'error: the following arguments are required: command' in err - - -@all_runners -def test_run(runner): - out, err = runner('run', '--quiet', get_examples_path('types/strings.py')) - out = out.split('\n') - assert out[0].startswith('Foo ') - assert out[1].startswith('Bar ') - assert out[2].startswith('Baz ') - - -@all_runners -def test_run_module(runner): - out, err = runner('run', '--quiet', '-m', 'bonobo.examples.types.strings') - out = out.split('\n') - assert out[0].startswith('Foo ') - assert out[1].startswith('Bar ') - assert out[2].startswith('Baz ') - - -@all_runners -def test_run_path(runner): - out, err = runner('run', '--quiet', get_examples_path('types')) - out = out.split('\n') - assert out[0].startswith('Foo ') - assert out[1].startswith('Bar ') - assert out[2].startswith('Baz ') - - -@all_runners -def test_install_requirements_for_dir(runner): - dirname = get_examples_path('types') - with patch('bonobo.commands.run._install_requirements') as install_mock: - runner('run', '--install', dirname) - install_mock.assert_called_once_with(os.path.join(dirname, 'requirements.txt')) - - -@all_runners -def test_install_requirements_for_file(runner): - dirname = get_examples_path('types') - with patch('bonobo.commands.run._install_requirements') as install_mock: - runner('run', '--install', os.path.join(dirname, 'strings.py')) - install_mock.assert_called_once_with(os.path.join(dirname, 'requirements.txt')) - - -@all_runners -def test_init_file(runner, tmpdir): - target = tmpdir.join('foo.py') - target_filename = str(target) - runner('init', target_filename) - assert os.path.exists(target_filename) - - out, err = runner('run', target_filename) - assert out.replace('\n', ' ').strip() == 'Hello World' - assert not err - - -@all_runners -def test_version(runner): - out, err = runner('version') - out = out.strip() - assert out.startswith('bonobo ') - assert __version__ in out - - out, err = runner('version', '-q') - out = out.strip() - assert out.startswith('bonobo ') - assert __version__ in out - - out, err = runner('version', '-qq') - out = out.strip() - assert not out.startswith('bonobo ') - assert __version__ in out - - -@all_runners -def test_download_works_for_examples(runner): - expected_bytes = b'hello world' - - class MockResponse(object): - def __init__(self): - self.status_code = 200 - - def iter_content(self, *args, **kwargs): - return [expected_bytes] - - def __enter__(self): - return self - - def __exit__(self, *args, **kwargs): - pass - - fout = io.BytesIO() - fout.close = lambda: None - - with patch('bonobo.commands.download._open_url') as mock_open_url, \ - patch('bonobo.commands.download.open') as mock_open: - mock_open_url.return_value = MockResponse() - mock_open.return_value = fout - runner('download', 'examples/datasets/coffeeshops.txt') - expected_url = EXAMPLES_BASE_URL + 'datasets/coffeeshops.txt' - mock_open_url.assert_called_once_with(expected_url) - - assert fout.getvalue() == expected_bytes - - -@all_runners -def test_download_fails_non_example(runner): - with pytest.raises(ValueError): - runner('download', 'something/entirely/different.txt') - - -@pytest.fixture -def env1(tmpdir): - env_file = tmpdir.join('.env_one') - env_file.write('\n'.join(( - 'SECRET=unknown', - 'PASSWORD=sweet', - 'PATH=first', - ))) - return str(env_file) - - -@pytest.fixture -def env2(tmpdir): - env_file = tmpdir.join('.env_two') - env_file.write('\n'.join(( - 'PASSWORD=bitter', - "PATH='second'", - ))) - return str(env_file) - - -all_environ_targets = pytest.mark.parametrize( - 'target', [ - (get_examples_path('environ.py'), ), - ( - '-m', - 'bonobo.examples.environ', - ), - ] -) - - -@all_runners -@all_environ_targets -class EnvironmentTestCase(): - def run_quiet(self, runner, *args): - return runner('run', '--quiet', *args) - - def run_environ(self, runner, *args, environ=None): - _environ = {'PATH': '/usr/bin'} - if environ: - _environ.update(environ) - - with patch.dict('os.environ', _environ, clear=True): - out, err = self.run_quiet(runner, *args) - assert 'SECRET' not in os.environ - assert 'PASSWORD' not in os.environ - if 'PATH' in _environ: - assert 'PATH' in os.environ - assert os.environ['PATH'] == _environ['PATH'] - - assert err == '' - return dict(map(lambda line: line.split(' ', 1), filter(None, out.split('\n')))) - - -class TestDefaultEnvFile(EnvironmentTestCase): - def test_run_with_default_env_file(self, runner, target, env1): - env = self.run_environ(runner, *target, '--default-env-file', env1) - assert env.get('SECRET') == 'unknown' - assert env.get('PASSWORD') == 'sweet' - assert env.get('PATH') == '/usr/bin' - - def test_run_with_multiple_default_env_files(self, runner, target, env1, env2): - env = self.run_environ(runner, *target, '--default-env-file', env1, '--default-env-file', env2) - assert env.get('SECRET') == 'unknown' - assert env.get('PASSWORD') == 'sweet' - assert env.get('PATH') == '/usr/bin' - - env = self.run_environ(runner, *target, '--default-env-file', env2, '--default-env-file', env1) - assert env.get('SECRET') == 'unknown' - assert env.get('PASSWORD') == 'bitter' - assert env.get('PATH') == '/usr/bin' - - -class TestEnvFile(EnvironmentTestCase): - def test_run_with_file(self, runner, target, env1): - env = self.run_environ(runner, *target, '--env-file', env1) - assert env.get('SECRET') == 'unknown' - assert env.get('PASSWORD') == 'sweet' - assert env.get('PATH') == 'first' - - def test_run_with_multiple_files(self, runner, target, env1, env2): - env = self.run_environ(runner, *target, '--env-file', env1, '--env-file', env2) - assert env.get('SECRET') == 'unknown' - assert env.get('PASSWORD') == 'bitter' - assert env.get('PATH') == 'second' - - env = self.run_environ(runner, *target, '--env-file', env2, '--env-file', env1) - assert env.get('SECRET') == 'unknown' - assert env.get('PASSWORD') == 'sweet' - assert env.get('PATH') == 'first' - - -class TestEnvFileCombinations(EnvironmentTestCase): - def test_run_with_both_env_files(self, runner, target, env1, env2): - env = self.run_environ(runner, *target, '--default-env-file', env1, '--env-file', env2) - assert env.get('SECRET') == 'unknown' - assert env.get('PASSWORD') == 'bitter' - assert env.get('PATH') == 'second' - - def test_run_with_both_env_files_then_overrides(self, runner, target, env1, env2): - env = self.run_environ( - runner, *target, '--default-env-file', env1, '--env-file', env2, '--env', 'PASSWORD=mine', '--env', - 'SECRET=s3cr3t' - ) - assert env.get('SECRET') == 's3cr3t' - assert env.get('PASSWORD') == 'mine' - assert env.get('PATH') == 'second' - - -class TestEnvVars(EnvironmentTestCase): - def test_run_no_env(self, runner, target): - env = self.run_environ(runner, *target, environ={'USER': 'romain'}) - assert env.get('USER') == 'romain' - - def test_run_env(self, runner, target): - env = self.run_environ(runner, *target, '--env', 'USER=serious', environ={'USER': 'romain'}) - assert env.get('USER') == 'serious' - - def test_run_env_mixed(self, runner, target): - env = self.run_environ(runner, *target, '--env', 'ONE=1', '--env', 'TWO="2"', environ={'USER': 'romain'}) - assert env.get('USER') == 'romain' - assert env.get('ONE') == '1' - assert env.get('TWO') == '2' - - def test_run_default_env(self, runner, target): - env = self.run_environ(runner, *target, '--default-env', 'USER=clown') - assert env.get('USER') == 'clown' - - env = self.run_environ(runner, *target, '--default-env', 'USER=clown', environ={'USER': 'romain'}) - assert env.get('USER') == 'romain' - - env = self.run_environ( - runner, *target, '--env', 'USER=serious', '--default-env', 'USER=clown', environ={ - 'USER': 'romain' - } - ) - assert env.get('USER') == 'serious' From 8439a535fec408d9db80a7c62ffdecaedb57e836 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 4 Nov 2017 15:01:04 +0100 Subject: [PATCH 32/37] Moves timer to statistics utilities. --- bonobo/util/statistics.py | 21 +++++++++++++++++++++ bonobo/util/time.py | 21 --------------------- 2 files changed, 21 insertions(+), 21 deletions(-) delete mode 100644 bonobo/util/time.py diff --git a/bonobo/util/statistics.py b/bonobo/util/statistics.py index 5d71a0f..2f9c5c2 100644 --- a/bonobo/util/statistics.py +++ b/bonobo/util/statistics.py @@ -13,6 +13,7 @@ # without warranties or conditions of any kind, either express or implied. # see the license for the specific language governing permissions and # limitations under the license. +import time class WithStatistics: @@ -29,3 +30,23 @@ class WithStatistics: def increment(self, name): self.statistics[name] += 1 + + +class Timer: + """ + Context manager used to time execution of stuff. + """ + + def __enter__(self): + self.__start = time.time() + + def __exit__(self, type=None, value=None, traceback=None): + # Error handling here + self.__finish = time.time() + + @property + def duration(self): + return self.__finish - self.__start + + def __str__(self): + return str(int(self.duration * 1000) / 1000.0) + 's' diff --git a/bonobo/util/time.py b/bonobo/util/time.py deleted file mode 100644 index 14de016..0000000 --- a/bonobo/util/time.py +++ /dev/null @@ -1,21 +0,0 @@ -import time - - -class Timer: - """ - Context manager used to time execution of stuff. - """ - - def __enter__(self): - self.__start = time.time() - - def __exit__(self, type=None, value=None, traceback=None): - # Error handling here - self.__finish = time.time() - - @property - def duration(self): - return self.__finish - self.__start - - def __str__(self): - return str(int(self.duration * 1000) / 1000.0) + 's' From a901731fffd9b5196f113fdf93bfd00a72d9a146 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 4 Nov 2017 15:17:51 +0100 Subject: [PATCH 33/37] Switch to stable mondrian. --- Makefile | 2 +- Projectfile | 2 +- requirements.txt | 2 +- setup.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index ccd73ba..8ae38cd 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# Generated by Medikit 0.4a11 on 2017-11-04. +# Generated by Medikit 0.4.1 on 2017-11-04. # All changes will be overriden. PACKAGE ?= bonobo diff --git a/Projectfile b/Projectfile index d0e800e..ca6a0f4 100644 --- a/Projectfile +++ b/Projectfile @@ -42,7 +42,7 @@ python.setup( python.add_requirements( 'fs >=2.0,<2.1', 'jinja2 >=2.9,<2.10', - 'mondrian ==0.4a1', + 'mondrian >=0.4,<0.5', 'packaging >=16,<17', 'psutil >=5.4,<6.0', 'requests >=2.0,<3.0', diff --git a/requirements.txt b/requirements.txt index dd9696d..3a5c70d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ fs==2.0.12 idna==2.6 jinja2==2.9.6 markupsafe==1.0 -mondrian==0.4a1 +mondrian==0.4.0 packaging==16.8 pbr==3.1.1 psutil==5.4.0 diff --git a/setup.py b/setup.py index d622505..d9eb59b 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,7 @@ setup( packages=find_packages(exclude=['ez_setup', 'example', 'test']), include_package_data=True, install_requires=[ - 'colorama (>= 0.3)', 'fs (>= 2.0, < 2.1)', 'jinja2 (>= 2.9, < 2.10)', 'mondrian (== 0.4a1)', + 'colorama (>= 0.3)', 'fs (>= 2.0, < 2.1)', 'jinja2 (>= 2.9, < 2.10)', 'mondrian (>= 0.4, < 0.5)', 'packaging (>= 16, < 17)', 'psutil (>= 5.4, < 6.0)', 'requests (>= 2.0, < 3.0)', 'stevedore (>= 1.27, < 1.28)', 'whistle (>= 1.0, < 1.1)' ], From 674f9348e7d8c40c0a15e9d3a5c7f9041a6697a7 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 5 Nov 2017 14:45:30 +0100 Subject: [PATCH 34/37] Skip failing order test for python 3.5 (temporary). --- tests/commands/test_convert.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/commands/test_convert.py b/tests/commands/test_convert.py index ea0c3c4..ed6f9e2 100644 --- a/tests/commands/test_convert.py +++ b/tests/commands/test_convert.py @@ -1,7 +1,13 @@ +import sys + +import pytest + from bonobo.util.environ import change_working_directory from bonobo.util.testing import all_runners +@pytest.mark.skipif(sys.version_info < (3, 6), + reason="python 3.5 does not preserve kwargs order and this cant pass for now") @all_runners def test_convert(runner, tmpdir): csv_content = 'id;name\n1;Romain' From 56c26ea26cd63ad83ea997cb6fbc75ac62ee522e Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 5 Nov 2017 14:54:01 +0100 Subject: [PATCH 35/37] Fix default logging level, adds options to default template. --- bonobo/commands/__init__.py | 1 + bonobo/commands/templates/default.py-tpl | 11 +++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/bonobo/commands/__init__.py b/bonobo/commands/__init__.py index f42d5c6..a482b53 100644 --- a/bonobo/commands/__init__.py +++ b/bonobo/commands/__init__.py @@ -16,6 +16,7 @@ def entrypoint(args=None): mondrian.setup(excepthook=True) logger = logging.getLogger() + logger.setLevel(settings.LOGGING_LEVEL.get()) parser = argparse.ArgumentParser() parser.add_argument('--debug', '-D', action='store_true') diff --git a/bonobo/commands/templates/default.py-tpl b/bonobo/commands/templates/default.py-tpl index c72efb5..eaea053 100644 --- a/bonobo/commands/templates/default.py-tpl +++ b/bonobo/commands/templates/default.py-tpl @@ -19,7 +19,7 @@ def load(*args): print(*args) -def get_graph(): +def get_graph(**options): """ This function builds the graph that needs to be executed. @@ -32,7 +32,7 @@ def get_graph(): return graph -def get_services(): +def get_services(**options): """ This function builds the services dictionary, which is a simple dict of names-to-implementation used by bonobo for runtime injection. @@ -48,5 +48,8 @@ def get_services(): # The __main__ block actually execute the graph. if __name__ == '__main__': parser = bonobo.get_argument_parser() - with bonobo.parse_args(parser): - bonobo.run(get_graph(), services=get_services()) + with bonobo.parse_args(parser) as options: + bonobo.run( + get_graph(**options), + services=get_services(**options) + ) From eb393331cdf4136af2c1955f532350528ba43103 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 5 Nov 2017 14:59:25 +0100 Subject: [PATCH 36/37] Adds a "bare" template, containing the very minimum you want to have in 90% of cases. --- bonobo/commands/init.py | 2 +- bonobo/commands/templates/bare.py-tpl | 15 +++++++++++++++ tests/commands/test_init.py | 16 +++++++++++++++- 3 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 bonobo/commands/templates/bare.py-tpl diff --git a/bonobo/commands/init.py b/bonobo/commands/init.py index 8c50b16..6d4b217 100644 --- a/bonobo/commands/init.py +++ b/bonobo/commands/init.py @@ -6,7 +6,7 @@ from bonobo.commands import BaseCommand class InitCommand(BaseCommand): - TEMPLATES = {'default'} + TEMPLATES = {'bare', 'default'} TEMPLATES_PATH = os.path.join(os.path.dirname(__file__), 'templates') def add_arguments(self, parser): diff --git a/bonobo/commands/templates/bare.py-tpl b/bonobo/commands/templates/bare.py-tpl new file mode 100644 index 0000000..1ca3019 --- /dev/null +++ b/bonobo/commands/templates/bare.py-tpl @@ -0,0 +1,15 @@ +import bonobo + + +def get_graph(**options): + graph = bonobo.Graph() + return graph + + +def get_services(**options): + return {} + + +if __name__ == '__main__': + with bonobo.parse_args() as options: + bonobo.run(get_graph(**options), services=get_services(**options)) diff --git a/tests/commands/test_init.py b/tests/commands/test_init.py index a551a9a..626f5e8 100644 --- a/tests/commands/test_init.py +++ b/tests/commands/test_init.py @@ -1,5 +1,8 @@ import os +import pytest + +from bonobo.commands.init import InitCommand from bonobo.util.testing import all_runners @@ -12,4 +15,15 @@ def test_init_file(runner, tmpdir): out, err = runner('run', target_filename) assert out.replace('\n', ' ').strip() == 'Hello World' - assert not err \ No newline at end of file + assert not err + + +@all_runners +@pytest.mark.parametrize('template', InitCommand.TEMPLATES) +def test_init_file_templates(runner, template, tmpdir): + target = tmpdir.join('foo.py') + target_filename = str(target) + runner('init', target_filename) + assert os.path.exists(target_filename) + out, err = runner('run', target_filename) + assert not err From 8f3c4252b4f5ea28b0f6924fa3e17bd7362a2619 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 5 Nov 2017 19:41:27 +0100 Subject: [PATCH 37/37] Working on the new version of the tutorial. Only Step1 implemented. --- bonobo/_api.py | 23 ++- docs/_static/custom.css | 18 ++- docs/_templates/base.html | 11 +- docs/conf.py | 9 ++ docs/tutorial/1-init.rst | 258 ++++++++++++++++++++++++++++++++++ docs/tutorial/2-jobs.rst | 12 ++ docs/tutorial/3-files.rst | 12 ++ docs/tutorial/4-services.rst | 210 +++++++++++++++++++++++++++ docs/tutorial/5-packaging.rst | 11 ++ docs/tutorial/django.rst | 3 + docs/tutorial/index.rst | 54 ++++--- docs/tutorial/notebooks.rst | 4 + docs/tutorial/sqlalchemy.rst | 4 + 13 files changed, 586 insertions(+), 43 deletions(-) create mode 100644 docs/tutorial/1-init.rst create mode 100644 docs/tutorial/2-jobs.rst create mode 100644 docs/tutorial/3-files.rst create mode 100644 docs/tutorial/4-services.rst create mode 100644 docs/tutorial/5-packaging.rst create mode 100644 docs/tutorial/django.rst create mode 100644 docs/tutorial/notebooks.rst create mode 100644 docs/tutorial/sqlalchemy.rst diff --git a/bonobo/_api.py b/bonobo/_api.py index 816dfda..af92868 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -10,16 +10,33 @@ __all__ = [] def register_api(x, __all__=__all__): + """Register a function as being part of Bonobo's API, then returns the original function.""" __all__.append(get_name(x)) return x +def register_graph_api(x, __all__=__all__): + """ + Register a function as being part of Bonobo's API, after checking that its signature contains the right parameters + to work correctly, then returns the original function. + """ + from inspect import signature + parameters = list(signature(x).parameters) + required_parameters = {'plugins', 'services', 'strategy'} + assert parameters[0] == 'graph', 'First parameter of a graph api function must be "graph".' + assert required_parameters.intersection( + parameters) == required_parameters, 'Graph api functions must define the following parameters: ' + ', '.join( + sorted(required_parameters)) + + return register_api(x, __all__=__all__) + + def register_api_group(*args): for attr in args: register_api(attr) -@register_api +@register_graph_api def run(graph, *, plugins=None, services=None, strategy=None): """ Main entry point of bonobo. It takes a graph and creates all the necessary plumbery around to execute it. @@ -82,8 +99,8 @@ def _inspect_as_graph(graph): _inspect_formats = {'graph': _inspect_as_graph} -@register_api -def inspect(graph, *, format): +@register_graph_api +def inspect(graph, *, plugins=None, services=None, strategy=None, format): if not format in _inspect_formats: raise NotImplementedError( 'Output format {} not implemented. Choices are: {}.'.format( diff --git a/docs/_static/custom.css b/docs/_static/custom.css index f658da9..fa608d1 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,3 +1,19 @@ svg { border: 2px solid green -} \ No newline at end of file +} + +div.related { + width: 940px; + margin: 30px auto 0 auto; +} + +@media screen and (max-width: 875px) { + div.related { + visibility: hidden; + display: none; + } +} + +.brand { + font-family: 'Ubuntu', 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', serif; +} diff --git a/docs/_templates/base.html b/docs/_templates/base.html index f8ad58a..27ca438 100644 --- a/docs/_templates/base.html +++ b/docs/_templates/base.html @@ -4,17 +4,8 @@ {%- block extrahead %} {{ super() }} + {% endblock %} {%- block footer %} diff --git a/docs/conf.py b/docs/conf.py index 93895a8..07d0424 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -186,3 +186,12 @@ epub_exclude_files = ['search.html'] # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} + +rst_epilog = """ +.. |bonobo| replace:: **Bonobo** + +.. |longversion| replace:: v.{version} + +""".format( + version = version, +) diff --git a/docs/tutorial/1-init.rst b/docs/tutorial/1-init.rst new file mode 100644 index 0000000..780d34d --- /dev/null +++ b/docs/tutorial/1-init.rst @@ -0,0 +1,258 @@ +Part 1: Let's get started! +========================== + +To get started with |bonobo|, you need to install it in a working python 3.5+ environment (you should use a +`virtualenv `_). + +.. code-block:: shell-session + + $ pip install bonobo + +Check that the installation worked, and that you're using a version that matches this tutorial (written for bonobo +|longversion|). + +.. code-block:: shell-session + + $ bonobo version + +See :doc:`/install` for more options. + + +Create an ETL job +::::::::::::::::: + +Since Bonobo 0.6, it's easy to bootstrap a simple ETL job using just one file. + +We'll start here, and the later stages of the tutorial will guide you toward refactoring this to a python package. + +.. code-block:: shell-session + + $ bonobo init tutorial.py + +This will create a simple job in a `tutorial.py` file. Let's run it: + +.. code-block:: shell-session + + $ python tutorial.py + Hello + World + - extract in=1 out=2 [done] + - transform in=2 out=2 [done] + - load in=2 [done] + +If you have a similar result, then congratulations! You just ran your first |bonobo| ETL job. + + +Inspect your graph +:::::::::::::::::: + +The basic building blocks of |bonobo| are **transformations** and **graphs**. + +**Transformations** are simple python callables (like functions) that handle a transformation step for a line of data. + +**Graphs** are a set of transformations, with directional links between them to define the data-flow that will happen +at runtime. + +To inspect the graph of your first transformation (you must install graphviz first to do so), run: + +.. code-block:: shell-session + + $ bonobo inspect --graph tutorial.py | dot -Tpng -o tutorial.png + +Open the generated `tutorial.png` file to have a quick look at the graph. + +.. graphviz:: + + digraph { + rankdir = LR; + "BEGIN" [shape="point"]; + "BEGIN" -> {0 [label="extract"]}; + {0 [label="extract"]} -> {1 [label="transform"]}; + {1 [label="transform"]} -> {2 [label="load"]}; + } + +You can easily understand here the structure of your graph. For such a simple graph, it's pretty much useless, but as +you'll write more complex transformations, it will be helpful. + + +Read the Code +::::::::::::: + +Before we write our own job, let's look at the code we have in `tutorial.py`. + + +Import +------ + +.. code-block:: python + + import bonobo + + +The highest level APIs of |bonobo| are all contained within the top level **bonobo** namespace. + +If you're a beginner with the library, stick to using only those APIs (they also are the most stable APIs). + +If you're an advanced user (and you'll be one quite soon), you can safely use second level APIs. + +The third level APIs are considered private, and you should not use them unless you're hacking on |bonobo| directly. + + +Extract +------- + +.. code-block:: python + + def extract(): + yield 'hello' + yield 'world' + +This is a first transformation, written as a python generator, that will send some strings, one after the other, to its +output. + +Transformations that take no input and yields a variable number of outputs are usually called **extractors**. You'll +encounter a few different types, either purely generating the data (like here), using an external service (a +database, for example) or using some filesystem (which is considered an external service too). + +Extractors do not need to have its input connected to anything, and will be called exactly once when the graph is +executed. + + +Transform +--------- + +.. code-block:: python + + def transform(*args): + yield tuple( + map(str.title, args) + ) + +This is a second transformation. It will get called a bunch of times, once for each input row it gets, and apply some +logic on the input to generate the output. + +This is the most **generic** case. For each input row, you can generate zero, one or many lines of output for each line +of input. + + +Load +---- + +.. code-block:: python + + def load(*args): + print(*args) + +This is the third and last transformation in our "hello world" example. It will apply some logic to each row, and have +absolutely no output. + +Transformations that take input and yields nothing are also called **loaders**. Like extractors, you'll encounter +different types, to work with various external systems. + +Please note that as a convenience mean and because the cost is marginal, most builtin `loaders` will send their +inputs to their output, so you can easily chain more than one loader, or apply more transformations after a given +loader was applied. + + +Graph Factory +------------- + +.. code-block:: python + + def get_graph(**options): + graph = bonobo.Graph() + graph.add_chain(extract, transform, load) + return graph + +All our transformations were defined above, but nothing ties them together, for now. + +This "graph factory" function is in charge of the creation and configuration of a :class:`bonobo.Graph` instance, that +will be executed later. + +By no mean is |bonobo| limited to simple graphs like this one. You can add as many chains as you want, and each chain +can contain as many nodes as you want. + + +Services Factory +---------------- + +.. code-block:: python + + def get_services(**options): + return {} + +This is the "services factory", that we'll use later to connect to external systems. Let's skip this one, for now. + +(we'll dive into this topic in :doc:`4-services`) + + +Main Block +---------- + +.. code-block:: python + + if __name__ == '__main__': + parser = bonobo.get_argument_parser() + with bonobo.parse_args(parser) as options: + bonobo.run( + get_graph(**options), + services=get_services(**options) + ) + +Here, the real thing happens. + +Without diving into too much details for now, using the :func:`bonobo.parse_args` context manager will allow our job to +be configurable, later, and although we don't really need it right now, it does not harm neither. + +Reading the output +:::::::::::::::::: + +Let's run this job once again: + +.. code-block:: shell-session + + $ python tutorial.py + Hello + World + - extract in=1 out=2 [done] + - transform in=2 out=2 [done] + - load in=2 [done] + +The console output contains two things. + +* First, it contains the real output of your job (what was :func:`print`-ed to `sys.stdout`). +* Second, it displays the execution status (on `sys.stderr`). Each line contains a "status" character, the node name, + numbers and a human readable status. This status will evolve in real time, and allows to understand a job's progress + while it's running. + + * Status character: + + * “ ” means that the node was not yet started. + * “`-`” means that the node finished its execution. + * “`+`” means that the node is currently running. + * “`!`” means that the node had problems running. + + * Numerical statistics: + + * “`in=...`” shows the input lines count, also known as the amount of calls to your transformation. + * “`out=...`” shows the output lines count. + * “`read=...`” shows the count of reads applied to an external system, if the transformation supports it. + * “`write=...`” shows the count of writes applied to an external system, if the transformation supports it. + * “`err=...`” shows the count of exceptions that happened while running the transformation. Note that exception will abort + a call, but the execution will move to the next row. + + +Moving forward +:::::::::::::: + +That's all for this first step. + +You now know: + +* How to create a new job file. +* How to inspect the content of a job file. +* What should go in a job file. +* How to execute a job file. +* How to read the console output. + +**Next: :doc:`2-jobs`** diff --git a/docs/tutorial/2-jobs.rst b/docs/tutorial/2-jobs.rst new file mode 100644 index 0000000..c3a6c8b --- /dev/null +++ b/docs/tutorial/2-jobs.rst @@ -0,0 +1,12 @@ +Part 2: Writing ETL Jobs +======================== + + +Moving forward +:::::::::::::: + +You now know: + +* How to ... + +**Next: :doc:`3-files`** diff --git a/docs/tutorial/3-files.rst b/docs/tutorial/3-files.rst new file mode 100644 index 0000000..adcc334 --- /dev/null +++ b/docs/tutorial/3-files.rst @@ -0,0 +1,12 @@ +Part 3: Working with Files +========================== + + +Moving forward +:::::::::::::: + +You now know: + +* How to ... + +**Next: :doc:`4-services`** diff --git a/docs/tutorial/4-services.rst b/docs/tutorial/4-services.rst new file mode 100644 index 0000000..e39f15b --- /dev/null +++ b/docs/tutorial/4-services.rst @@ -0,0 +1,210 @@ +Part 4: Services and Configurables +================================== + +.. note:: + + This section lacks completeness, sorry for that (but you can still read it!). + +In the last section, we used a few new tools. + +Class-based transformations and configurables +::::::::::::::::::::::::::::::::::::::::::::: + +Bonobo is a bit dumb. If something is callable, it considers it can be used as a transformation, and it's up to the +user to provide callables that logically fits in a graph. + +You can use plain python objects with a `__call__()` method, and it ill just work. + +As a lot of transformations needs common machinery, there is a few tools to quickly build transformations, most of +them requiring your class to subclass :class:`bonobo.config.Configurable`. + +Configurables allows to use the following features: + +* You can add **Options** (using the :class:`bonobo.config.Option` descriptor). Options can be positional, or keyword + based, can have a default value and will be consumed from the constructor arguments. + + .. code-block:: python + + from bonobo.config import Configurable, Option + + class PrefixIt(Configurable): + prefix = Option(str, positional=True, default='>>>') + + def call(self, row): + return self.prefix + ' ' + row + + prefixer = PrefixIt('$') + +* You can add **Services** (using the :class:`bonobo.config.Service` descriptor). Services are a subclass of + :class:`bonobo.config.Option`, sharing the same basics, but specialized in the definition of "named services" that + will be resolved at runtime (a.k.a for which we will provide an implementation at runtime). We'll dive more into that + in the next section + + .. code-block:: python + + from bonobo.config import Configurable, Option, Service + + class HttpGet(Configurable): + url = Option(default='https://jsonplaceholder.typicode.com/users') + http = Service('http.client') + + def call(self, http): + resp = http.get(self.url) + + for row in resp.json(): + yield row + + http_get = HttpGet() + + +* You can add **Methods** (using the :class:`bonobo.config.Method` descriptor). :class:`bonobo.config.Method` is a + subclass of :class:`bonobo.config.Option` that allows to pass callable parameters, either to the class constructor, + or using the class as a decorator. + + .. code-block:: python + + from bonobo.config import Configurable, Method + + class Applier(Configurable): + apply = Method() + + def call(self, row): + return self.apply(row) + + @Applier + def Prefixer(self, row): + return 'Hello, ' + row + + prefixer = Prefixer() + +* You can add **ContextProcessors**, which are an advanced feature we won't introduce here. If you're familiar with + pytest, you can think of them as pytest fixtures, execution wise. + +Services +:::::::: + +The motivation behind services is mostly separation of concerns, testability and deployability. + +Usually, your transformations will depend on services (like a filesystem, an http client, a database, a rest api, ...). +Those services can very well be hardcoded in the transformations, but there is two main drawbacks: + +* You won't be able to change the implementation depending on the current environment (development laptop versus + production servers, bug-hunting session versus execution, etc.) +* You won't be able to test your transformations without testing the associated services. + +To overcome those caveats of hardcoding things, we define Services in the configurable, which are basically +string-options of the service names, and we provide an implementation at the last moment possible. + +There are two ways of providing implementations: + +* Either file-wide, by providing a `get_services()` function that returns a dict of named implementations (we did so + with filesystems in the previous step, :doc:`tut02`) +* Either directory-wide, by providing a `get_services()` function in a specially named `_services.py` file. + +The first is simpler if you only have one transformation graph in one file, the second allows to group coherent +transformations together in a directory and share the implementations. + +Let's see how to use it, starting from the previous service example: + +.. code-block:: python + + from bonobo.config import Configurable, Option, Service + + class HttpGet(Configurable): + url = Option(default='https://jsonplaceholder.typicode.com/users') + http = Service('http.client') + + def call(self, http): + resp = http.get(self.url) + + for row in resp.json(): + yield row + +We defined an "http.client" service, that obviously should have a `get()` method, returning responses that have a +`json()` method. + +Let's provide two implementations for that. The first one will be using `requests `_, +that coincidally satisfies the described interface: + +.. code-block:: python + + import bonobo + import requests + + def get_services(): + return { + 'http.client': requests + } + + graph = bonobo.Graph( + HttpGet(), + print, + ) + +If you run this code, you should see some mock data returned by the webservice we called (assuming it's up and you can +reach it). + +Now, the second implementation will replace that with a mock, used for testing purposes: + +.. code-block:: python + + class HttpResponseStub: + def json(self): + return [ + {'id': 1, 'name': 'Leanne Graham', 'username': 'Bret', 'email': 'Sincere@april.biz', 'address': {'street': 'Kulas Light', 'suite': 'Apt. 556', 'city': 'Gwenborough', 'zipcode': '92998-3874', 'geo': {'lat': '-37.3159', 'lng': '81.1496'}}, 'phone': '1-770-736-8031 x56442', 'website': 'hildegard.org', 'company': {'name': 'Romaguera-Crona', 'catchPhrase': 'Multi-layered client-server neural-net', 'bs': 'harness real-time e-markets'}}, + {'id': 2, 'name': 'Ervin Howell', 'username': 'Antonette', 'email': 'Shanna@melissa.tv', 'address': {'street': 'Victor Plains', 'suite': 'Suite 879', 'city': 'Wisokyburgh', 'zipcode': '90566-7771', 'geo': {'lat': '-43.9509', 'lng': '-34.4618'}}, 'phone': '010-692-6593 x09125', 'website': 'anastasia.net', 'company': {'name': 'Deckow-Crist', 'catchPhrase': 'Proactive didactic contingency', 'bs': 'synergize scalable supply-chains'}}, + ] + + class HttpStub: + def get(self, url): + return HttpResponseStub() + + def get_services(): + return { + 'http.client': HttpStub() + } + + graph = bonobo.Graph( + HttpGet(), + print, + ) + +The `Graph` definition staying the exact same, you can easily substitute the `_services.py` file depending on your +environment (the way you're doing this is out of bonobo scope and heavily depends on your usual way of managing +configuration files on different platforms). + +Starting with bonobo 0.5 (not yet released), you will be able to use service injections with function-based +transformations too, using the `bonobo.config.requires` decorator to mark a dependency. + +.. code-block:: python + + from bonobo.config import requires + + @requires('http.client') + def http_get(http): + resp = http.get('https://jsonplaceholder.typicode.com/users') + + for row in resp.json(): + yield row + + +Read more +::::::::: + +* :doc:`/guide/services` +* :doc:`/reference/api_config` + +Next +:::: + +:doc:`tut04`. + + +Moving forward +:::::::::::::: + +You now know: + +* How to ... + +**Next: :doc:`5-packaging`** diff --git a/docs/tutorial/5-packaging.rst b/docs/tutorial/5-packaging.rst new file mode 100644 index 0000000..bf4537b --- /dev/null +++ b/docs/tutorial/5-packaging.rst @@ -0,0 +1,11 @@ +Part 5: Projects and Packaging +============================== + + +Moving forward +:::::::::::::: + +You now know: + +* How to ... + diff --git a/docs/tutorial/django.rst b/docs/tutorial/django.rst new file mode 100644 index 0000000..1be4f52 --- /dev/null +++ b/docs/tutorial/django.rst @@ -0,0 +1,3 @@ +Working with Django +=================== + diff --git a/docs/tutorial/index.rst b/docs/tutorial/index.rst index 4ba99c2..111c543 100644 --- a/docs/tutorial/index.rst +++ b/docs/tutorial/index.rst @@ -17,47 +17,43 @@ Bonobo uses simple python and should be quick and easy to learn. Tutorial :::::::: -.. note:: +.. toctree:: + :maxdepth: 1 - Good documentation is not easy to write. We do our best to make it better and better. + 1-init + 2-jobs + 3-files + 4-services + 5-packaging - Although all content here should be accurate, you may feel a lack of completeness, for which we plead guilty and - apologize. - - If you're stuck, please come and ask on our `slack channel `_, we'll figure - something out. - - If you're not stuck but had trouble understanding something, please consider contributing to the docs (via GitHub - pull requests). +More +:::: .. toctree:: - :maxdepth: 2 - - tut01 - tut02 - tut03 - tut04 + :maxdepth: 1 + django + notebooks + sqlalchemy What's next? :::::::::::: -Read a few examples -------------------- +* :doc:`The Bonobo Guide <../guide/index>` +* :doc:`Extensions <../extension/index>` -* :doc:`../reference/examples` -Read about best development practices -------------------------------------- +We're there! +:::::::::::: -* :doc:`../guide/index` -* :doc:`../guide/purity` +Good documentation is not easy to write. -Read about integrating external tools with bonobo -------------------------------------------------- +Although all content here should be accurate, you may feel a lack of completeness, for which we plead guilty and +apologize. -* :doc:`../extension/docker`: run transformation graphs in isolated containers. -* :doc:`../extension/jupyter`: run transformations within jupyter notebooks. -* :doc:`../extension/selenium`: crawl the web using a real browser and work with the gathered data. -* :doc:`../extension/sqlalchemy`: everything you need to interract with SQL databases. +If you're stuck, please come to the `Bonobo Slack Channel `_ and we'll figure it +out. + +If you're not stuck but had trouble understanding something, please consider contributing to the docs (using GitHub +pull requests). diff --git a/docs/tutorial/notebooks.rst b/docs/tutorial/notebooks.rst new file mode 100644 index 0000000..ed59121 --- /dev/null +++ b/docs/tutorial/notebooks.rst @@ -0,0 +1,4 @@ +Working with Jupyter Notebooks +============================== + + diff --git a/docs/tutorial/sqlalchemy.rst b/docs/tutorial/sqlalchemy.rst new file mode 100644 index 0000000..359fbd5 --- /dev/null +++ b/docs/tutorial/sqlalchemy.rst @@ -0,0 +1,4 @@ +Working with SQL Databases +========================== + +