From 650b49a41a41012b583fc0188dc911645195a25d Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 22 Oct 2017 09:24:34 +0200 Subject: [PATCH 01/14] [django, google] Implements basic extensions for django and google oauth systems. Using those extensions means you have the correct dependencies installed, and that you know about the external system. Django: just provide an ETLCommand class that contains all the shortcuts to write django management commands based on Bonobo. Google: shortcuts to create the necessary objects for oauth flow, with local caching of credentials. Both those extensions are not stable and will evolve. --- bonobo/events.py | 3 +++ bonobo/execution/base.py | 12 +-------- bonobo/execution/graph.py | 9 +++++++ bonobo/execution/plugin.py | 6 ++--- bonobo/ext/console.py | 45 ++++++++++++++++++++++----------- bonobo/ext/django.py | 47 +++++++++++++++++++++++++++++++++++ bonobo/ext/google.py | 43 ++++++++++++++++++++++++++++++++ bonobo/plugins.py | 28 +-------------------- bonobo/strategies/executor.py | 1 + docs/conf.py | 5 ++-- 10 files changed, 141 insertions(+), 58 deletions(-) create mode 100644 bonobo/events.py create mode 100644 bonobo/ext/django.py create mode 100644 bonobo/ext/google.py diff --git a/bonobo/events.py b/bonobo/events.py new file mode 100644 index 0000000..9a0cbba --- /dev/null +++ b/bonobo/events.py @@ -0,0 +1,3 @@ +ON_START = 'bonobo.on_start' +ON_TICK = 'bonobo.on_tick' +ON_STOP = 'bonobo.on_stop' diff --git a/bonobo/execution/base.py b/bonobo/execution/base.py index 81ac74e..b9bce36 100644 --- a/bonobo/execution/base.py +++ b/bonobo/execution/base.py @@ -4,8 +4,7 @@ from time import sleep from bonobo.config import create_container from bonobo.config.processors import ContextCurrifier -from bonobo.plugins import get_enhancers -from bonobo.util import inspect_node, isconfigurabletype +from bonobo.util import isconfigurabletype from bonobo.util.errors import print_error from bonobo.util.objects import Wrapper, get_name @@ -56,9 +55,6 @@ class LoopingExecutionContext(Wrapper): self._started, self._stopped = False, False self._stack = None - # XXX enhancers - self._enhancers = get_enhancers(self.wrapped) - def __enter__(self): self.start() return self @@ -79,15 +75,9 @@ class LoopingExecutionContext(Wrapper): raise TypeError( 'The Configurable should be fully instanciated by now, unfortunately I got a PartiallyConfigured object...' ) - # XXX enhance that, maybe giving hints on what's missing. - # print(inspect_node(self.wrapped)) self._stack.setup(self) - for enhancer in self._enhancers: - with unrecoverable(self.handle_error): - enhancer.start(self) - def loop(self): """Generic loop. A bit boring. """ while self.alive: diff --git a/bonobo/execution/graph.py b/bonobo/execution/graph.py index 77e01fa..33e77bc 100644 --- a/bonobo/execution/graph.py +++ b/bonobo/execution/graph.py @@ -5,6 +5,7 @@ from bonobo.config import create_container from bonobo.constants import BEGIN, END from bonobo.execution.node import NodeExecutionContext from bonobo.execution.plugin import PluginExecutionContext +from whistle import EventDispatcher class GraphExecutionContext: @@ -23,6 +24,14 @@ class GraphExecutionContext: def alive(self): return any(node.alive for node in self.nodes) + @property + def dispatcher(self): + try: + return self._dispatcher + except AttributeError: + self._dispatcher = EventDispatcher() + return self._dispatcher + def __init__(self, graph, plugins=None, services=None): self.graph = graph self.nodes = [self.create_node_execution_context_for(node) for node in self.graph] diff --git a/bonobo/execution/plugin.py b/bonobo/execution/plugin.py index a207f23..3379fc0 100644 --- a/bonobo/execution/plugin.py +++ b/bonobo/execution/plugin.py @@ -13,14 +13,14 @@ class PluginExecutionContext(LoopingExecutionContext): super().start() with recoverable(self.handle_error): - self.wrapped.initialize() + self.wrapped.on_start() def shutdown(self): if self.started: with recoverable(self.handle_error): - self.wrapped.finalize() + self.wrapped.on_stop() self.alive = False def step(self): with recoverable(self.handle_error): - self.wrapped.run() + self.wrapped.on_tick() diff --git a/bonobo/ext/console.py b/bonobo/ext/console.py index 1053e04..02bcafa 100644 --- a/bonobo/ext/console.py +++ b/bonobo/ext/console.py @@ -1,6 +1,6 @@ import io import sys -from contextlib import redirect_stdout +from contextlib import redirect_stdout, redirect_stderr from colorama import Style, Fore, init @@ -50,35 +50,50 @@ class ConsoleOutputPlugin(Plugin): """ - def initialize(self): + # Standard outputs descriptors backup here, also used to override if needed. + _stdout = sys.stdout + _stderr = sys.stderr + + # When the plugin is started, we'll set the real value of this. + isatty = False + + # Whether we're on windows, or a real operating system. + iswindows = (sys.platform == 'win32') + + def on_start(self): self.prefix = '' self.counter = 0 self._append_cache = '' - self.isatty = sys.stdout.isatty() - self.iswindows = (sys.platform == 'win32') - self._stdout = sys.stdout + self.isatty = self._stdout.isatty() + self.stdout = IOBuffer() self.redirect_stdout = redirect_stdout(self._stdout if self.iswindows else self.stdout) self.redirect_stdout.__enter__() - def run(self): + self.stderr = IOBuffer() + self.redirect_stderr = redirect_stderr(self._stderr if self.iswindows else self.stderr) + self.redirect_stderr.__enter__() + + def on_tick(self): if self.isatty and not self.iswindows: self._write(self.context.parent, rewind=True) else: pass # not a tty, or windows, so we'll ignore stats output - def finalize(self): + def on_stop(self): self._write(self.context.parent, rewind=False) + self.redirect_stderr.__exit__(None, None, None) self.redirect_stdout.__exit__(None, None, None) def write(self, context, prefix='', rewind=True, append=None): t_cnt = len(context) if not self.iswindows: - buffered = self.stdout.switch() - for line in buffered.split('\n')[:-1]: - print(line + CLEAR_EOL, file=sys.stderr) + for line in self.stdout.switch().split('\n')[:-1]: + print(line + CLEAR_EOL, file=self._stdout) + for line in self.stderr.switch().split('\n')[:-1]: + print(line + CLEAR_EOL, file=self._stderr) alive_color = Style.BRIGHT dead_color = Style.BRIGHT + Fore.BLACK @@ -117,7 +132,7 @@ class ConsoleOutputPlugin(Plugin): ' ', ) ) - print(prefix + _line + '\033[0K', file=sys.stderr) + print(prefix + _line + CLEAR_EOL, file=self._stderr) if append: # todo handle multiline @@ -128,13 +143,13 @@ class ConsoleOutputPlugin(Plugin): CLEAR_EOL ) ), - file=sys.stderr + file=self._stderr ) t_cnt += 1 if rewind: - print(CLEAR_EOL, file=sys.stderr) - print(MOVE_CURSOR_UP(t_cnt + 2), file=sys.stderr) + print(CLEAR_EOL, file=self._stderr) + print(MOVE_CURSOR_UP(t_cnt + 2), file=self._stderr) def _write(self, graph_context, rewind): if settings.PROFILE.get(): @@ -154,4 +169,4 @@ class ConsoleOutputPlugin(Plugin): def memory_usage(): import os, psutil process = psutil.Process(os.getpid()) - return process.memory_info()[0] / float(2**20) + return process.memory_info()[0] / float(2 ** 20) diff --git a/bonobo/ext/django.py b/bonobo/ext/django.py new file mode 100644 index 0000000..1bd3fff --- /dev/null +++ b/bonobo/ext/django.py @@ -0,0 +1,47 @@ +from colorama import Fore, Back, Style +from django.core.management.base import BaseCommand, OutputWrapper +from logging import getLogger + +import bonobo +import bonobo.util +from bonobo.commands.run import get_default_services +from bonobo.ext.console import ConsoleOutputPlugin +from bonobo.util.term import CLEAR_EOL + +class ETLCommand(BaseCommand): + GraphType = bonobo.Graph + + def get_graph(self, *args, **options): + def not_implemented(): + raise NotImplementedError('You must implement {}.get_graph() method.'.format(self)) + + return self.GraphType(not_implemented) + + def get_services(self): + return get_default_services(type(self).__file__) + + @property + def logger(self): + try: + return self._logger + except AttributeError: + self._logger = getLogger(type(self).__module__) + return self._logger + + def info(self, *args, **kwargs): + self.logger.info(*args, **kwargs) + + def handle(self, *args, **options): + _stdout_backup, _stderr_backup = self.stdout, self.stderr + + self.stdout = OutputWrapper(ConsoleOutputPlugin._stdout, ending=CLEAR_EOL + '\n') + self.stderr = OutputWrapper(ConsoleOutputPlugin._stderr, ending=CLEAR_EOL + '\n') + self.stderr.style_func = lambda x: Fore.LIGHTRED_EX + Back.RED + '!' + Style.RESET_ALL + ' ' + x + + result = bonobo.run( + self.get_graph(*args, **options), + services=self.get_services(), + ) + self.stdout = _stdout_backup + + return '\nReturn Value: ' + str(result) diff --git a/bonobo/ext/google.py b/bonobo/ext/google.py new file mode 100644 index 0000000..920af9d --- /dev/null +++ b/bonobo/ext/google.py @@ -0,0 +1,43 @@ +import os + +import httplib2 +from apiclient import discovery +from oauth2client import client, tools +from oauth2client.file import Storage +from oauth2client.tools import argparser + +HOME_DIR = os.path.expanduser('~') +GOOGLE_SCOPES = ('https://www.googleapis.com/auth/spreadsheets',) +GOOGLE_SECRETS = os.path.join(HOME_DIR, '.cache/secrets/client_secrets.json') + + +def get_credentials(): + """Gets valid user credentials from storage. + + If nothing has been stored, or if the stored credentials are invalid, + the OAuth2 flow is completed to obtain the new credentials. + + Returns: + Credentials, the obtained credential. + """ + credential_dir = os.path.join(HOME_DIR, '.cache', __package__, 'credentials') + if not os.path.exists(credential_dir): + os.makedirs(credential_dir) + credential_path = os.path.join(credential_dir, 'googleapis.json') + + store = Storage(credential_path) + credentials = store.get() + if not credentials or credentials.invalid: + flow = client.flow_from_clientsecrets(GOOGLE_SECRETS, GOOGLE_SCOPES) + flow.user_agent = 'Bonobo ETL (https://www.bonobo-project.org/)' + flags = argparser.parse_args(['--noauth_local_webserver']) + credentials = tools.run_flow(flow, store, flags) + print('Storing credentials to ' + credential_path) + return credentials + + +def get_google_spreadsheets_api_client(): + credentials = get_credentials() + http = credentials.authorize(httplib2.Http()) + discoveryUrl = 'https://sheets.googleapis.com/$discovery/rest?version=v4' + return discovery.build('sheets', 'v4', http=http, discoveryServiceUrl=discoveryUrl, cache_discovery=False) diff --git a/bonobo/plugins.py b/bonobo/plugins.py index 4fa1e18..7a0f5d1 100644 --- a/bonobo/plugins.py +++ b/bonobo/plugins.py @@ -1,7 +1,3 @@ -from bonobo.config import Configurable -from bonobo.util.objects import get_attribute_or_create - - class Plugin: """ A plugin is an extension to the core behavior of bonobo. If you're writing transformations, you should not need @@ -11,30 +7,8 @@ class Plugin: respectively permits an interactive output on an ANSI console and a rich output in a jupyter notebook. Warning: THE PLUGIN API IS PRE-ALPHA AND WILL EVOLVE BEFORE 1.0, DO NOT RELY ON IT BEING STABLE! - + """ def __init__(self, context): self.context = context - - def initialize(self): - pass - - def run(self): - pass - - def finalize(self): - pass - - -def get_enhancers(obj): - try: - return get_attribute_or_create(obj, '__enhancers__', list()) - except AttributeError: - return list() - - -class NodeEnhancer(Configurable): - def __matmul__(self, other): - get_enhancers(other).append(self) - return other diff --git a/bonobo/strategies/executor.py b/bonobo/strategies/executor.py index 3bfabc6..57e2524 100644 --- a/bonobo/strategies/executor.py +++ b/bonobo/strategies/executor.py @@ -6,6 +6,7 @@ from bonobo.constants import BEGIN, END from bonobo.strategies.base import Strategy from bonobo.structs.bags import Bag from bonobo.util.errors import print_error +from whistle import EventDispatcher class ExecutorStrategy(Strategy): diff --git a/docs/conf.py b/docs/conf.py index afbbe83..93895a8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,8 +1,9 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import sys +import datetime import os +import sys sys.path.insert(0, os.path.abspath('..')) sys.path.insert(0, os.path.abspath('_themes')) @@ -36,8 +37,8 @@ master_doc = 'index' # General information about the project. project = 'Bonobo' -copyright = '2012-2017, Romain Dorgueil' author = 'Romain Dorgueil' +copyright = '2012-{}, {}'.format(datetime.datetime.now().year, author) # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the From 42c1fee6f112b83809e87cb34b228cffc403e64c Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 22 Oct 2017 16:21:44 +0200 Subject: [PATCH 02/14] [core] (..., dict) means Bag(..., **dict) --- bonobo/execution/node.py | 2 ++ bonobo/ext/django.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/bonobo/execution/node.py b/bonobo/execution/node.py index 2aa626c..28a20b3 100644 --- a/bonobo/execution/node.py +++ b/bonobo/execution/node.py @@ -159,6 +159,8 @@ def _resolve(input_bag, output): return Bag(**output) if istuple(output): + if len(output) > 1 and isdict(output[-1]): + return Bag(*output[0:-1], **output[-1]) return Bag(*output) # Either we use arg0 format, either it's "just" a value. diff --git a/bonobo/ext/django.py b/bonobo/ext/django.py index 1bd3fff..232fd5a 100644 --- a/bonobo/ext/django.py +++ b/bonobo/ext/django.py @@ -1,6 +1,7 @@ +from logging import getLogger + from colorama import Fore, Back, Style from django.core.management.base import BaseCommand, OutputWrapper -from logging import getLogger import bonobo import bonobo.util From 0c58d21b12211146f26b0aa4f508d05de4010068 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 22 Oct 2017 16:22:28 +0200 Subject: [PATCH 03/14] [django, misc] adds create_or_update to djangos ETLCommand class, adds getitem/setitem/contains dunders to ValueHolder. --- bonobo/ext/django.py | 25 +++++++++++++++++++++++++ bonobo/util/objects.py | 17 +++++++++++------ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/bonobo/ext/django.py b/bonobo/ext/django.py index 232fd5a..8d73428 100644 --- a/bonobo/ext/django.py +++ b/bonobo/ext/django.py @@ -9,9 +9,34 @@ from bonobo.commands.run import get_default_services from bonobo.ext.console import ConsoleOutputPlugin from bonobo.util.term import CLEAR_EOL + class ETLCommand(BaseCommand): GraphType = bonobo.Graph + def create_or_update(self, model, *, defaults=None, save=True, **kwargs): + """ + Create or update a django model instance. + + :param model: + :param defaults: + :param kwargs: + :return: object, created, updated + + """ + obj, created = model._default_manager.get_or_create(defaults=defaults, **kwargs) + + updated = False + if not created: + for k, v in defaults.items(): + if getattr(obj, k) != v: + setattr(obj, k, v) + updated = True + + if updated and save: + obj.save() + + return obj, created, updated + def get_graph(self, *args, **options): def not_implemented(): raise NotImplementedError('You must implement {}.get_graph() method.'.format(self)) diff --git a/bonobo/util/objects.py b/bonobo/util/objects.py index 34fc6e7..acae2ad 100644 --- a/bonobo/util/objects.py +++ b/bonobo/util/objects.py @@ -1,7 +1,3 @@ -import functools -from functools import partial - - def get_name(mixed): try: return mixed.__name__ @@ -146,10 +142,10 @@ class ValueHolder: return divmod(other, self._value) def __pow__(self, other): - return self._value**other + return self._value ** other def __rpow__(self, other): - return other**self._value + return other ** self._value def __ipow__(self, other): self._value **= other @@ -220,6 +216,15 @@ class ValueHolder: def __len__(self): return len(self._value) + def __contains__(self, item): + return item in self._value + + def __getitem__(self, item): + return self._value[item] + + def __setitem__(self, key, value): + self._value[key] = value + def get_attribute_or_create(obj, attr, default): try: From 321bb83aa4e2174f9db28049c824176ed473750a Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 22 Oct 2017 16:57:46 +0200 Subject: [PATCH 04/14] Documentation for new behaviour. --- docs/guide/transformations.rst | 94 ++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/docs/guide/transformations.rst b/docs/guide/transformations.rst index e108a44..5b6b954 100644 --- a/docs/guide/transformations.rst +++ b/docs/guide/transformations.rst @@ -32,6 +32,100 @@ Iterable Something we can iterate on, in python, so basically anything you'd be able to use in a `for` loop. +Concepts +:::::::: + +Whatever kind of transformation you want to use, there are a few common concepts you should know about. + +Input +----- + +All input is retrieved via the call arguments. Each line of input means one call to the callable provided. Arguments +will be, in order: + +* Injected dependencies (database, http, filesystem, ...) +* Position based arguments +* Keyword based arguments + +You'll see below how to pass each of those. + +Output +------ + +Each callable can return/yield different things (all examples will use yield, but if there is only one output per input +line, you can also return your output row and expect the exact same behaviour). + +Let's see the rules (first to match wins). + +1. A flag, eventually followed by something else, marks a special behaviour. If it supports it, the remaining part of + the output line will be interpreted using the same rules, and some flags can be combined. + + **NOT_MODIFIED** + + **NOT_MODIFIED** tells bonobo to use the input row unmodified as the output. + + *CANNOT be combined* + + Example: + + .. code-block:: python + + from bonobo import NOT_MODIFIED + + def output_will_be_same_as_input(*args, **kwargs): + yield NOT_MODIFIED + + **APPEND** + + **APPEND** tells bonobo to append this output to the input (positional arguments will equal `input_args + output_args`, + keyword arguments will equal `{**input_kwargs, **output_kwargs}`). + + *CAN be combined, but not with itself* + + .. code-block:: python + + from bonobo import APPEND + + def output_will_be_appended_to_input(*args, **kwargs): + yield APPEND, 'foo', 'bar', {'eat_at': 'joe'} + + **LOOPBACK** + + **LOOPBACK** tells bonobo that this output must be looped back into our own input queue, allowing to create the stream + processing version of recursive algorithms. + + *CAN be combined, but not with itself* + + .. code-block:: python + + from bonobo import LOOPBACK + + def output_will_be_sent_to_self(*args, **kwargs): + yield LOOPBACK, 'Hello, I am the future "you".' + + **CHANNEL(...)** + + **CHANNEL(...)** tells bonobo that this output does not use the default channel and is routed through another path. + This is something you should probably not use unless your data flow design is complex, and if you're not certain + about it, it probably means that it is not the feature you're looking for. + + *CAN be combined, but not with itself* + + .. code-block:: python + + from bonobo import CHANNEL + + def output_will_be_sent_to_self(*args, **kwargs): + yield CHANNEL("errors"), 'That is not cool.' + +2. Once all flags are "consumed", the remaining part is interpreted. + + * If it is a :class:`bonobo.Bag` instance, then it's used directly. + * If it is a :class:`dict` then a kwargs-only :class:`bonobo.Bag` will be created. + * If it is a :class:`tuple` then an args-only :class:`bonobo.Bag` will be created, unless its last argument is a + :class:`dict` in which case a args+kwargs :class:`bonobo.Bag` will be created. + * If it's something else, it will be used to create a one-arg-only :class:`bonobo.Bag`. + Function based transformations :::::::::::::::::::::::::::::: From 80006ba24dde40df616a47cefa85e185550dc914 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 22 Oct 2017 17:23:19 +0200 Subject: [PATCH 05/14] [tests] fix uncaptured output in test_commands --- tests/test_commands.py | 147 ++++++++++++++++++++--------------------- 1 file changed, 71 insertions(+), 76 deletions(-) diff --git a/tests/test_commands.py b/tests/test_commands.py index 1b09a45..605892b 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -1,6 +1,9 @@ +import functools +import io import os import runpy import sys +from contextlib import redirect_stdout, redirect_stderr from unittest.mock import patch import pkg_resources @@ -10,12 +13,27 @@ from bonobo import __main__, __version__, get_examples_path from bonobo.commands import entrypoint -def runner_entrypoint(*args): +def runner(f): + @functools.wraps(f) + def wrapped_runner(*args): + with redirect_stdout(io.StringIO()) as stdout, redirect_stderr(io.StringIO()) as stderr: + try: + f(list(args)) + except BaseException as exc: + return stdout.getvalue(), stderr.getvalue(), exc + return stdout.getvalue(), stderr.getvalue() + + return wrapped_runner + + +@runner +def runner_entrypoint(args): """ Run bonobo using the python command entrypoint directly (bonobo.commands.entrypoint). """ - return entrypoint(list(args)) + return entrypoint(args) -def runner_module(*args): +@runner +def runner_module(args): """ Run bonobo using the bonobo.__main__ file, which is equivalent as doing "python -m bonobo ...".""" with patch.object(sys, 'argv', ['bonobo', *args]): return runpy.run_path(__main__.__file__, run_name='__main__') @@ -40,17 +58,15 @@ def test_entrypoint(): @all_runners -def test_no_command(runner, capsys): - with pytest.raises(SystemExit): - runner() - _, err = capsys.readouterr() +def test_no_command(runner): + _, err, exc = runner() + assert type(exc) == SystemExit assert 'error: the following arguments are required: command' in err @all_runners -def test_run(runner, capsys): - runner('run', '--quiet', get_examples_path('types/strings.py')) - out, err = capsys.readouterr() +def test_run(runner): + out, err = runner('run', '--quiet', get_examples_path('types/strings.py')) out = out.split('\n') assert out[0].startswith('Foo ') assert out[1].startswith('Bar ') @@ -58,9 +74,8 @@ def test_run(runner, capsys): @all_runners -def test_run_module(runner, capsys): - runner('run', '--quiet', '-m', 'bonobo.examples.types.strings') - out, err = capsys.readouterr() +def test_run_module(runner): + out, err = runner('run', '--quiet', '-m', 'bonobo.examples.types.strings') out = out.split('\n') assert out[0].startswith('Foo ') assert out[1].startswith('Bar ') @@ -68,9 +83,8 @@ def test_run_module(runner, capsys): @all_runners -def test_run_path(runner, capsys): - runner('run', '--quiet', get_examples_path('types')) - out, err = capsys.readouterr() +def test_run_path(runner): + out, err = runner('run', '--quiet', get_examples_path('types')) out = out.split('\n') assert out[0].startswith('Foo ') assert out[1].startswith('Bar ') @@ -94,9 +108,8 @@ def test_install_requirements_for_file(runner): @all_runners -def test_version(runner, capsys): - runner('version') - out, err = capsys.readouterr() +def test_version(runner): + out, err = runner('version') out = out.strip() assert out.startswith('bonobo ') assert __version__ in out @@ -104,48 +117,44 @@ def test_version(runner, capsys): @all_runners class TestDefaultEnvFile(object): - def test_run_file_with_default_env_file(self, runner, capsys): - runner( + def test_run_file_with_default_env_file(self, runner): + out, err = runner( 'run', '--quiet', '--default-env-file', '.env_one', get_examples_path('environment/env_files/get_passed_env_file.py') ) - out, err = capsys.readouterr() out = out.split('\n') assert out[0] == '321' assert out[1] == 'sweetpassword' assert out[2] != 'marzo' - def test_run_file_with_multiple_default_env_files(self, runner, capsys): - runner( + def test_run_file_with_multiple_default_env_files(self, runner): + out, err = runner( 'run', '--quiet', '--default-env-file', '.env_one', '--default-env-file', '.env_two', get_examples_path('environment/env_files/get_passed_env_file.py') ) - out, err = capsys.readouterr() out = out.split('\n') assert out[0] == '321' assert out[1] == 'sweetpassword' assert out[2] != 'marzo' - def test_run_module_with_default_env_file(self, runner, capsys): - runner( + def test_run_module_with_default_env_file(self, runner): + out, err = runner( 'run', '--quiet', '-m', 'bonobo.examples.environment.env_files.get_passed_env_file', '--default-env-file', '.env_one' ) - out, err = capsys.readouterr() out = out.split('\n') assert out[0] == '321' assert out[1] == 'sweetpassword' assert out[2] != 'marzo' - def test_run_module_with_multiple_default_env_files(self, runner, capsys): - runner( + def test_run_module_with_multiple_default_env_files(self, runner): + out, err = runner( 'run', '--quiet', '-m', 'bonobo.examples.environment.env_files.get_passed_env_file', '--default-env-file', '.env_one', '--default-env-file', '.env_two', ) - out, err = capsys.readouterr() out = out.split('\n') assert out[0] == '321' assert out[1] == 'sweetpassword' @@ -154,49 +163,45 @@ class TestDefaultEnvFile(object): @all_runners class TestEnvFile(object): - def test_run_file_with_file(self, runner, capsys): - runner( + def test_run_file_with_file(self, runner): + out, err = runner( 'run', '--quiet', get_examples_path('environment/env_files/get_passed_env_file.py'), '--env-file', '.env_one', ) - out, err = capsys.readouterr() out = out.split('\n') assert out[0] == '321' assert out[1] == 'sweetpassword' assert out[2] == 'marzo' - def test_run_file_with_multiple_files(self, runner, capsys): - runner( + def test_run_file_with_multiple_files(self, runner): + out, err = runner( 'run', '--quiet', get_examples_path('environment/env_files/get_passed_env_file.py'), '--env-file', '.env_one', '--env-file', '.env_two', ) - out, err = capsys.readouterr() out = out.split('\n') assert out[0] == '321' assert out[1] == 'not_sweet_password' assert out[2] == 'abril' - def test_run_module_with_file(self, runner, capsys): - runner( + def test_run_module_with_file(self, runner): + out, err = runner( 'run', '--quiet', '-m', 'bonobo.examples.environment.env_files.get_passed_env_file', '--env-file', '.env_one', ) - out, err = capsys.readouterr() out = out.split('\n') assert out[0] == '321' assert out[1] == 'sweetpassword' assert out[2] == 'marzo' - def test_run_module_with_multiple_files(self, runner, capsys): - runner( + def test_run_module_with_multiple_files(self, runner): + out, err = runner( 'run', '--quiet', '-m', 'bonobo.examples.environment.env_files.get_passed_env_file', '--env-file', '.env_one', '--env-file', '.env_two', ) - out, err = capsys.readouterr() out = out.split('\n') assert out[0] == '321' assert out[1] == 'not_sweet_password' @@ -204,28 +209,26 @@ class TestEnvFile(object): @all_runners -class TestEnvFileCombinations(object): - def test_run_file_with_default_env_file_and_env_file(self, runner, capsys): - runner( +class TestEnvFileCombinations: + def test_run_file_with_default_env_file_and_env_file(self, runner): + out, err = runner( 'run', '--quiet', get_examples_path('environment/env_files/get_passed_env_file.py'), '--default-env-file', '.env_one', '--env-file', '.env_two', ) - out, err = capsys.readouterr() out = out.split('\n') assert out[0] == '321' assert out[1] == 'not_sweet_password' assert out[2] == 'abril' - def test_run_file_with_default_env_file_and_env_file_and_env_vars(self, runner, capsys): - runner( + def test_run_file_with_default_env_file_and_env_file_and_env_vars(self, runner): + out, err = runner( 'run', '--quiet', get_examples_path('environment/env_files/get_passed_env_file.py'), '--default-env-file', '.env_one', '--env-file', '.env_two', '--env', 'TEST_USER_PASSWORD=SWEETpassWORD', '--env', 'MY_SECRET=444', ) - out, err = capsys.readouterr() out = out.split('\n') assert out[0] == '444' assert out[1] == 'SWEETpassWORD' @@ -233,54 +236,50 @@ class TestEnvFileCombinations(object): @all_runners -class TestDefaultEnvVars(object): - def test_run_file_with_default_env_var(self, runner, capsys): - runner( +class TestDefaultEnvVars: + def test_run_file_with_default_env_var(self, runner): + out, err = runner( 'run', '--quiet', get_examples_path('environment/env_vars/get_passed_env.py'), '--default-env', 'USER=clowncity', '--env', 'USER=ted' ) - out, err = capsys.readouterr() out = out.split('\n') assert out[0] == 'user' assert out[1] == 'number' assert out[2] == 'string' assert out[3] != 'clowncity' - def test_run_file_with_default_env_vars(self, runner, capsys): - runner( + def test_run_file_with_default_env_vars(self, runner): + out, err = runner( 'run', '--quiet', get_examples_path('environment/env_vars/get_passed_env.py'), '--env', 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', '--default-env', "ENV_TEST_STRING='my_test_string'" ) - out, err = capsys.readouterr() out = out.split('\n') assert out[0] == 'cwandrews' assert out[1] == '123' assert out[2] == 'my_test_string' - def test_run_module_with_default_env_var(self, runner, capsys): - runner( + def test_run_module_with_default_env_var(self, runner): + out, err = runner( 'run', '--quiet', '-m', 'bonobo.examples.environment.env_vars.get_passed_env', '--env', 'ENV_TEST_NUMBER=123', '--default-env', 'ENV_TEST_STRING=string' ) - out, err = capsys.readouterr() out = out.split('\n') assert out[0] == 'cwandrews' assert out[1] == '123' assert out[2] != 'string' - def test_run_module_with_default_env_vars(self, runner, capsys): - runner( + def test_run_module_with_default_env_vars(self, runner): + out, err = runner( 'run', '--quiet', '-m', 'bonobo.examples.environment.env_vars.get_passed_env', '--env', 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', '--default-env', "ENV_TEST_STRING='string'" ) - out, err = capsys.readouterr() out = out.split('\n') assert out[0] == 'cwandrews' assert out[1] == '123' @@ -288,52 +287,48 @@ class TestDefaultEnvVars(object): @all_runners -class TestEnvVars(object): - def test_run_file_with_env_var(self, runner, capsys): - runner( +class TestEnvVars: + def test_run_file_with_env_var(self, runner): + out, err = runner( 'run', '--quiet', get_examples_path('environment/env_vars/get_passed_env.py'), '--env', 'ENV_TEST_NUMBER=123' ) - out, err = capsys.readouterr() out = out.split('\n') assert out[0] != 'test_user' assert out[1] == '123' assert out[2] == 'my_test_string' - def test_run_file_with_env_vars(self, runner, capsys): - runner( + def test_run_file_with_env_vars(self, runner): + out, err = runner( 'run', '--quiet', get_examples_path('environment/env_vars/get_passed_env.py'), '--env', 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'" ) - out, err = capsys.readouterr() out = out.split('\n') assert out[0] == 'cwandrews' assert out[1] == '123' assert out[2] == 'my_test_string' - def test_run_module_with_env_var(self, runner, capsys): - runner( + def test_run_module_with_env_var(self, runner): + out, err = runner( 'run', '--quiet', '-m', 'bonobo.examples.environment.env_vars.get_passed_env', '--env', 'ENV_TEST_NUMBER=123' ) - out, err = capsys.readouterr() out = out.split('\n') assert out[0] == 'cwandrews' assert out[1] == '123' assert out[2] == 'my_test_string' - def test_run_module_with_env_vars(self, runner, capsys): - runner( + def test_run_module_with_env_vars(self, runner): + out, err = runner( 'run', '--quiet', '-m', 'bonobo.examples.environment.env_vars.get_passed_env', '--env', 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'" ) - out, err = capsys.readouterr() out = out.split('\n') assert out[0] == 'cwandrews' assert out[1] == '123' From bc01b5d404ed7a79ee6eb6b20cae2c5bf84353eb Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 22 Oct 2017 17:37:49 +0200 Subject: [PATCH 06/14] [core] simplification of result interpretation. --- .../env_files/get_passed_env_file.py | 4 +++- .../environment/env_vars/get_passed_env.py | 6 ++++- bonobo/execution/node.py | 22 ++++++++---------- bonobo/util/iterators.py | 9 -------- tests/execution/test_node.py | 23 +++++++++++++++++++ 5 files changed, 41 insertions(+), 23 deletions(-) diff --git a/bonobo/examples/environment/env_files/get_passed_env_file.py b/bonobo/examples/environment/env_files/get_passed_env_file.py index bb83e67..bb45540 100644 --- a/bonobo/examples/environment/env_files/get_passed_env_file.py +++ b/bonobo/examples/environment/env_files/get_passed_env_file.py @@ -8,7 +8,9 @@ def extract(): test_user_password = os.getenv('TEST_USER_PASSWORD') path = os.getenv('PATH') - return my_secret, test_user_password, path + yield my_secret + yield test_user_password + yield path def load(s: str): diff --git a/bonobo/examples/environment/env_vars/get_passed_env.py b/bonobo/examples/environment/env_vars/get_passed_env.py index f236ba7..e0c6c45 100644 --- a/bonobo/examples/environment/env_vars/get_passed_env.py +++ b/bonobo/examples/environment/env_vars/get_passed_env.py @@ -8,7 +8,11 @@ def extract(): env_test_number = os.getenv('ENV_TEST_NUMBER', 'number') env_test_string = os.getenv('ENV_TEST_STRING', 'string') env_user = os.getenv('USER') - return env_test_user, env_test_number, env_test_string, env_user + + yield env_test_user + yield env_test_number + yield env_test_string + yield env_user def load(s: str): diff --git a/bonobo/execution/node.py b/bonobo/execution/node.py index 28a20b3..c943ba0 100644 --- a/bonobo/execution/node.py +++ b/bonobo/execution/node.py @@ -1,6 +1,7 @@ import traceback from queue import Empty from time import sleep +from types import GeneratorType from bonobo import settings from bonobo.constants import INHERIT_INPUT, NOT_MODIFIED, BEGIN, END @@ -10,7 +11,6 @@ from bonobo.structs.bags import Bag from bonobo.structs.inputs import Input from bonobo.util import get_name, iserrorbag, isloopbackbag, isdict, istuple from bonobo.util.compat import deprecated_alias -from bonobo.util.iterators import iter_if_not_sequence from bonobo.util.statistics import WithStatistics @@ -120,23 +120,21 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): def handle_results(self, input_bag, results): # self._exec_time += timer.duration # Put data onto output channels - try: - results = iter_if_not_sequence(results) - except TypeError: # not an iterator - if results: - self.send(_resolve(input_bag, results)) - else: - # case with no result, an execution went through anyway, use for stats. - # self._exec_count += 1 - pass - else: - while True: # iterator + + if isinstance(results, GeneratorType): + while True: try: result = next(results) except StopIteration: break else: self.send(_resolve(input_bag, result)) + elif results: + self.send(_resolve(input_bag, results)) + else: + # case with no result, an execution went through anyway, use for stats. + # self._exec_count += 1 + pass def _resolve(input_bag, output): diff --git a/bonobo/util/iterators.py b/bonobo/util/iterators.py index ee45614..1ed09ac 100644 --- a/bonobo/util/iterators.py +++ b/bonobo/util/iterators.py @@ -37,12 +37,3 @@ def tuplize(generator): return tuplized -def iter_if_not_sequence(mixed): - if isinstance(mixed, ( - dict, - list, - str, - bytes, - )): - raise TypeError(type(mixed).__name__) - return iter(mixed) diff --git a/tests/execution/test_node.py b/tests/execution/test_node.py index 23748d4..7870323 100644 --- a/tests/execution/test_node.py +++ b/tests/execution/test_node.py @@ -102,3 +102,26 @@ def test_node_dict_chained(): assert len(output) == 2 assert output[0] == {'id': 1, 'name': 'FOO'} assert output[1] == {'id': 2, 'name': 'BAR'} + +def test_node_tuple(): + def f(): + return 'foo', 'bar' + + with BufferingNodeExecutionContext(f) as context: + context.write_sync(Bag()) + output = context.get_buffer() + + assert len(output) == 1 + assert output[0] == ('foo', 'bar') + + def g(): + yield 'foo', 'bar' + yield 'foo', 'baz' + + with BufferingNodeExecutionContext(g) as context: + context.write_sync(Bag()) + output = context.get_buffer() + + assert len(output) == 2 + assert output[0] == ('foo', 'bar') + assert output[1] == ('foo', 'baz') From 28fe41c0bdd0d7d8f4784da540650d5c192575b0 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 22 Oct 2017 18:00:16 +0200 Subject: [PATCH 07/14] [core] Testing and fixing new args/kwargs behaviour. --- bonobo/structs/bags.py | 27 ++++++++-------- tests/execution/test_node.py | 63 ++++++++++++++++++++++++++++++++---- tests/structs/test_bags.py | 17 +++++++++- 3 files changed, 87 insertions(+), 20 deletions(-) diff --git a/bonobo/structs/bags.py b/bonobo/structs/bags.py index 20db9fa..22b507d 100644 --- a/bonobo/structs/bags.py +++ b/bonobo/structs/bags.py @@ -96,7 +96,7 @@ class Bag: @classmethod def inherit(cls, *args, **kwargs): - return cls(*args, _flags=(INHERIT_INPUT, ), **kwargs) + return cls(*args, _flags=(INHERIT_INPUT,), **kwargs) def __eq__(self, other): # XXX there are overlapping cases, but this is very handy for now. Let's think about it later. @@ -105,23 +105,24 @@ class Bag: if isinstance(other, Bag) and other.args == self.args and other.kwargs == self.kwargs: return True - # tuple of (tuple, dict) - if isinstance(other, tuple) and len(other) == 2 and other[0] == self.args and other[1] == self.kwargs: - return True + # tuple + if isinstance(other, tuple): + # self == () + if not len(other): + return not self.args and not self.kwargs - # tuple (aka args) - if isinstance(other, tuple) and other == self.args: - return True + if isinstance(other[-1], dict): + # self == (*args, {**kwargs}) ? + return other[:-1] == self.args and other[-1] == self.kwargs + + # self == (*args) ? + return other == self.args and not self.kwargs # dict (aka kwargs) if isinstance(other, dict) and not self.args and other == self.kwargs: return True - # arg0 - if len(self.args) == 1 and not len(self.kwargs) and self.args[0] == other: - return True - - return False + return len(self.args) == 1 and not self.kwargs and self.args[0] == other def __repr__(self): return '<{} ({})>'.format( @@ -135,7 +136,7 @@ class Bag: class LoopbackBag(Bag): - default_flags = (LOOPBACK, ) + default_flags = (LOOPBACK,) class ErrorBag(Bag): diff --git a/tests/execution/test_node.py b/tests/execution/test_node.py index 7870323..cb0f9c3 100644 --- a/tests/execution/test_node.py +++ b/tests/execution/test_node.py @@ -12,7 +12,7 @@ def test_node_string(): output = context.get_buffer() assert len(output) == 1 - assert output[0] == (('foo', ), {}) + assert output[0] == 'foo' def g(): yield 'foo' @@ -23,8 +23,8 @@ def test_node_string(): output = context.get_buffer() assert len(output) == 2 - assert output[0] == (('foo', ), {}) - assert output[1] == (('bar', ), {}) + assert output[0] == 'foo' + assert output[1] == 'bar' def test_node_bytes(): @@ -36,7 +36,7 @@ def test_node_bytes(): output = context.get_buffer() assert len(output) == 1 - assert output[0] == ((b'foo', ), {}) + assert output[0] == b'foo' def g(): yield b'foo' @@ -47,8 +47,8 @@ def test_node_bytes(): output = context.get_buffer() assert len(output) == 2 - assert output[0] == ((b'foo', ), {}) - assert output[1] == ((b'bar', ), {}) + assert output[0] == b'foo' + assert output[1] == b'bar' def test_node_dict(): @@ -125,3 +125,54 @@ def test_node_tuple(): assert len(output) == 2 assert output[0] == ('foo', 'bar') assert output[1] == ('foo', 'baz') + +def test_node_tuple_chained(): + strategy = NaiveStrategy(GraphExecutionContextType=BufferingGraphExecutionContext) + + def uppercase(*args): + return tuple(map(str.upper, args)) + + def f(): + return 'foo', 'bar' + + graph = Graph(f, uppercase) + context = strategy.execute(graph) + output = context.get_buffer() + + assert len(output) == 1 + assert output[0] == ('FOO', 'BAR') + + def g(): + yield 'foo', 'bar' + yield 'foo', 'baz' + + graph = Graph(g, uppercase) + context = strategy.execute(graph) + output = context.get_buffer() + + assert len(output) == 2 + assert output[0] == ('FOO', 'BAR') + assert output[1] == ('FOO', 'BAZ') + +def test_node_tuple_dict(): + def f(): + return 'foo', 'bar', {'id': 1} + + with BufferingNodeExecutionContext(f) as context: + context.write_sync(Bag()) + output = context.get_buffer() + + assert len(output) == 1 + assert output[0] == ('foo', 'bar', {'id': 1}) + + def g(): + yield 'foo', 'bar', {'id': 1} + yield 'foo', 'baz', {'id': 2} + + with BufferingNodeExecutionContext(g) as context: + context.write_sync(Bag()) + output = context.get_buffer() + + assert len(output) == 2 + assert output[0] == ('foo', 'bar', {'id': 1}) + assert output[1] == ('foo', 'baz', {'id': 2}) diff --git a/tests/structs/test_bags.py b/tests/structs/test_bags.py index d52a6c6..53d3ae7 100644 --- a/tests/structs/test_bags.py +++ b/tests/structs/test_bags.py @@ -92,13 +92,28 @@ def test_pickle(): assert unpickled == bag -def test_eq_operator(): +def test_eq_operator_bag(): assert Bag('foo') == Bag('foo') assert Bag('foo') != Bag('bar') assert Bag('foo') is not Bag('foo') assert Bag('foo') != Token('foo') assert Token('foo') != Bag('foo') +def test_eq_operator_tuple_mixed(): + assert Bag('foo', bar='baz') == ('foo', {'bar': 'baz'}) + assert Bag('foo') == ('foo', {}) + assert Bag() == ({}, ) + +def test_eq_operator_tuple_not_mixed(): + assert Bag('foo', 'bar') == ('foo', 'bar') + assert Bag('foo') == ('foo', ) + assert Bag() == () + +def test_eq_operator_dict(): + assert Bag(foo='bar') == {'foo': 'bar'} + assert Bag(foo='bar', corp='acme') == {'foo': 'bar', 'corp': 'acme', } + assert Bag(foo='bar', corp='acme') == {'corp': 'acme', 'foo': 'bar', } + assert Bag() == {} def test_repr(): bag = Bag('a', a=1) From 3c453f0be74dc52d4d8d8a4c64fbc0c30e72f3a6 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 22 Oct 2017 18:05:39 +0200 Subject: [PATCH 08/14] Code formating. --- Makefile | 4 +- bonobo/commands/run.py | 28 ++++++++- bonobo/ext/console.py | 2 +- bonobo/ext/google.py | 2 +- bonobo/structs/bags.py | 4 +- bonobo/util/iterators.py | 2 - bonobo/util/objects.py | 4 +- requirements-docker.txt | 2 +- tests/execution/test_node.py | 3 + tests/structs/test_bags.py | 18 +++++- tests/test_commands.py | 107 ++++++++++++++++++++--------------- 11 files changed, 115 insertions(+), 61 deletions(-) diff --git a/Makefile b/Makefile index fdab6c6..1ea5ea5 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -# This file has been auto-generated by Medikit. All changes will be lost. -# Updated on 2017-10-21. +# Generated by Medikit 0.4a5 on 2017-10-22. +# All changes will be overriden. PACKAGE ?= bonobo PYTHON ?= $(shell which python) diff --git a/bonobo/commands/run.py b/bonobo/commands/run.py index ad19230..2359f99 100644 --- a/bonobo/commands/run.py +++ b/bonobo/commands/run.py @@ -46,7 +46,17 @@ def _install_requirements(requirements): importlib.reload(site) -def read(filename, module, install=False, quiet=False, verbose=False, default_env_file=None, default_env=None, env_file=None, env=None): +def read( + filename, + module, + install=False, + quiet=False, + verbose=False, + default_env_file=None, + default_env=None, + env_file=None, + env=None +): import runpy from bonobo import Graph, settings @@ -129,8 +139,20 @@ def set_env_var(e, override=False): os.environ.setdefault(ename, evalue) -def execute(filename, module, install=False, quiet=False, verbose=False, default_env_file=None, default_env=None, env_file=None, env=None): - graph, plugins, services = read(filename, module, install, quiet, verbose, default_env_file, default_env, env_file, env) +def execute( + filename, + module, + install=False, + quiet=False, + verbose=False, + default_env_file=None, + default_env=None, + env_file=None, + env=None +): + graph, plugins, services = read( + filename, module, install, quiet, verbose, default_env_file, default_env, env_file, env + ) return bonobo.run(graph, plugins=plugins, services=services) diff --git a/bonobo/ext/console.py b/bonobo/ext/console.py index 02bcafa..0e6abb3 100644 --- a/bonobo/ext/console.py +++ b/bonobo/ext/console.py @@ -169,4 +169,4 @@ class ConsoleOutputPlugin(Plugin): def memory_usage(): import os, psutil process = psutil.Process(os.getpid()) - return process.memory_info()[0] / float(2 ** 20) + return process.memory_info()[0] / float(2**20) diff --git a/bonobo/ext/google.py b/bonobo/ext/google.py index 920af9d..e8e66b5 100644 --- a/bonobo/ext/google.py +++ b/bonobo/ext/google.py @@ -7,7 +7,7 @@ from oauth2client.file import Storage from oauth2client.tools import argparser HOME_DIR = os.path.expanduser('~') -GOOGLE_SCOPES = ('https://www.googleapis.com/auth/spreadsheets',) +GOOGLE_SCOPES = ('https://www.googleapis.com/auth/spreadsheets', ) GOOGLE_SECRETS = os.path.join(HOME_DIR, '.cache/secrets/client_secrets.json') diff --git a/bonobo/structs/bags.py b/bonobo/structs/bags.py index 22b507d..47194d2 100644 --- a/bonobo/structs/bags.py +++ b/bonobo/structs/bags.py @@ -96,7 +96,7 @@ class Bag: @classmethod def inherit(cls, *args, **kwargs): - return cls(*args, _flags=(INHERIT_INPUT,), **kwargs) + return cls(*args, _flags=(INHERIT_INPUT, ), **kwargs) def __eq__(self, other): # XXX there are overlapping cases, but this is very handy for now. Let's think about it later. @@ -136,7 +136,7 @@ class Bag: class LoopbackBag(Bag): - default_flags = (LOOPBACK,) + default_flags = (LOOPBACK, ) class ErrorBag(Bag): diff --git a/bonobo/util/iterators.py b/bonobo/util/iterators.py index 1ed09ac..5bc8fb2 100644 --- a/bonobo/util/iterators.py +++ b/bonobo/util/iterators.py @@ -35,5 +35,3 @@ def tuplize(generator): return tuple(generator(*args, **kwargs)) return tuplized - - diff --git a/bonobo/util/objects.py b/bonobo/util/objects.py index acae2ad..e4dd29e 100644 --- a/bonobo/util/objects.py +++ b/bonobo/util/objects.py @@ -142,10 +142,10 @@ class ValueHolder: return divmod(other, self._value) def __pow__(self, other): - return self._value ** other + return self._value**other def __rpow__(self, other): - return other ** self._value + return other**self._value def __ipow__(self, other): self._value **= other diff --git a/requirements-docker.txt b/requirements-docker.txt index 9e68208..54bac73 100644 --- a/requirements-docker.txt +++ b/requirements-docker.txt @@ -1,6 +1,6 @@ -e .[docker] appdirs==1.4.3 -bonobo-docker==0.2.12 +bonobo-docker==0.5.0 certifi==2017.7.27.1 chardet==3.0.4 colorama==0.3.9 diff --git a/tests/execution/test_node.py b/tests/execution/test_node.py index cb0f9c3..fef385c 100644 --- a/tests/execution/test_node.py +++ b/tests/execution/test_node.py @@ -103,6 +103,7 @@ def test_node_dict_chained(): assert output[0] == {'id': 1, 'name': 'FOO'} assert output[1] == {'id': 2, 'name': 'BAR'} + def test_node_tuple(): def f(): return 'foo', 'bar' @@ -126,6 +127,7 @@ def test_node_tuple(): assert output[0] == ('foo', 'bar') assert output[1] == ('foo', 'baz') + def test_node_tuple_chained(): strategy = NaiveStrategy(GraphExecutionContextType=BufferingGraphExecutionContext) @@ -154,6 +156,7 @@ def test_node_tuple_chained(): assert output[0] == ('FOO', 'BAR') assert output[1] == ('FOO', 'BAZ') + def test_node_tuple_dict(): def f(): return 'foo', 'bar', {'id': 1} diff --git a/tests/structs/test_bags.py b/tests/structs/test_bags.py index 53d3ae7..1de04ce 100644 --- a/tests/structs/test_bags.py +++ b/tests/structs/test_bags.py @@ -99,22 +99,36 @@ def test_eq_operator_bag(): assert Bag('foo') != Token('foo') assert Token('foo') != Bag('foo') + def test_eq_operator_tuple_mixed(): assert Bag('foo', bar='baz') == ('foo', {'bar': 'baz'}) assert Bag('foo') == ('foo', {}) assert Bag() == ({}, ) + def test_eq_operator_tuple_not_mixed(): assert Bag('foo', 'bar') == ('foo', 'bar') assert Bag('foo') == ('foo', ) assert Bag() == () + def test_eq_operator_dict(): assert Bag(foo='bar') == {'foo': 'bar'} - assert Bag(foo='bar', corp='acme') == {'foo': 'bar', 'corp': 'acme', } - assert Bag(foo='bar', corp='acme') == {'corp': 'acme', 'foo': 'bar', } + assert Bag( + foo='bar', corp='acme' + ) == { + 'foo': 'bar', + 'corp': 'acme', + } + assert Bag( + foo='bar', corp='acme' + ) == { + 'corp': 'acme', + 'foo': 'bar', + } assert Bag() == {} + def test_repr(): bag = Bag('a', a=1) assert repr(bag) == "" diff --git a/tests/test_commands.py b/tests/test_commands.py index 605892b..e467bb3 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -129,8 +129,7 @@ class TestDefaultEnvFile(object): def test_run_file_with_multiple_default_env_files(self, runner): out, err = runner( - 'run', '--quiet', '--default-env-file', '.env_one', - '--default-env-file', '.env_two', + 'run', '--quiet', '--default-env-file', '.env_one', '--default-env-file', '.env_two', get_examples_path('environment/env_files/get_passed_env_file.py') ) out = out.split('\n') @@ -140,9 +139,8 @@ class TestDefaultEnvFile(object): def test_run_module_with_default_env_file(self, runner): out, err = runner( - 'run', '--quiet', '-m', - 'bonobo.examples.environment.env_files.get_passed_env_file', - '--default-env-file', '.env_one' + 'run', '--quiet', '-m', 'bonobo.examples.environment.env_files.get_passed_env_file', '--default-env-file', + '.env_one' ) out = out.split('\n') assert out[0] == '321' @@ -151,9 +149,14 @@ class TestDefaultEnvFile(object): def test_run_module_with_multiple_default_env_files(self, runner): out, err = runner( - 'run', '--quiet', '-m', + 'run', + '--quiet', + '-m', 'bonobo.examples.environment.env_files.get_passed_env_file', - '--default-env-file', '.env_one', '--default-env-file', '.env_two', + '--default-env-file', + '.env_one', + '--default-env-file', + '.env_two', ) out = out.split('\n') assert out[0] == '321' @@ -165,9 +168,11 @@ class TestDefaultEnvFile(object): class TestEnvFile(object): def test_run_file_with_file(self, runner): out, err = runner( - 'run', '--quiet', + 'run', + '--quiet', get_examples_path('environment/env_files/get_passed_env_file.py'), - '--env-file', '.env_one', + '--env-file', + '.env_one', ) out = out.split('\n') assert out[0] == '321' @@ -176,9 +181,13 @@ class TestEnvFile(object): def test_run_file_with_multiple_files(self, runner): out, err = runner( - 'run', '--quiet', + 'run', + '--quiet', get_examples_path('environment/env_files/get_passed_env_file.py'), - '--env-file', '.env_one', '--env-file', '.env_two', + '--env-file', + '.env_one', + '--env-file', + '.env_two', ) out = out.split('\n') assert out[0] == '321' @@ -187,9 +196,12 @@ class TestEnvFile(object): def test_run_module_with_file(self, runner): out, err = runner( - 'run', '--quiet', '-m', + 'run', + '--quiet', + '-m', 'bonobo.examples.environment.env_files.get_passed_env_file', - '--env-file', '.env_one', + '--env-file', + '.env_one', ) out = out.split('\n') assert out[0] == '321' @@ -198,9 +210,14 @@ class TestEnvFile(object): def test_run_module_with_multiple_files(self, runner): out, err = runner( - 'run', '--quiet', '-m', + 'run', + '--quiet', + '-m', 'bonobo.examples.environment.env_files.get_passed_env_file', - '--env-file', '.env_one', '--env-file', '.env_two', + '--env-file', + '.env_one', + '--env-file', + '.env_two', ) out = out.split('\n') assert out[0] == '321' @@ -212,9 +229,13 @@ class TestEnvFile(object): class TestEnvFileCombinations: def test_run_file_with_default_env_file_and_env_file(self, runner): out, err = runner( - 'run', '--quiet', + 'run', + '--quiet', get_examples_path('environment/env_files/get_passed_env_file.py'), - '--default-env-file', '.env_one', '--env-file', '.env_two', + '--default-env-file', + '.env_one', + '--env-file', + '.env_two', ) out = out.split('\n') assert out[0] == '321' @@ -223,10 +244,16 @@ class TestEnvFileCombinations: def test_run_file_with_default_env_file_and_env_file_and_env_vars(self, runner): out, err = runner( - 'run', '--quiet', + 'run', + '--quiet', get_examples_path('environment/env_files/get_passed_env_file.py'), - '--default-env-file', '.env_one', '--env-file', '.env_two', - '--env', 'TEST_USER_PASSWORD=SWEETpassWORD', '--env', + '--default-env-file', + '.env_one', + '--env-file', + '.env_two', + '--env', + 'TEST_USER_PASSWORD=SWEETpassWORD', + '--env', 'MY_SECRET=444', ) out = out.split('\n') @@ -240,8 +267,8 @@ class TestDefaultEnvVars: def test_run_file_with_default_env_var(self, runner): out, err = runner( 'run', '--quiet', - get_examples_path('environment/env_vars/get_passed_env.py'), - '--default-env', 'USER=clowncity', '--env', 'USER=ted' + get_examples_path('environment/env_vars/get_passed_env.py'), '--default-env', 'USER=clowncity', '--env', + 'USER=ted' ) out = out.split('\n') assert out[0] == 'user' @@ -252,9 +279,8 @@ class TestDefaultEnvVars: def test_run_file_with_default_env_vars(self, runner): out, err = runner( 'run', '--quiet', - get_examples_path('environment/env_vars/get_passed_env.py'), - '--env', 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', - '--default-env', "ENV_TEST_STRING='my_test_string'" + get_examples_path('environment/env_vars/get_passed_env.py'), '--env', 'ENV_TEST_NUMBER=123', '--env', + 'ENV_TEST_USER=cwandrews', '--default-env', "ENV_TEST_STRING='my_test_string'" ) out = out.split('\n') assert out[0] == 'cwandrews' @@ -263,10 +289,8 @@ class TestDefaultEnvVars: def test_run_module_with_default_env_var(self, runner): out, err = runner( - 'run', '--quiet', '-m', - 'bonobo.examples.environment.env_vars.get_passed_env', - '--env', 'ENV_TEST_NUMBER=123', - '--default-env', 'ENV_TEST_STRING=string' + 'run', '--quiet', '-m', 'bonobo.examples.environment.env_vars.get_passed_env', '--env', + 'ENV_TEST_NUMBER=123', '--default-env', 'ENV_TEST_STRING=string' ) out = out.split('\n') assert out[0] == 'cwandrews' @@ -275,10 +299,8 @@ class TestDefaultEnvVars: def test_run_module_with_default_env_vars(self, runner): out, err = runner( - 'run', '--quiet', '-m', - 'bonobo.examples.environment.env_vars.get_passed_env', - '--env', 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', - '--default-env', "ENV_TEST_STRING='string'" + 'run', '--quiet', '-m', 'bonobo.examples.environment.env_vars.get_passed_env', '--env', + 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', '--default-env', "ENV_TEST_STRING='string'" ) out = out.split('\n') assert out[0] == 'cwandrews' @@ -291,8 +313,7 @@ class TestEnvVars: def test_run_file_with_env_var(self, runner): out, err = runner( 'run', '--quiet', - get_examples_path('environment/env_vars/get_passed_env.py'), - '--env', 'ENV_TEST_NUMBER=123' + get_examples_path('environment/env_vars/get_passed_env.py'), '--env', 'ENV_TEST_NUMBER=123' ) out = out.split('\n') assert out[0] != 'test_user' @@ -302,9 +323,8 @@ class TestEnvVars: def test_run_file_with_env_vars(self, runner): out, err = runner( 'run', '--quiet', - get_examples_path('environment/env_vars/get_passed_env.py'), - '--env', 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', - '--env', "ENV_TEST_STRING='my_test_string'" + get_examples_path('environment/env_vars/get_passed_env.py'), '--env', 'ENV_TEST_NUMBER=123', '--env', + 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'" ) out = out.split('\n') assert out[0] == 'cwandrews' @@ -313,9 +333,8 @@ class TestEnvVars: def test_run_module_with_env_var(self, runner): out, err = runner( - 'run', '--quiet', '-m', - 'bonobo.examples.environment.env_vars.get_passed_env', - '--env', 'ENV_TEST_NUMBER=123' + 'run', '--quiet', '-m', 'bonobo.examples.environment.env_vars.get_passed_env', '--env', + 'ENV_TEST_NUMBER=123' ) out = out.split('\n') assert out[0] == 'cwandrews' @@ -324,10 +343,8 @@ class TestEnvVars: def test_run_module_with_env_vars(self, runner): out, err = runner( - 'run', '--quiet', '-m', - 'bonobo.examples.environment.env_vars.get_passed_env', - '--env', 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', - '--env', "ENV_TEST_STRING='my_test_string'" + 'run', '--quiet', '-m', 'bonobo.examples.environment.env_vars.get_passed_env', '--env', + 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'" ) out = out.split('\n') assert out[0] == 'cwandrews' From 01a652cd05545596b2553dca37de9c6ef5a056d0 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 22 Oct 2017 18:08:03 +0200 Subject: [PATCH 09/14] Remove dispatcher as it is not a dependency, for now, and as such breaks the continuous integration. --- bonobo/execution/graph.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/bonobo/execution/graph.py b/bonobo/execution/graph.py index 33e77bc..77e01fa 100644 --- a/bonobo/execution/graph.py +++ b/bonobo/execution/graph.py @@ -5,7 +5,6 @@ from bonobo.config import create_container from bonobo.constants import BEGIN, END from bonobo.execution.node import NodeExecutionContext from bonobo.execution.plugin import PluginExecutionContext -from whistle import EventDispatcher class GraphExecutionContext: @@ -24,14 +23,6 @@ class GraphExecutionContext: def alive(self): return any(node.alive for node in self.nodes) - @property - def dispatcher(self): - try: - return self._dispatcher - except AttributeError: - self._dispatcher = EventDispatcher() - return self._dispatcher - def __init__(self, graph, plugins=None, services=None): self.graph = graph self.nodes = [self.create_node_execution_context_for(node) for node in self.graph] From f18889830ba3c6d92fc1975da7274326b6da81ee Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 22 Oct 2017 18:11:11 +0200 Subject: [PATCH 10/14] Remove dispatcher as it is not a dependency, for now, and as such breaks the continuous integration (yes, again.). --- bonobo/strategies/executor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bonobo/strategies/executor.py b/bonobo/strategies/executor.py index 57e2524..3bfabc6 100644 --- a/bonobo/strategies/executor.py +++ b/bonobo/strategies/executor.py @@ -6,7 +6,6 @@ from bonobo.constants import BEGIN, END from bonobo.strategies.base import Strategy from bonobo.structs.bags import Bag from bonobo.util.errors import print_error -from whistle import EventDispatcher class ExecutorStrategy(Strategy): From 9a54f7b4aa6358bf8e8a127738735044a5152d88 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Mon, 23 Oct 2017 21:18:02 +0200 Subject: [PATCH 11/14] [core] Still refactoring the core behaviour of bags, starting to be much simpler. --- bonobo/commands/convert.py | 113 ++++++------------------------ bonobo/commands/util/__init__.py | 0 bonobo/commands/util/arguments.py | 26 ------- bonobo/execution/node.py | 25 ++----- bonobo/ext/django.py | 2 +- bonobo/ext/opendatasoft.py | 20 +++--- bonobo/nodes/factory.py | 49 +++---------- bonobo/registry.py | 90 ++++++++++++++++++++++++ bonobo/structs/bags.py | 49 ++++++++++++- bonobo/util/__init__.py | 3 +- bonobo/util/collections.py | 44 +++++++++++- bonobo/util/iterators.py | 37 ---------- bonobo/util/resolvers.py | 61 ++++++++++++++++ tests/__init__.py | 0 tests/io/test_csv.py | 4 +- tests/io/test_json.py | 6 +- tests/io/test_pickle.py | 4 +- tests/nodes/factory.py | 66 +++++++++++++++++ tests/structs/test_bags.py | 30 +++++++- tests/util/test_collections.py | 30 ++++++++ tests/util/test_iterators.py | 22 ------ tests/util/test_resolvers.py | 18 +++++ 22 files changed, 437 insertions(+), 262 deletions(-) delete mode 100644 bonobo/commands/util/__init__.py delete mode 100644 bonobo/commands/util/arguments.py create mode 100644 bonobo/registry.py delete mode 100644 bonobo/util/iterators.py create mode 100644 bonobo/util/resolvers.py delete mode 100644 tests/__init__.py create mode 100644 tests/nodes/factory.py create mode 100644 tests/util/test_collections.py delete mode 100644 tests/util/test_iterators.py create mode 100644 tests/util/test_resolvers.py diff --git a/bonobo/commands/convert.py b/bonobo/commands/convert.py index 48caaa3..e9039fd 100644 --- a/bonobo/commands/convert.py +++ b/bonobo/commands/convert.py @@ -1,105 +1,34 @@ -import mimetypes -import os - import bonobo -from bonobo.commands.util.arguments import parse_variable_argument -from bonobo.util import require -from bonobo.util.iterators import tuplize -from bonobo.util.python import WorkingDirectoryModulesRegistry - -SHORTCUTS = { - 'csv': 'text/csv', - 'json': 'application/json', - 'pickle': 'pickle', - 'plain': 'text/plain', - 'text': 'text/plain', - 'txt': 'text/plain', -} - -REGISTRY = { - 'application/json': (bonobo.JsonReader, bonobo.JsonWriter), - 'pickle': (bonobo.PickleReader, bonobo.PickleWriter), - 'text/csv': (bonobo.CsvReader, bonobo.CsvWriter), - 'text/plain': (bonobo.FileReader, bonobo.FileWriter), -} - -READER = 'reader' -WRITER = 'writer' - - -def resolve_factory(name, filename, factory_type, options=None): - """ - Try to resolve which transformation factory to use for this filename. User eventually provided a name, which has - priority, otherwise we try to detect it using the mimetype detection on filename. - - """ - if name is None: - name = mimetypes.guess_type(filename)[0] - - if name in SHORTCUTS: - name = SHORTCUTS[name] - - if name is None: - _, _ext = os.path.splitext(filename) - if _ext: - _ext = _ext[1:] - if _ext in SHORTCUTS: - name = SHORTCUTS[_ext] - - if options: - options = dict(map(parse_variable_argument, options)) - else: - options = dict() - - if not name in REGISTRY: - raise RuntimeError( - 'Could not resolve {factory_type} factory for {filename} ({name}). Try providing it explicitely using -{opt} .'. - format(name=name, filename=filename, factory_type=factory_type, opt=factory_type[0]) - ) - - if factory_type == READER: - return REGISTRY[name][0], options - elif factory_type == WRITER: - return REGISTRY[name][1], options - else: - raise ValueError('Invalid factory type.') - - -@tuplize -def resolve_filters(filters): - registry = WorkingDirectoryModulesRegistry() - for f in filters: - try: - mod, attr = f.split(':', 1) - yield getattr(registry.require(mod), attr) - except ValueError: - yield getattr(bonobo, f) +from bonobo.registry import READER, WRITER, default_registry +from bonobo.util.resolvers import _resolve_transformations, _resolve_options def execute( - input, - output, + input_filename, + output_filename, reader=None, reader_option=None, writer=None, writer_option=None, option=None, - filter=None, + transformation=None, ): - reader_factory, reader_option = resolve_factory(reader, input, READER, (option or []) + (reader_option or [])) + reader_factory = default_registry.get_reader_factory_for(input_filename, format=reader) + reader_options = _resolve_options((option or []) + (reader_option or [])) - if output == '-': - writer_factory, writer_option = bonobo.PrettyPrinter, {} + if output_filename == '-': + writer_factory = bonobo.PrettyPrinter else: - writer_factory, writer_option = resolve_factory(writer, output, WRITER, (option or []) + (writer_option or [])) + writer_factory = default_registry.get_writer_factory_for(output_filename, format=writer) + writer_options = _resolve_options((option or []) + (writer_option or [])) - filters = resolve_filters(filter) + transformations = _resolve_transformations(transformation) graph = bonobo.Graph() graph.add_chain( - reader_factory(input, **reader_option), - *filters, - writer_factory(output, **writer_option), + reader_factory(input_filename, **reader_options), + *transformations, + writer_factory(output_filename, **writer_options), ) return bonobo.run( @@ -110,8 +39,8 @@ def execute( def register(parser): - parser.add_argument('input', help='Input filename.') - parser.add_argument('output', help='Output filename.') + parser.add_argument('input-filename', help='Input filename.') + parser.add_argument('output-filename', help='Output filename.') parser.add_argument( '--' + READER, '-r', @@ -124,11 +53,11 @@ def register(parser): 'Choose the writer factory if it cannot be detected from extension, or if detection is wrong (use - for console pretty print).' ) parser.add_argument( - '--filter', - '-f', - dest='filter', + '--transformation', + '-t', + dest='transformation', action='append', - help='Add a filter between input and output', + help='Add a transformation between input and output (can be used multiple times, order is preserved).', ) parser.add_argument( '--option', diff --git a/bonobo/commands/util/__init__.py b/bonobo/commands/util/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bonobo/commands/util/arguments.py b/bonobo/commands/util/arguments.py deleted file mode 100644 index 435c6f5..0000000 --- a/bonobo/commands/util/arguments.py +++ /dev/null @@ -1,26 +0,0 @@ -import json - - -def parse_variable_argument(arg): - try: - key, val = arg.split('=', 1) - except ValueError: - return arg, True - - try: - val = json.loads(val) - except json.JSONDecodeError: - pass - - return key, val - - -def test_parse_variable_argument(): - assert parse_variable_argument('foo=bar') == ('foo', 'bar') - assert parse_variable_argument('foo="bar"') == ('foo', 'bar') - assert parse_variable_argument('sep=";"') == ('sep', ';') - assert parse_variable_argument('foo') == ('foo', True) - - -if __name__ == '__main__': - test_parse_var() diff --git a/bonobo/execution/node.py b/bonobo/execution/node.py index c943ba0..445c2f6 100644 --- a/bonobo/execution/node.py +++ b/bonobo/execution/node.py @@ -3,13 +3,13 @@ from queue import Empty from time import sleep from types import GeneratorType -from bonobo import settings -from bonobo.constants import INHERIT_INPUT, NOT_MODIFIED, BEGIN, END +from bonobo.constants import NOT_MODIFIED, BEGIN, END from bonobo.errors import InactiveReadableError, UnrecoverableError from bonobo.execution.base import LoopingExecutionContext from bonobo.structs.bags import Bag from bonobo.structs.inputs import Input -from bonobo.util import get_name, iserrorbag, isloopbackbag, isdict, istuple +from bonobo.structs.tokens import Token +from bonobo.util import get_name, iserrorbag, isloopbackbag from bonobo.util.compat import deprecated_alias from bonobo.util.statistics import WithStatistics @@ -49,7 +49,7 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): :param mixed value: message """ for message in messages: - self.input.put(message) + self.input.put(message if isinstance(message, (Bag, Token)) else Bag(message)) def write_sync(self, *messages): self.write(BEGIN, *messages, END) @@ -145,21 +145,4 @@ def _resolve(input_bag, output): if iserrorbag(output): return output - # If it does not look like a bag, let's create one for easier manipulation - if hasattr(output, 'apply'): # XXX TODO use isbag() ? - # Already a bag? Check if we need to set parent. - if INHERIT_INPUT in output.flags: - output.set_parent(input_bag) - return output - - # If we're using kwargs ioformat, then a dict means kwargs. - if settings.IOFORMAT == settings.IOFORMAT_KWARGS and isdict(output): - return Bag(**output) - - if istuple(output): - if len(output) > 1 and isdict(output[-1]): - return Bag(*output[0:-1], **output[-1]) - return Bag(*output) - - # Either we use arg0 format, either it's "just" a value. return Bag(output) diff --git a/bonobo/ext/django.py b/bonobo/ext/django.py index 8d73428..06f31a7 100644 --- a/bonobo/ext/django.py +++ b/bonobo/ext/django.py @@ -44,7 +44,7 @@ class ETLCommand(BaseCommand): return self.GraphType(not_implemented) def get_services(self): - return get_default_services(type(self).__file__) + return {} @property def logger(self): diff --git a/bonobo/ext/opendatasoft.py b/bonobo/ext/opendatasoft.py index 2dc54c0..5144e59 100644 --- a/bonobo/ext/opendatasoft.py +++ b/bonobo/ext/opendatasoft.py @@ -14,14 +14,14 @@ def path_str(path): class OpenDataSoftAPI(Configurable): dataset = Option(str, positional=True) - endpoint = Option(str, default='{scheme}://{netloc}{path}') - scheme = Option(str, default='https') - netloc = Option(str, default='data.opendatasoft.com') - path = Option(path_str, default='/api/records/1.0/search/') - rows = Option(int, default=500) + endpoint = Option(str, required=False, default='{scheme}://{netloc}{path}') + scheme = Option(str, required=False, default='https') + netloc = Option(str, required=False, default='data.opendatasoft.com') + path = Option(path_str, required=False, default='/api/records/1.0/search/') + rows = Option(int, required=False, default=500) limit = Option(int, required=False) - timezone = Option(str, default='Europe/Paris') - kwargs = Option(dict, default=dict) + timezone = Option(str, required=False, default='Europe/Paris') + kwargs = Option(dict, required=False, default=dict) @ContextProcessor def compute_path(self, context): @@ -44,7 +44,11 @@ class OpenDataSoftAPI(Configurable): break for row in records: - yield {**row.get('fields', {}), 'geometry': row.get('geometry', {})} + yield { + **row.get('fields', {}), + 'geometry': row.get('geometry', {}), + 'recordid': row.get('recordid'), + } start += self.rows diff --git a/bonobo/nodes/factory.py b/bonobo/nodes/factory.py index 2a1c30b..bd77e03 100644 --- a/bonobo/nodes/factory.py +++ b/bonobo/nodes/factory.py @@ -75,24 +75,24 @@ class Cursor(): self.item = item @operation('dict') - def dict(self, x): + def as_dict(self, x): return x if isinstance(x, dict) else dict(x) @operation('int') - def int(self): - pass + def as_int(self, x): + return x if isinstance(x, int) else int(x) @operation('str') - def str(self, x): + def as_str(self, x): return x if isinstance(x, str) else str(x) @operation('list') - def list(self): - pass + def as_list(self, x): + return x if isinstance(x, list) else list(x) @operation('tuple') - def tuple(self): - pass + def as_tuple(self, x): + return x if isinstance(x, tuple) else tuple(x) def __getattr__(self, item): """ @@ -147,7 +147,7 @@ class Factory(Configurable): def __init__(self, *args, **kwargs): warnings.warn( - __file__ + + type(self).__name__ + ' is experimental, API may change in the future, use it as a preview only and knowing the risks.', FutureWarning ) @@ -180,40 +180,9 @@ class Factory(Configurable): raise RuntimeError('Houston, we have a problem...') def __call__(self, *args, **kwargs): - print('factory call on', args, kwargs) for operation in self.operations: args, kwargs = operation.apply(*args, **kwargs) - print(' ... after', operation, 'got', args, kwargs) return Bag(*args, **kwargs) def __getitem__(self, item): return CURSOR_TYPES[self.default_cursor_type](self, item) - - -if __name__ == '__main__': - f = Factory() - - f[0].dict().map_keys({'foo': 'F00'}) - f['foo'].str().upper() - - print('operations:', f.operations) - print(f({'foo': 'bisou'}, foo='blah')) -''' -specs: - -- rename keys of an input dict (in args, or kwargs) using a translation map. - - -f = Factory() - -f[0] -f['xxx'] = - -f[0].dict().get('foo.bar').move_to('foo.baz').apply(str.upper) -f[0].get('foo.*').items().map(str.lower) - -f['foo'].keys_map({ - 'a': 'b' -}) - -''' diff --git a/bonobo/registry.py b/bonobo/registry.py new file mode 100644 index 0000000..be8d47b --- /dev/null +++ b/bonobo/registry.py @@ -0,0 +1,90 @@ +import mimetypes + +import os + +from bonobo import JsonReader, CsvReader, PickleReader, FileReader, FileWriter, PickleWriter, CsvWriter, JsonWriter + +FILETYPE_CSV = 'text/csv' +FILETYPE_JSON = 'application/json' +FILETYPE_PICKLE = 'pickle' +FILETYPE_PLAIN = 'text/plain' + +READER = 'reader' +WRITER = 'writer' + + +class Registry: + ALIASES = { + 'csv': FILETYPE_CSV, + 'json': FILETYPE_JSON, + 'pickle': FILETYPE_PICKLE, + 'plain': FILETYPE_PLAIN, + 'text': FILETYPE_PLAIN, + 'txt': FILETYPE_PLAIN, + } + + FACTORIES = { + READER: { + FILETYPE_JSON: JsonReader, + FILETYPE_CSV: CsvReader, + FILETYPE_PICKLE: PickleReader, + FILETYPE_PLAIN: FileReader, + }, + WRITER: { + FILETYPE_JSON: JsonWriter, + FILETYPE_CSV: CsvWriter, + FILETYPE_PICKLE: PickleWriter, + FILETYPE_PLAIN: FileWriter, + }, + } + + def get_factory_for(self, kind, name, *, format=None): + if not kind in self.FACTORIES: + raise KeyError('Unknown factory kind {!r}.'.format(kind)) + + if format is None and name is None: + raise RuntimeError('Cannot guess factory without at least a filename or a format.') + + # Guess mimetype if possible + if format is None: + format = mimetypes.guess_type(name)[0] + + # Guess from extension if possible + if format is None: + _, _ext = os.path.splitext(name) + if _ext: + format = _ext[1:] + + # Apply aliases + if format in self.ALIASES: + format = self.ALIASES[format] + + if format is None or not format in self.FACTORIES[kind]: + raise RuntimeError( + 'Could not resolve {kind} factory for {name} ({format}).'.format(kind=kind, name=name, format=format) + ) + + return self.FACTORIES[kind][format] + + def get_reader_factory_for(self, name, *, format=None): + """ + Returns a callable to build a reader for the provided filename, eventually forcing a format. + + :param name: filename + :param format: format + :return: type + """ + return self.get_factory_for(READER, name, format=format) + + def get_writer_factory_for(self, name, *, format=None): + """ + Returns a callable to build a writer for the provided filename, eventually forcing a format. + + :param name: filename + :param format: format + :return: type + """ + return self.get_factory_for(WRITER, name, format=format) + + +default_registry = Registry() diff --git a/bonobo/structs/bags.py b/bonobo/structs/bags.py index 47194d2..8683175 100644 --- a/bonobo/structs/bags.py +++ b/bonobo/structs/bags.py @@ -1,5 +1,6 @@ import itertools +from bonobo.structs.tokens import Token from bonobo.constants import INHERIT_INPUT, LOOPBACK __all__ = [ @@ -35,11 +36,55 @@ class Bag: default_flags = () + def __new__(cls, *args, _flags=None, _parent=None, **kwargs): + # Handle the special case where we call Bag's constructor with only one bag or token as argument. + if len(args) == 1 and len(kwargs) == 0: + if isinstance(args[0], Bag): + raise ValueError('Bag cannot be instanciated with a bag (for now ...).') + + if isinstance(args[0], Token): + return args[0] + + # Otherwise, type will handle that for us. + return super().__new__(cls) + def __init__(self, *args, _flags=None, _parent=None, **kwargs): self._flags = type(self).default_flags + (_flags or ()) self._parent = _parent - self._args = args - self._kwargs = kwargs + + if len(args) == 1 and len(kwargs) == 0: + # If we only have one argument, that may be because we're using the shorthand syntax. + mixed = args[0] + + if isinstance(mixed, Bag): + # Just duplicate the bag. + self._args = mixed.args + self._kwargs = mixed.kwargs + elif isinstance(mixed, tuple): + if not len(mixed): + # Empty bag. + self._args = () + self._kwargs = {} + elif isinstance(mixed[-1], dict): + # Args + Kwargs + self._args = mixed[:-1] + self._kwargs = mixed[-1] + else: + # Args only + self._args = mixed + self._kwargs = {} + elif isinstance(mixed, dict): + # Kwargs only + self._args = () + self._kwargs = mixed + else: + self._args = args + self._kwargs = {} + + else: + # Otherwise, lets get args/kwargs from the constructor. + self._args = args + self._kwargs = kwargs @property def args(self): diff --git a/bonobo/util/__init__.py b/bonobo/util/__init__.py index e2eebe1..4ef136e 100644 --- a/bonobo/util/__init__.py +++ b/bonobo/util/__init__.py @@ -1,5 +1,4 @@ -from bonobo.util.collections import sortedlist -from bonobo.util.iterators import ensure_tuple +from bonobo.util.collections import sortedlist, ensure_tuple from bonobo.util.compat import deprecated, deprecated_alias from bonobo.util.inspect import ( inspect_node, diff --git a/bonobo/util/collections.py b/bonobo/util/collections.py index b97630a..d53a7da 100644 --- a/bonobo/util/collections.py +++ b/bonobo/util/collections.py @@ -1,6 +1,48 @@ import bisect +import functools class sortedlist(list): def insort(self, x): - bisect.insort(self, x) \ No newline at end of file + bisect.insort(self, x) + + +def ensure_tuple(tuple_or_mixed): + """ + If it's not a tuple, let's make a tuple of one item. + Otherwise, not changed. + + :param tuple_or_mixed: + :return: tuple + + """ + if isinstance(tuple_or_mixed, tuple): + return tuple_or_mixed + return (tuple_or_mixed, ) + + +def tuplize(generator): + """ Takes a generator and make it a tuple-returning function. As a side + effect, it can also decorate any iterator-returning function to force + return value to be a tuple. + + >>> tuplized_lambda = tuplize(lambda: [1, 2, 3]) + >>> tuplized_lambda() + (1, 2, 3) + + >>> @tuplize + ... def my_generator(): + ... yield 1 + ... yield 2 + ... yield 3 + ... + >>> my_generator() + (1, 2, 3) + + """ + + @functools.wraps(generator) + def tuplized(*args, **kwargs): + return tuple(generator(*args, **kwargs)) + + return tuplized diff --git a/bonobo/util/iterators.py b/bonobo/util/iterators.py deleted file mode 100644 index 5bc8fb2..0000000 --- a/bonobo/util/iterators.py +++ /dev/null @@ -1,37 +0,0 @@ -""" Iterator utilities. """ -import functools - - -def force_iterator(mixed): - """Sudo make me an iterator. - - Deprecated? - - :param mixed: - :return: Iterator, baby. - """ - if isinstance(mixed, str): - return [mixed] - try: - return iter(mixed) - except TypeError: - return [mixed] if mixed else [] - - -def ensure_tuple(tuple_or_mixed): - if isinstance(tuple_or_mixed, tuple): - return tuple_or_mixed - return (tuple_or_mixed, ) - - -def tuplize(generator): - """ Takes a generator and make it a tuple-returning function. As a side - effect, it can also decorate any iterator-returning function to force - return value to be a tuple. - """ - - @functools.wraps(generator) - def tuplized(*args, **kwargs): - return tuple(generator(*args, **kwargs)) - - return tuplized diff --git a/bonobo/util/resolvers.py b/bonobo/util/resolvers.py new file mode 100644 index 0000000..0590fc7 --- /dev/null +++ b/bonobo/util/resolvers.py @@ -0,0 +1,61 @@ +""" +This package is considered private, and should only be used within bonobo. + +""" + +import json + +import bonobo +from bonobo.util.collections import tuplize +from bonobo.util.python import WorkingDirectoryModulesRegistry + + +def _parse_option(option): + """ + Parse a 'key=val' option string into a python (key, val) pair + + :param option: str + :return: tuple + """ + try: + key, val = option.split('=', 1) + except ValueError: + return option, True + + try: + val = json.loads(val) + except json.JSONDecodeError: + pass + + return key, val + + +def _resolve_options(options=None): + """ + Resolve a collection of option strings (eventually coming from command line) into a python dictionary. + + :param options: tuple[str] + :return: dict + """ + if options: + return dict(map(_parse_option, options)) + return dict() + + +@tuplize +def _resolve_transformations(transformations): + """ + Resolve a collection of strings into the matching python objects, defaulting to bonobo namespace if no package is provided. + + Syntax for each string is path.to.package:attribute + + :param transformations: tuple(str) + :return: tuple(object) + """ + registry = WorkingDirectoryModulesRegistry() + for t in transformations: + try: + mod, attr = t.split(':', 1) + yield getattr(registry.require(mod), attr) + except ValueError: + yield getattr(bonobo, t) diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/io/test_csv.py b/tests/io/test_csv.py index 1de6f89..9048bef 100644 --- a/tests/io/test_csv.py +++ b/tests/io/test_csv.py @@ -28,9 +28,7 @@ def test_write_csv_to_file_kwargs(tmpdir, add_kwargs): fs, filename, services = csv_tester.get_services_for_writer(tmpdir) with NodeExecutionContext(CsvWriter(filename, **add_kwargs), services=services) as context: - context.write(BEGIN, Bag(**{'foo': 'bar'}), Bag(**{'foo': 'baz', 'ignore': 'this'}), END) - context.step() - context.step() + context.write_sync({'foo': 'bar'}, {'foo': 'baz', 'ignore': 'this'}) with fs.open(filename) as fp: assert fp.read() == 'foo\nbar\nbaz\n' diff --git a/tests/io/test_json.py b/tests/io/test_json.py index bbeb17f..c0124cd 100644 --- a/tests/io/test_json.py +++ b/tests/io/test_json.py @@ -1,7 +1,6 @@ import pytest -from bonobo import Bag, JsonReader, JsonWriter, settings -from bonobo.constants import BEGIN, END +from bonobo import JsonReader, JsonWriter, settings from bonobo.execution.node import NodeExecutionContext from bonobo.util.testing import FilesystemTester @@ -29,8 +28,7 @@ def test_write_json_kwargs(tmpdir, add_kwargs): fs, filename, services = json_tester.get_services_for_writer(tmpdir) with NodeExecutionContext(JsonWriter(filename, **add_kwargs), services=services) as context: - context.write(BEGIN, Bag(**{'foo': 'bar'}), END) - context.step() + context.write_sync({'foo': 'bar'}) with fs.open(filename) as fp: assert fp.read() == '[{"foo": "bar"}]' diff --git a/tests/io/test_pickle.py b/tests/io/test_pickle.py index eca3493..1f95309 100644 --- a/tests/io/test_pickle.py +++ b/tests/io/test_pickle.py @@ -14,7 +14,7 @@ def test_write_pickled_dict_to_file(tmpdir): fs, filename, services = pickle_tester.get_services_for_writer(tmpdir) with NodeExecutionContext(PickleWriter(filename), services=services) as context: - context.write_sync(Bag({'foo': 'bar'}), Bag({'foo': 'baz', 'ignore': 'this'})) + context.write_sync(Bag(({'foo': 'bar'}, {})), Bag(({'foo': 'baz', 'ignore': 'this'}, {}))) with fs.open(filename, 'rb') as fp: assert pickle.loads(fp.read()) == {'foo': 'bar'} @@ -27,7 +27,7 @@ def test_read_pickled_list_from_file(tmpdir): fs, filename, services = pickle_tester.get_services_for_reader(tmpdir) with BufferingNodeExecutionContext(PickleReader(filename), services=services) as context: - context.write_sync(Bag()) + context.write_sync(()) output = context.get_buffer() assert len(output) == 2 diff --git a/tests/nodes/factory.py b/tests/nodes/factory.py new file mode 100644 index 0000000..8443cb6 --- /dev/null +++ b/tests/nodes/factory.py @@ -0,0 +1,66 @@ +from unittest import TestCase + +import pytest + +from bonobo import Bag +from bonobo.nodes.factory import Factory +from bonobo.util.testing import BufferingNodeExecutionContext + + +@pytest.mark.filterwarnings('ignore:Factory') +class FactoryTypeTest(TestCase): + def execute_node(self, node, *rows): + with BufferingNodeExecutionContext(node) as context: + context.write_sync(*map(Bag, rows)) + return context.get_buffer() + + def test_args_as_str(self): + f = Factory() + f[0].as_str().upper() + + output = self.execute_node(f, 'foo', 'bar', 'baz') + + assert len(output) == 3 + assert output[0] == 'FOO' + assert output[1] == 'BAR' + assert output[2] == 'BAZ' + + def test_kwargs_as_str(self): + f = Factory() + f['foo'].as_str().upper() + + output = self.execute_node(f, {'foo': 'bar'}, {'foo': 'baz'}) + assert len(output) == 2 + assert output[0] == {'foo': 'BAR'} + assert output[1] == {'foo': 'BAZ'} + + +""" +draft below. + +if __name__ == '__main__': + f = Factory() + + f[0].dict().map_keys({'foo': 'F00'}) + + print('operations:', f.operations) + print(f({'foo': 'bisou'}, foo='blah')) + +specs: + +- rename keys of an input dict (in args, or kwargs) using a translation map. + + +f = Factory() + +f[0] +f['xxx'] = + +f[0].dict().get('foo.bar').move_to('foo.baz').apply(str.upper) +f[0].get('foo.*').items().map(str.lower) + +f['foo'].keys_map({ + 'a': 'b' +}) + +""" diff --git a/tests/structs/test_bags.py b/tests/structs/test_bags.py index 1de04ce..b5517e3 100644 --- a/tests/structs/test_bags.py +++ b/tests/structs/test_bags.py @@ -1,8 +1,10 @@ import pickle from unittest.mock import Mock +import pytest + from bonobo import Bag -from bonobo.constants import INHERIT_INPUT +from bonobo.constants import INHERIT_INPUT, BEGIN from bonobo.structs import Token args = ( @@ -31,6 +33,32 @@ def test_basic(): my_callable2.assert_called_once_with(*args, **kwargs) +def test_constructor_empty(): + a, b = Bag(), Bag() + assert a == b + assert a.args is () + assert a.kwargs == {} + + +@pytest.mark.parametrize(('arg_in', 'arg_out'), ( + ((), ()), + ({}, ()), + (('a', 'b', 'c'), None), +)) +def test_constructor_shorthand(arg_in, arg_out): + if arg_out is None: + arg_out = arg_in + assert Bag(arg_in) == arg_out + + +def test_constructor_kwargs_only(): + assert Bag(foo='bar') == {'foo': 'bar'} + + +def test_constructor_identity(): + assert Bag(BEGIN) is BEGIN + + def test_inherit(): bag = Bag('a', a=1) bag2 = Bag.inherit('b', b=2, _parent=bag) diff --git a/tests/util/test_collections.py b/tests/util/test_collections.py new file mode 100644 index 0000000..3a1e517 --- /dev/null +++ b/tests/util/test_collections.py @@ -0,0 +1,30 @@ +from bonobo.util import sortedlist, ensure_tuple +from bonobo.util.collections import tuplize + + +def test_sortedlist(): + l = sortedlist() + l.insort(2) + l.insort(1) + l.insort(3) + l.insort(2) + assert l == [1, 2, 2, 3] + + +def test_ensure_tuple(): + assert ensure_tuple('a') == ('a', ) + assert ensure_tuple(('a', )) == ('a', ) + assert ensure_tuple(()) is () + + +def test_tuplize(): + tuplized_lambda = tuplize(lambda: [1, 2, 3]) + assert tuplized_lambda() == (1, 2, 3) + + @tuplize + def some_generator(): + yield 'c' + yield 'b' + yield 'a' + + assert some_generator() == ('c', 'b', 'a') diff --git a/tests/util/test_iterators.py b/tests/util/test_iterators.py deleted file mode 100644 index 3d0249e..0000000 --- a/tests/util/test_iterators.py +++ /dev/null @@ -1,22 +0,0 @@ -import types - -from bonobo.util.iterators import force_iterator - - -def test_force_iterator_with_string(): - assert force_iterator('foo') == ['foo'] - - -def test_force_iterator_with_none(): - assert force_iterator(None) == [] - - -def test_force_iterator_with_generator(): - def generator(): - yield 'aaa' - yield 'bbb' - yield 'ccc' - - iterator = force_iterator(generator()) - assert isinstance(iterator, types.GeneratorType) - assert list(iterator) == ['aaa', 'bbb', 'ccc'] diff --git a/tests/util/test_resolvers.py b/tests/util/test_resolvers.py new file mode 100644 index 0000000..0de3003 --- /dev/null +++ b/tests/util/test_resolvers.py @@ -0,0 +1,18 @@ +import bonobo +from bonobo.util.resolvers import _parse_option, _resolve_options, _resolve_transformations + + +def test_parse_option(): + assert _parse_option('foo=bar') == ('foo', 'bar') + assert _parse_option('foo="bar"') == ('foo', 'bar') + assert _parse_option('sep=";"') == ('sep', ';') + assert _parse_option('foo') == ('foo', True) + + +def test_resolve_options(): + assert _resolve_options(('foo=bar', 'bar="baz"')) == {'foo': 'bar', 'bar': 'baz'} + assert _resolve_options() == {} + + +def test_resolve_transformations(): + assert _resolve_transformations(('PrettyPrinter', )) == (bonobo.PrettyPrinter, ) From ece764b95cfeefb9e2284192f07f3c7092de8026 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Mon, 23 Oct 2017 21:28:49 +0200 Subject: [PATCH 12/14] [tests] rename factory test and move bag detecting so any bag is returned as is as an output. --- bonobo/execution/node.py | 4 ++-- tests/nodes/{factory.py => test_factory.py} | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) rename tests/nodes/{factory.py => test_factory.py} (96%) diff --git a/bonobo/execution/node.py b/bonobo/execution/node.py index 445c2f6..6c52e7d 100644 --- a/bonobo/execution/node.py +++ b/bonobo/execution/node.py @@ -9,7 +9,7 @@ from bonobo.execution.base import LoopingExecutionContext from bonobo.structs.bags import Bag from bonobo.structs.inputs import Input from bonobo.structs.tokens import Token -from bonobo.util import get_name, iserrorbag, isloopbackbag +from bonobo.util import get_name, iserrorbag, isloopbackbag, isbag from bonobo.util.compat import deprecated_alias from bonobo.util.statistics import WithStatistics @@ -142,7 +142,7 @@ def _resolve(input_bag, output): if output is NOT_MODIFIED: return input_bag - if iserrorbag(output): + if isbag(output): return output return Bag(output) diff --git a/tests/nodes/factory.py b/tests/nodes/test_factory.py similarity index 96% rename from tests/nodes/factory.py rename to tests/nodes/test_factory.py index 8443cb6..781ba57 100644 --- a/tests/nodes/factory.py +++ b/tests/nodes/test_factory.py @@ -11,7 +11,7 @@ from bonobo.util.testing import BufferingNodeExecutionContext class FactoryTypeTest(TestCase): def execute_node(self, node, *rows): with BufferingNodeExecutionContext(node) as context: - context.write_sync(*map(Bag, rows)) + context.write_sync(*rows) return context.get_buffer() def test_args_as_str(self): From dd28e08f0f8de5d446052497f3b750e90cbe703d Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Mon, 23 Oct 2017 21:56:13 +0200 Subject: [PATCH 13/14] [nodes] Removing draft quality factory from bonobo main package, will live in separate personnal package until it is good enough to live here. --- bonobo/examples/nodes/factory.py | 18 --- bonobo/nodes/factory.py | 188 ------------------------------- tests/nodes/test_factory.py | 66 ----------- 3 files changed, 272 deletions(-) delete mode 100644 bonobo/examples/nodes/factory.py delete mode 100644 bonobo/nodes/factory.py delete mode 100644 tests/nodes/test_factory.py diff --git a/bonobo/examples/nodes/factory.py b/bonobo/examples/nodes/factory.py deleted file mode 100644 index c1f3818..0000000 --- a/bonobo/examples/nodes/factory.py +++ /dev/null @@ -1,18 +0,0 @@ -import bonobo -from bonobo.commands.run import get_default_services -from bonobo.nodes.factory import Factory -from bonobo.nodes.io.json import JsonDictItemsReader - -normalize = Factory() -normalize[0].str().title() -normalize.move(0, 'title') -normalize.move(0, 'address') - -graph = bonobo.Graph( - JsonDictItemsReader('datasets/coffeeshops.json'), - normalize, - bonobo.PrettyPrinter(), -) - -if __name__ == '__main__': - bonobo.run(graph, services=get_default_services(__file__)) diff --git a/bonobo/nodes/factory.py b/bonobo/nodes/factory.py deleted file mode 100644 index bd77e03..0000000 --- a/bonobo/nodes/factory.py +++ /dev/null @@ -1,188 +0,0 @@ -import functools -import warnings -from functools import partial - -from bonobo import Bag -from bonobo.config import Configurable, Method - -_isarg = lambda item: type(item) is int -_iskwarg = lambda item: type(item) is str - - -class Operation(): - def __init__(self, item, callable): - self.item = item - self.callable = callable - - def __repr__(self): - return ''.format(self.callable.__name__, self.item) - - def apply(self, *args, **kwargs): - if _isarg(self.item): - return (*args[0:self.item], self.callable(args[self.item]), *args[self.item + 1:]), kwargs - if _iskwarg(self.item): - return args, {**kwargs, self.item: self.callable(kwargs.get(self.item))} - raise RuntimeError('Houston, we have a problem...') - - -class FactoryOperation(): - def __init__(self, factory, callable): - self.factory = factory - self.callable = callable - - def __repr__(self): - return ''.format(self.callable.__name__) - - def apply(self, *args, **kwargs): - return self.callable(*args, **kwargs) - - -CURSOR_TYPES = {} - - -def operation(mixed): - def decorator(m, ctype=mixed): - def lazy_operation(self, *args, **kwargs): - @functools.wraps(m) - def actual_operation(x): - return m(self, x, *args, **kwargs) - - self.factory.operations.append(Operation(self.item, actual_operation)) - return CURSOR_TYPES[ctype](self.factory, self.item) if ctype else self - - return lazy_operation - - return decorator if isinstance(mixed, str) else decorator(mixed, ctype=None) - - -def factory_operation(m): - def lazy_operation(self, *config): - @functools.wraps(m) - def actual_operation(*args, **kwargs): - return m(self, *config, *args, **kwargs) - - self.operations.append(FactoryOperation(self, actual_operation)) - return self - - return lazy_operation - - -class Cursor(): - _type = None - - def __init__(self, factory, item): - self.factory = factory - self.item = item - - @operation('dict') - def as_dict(self, x): - return x if isinstance(x, dict) else dict(x) - - @operation('int') - def as_int(self, x): - return x if isinstance(x, int) else int(x) - - @operation('str') - def as_str(self, x): - return x if isinstance(x, str) else str(x) - - @operation('list') - def as_list(self, x): - return x if isinstance(x, list) else list(x) - - @operation('tuple') - def as_tuple(self, x): - return x if isinstance(x, tuple) else tuple(x) - - def __getattr__(self, item): - """ - Fallback to type methods if they exist, for example StrCursor.upper will use str.upper if not overriden, etc. - - :param item: - """ - if self._type and item in self._type.__dict__: - method = self._type.__dict__[item] - - @operation - @functools.wraps(method) - def _operation(self, x, *args, **kwargs): - return method(x, *args, **kwargs) - - setattr(self, item, partial(_operation, self)) - return getattr(self, item) - - raise AttributeError('Unknown operation {}.{}().'.format( - type(self).__name__, - item, - )) - - -CURSOR_TYPES['default'] = Cursor - - -class DictCursor(Cursor): - _type = dict - - @operation('default') - def get(self, x, path): - return x.get(path) - - @operation - def map_keys(self, x, mapping): - return {mapping.get(k): v for k, v in x.items()} - - -CURSOR_TYPES['dict'] = DictCursor - - -class StringCursor(Cursor): - _type = str - - -CURSOR_TYPES['str'] = StringCursor - - -class Factory(Configurable): - initialize = Method(required=False) - - def __init__(self, *args, **kwargs): - warnings.warn( - type(self).__name__ + - ' is experimental, API may change in the future, use it as a preview only and knowing the risks.', - FutureWarning - ) - super(Factory, self).__init__(*args, **kwargs) - self.default_cursor_type = 'default' - self.operations = [] - - if self.initialize is not None: - self.initialize(self) - - @factory_operation - def move(self, _from, _to, *args, **kwargs): - if _from == _to: - return args, kwargs - - if _isarg(_from): - value = args[_from] - args = args[:_from] + args[_from + 1:] - elif _iskwarg(_from): - value = kwargs[_from] - kwargs = {k: v for k, v in kwargs if k != _from} - else: - raise RuntimeError('Houston, we have a problem...') - - if _isarg(_to): - return (*args[:_to], value, *args[_to + 1:]), kwargs - elif _iskwarg(_to): - return args, {**kwargs, _to: value} - else: - raise RuntimeError('Houston, we have a problem...') - - def __call__(self, *args, **kwargs): - for operation in self.operations: - args, kwargs = operation.apply(*args, **kwargs) - return Bag(*args, **kwargs) - - def __getitem__(self, item): - return CURSOR_TYPES[self.default_cursor_type](self, item) diff --git a/tests/nodes/test_factory.py b/tests/nodes/test_factory.py deleted file mode 100644 index 781ba57..0000000 --- a/tests/nodes/test_factory.py +++ /dev/null @@ -1,66 +0,0 @@ -from unittest import TestCase - -import pytest - -from bonobo import Bag -from bonobo.nodes.factory import Factory -from bonobo.util.testing import BufferingNodeExecutionContext - - -@pytest.mark.filterwarnings('ignore:Factory') -class FactoryTypeTest(TestCase): - def execute_node(self, node, *rows): - with BufferingNodeExecutionContext(node) as context: - context.write_sync(*rows) - return context.get_buffer() - - def test_args_as_str(self): - f = Factory() - f[0].as_str().upper() - - output = self.execute_node(f, 'foo', 'bar', 'baz') - - assert len(output) == 3 - assert output[0] == 'FOO' - assert output[1] == 'BAR' - assert output[2] == 'BAZ' - - def test_kwargs_as_str(self): - f = Factory() - f['foo'].as_str().upper() - - output = self.execute_node(f, {'foo': 'bar'}, {'foo': 'baz'}) - assert len(output) == 2 - assert output[0] == {'foo': 'BAR'} - assert output[1] == {'foo': 'BAZ'} - - -""" -draft below. - -if __name__ == '__main__': - f = Factory() - - f[0].dict().map_keys({'foo': 'F00'}) - - print('operations:', f.operations) - print(f({'foo': 'bisou'}, foo='blah')) - -specs: - -- rename keys of an input dict (in args, or kwargs) using a translation map. - - -f = Factory() - -f[0] -f['xxx'] = - -f[0].dict().get('foo.bar').move_to('foo.baz').apply(str.upper) -f[0].get('foo.*').items().map(str.lower) - -f['foo'].keys_map({ - 'a': 'b' -}) - -""" From a79c17c3e923214a315bc321fda0596e98b49bbd Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Mon, 23 Oct 2017 22:15:19 +0200 Subject: [PATCH 14/14] [tests] bonobo.util.objects --- tests/util/test_objects.py | 69 +++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/tests/util/test_objects.py b/tests/util/test_objects.py index c6e30b2..9a3696e 100644 --- a/tests/util/test_objects.py +++ b/tests/util/test_objects.py @@ -2,7 +2,7 @@ import operator import pytest -from bonobo.util.objects import Wrapper, get_name, ValueHolder +from bonobo.util.objects import Wrapper, get_name, ValueHolder, get_attribute_or_create from bonobo.util.testing import optional_contextmanager @@ -59,6 +59,73 @@ def test_valueholder(): assert repr(x) == repr(y) == repr(43) +def test_valueholder_notequal(): + x = ValueHolder(42) + assert x != 41 + assert not (x != 42) + + +@pytest.mark.parametrize('rlo,rhi', [ + (1, 2), + ('a', 'b'), +]) +def test_valueholder_ordering(rlo, rhi): + vlo, vhi = ValueHolder(rlo), ValueHolder(rhi) + + for lo in (rlo, vlo): + for hi in (rhi, vhi): + assert lo < hi + assert hi > lo + assert lo <= lo + assert not (lo < lo) + assert lo >= lo + + +def test_valueholder_negpos(): + neg, zero, pos = ValueHolder(-1), ValueHolder(0), ValueHolder(1) + + assert -neg == pos + assert -pos == neg + assert -zero == zero + assert +pos == pos + assert +neg == neg + + +def test_valueholders_containers(): + x = ValueHolder({1, 2, 3, 5, 8, 13}) + + assert 5 in x + assert 42 not in x + + y = ValueHolder({'foo': 'bar', 'corp': 'acme'}) + + assert 'foo' in y + assert y['foo'] == 'bar' + with pytest.raises(KeyError): + y['no'] + y['no'] = 'oh, wait' + assert 'no' in y + assert 'oh, wait' == y['no'] + + +def test_get_attribute_or_create(): + class X: + pass + + x = X() + + with pytest.raises(AttributeError): + x.foo + + foo = get_attribute_or_create(x, 'foo', 'bar') + assert foo == 'bar' + assert x.foo == 'bar' + + foo = get_attribute_or_create(x, 'foo', 'baz') + assert foo == 'bar' + assert x.foo == 'bar' + + unsupported_operations = { int: {operator.matmul}, str: {