From 3059f112f1c8d3c7f909c2ac32d531f208629095 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 21 May 2017 20:42:54 +0200 Subject: [PATCH 01/71] WIP: factory for transformations --- bonobo/examples/datasets/fablabs.py | 12 +- bonobo/examples/nodes/_services.py | 5 + bonobo/examples/{types => nodes}/bags.py | 0 bonobo/examples/{types => nodes}/dicts.py | 0 bonobo/examples/nodes/factory.py | 33 ++++ bonobo/examples/{types => nodes}/strings.py | 0 bonobo/examples/types/__init__.py | 7 - bonobo/nodes/basics.py | 4 +- bonobo/nodes/factory.py | 208 ++++++++++++++++++++ bonobo/nodes/io/json.py | 9 + 10 files changed, 261 insertions(+), 17 deletions(-) create mode 100644 bonobo/examples/nodes/_services.py rename bonobo/examples/{types => nodes}/bags.py (100%) rename bonobo/examples/{types => nodes}/dicts.py (100%) create mode 100644 bonobo/examples/nodes/factory.py rename bonobo/examples/{types => nodes}/strings.py (100%) delete mode 100644 bonobo/examples/types/__init__.py create mode 100644 bonobo/nodes/factory.py diff --git a/bonobo/examples/datasets/fablabs.py b/bonobo/examples/datasets/fablabs.py index be95fe1..333cb17 100644 --- a/bonobo/examples/datasets/fablabs.py +++ b/bonobo/examples/datasets/fablabs.py @@ -48,10 +48,6 @@ def normalize(row): return result -def filter_france(row): - if row.get('country') == 'France': - yield row - def display(row): print(Style.BRIGHT, row.get('name'), Style.RESET_ALL, sep='') @@ -73,15 +69,15 @@ def display(row): print( ' - {}address{}: {address}'. - format(Fore.BLUE, Style.RESET_ALL, address=', '.join(address)) + format(Fore.BLUE, Style.RESET_ALL, address=', '.join(address)) ) print( ' - {}links{}: {links}'. - format(Fore.BLUE, Style.RESET_ALL, links=', '.join(row['links'])) + format(Fore.BLUE, Style.RESET_ALL, links=', '.join(row['links'])) ) print( ' - {}geometry{}: {geometry}'. - format(Fore.BLUE, Style.RESET_ALL, **row) + format(Fore.BLUE, Style.RESET_ALL, **row) ) print( ' - {}source{}: {source}'.format( @@ -95,7 +91,7 @@ graph = bonobo.Graph( dataset=API_DATASET, netloc=API_NETLOC, timezone='Europe/Paris' ), normalize, - filter_france, + bonobo.Filter(filter=lambda row: row.get('country') == 'France'), bonobo.Tee(display), bonobo.JsonWriter(path='fablabs.txt'), ) diff --git a/bonobo/examples/nodes/_services.py b/bonobo/examples/nodes/_services.py new file mode 100644 index 0000000..337bf6b --- /dev/null +++ b/bonobo/examples/nodes/_services.py @@ -0,0 +1,5 @@ +from bonobo import get_examples_path, open_fs + + +def get_services(): + return {'fs': open_fs(get_examples_path())} diff --git a/bonobo/examples/types/bags.py b/bonobo/examples/nodes/bags.py similarity index 100% rename from bonobo/examples/types/bags.py rename to bonobo/examples/nodes/bags.py diff --git a/bonobo/examples/types/dicts.py b/bonobo/examples/nodes/dicts.py similarity index 100% rename from bonobo/examples/types/dicts.py rename to bonobo/examples/nodes/dicts.py diff --git a/bonobo/examples/nodes/factory.py b/bonobo/examples/nodes/factory.py new file mode 100644 index 0000000..d4702c6 --- /dev/null +++ b/bonobo/examples/nodes/factory.py @@ -0,0 +1,33 @@ +from functools import partial + +import itertools + +import bonobo +from bonobo.commands.run import get_default_services +from bonobo.config import Configurable +from bonobo.nodes.factory import Factory +from bonobo.nodes.io.json import JsonDictReader + +@Factory +def Normalize(self): + self[0].str().title() + self.move(0, 'title') + self.move(0, 'address') + + + + +class PrettyPrinter(Configurable): + def call(self, *args, **kwargs): + for i, (item, value) in enumerate(itertools.chain(enumerate(args), kwargs.items())): + print(' ' if i else '• ', item, '=', value) + + +graph = bonobo.Graph( + JsonDictReader('datasets/coffeeshops.json'), + Normalize(), + PrettyPrinter(), +) + +if __name__ == '__main__': + bonobo.run(graph, services=get_default_services(__file__)) diff --git a/bonobo/examples/types/strings.py b/bonobo/examples/nodes/strings.py similarity index 100% rename from bonobo/examples/types/strings.py rename to bonobo/examples/nodes/strings.py diff --git a/bonobo/examples/types/__init__.py b/bonobo/examples/types/__init__.py deleted file mode 100644 index a2c0ceb..0000000 --- a/bonobo/examples/types/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from . import bags, dicts, strings - -__all__ = [ - 'bags', - 'dicts', - 'strings', -] \ No newline at end of file diff --git a/bonobo/nodes/basics.py b/bonobo/nodes/basics.py index 195cd8e..094e8e6 100644 --- a/bonobo/nodes/basics.py +++ b/bonobo/nodes/basics.py @@ -3,8 +3,7 @@ from pprint import pprint as _pprint from colorama import Fore, Style -from bonobo.config import Configurable, Option -from bonobo.config.processors import ContextProcessor +from bonobo.config import Configurable, ContextProcessor, Option from bonobo.structs.bags import Bag from bonobo.util.objects import ValueHolder from bonobo.util.term import CLEAR_EOL @@ -74,6 +73,7 @@ pprint = Tee(_pprint) def PrettyPrint(title_keys=('title', 'name', 'id'), print_values=True, sort=True): from bonobo.constants import NOT_MODIFIED + from colorama import Fore, Style def _pprint(*args, **kwargs): nonlocal title_keys, sort, print_values diff --git a/bonobo/nodes/factory.py b/bonobo/nodes/factory.py new file mode 100644 index 0000000..af3f778 --- /dev/null +++ b/bonobo/nodes/factory.py @@ -0,0 +1,208 @@ +import functools +from functools import partial + +from bonobo import Bag +from bonobo.config import Configurable, Method + +_isarg = lambda item: type(item) is int +_iskwarg = lambda item: type(item) is str + + +class Operation(): + def __init__(self, item, callable): + self.item = item + self.callable = callable + + def __repr__(self): + return ''.format(self.callable.__name__, self.item) + + def apply(self, *args, **kwargs): + if _isarg(self.item): + return (*args[0:self.item], self.callable(args[self.item]), *args[self.item + 1:]), kwargs + if _iskwarg(self.item): + return args, {**kwargs, self.item: self.callable(kwargs.get(self.item))} + raise RuntimeError('Houston, we have a problem...') + + +class FactoryOperation(): + def __init__(self, factory, callable): + self.factory = factory + self.callable = callable + + def __repr__(self): + return ''.format(self.callable.__name__) + + def apply(self, *args, **kwargs): + return self.callable(*args, **kwargs) + + +CURSOR_TYPES = {} + + +def operation(mixed): + def decorator(m, ctype=mixed): + def lazy_operation(self, *args, **kwargs): + @functools.wraps(m) + def actual_operation(x): + return m(self, x, *args, **kwargs) + + self.factory.operations.append(Operation(self.item, actual_operation)) + return CURSOR_TYPES[ctype](self.factory, self.item) if ctype else self + + return lazy_operation + + return decorator if isinstance(mixed, str) else decorator(mixed, ctype=None) + + +def factory_operation(m): + def lazy_operation(self, *config): + @functools.wraps(m) + def actual_operation(*args, **kwargs): + return m(self, *config, *args, **kwargs) + + self.operations.append(FactoryOperation(self, actual_operation)) + return self + + return lazy_operation + + +class Cursor(): + _type = None + + def __init__(self, factory, item): + self.factory = factory + self.item = item + + @operation('dict') + def dict(self, x): + return x if isinstance(x, dict) else dict(x) + + @operation('int') + def int(self): + pass + + @operation('str') + def str(self, x): + return x if isinstance(x, str) else str(x) + + @operation('list') + def list(self): + pass + + @operation('tuple') + def tuple(self): + pass + + def __getattr__(self, item): + """ + Fallback to type methods if they exist, for example StrCursor.upper will use str.upper if not overriden, etc. + + :param item: + """ + if self._type and item in self._type.__dict__: + method = self._type.__dict__[item] + + @operation + @functools.wraps(method) + def _operation(self, x, *args, **kwargs): + return method(x, *args, **kwargs) + + setattr(self, item, partial(_operation, self)) + return getattr(self, item) + + raise AttributeError('Unknown operation {}.{}().'.format(type(self).__name__, item, )) + + +CURSOR_TYPES['default'] = Cursor + + +class DictCursor(Cursor): + _type = dict + + @operation('default') + def get(self, x, path): + return x.get(path) + + @operation + def map_keys(self, x, mapping): + return {mapping.get(k): v for k, v in x.items()} + + +CURSOR_TYPES['dict'] = DictCursor + + +class StringCursor(Cursor): + _type = str + + +CURSOR_TYPES['str'] = StringCursor + + +class Factory(Configurable): + setup = Method() + + def __init__(self): + self.default_cursor_type = 'default' + self.operations = [] + self.setup() + + @factory_operation + def move(self, _from, _to, *args, **kwargs): + if _from == _to: + return args, kwargs + + if _isarg(_from): + value = args[_from] + args = args[:_from] + args[_from + 1:] + elif _iskwarg(_from): + value = kwargs[_from] + kwargs = {k: v for k, v in kwargs if k != _from} + else: + raise RuntimeError('Houston, we have a problem...') + + if _isarg(_to): + return (*args[:_to], value, *args[_to + 1:]), kwargs + elif _iskwarg(_to): + return args, {**kwargs, _to: value} + else: + raise RuntimeError('Houston, we have a problem...') + + def __call__(self, *args, **kwargs): + # print('factory call on', args, kwargs) + for operation in self.operations: + args, kwargs = operation.apply(*args, **kwargs) + # print(' ... after', operation, 'got', args, kwargs) + return Bag(*args, **kwargs) + + def __getitem__(self, item): + return CURSOR_TYPES[self.default_cursor_type](self, item) + + +if __name__ == '__main__': + f = Factory() + + f[0].dict().map_keys({'foo': 'F00'}) + f['foo'].str().upper() + + print('operations:', f.operations) + print(f({'foo': 'bisou'}, foo='blah')) + +''' +specs: + +- rename keys of an input dict (in args, or kwargs) using a translation map. + + +f = Factory() + +f[0] +f['xxx'] = + +f[0].dict().get('foo.bar').move_to('foo.baz').apply(str.upper) +f[0].get('foo.*').items().map(str.lower) + +f['foo'].keys_map({ + 'a': 'b' +}) + +''' diff --git a/bonobo/nodes/io/json.py b/bonobo/nodes/io/json.py index fdb49b8..91c8f34 100644 --- a/bonobo/nodes/io/json.py +++ b/bonobo/nodes/io/json.py @@ -1,5 +1,7 @@ import json +from itertools import starmap +from bonobo.structs.bags import Bag from bonobo.config.processors import ContextProcessor from .file import FileWriter, FileReader @@ -21,6 +23,13 @@ class JsonReader(JsonHandler, FileReader): yield line +class JsonDictReader(JsonReader): + """ not api, don't use or expect breakage. """ + + def read(self, fs, file): + yield from starmap(Bag, self.loader(file).items()) + + class JsonWriter(JsonHandler, FileWriter): @ContextProcessor def envelope(self, context, fs, file, lineno): From be844c3ed788a19b04aeb2a8fec79ad36854556a Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Thu, 25 May 2017 16:41:01 +0200 Subject: [PATCH 02/71] WIP GRAPHVIZ --- Makefile | 2 +- Projectfile | 1 + bin/imgcat | 112 +++++++++++++++++++++++++++++++++++++++ bin/test_graph | 1 + bonobo/commands/graph.py | 22 ++++++++ bonobo/commands/run.py | 27 +++++----- bonobo/constants.py | 2 + bonobo/structs/graphs.py | 2 + bonobo/util/graphviz.py | 9 ++++ setup.py | 1 + 10 files changed, 165 insertions(+), 14 deletions(-) create mode 100755 bin/imgcat create mode 100644 bin/test_graph create mode 100644 bonobo/commands/graph.py create mode 100644 bonobo/util/graphviz.py diff --git a/Makefile b/Makefile index 6db0f7c..6c5443d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # This file has been auto-generated. # All changes will be lost, see Projectfile. # -# Updated at 2017-05-03 18:02:59.359160 +# Updated at 2017-05-08 11:34:30.472553 PACKAGE ?= bonobo PYTHON ?= $(shell which python) diff --git a/Projectfile b/Projectfile index 47844ee..95e5afa 100644 --- a/Projectfile +++ b/Projectfile @@ -60,6 +60,7 @@ entry_points = { ], 'bonobo.commands': [ 'init = bonobo.commands.init:register', + 'graph = bonobo.commands.graph:register', 'run = bonobo.commands.run:register', 'version = bonobo.commands.version:register', ], diff --git a/bin/imgcat b/bin/imgcat new file mode 100755 index 0000000..001d2b8 --- /dev/null +++ b/bin/imgcat @@ -0,0 +1,112 @@ +#!/bin/bash + +# tmux requires unrecognized OSC sequences to be wrapped with DCS tmux; +# ST, and for all ESCs in to be replaced with ESC ESC. It +# only accepts ESC backslash for ST. +function print_osc() { + if [[ $TERM == screen* ]] ; then + printf "\033Ptmux;\033\033]" + else + printf "\033]" + fi +} + +# More of the tmux workaround described above. +function print_st() { + if [[ $TERM == screen* ]] ; then + printf "\a\033\\" + else + printf "\a" + fi +} + +# print_image filename inline base64contents print_filename +# filename: Filename to convey to client +# inline: 0 or 1 +# base64contents: Base64-encoded contents +# print_filename: If non-empty, print the filename +# before outputting the image +function print_image() { + print_osc + printf '1337;File=' + if [[ -n "$1" ]]; then + printf 'name='`printf "%s" "$1" | base64`";" + fi + + VERSION=$(base64 --version 2>&1) + if [[ "$VERSION" =~ fourmilab ]]; then + BASE64ARG=-d + elif [[ "$VERSION" =~ GNU ]]; then + BASE64ARG=-di + else + BASE64ARG=-D + fi + + printf "%s" "$3" | base64 $BASE64ARG | wc -c | awk '{printf "size=%d",$1}' + printf ";inline=$2" + printf ":" + printf "%s" "$3" + print_st + printf '\n' + if [[ -n "$4" ]]; then + echo $1 + fi +} + +function error() { + echo "ERROR: $*" 1>&2 +} + +function show_help() { + echo "Usage: imgcat [-p] filename ..." 1>& 2 + echo " or: cat filename | imgcat" 1>& 2 +} + +## Main + +if [ -t 0 ]; then + has_stdin=f +else + has_stdin=t +fi + +# Show help if no arguments and no stdin. +if [ $has_stdin = f -a $# -eq 0 ]; then + show_help + exit +fi + +# Look for command line flags. +while [ $# -gt 0 ]; do + case "$1" in + -h|--h|--help) + show_help + exit + ;; + -p|--p|--print) + print_filename=1 + ;; + -*) + error "Unknown option flag: $1" + show_help + exit 1 + ;; + *) + if [ -r "$1" ] ; then + has_stdin=f + print_image "$1" 1 "$(base64 < "$1")" "$print_filename" + else + error "imgcat: $1: No such file or directory" + exit 2 + fi + ;; + esac + shift +done + +# Read and print stdin +if [ $has_stdin = t ]; then + print_image "" 1 "$(cat | base64)" "" +fi + +exit 0 diff --git a/bin/test_graph b/bin/test_graph new file mode 100644 index 0000000..1e5fd85 --- /dev/null +++ b/bin/test_graph @@ -0,0 +1 @@ +bonobo graph bonobo/examples/tutorials/tut02_03_writeasmap.py | dot -otest.png -Tpng && bin/imgcat test.png diff --git a/bonobo/commands/graph.py b/bonobo/commands/graph.py new file mode 100644 index 0000000..b8bbdf9 --- /dev/null +++ b/bonobo/commands/graph.py @@ -0,0 +1,22 @@ +import json + +from bonobo.util.objects import get_name +from bonobo.commands.run import read_file +from bonobo.constants import BEGIN + + +def execute(file): + graph, plugins, services = read_file(file) + + print('digraph {') + print(' rankdir = LR;') + print(' "BEGIN" [shape="point"];') + for i in graph.outputs_of(BEGIN): + print(' "BEGIN" -> ' + json.dumps(get_name(graph.nodes[i])) + ';') + print('}') + + +def register(parser): + import argparse + parser.add_argument('file', type=argparse.FileType()) + return execute diff --git a/bonobo/commands/run.py b/bonobo/commands/run.py index b7872e2..60123c8 100644 --- a/bonobo/commands/run.py +++ b/bonobo/commands/run.py @@ -1,11 +1,7 @@ -import argparse - import os import bonobo - -DEFAULT_SERVICES_FILENAME = '_services.py' -DEFAULT_SERVICES_ATTR = 'get_services' +from bonobo.constants import DEFAULT_SERVICES_ATTR, DEFAULT_SERVICES_FILENAME def get_default_services(filename, services=None): @@ -29,7 +25,7 @@ def get_default_services(filename, services=None): return services or {} -def execute(file, quiet=False): +def read_file(file): with file: code = compile(file.read(), file.name, 'exec') @@ -56,19 +52,24 @@ def execute(file, quiet=False): ).format(len(graphs)) graph = list(graphs.values())[0] + plugins = [] + services = get_default_services( + file.name, context.get(DEFAULT_SERVICES_ATTR)() if DEFAULT_SERVICES_ATTR in context else None + ) + + return graph, plugins, services + + +def execute(file, quiet=False): + graph, plugins, services = read_file(file) # todo if console and not quiet, then add the console plugin # todo when better console plugin, add it if console and just disable display - return bonobo.run( - graph, - plugins=[], - services=get_default_services( - file.name, context.get(DEFAULT_SERVICES_ATTR)() if DEFAULT_SERVICES_ATTR in context else None - ) - ) + return bonobo.run(graph, plugins=plugins, services=services) def register(parser): + import argparse parser.add_argument('file', type=argparse.FileType()) parser.add_argument('--quiet', action='store_true') return execute diff --git a/bonobo/constants.py b/bonobo/constants.py index d567229..4187197 100644 --- a/bonobo/constants.py +++ b/bonobo/constants.py @@ -4,3 +4,5 @@ BEGIN = Token('Begin') END = Token('End') INHERIT_INPUT = Token('InheritInput') NOT_MODIFIED = Token('NotModified') +DEFAULT_SERVICES_FILENAME = '_services.py' +DEFAULT_SERVICES_ATTR = 'get_services' \ No newline at end of file diff --git a/bonobo/structs/graphs.py b/bonobo/structs/graphs.py index d2d755e..c68186b 100644 --- a/bonobo/structs/graphs.py +++ b/bonobo/structs/graphs.py @@ -29,3 +29,5 @@ class Graph: def __len__(self): return len(self.nodes) + + diff --git a/bonobo/util/graphviz.py b/bonobo/util/graphviz.py new file mode 100644 index 0000000..fa88974 --- /dev/null +++ b/bonobo/util/graphviz.py @@ -0,0 +1,9 @@ + +def render_as_dot(graph): + """ + + :param bonobo.Graph graph: + :return: str + """ + + pass \ No newline at end of file diff --git a/setup.py b/setup.py index 4cd8d82..1153f35 100644 --- a/setup.py +++ b/setup.py @@ -65,6 +65,7 @@ setup( entry_points={ 'bonobo.commands': [ 'init = bonobo.commands.init:register', + 'graph = bonobo.commands.graph:register', 'run = bonobo.commands.run:register', 'version = bonobo.commands.version:register' ], From 5062221e7866e707cd2c42073f93d929c2059ef5 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 5 Jul 2017 11:15:03 +0200 Subject: [PATCH 03/71] [config] Refactoring of configurables, allowing partially configured objects. Configurables did not allow more than one "method" option, and mixed scenarios (options+methods+...) were sometimes flaky, forcing the user to know what order was the right one. Now, all options work the same, sharing the same "order" namespace. Backward incompatible change: Options are now required by default, unless a default is provided. Also adds a few candies for debugging/testing, found in the bonobo.util.inspect module. --- bonobo/_api.py | 3 +- bonobo/config/__init__.py | 2 +- bonobo/config/configurables.py | 200 +++++++++++++++++++-------- bonobo/config/options.py | 60 +++++--- bonobo/config/processors.py | 10 +- bonobo/config/services.py | 4 +- bonobo/ext/opendatasoft.py | 4 +- bonobo/nodes/__init__.py | 11 +- bonobo/nodes/basics.py | 14 +- bonobo/nodes/io/csv.py | 2 +- bonobo/nodes/io/pickle.py | 2 +- bonobo/nodes/throttle.py | 55 ++++++++ bonobo/settings.py | 2 +- bonobo/util/collections.py | 6 + bonobo/util/inspect.py | 114 +++++++++++++++ tests/config/test_configurables.py | 57 +++++++- tests/config/test_methods.py | 80 ++++++++--- tests/config/test_methods_partial.py | 66 +++++++++ tests/test_basics.py | 1 + 19 files changed, 573 insertions(+), 120 deletions(-) create mode 100644 bonobo/nodes/throttle.py create mode 100644 bonobo/util/collections.py create mode 100644 bonobo/util/inspect.py create mode 100644 tests/config/test_methods_partial.py diff --git a/bonobo/_api.py b/bonobo/_api.py index cf28a33..89b6d4c 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -1,6 +1,6 @@ from bonobo.structs import Bag, Graph, Token from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \ - PrettyPrinter, PickleWriter, PickleReader, Tee, count, identity, noop, pprint + PrettyPrinter, PickleWriter, PickleReader, RateLimited, Tee, count, identity, noop, pprint from bonobo.strategies import create_strategy from bonobo.util.objects import get_name @@ -104,6 +104,7 @@ register_api_group( PrettyPrinter, PickleReader, PickleWriter, + RateLimited, Tee, count, identity, diff --git a/bonobo/config/__init__.py b/bonobo/config/__init__.py index 08be544..6a4247e 100644 --- a/bonobo/config/__init__.py +++ b/bonobo/config/__init__.py @@ -3,7 +3,7 @@ from bonobo.config.options import Method, Option from bonobo.config.processors import ContextProcessor from bonobo.config.services import Container, Exclusive, Service, requires -# bonobo.config public programming interface +# Bonobo's Config API __all__ = [ 'Configurable', 'Container', diff --git a/bonobo/config/configurables.py b/bonobo/config/configurables.py index 43cb8c2..01db9e0 100644 --- a/bonobo/config/configurables.py +++ b/bonobo/config/configurables.py @@ -1,12 +1,14 @@ -from bonobo.config.options import Method, Option -from bonobo.config.processors import ContextProcessor -from bonobo.errors import ConfigurationError, AbstractError +from bonobo.util.inspect import isoption, iscontextprocessor +from bonobo.errors import AbstractError +from bonobo.util.collections import sortedlist __all__ = [ 'Configurable', 'Option', ] +get_creation_counter = lambda v: v._creation_counter + class ConfigurableMeta(type): """ @@ -15,36 +17,77 @@ class ConfigurableMeta(type): def __init__(cls, what, bases=None, dict=None): super().__init__(what, bases, dict) - cls.__options__ = {} - cls.__positional_options__ = [] - cls.__processors__ = [] - cls.__wrappable__ = None + + cls.__processors = sortedlist() + cls.__methods = sortedlist() + cls.__options = sortedlist() + cls.__names = set() + + # cls.__kwoptions = [] for typ in cls.__mro__: - for name, value in typ.__dict__.items(): - if isinstance(value, Option): - if isinstance(value, ContextProcessor): - cls.__processors__.append(value) - else: - if not value.name: - value.name = name + for name, value in filter(lambda x: isoption(x[1]), typ.__dict__.items()): + if iscontextprocessor(value): + cls.__processors.insort((value._creation_counter, value)) + continue - if isinstance(value, Method): - if cls.__wrappable__: - raise ConfigurationError( - 'Cannot define more than one "Method" option in a configurable. That may change in the future.' - ) - cls.__wrappable__ = name + if not value.name: + value.name = name - if not name in cls.__options__: - cls.__options__[name] = value + if not name in cls.__names: + cls.__names.add(name) + cls.__options.insort((not value.positional, value._creation_counter, name, value)) - if value.positional: - cls.__positional_options__.append(name) + @property + def __options__(cls): + return ((name, option) for _, _, name, option in cls.__options) - # This can be done before, more efficiently. Not so bad neither as this is only done at type() creation time - # (aka class Xxx(...) time) and there should not be hundreds of processors. Still not very elegant. - cls.__processors__ = sorted(cls.__processors__, key=lambda v: v._creation_counter) + @property + def __options_dict__(cls): + return dict(cls.__options__) + + @property + def __processors__(cls): + return (processor for _, processor in cls.__processors) + + def __repr__(self): + return ' '.join(('= position + 1 else None + position += 1 + + return self.__options_values + + def __getattr__(self, item): + _dict = self.func.__options_dict__ + if item in _dict: + return _dict[item].__get__(self, self.func) + return getattr(self.func, item) class Configurable(metaclass=ConfigurableMeta): @@ -54,61 +97,108 @@ class Configurable(metaclass=ConfigurableMeta): """ - def __new__(cls, *args, **kwargs): - if cls.__wrappable__ and len(args) == 1 and hasattr(args[0], '__call__'): - return type(args[0].__name__, (cls, ), {cls.__wrappable__: args[0]}) + def __new__(cls, *args, _final=False, **kwargs): + """ + Custom instance builder. If not all options are fulfilled, will return a :class:`PartiallyConfigured` instance + which is just a :class:`functools.partial` object that behaves like a :class:`Configurable` instance. - return super(Configurable, cls).__new__(cls) - - def __init__(self, *args, **kwargs): - super().__init__() - - # initialize option's value dictionary, used by descriptor implementation (see Option). - self.__options_values__ = {} + The special `_final` argument can be used to force final instance to be created, or an error raised if options + are missing. + :param args: + :param _final: bool + :param kwargs: + :return: Configurable or PartiallyConfigured + """ + options = tuple(cls.__options__) # compute missing options, given the kwargs. missing = set() - for name, option in type(self).__options__.items(): + for name, option in options: if option.required and not option.name in kwargs: missing.add(name) # transform positional arguments in keyword arguments if possible. position = 0 - for positional_option in self.__positional_options__: - if len(args) <= position: - break - kwargs[positional_option] = args[position] - position += 1 - if positional_option in missing: - missing.remove(positional_option) + for name, option in options: + if not option.positional: + break # option orders make all positional options first, job done. - # complain if there are still missing options. - if len(missing): - raise TypeError( - '{}() missing {} required option{}: {}.'.format( - type(self).__name__, - len(missing), 's' if len(missing) > 1 else '', ', '.join(map(repr, sorted(missing))) - ) - ) + if not isoption(getattr(cls, name)): + missing.remove(name) + continue + + if len(args) <= position: + break # no more positional arguments given. + + position += 1 + if name in missing: + missing.remove(name) # complain if there is more options than possible. - extraneous = set(kwargs.keys()) - set(type(self).__options__.keys()) + extraneous = set(kwargs.keys()) - (set(next(zip(*options))) if len(options) else set()) if len(extraneous): raise TypeError( '{}() got {} unexpected option{}: {}.'.format( - type(self).__name__, + cls.__name__, len(extraneous), 's' if len(extraneous) > 1 else '', ', '.join(map(repr, sorted(extraneous))) ) ) + # missing options? we'll return a partial instance to finish the work later, unless we're required to be + # "final". + if len(missing): + if _final: + raise TypeError( + '{}() missing {} required option{}: {}.'.format( + cls.__name__, + len(missing), 's' if len(missing) > 1 else '', ', '.join(map(repr, sorted(missing))) + ) + ) + return PartiallyConfigured(cls, *args, **kwargs) + + return super(Configurable, cls).__new__(cls) + + def __init__(self, *args, **kwargs): + # initialize option's value dictionary, used by descriptor implementation (see Option). + self._options_values = { + **kwargs + } + # set option values. for name, value in kwargs.items(): setattr(self, name, value) + position = 0 + for name, option in self.__options__: + if not option.positional: + break # option orders make all positional options first + + # value was overriden? Skip. + maybe_value = getattr(type(self), name) + if not isoption(maybe_value): + continue + + if len(args) <= position: + break + + if name in self._options_values: + raise ValueError('Already got a value for option {}'.format(name)) + + setattr(self, name, args[position]) + position += 1 + def __call__(self, *args, **kwargs): """ You can implement a configurable callable behaviour by implemenenting the call(...) method. Of course, it is also backward compatible with legacy __call__ override. """ return self.call(*args, **kwargs) + @property + def __options__(self): + return type(self).__options__ + + @property + def __processors__(self): + return type(self).__processors__ + def call(self, *args, **kwargs): raise AbstractError('Not implemented.') diff --git a/bonobo/config/options.py b/bonobo/config/options.py index 51f4a20..82604fb 100644 --- a/bonobo/config/options.py +++ b/bonobo/config/options.py @@ -1,3 +1,6 @@ +from bonobo.util.inspect import istype + + class Option: """ An Option is a descriptor for Configurable's parameters. @@ -14,7 +17,9 @@ class Option: If an option is required, an error will be raised if no value is provided (at runtime). If it is not, option will have the default value if user does not override it at runtime. - (default: False) + Ignored if a default is provided, meaning that the option cannot be required. + + (default: True) .. attribute:: positional @@ -48,10 +53,10 @@ class Option: _creation_counter = 0 - def __init__(self, type=None, *, required=False, positional=False, default=None): + def __init__(self, type=None, *, required=True, positional=False, default=None): self.name = None self.type = type - self.required = required + self.required = required if default is None else False self.positional = positional self.default = default @@ -60,12 +65,27 @@ class Option: Option._creation_counter += 1 def __get__(self, inst, typ): - if not self.name in inst.__options_values__: - inst.__options_values__[self.name] = self.get_default() - return inst.__options_values__[self.name] + # XXX If we call this on the type, then either return overriden value or ... ??? + if inst is None: + return vars(type).get(self.name, self) + + if not self.name in inst._options_values: + inst._options_values[self.name] = self.get_default() + + return inst._options_values[self.name] def __set__(self, inst, value): - inst.__options_values__[self.name] = self.clean(value) + inst._options_values[self.name] = self.clean(value) + + def __repr__(self): + return '<{positional}{typename} {type}{name} default={default!r}{required}>'.format( + typename=type(self).__name__, + type='({})'.format(self.type) if istype(self.type) else '', + name=self.name, + positional='*' if self.positional else '**', + default=self.default, + required=' (required)' if self.required else '', + ) def clean(self, value): return self.type(value) if self.type else value @@ -105,20 +125,18 @@ class Method(Option): """ - def __init__(self): - super().__init__(None, required=False) - - def __get__(self, inst, typ): - if not self.name in inst.__options_values__: - inst.__options_values__[self.name] = getattr(inst, self.name) - return inst.__options_values__[self.name] + def __init__(self, *, required=True, positional=True): + super().__init__(None, required=required, positional=positional) def __set__(self, inst, value): - if isinstance(value, str): - raise ValueError('should be callable') - inst.__options_values__[self.name] = self.type(value) if self.type else value - - def clean(self, value): if not hasattr(value, '__call__'): - raise ValueError('{} value must be callable.'.format(type(self).__name__)) - return value + raise TypeError('Option of type {!r} is expecting a callable value, got {!r} object (which is not).'.format( + type(self).__name__, type(value).__name__)) + inst._options_values[self.name] = self.type(value) if self.type else value + + def __call__(self, *args, **kwargs): + # only here to trick IDEs into thinking this is callable. + raise NotImplementedError('You cannot call the descriptor') + + + diff --git a/bonobo/config/processors.py b/bonobo/config/processors.py index d441b6e..27f8703 100644 --- a/bonobo/config/processors.py +++ b/bonobo/config/processors.py @@ -74,8 +74,7 @@ class ContextCurrifier: def __init__(self, wrapped, *initial_context): self.wrapped = wrapped self.context = tuple(initial_context) - self._stack = [] - self._stack_values = [] + self._stack, self._stack_values = None, None def __iter__(self): yield from self.wrapped @@ -86,8 +85,10 @@ class ContextCurrifier: return self.wrapped(*self.context, *args, **kwargs) def setup(self, *context): - if len(self._stack): + if self._stack is not None: raise RuntimeError('Cannot setup context currification twice.') + + self._stack, self._stack_values = list(), list() for processor in resolve_processors(self.wrapped): _processed = processor(self.wrapped, *context, *self.context) _append_to_context = next(_processed) @@ -97,7 +98,7 @@ class ContextCurrifier: self._stack.append(_processed) def teardown(self): - while len(self._stack): + while self._stack: processor = self._stack.pop() try: # todo yield from ? how to ? @@ -108,6 +109,7 @@ class ContextCurrifier: else: # No error ? We should have had StopIteration ... raise RuntimeError('Context processors should not yield more than once.') + self._stack, self._stack_values = None, None @contextmanager def as_contextmanager(self, *context): diff --git a/bonobo/config/services.py b/bonobo/config/services.py index d792175..1fe066d 100644 --- a/bonobo/config/services.py +++ b/bonobo/config/services.py @@ -53,7 +53,7 @@ class Service(Option): super().__init__(str, required=False, default=name) def __set__(self, inst, value): - inst.__options_values__[self.name] = validate_service_name(value) + inst._options_values[self.name] = validate_service_name(value) def resolve(self, inst, services): try: @@ -75,7 +75,7 @@ class Container(dict): def args_for(self, mixed): try: - options = mixed.__options__ + options = dict(mixed.__options__) except AttributeError: options = {} diff --git a/bonobo/ext/opendatasoft.py b/bonobo/ext/opendatasoft.py index 4be3134..2dc54c0 100644 --- a/bonobo/ext/opendatasoft.py +++ b/bonobo/ext/opendatasoft.py @@ -13,13 +13,13 @@ def path_str(path): class OpenDataSoftAPI(Configurable): - dataset = Option(str, required=True) + dataset = Option(str, positional=True) endpoint = Option(str, default='{scheme}://{netloc}{path}') scheme = Option(str, default='https') netloc = Option(str, default='data.opendatasoft.com') path = Option(path_str, default='/api/records/1.0/search/') rows = Option(int, default=500) - limit = Option(int, default=None) + limit = Option(int, required=False) timezone = Option(str, default='Europe/Paris') kwargs = Option(dict, default=dict) diff --git a/bonobo/nodes/__init__.py b/bonobo/nodes/__init__.py index c25b580..2cdd1e9 100644 --- a/bonobo/nodes/__init__.py +++ b/bonobo/nodes/__init__.py @@ -1,9 +1,8 @@ -from bonobo.nodes.io import __all__ as _all_io -from bonobo.nodes.io import * - -from bonobo.nodes.basics import __all__ as _all_basics from bonobo.nodes.basics import * - +from bonobo.nodes.basics import __all__ as _all_basics from bonobo.nodes.filter import Filter +from bonobo.nodes.io import * +from bonobo.nodes.io import __all__ as _all_io +from bonobo.nodes.throttle import RateLimited -__all__ = _all_basics + _all_io + ['Filter'] +__all__ = _all_basics + _all_io + ['Filter', 'RateLimited'] diff --git a/bonobo/nodes/basics.py b/bonobo/nodes/basics.py index c21757a..c1ead61 100644 --- a/bonobo/nodes/basics.py +++ b/bonobo/nodes/basics.py @@ -1,16 +1,16 @@ import functools -from pprint import pprint as _pprint - import itertools + from colorama import Fore, Style from bonobo import settings from bonobo.config import Configurable, Option from bonobo.config.processors import ContextProcessor +from bonobo.constants import NOT_MODIFIED from bonobo.structs.bags import Bag +from bonobo.util.compat import deprecated from bonobo.util.objects import ValueHolder from bonobo.util.term import CLEAR_EOL -from bonobo.constants import NOT_MODIFIED __all__ = [ 'identity', @@ -87,8 +87,12 @@ class PrettyPrinter(Configurable): ) -pprint = PrettyPrinter() -pprint.__name__ = 'pprint' +_pprint = PrettyPrinter() + + +@deprecated +def pprint(*args, **kwargs): + return _pprint(*args, **kwargs) def PrettyPrint(title_keys=('title', 'name', 'id'), print_values=True, sort=True): diff --git a/bonobo/nodes/io/csv.py b/bonobo/nodes/io/csv.py index ae68bd0..75fffe8 100644 --- a/bonobo/nodes/io/csv.py +++ b/bonobo/nodes/io/csv.py @@ -26,7 +26,7 @@ class CsvHandler(FileHandler): """ delimiter = Option(str, default=';') quotechar = Option(str, default='"') - headers = Option(tuple) + headers = Option(tuple, required=False) class CsvReader(IOFormatEnabled, FileReader, CsvHandler): diff --git a/bonobo/nodes/io/pickle.py b/bonobo/nodes/io/pickle.py index e94f94a..d9da55f 100644 --- a/bonobo/nodes/io/pickle.py +++ b/bonobo/nodes/io/pickle.py @@ -17,7 +17,7 @@ class PickleHandler(FileHandler): """ - item_names = Option(tuple) + item_names = Option(tuple, required=False) class PickleReader(IOFormatEnabled, FileReader, PickleHandler): diff --git a/bonobo/nodes/throttle.py b/bonobo/nodes/throttle.py new file mode 100644 index 0000000..2f08cd3 --- /dev/null +++ b/bonobo/nodes/throttle.py @@ -0,0 +1,55 @@ +import threading +import time + +from bonobo.config import Configurable, ContextProcessor, Method, Option + + +class RateLimitBucket(threading.Thread): + daemon = True + + @property + def stopped(self): + return self._stop_event.is_set() + + def __init__(self, initial=1, period=1, amount=1): + super(RateLimitBucket, self).__init__() + self.semaphore = threading.BoundedSemaphore(initial) + self.amount = amount + self.period = period + + self._stop_event = threading.Event() + + def stop(self): + self._stop_event.set() + + def run(self): + while not self.stopped: + time.sleep(self.period) + for _ in range(self.amount): + self.semaphore.release() + + def wait(self): + return self.semaphore.acquire() + + +class RateLimited(Configurable): + handler = Method() + + initial = Option(int, positional=True, default=1) + period = Option(int, positional=True, default=1) + amount = Option(int, positional=True, default=1) + + @ContextProcessor + def bucket(self, context): + print(context) + bucket = RateLimitBucket(self.initial, self.amount, self.period) + bucket.start() + print(bucket) + yield bucket + bucket.stop() + bucket.join() + + def call(self, bucket, *args, **kwargs): + print(bucket, args, kwargs) + bucket.wait() + return self.handler(*args, **kwargs) diff --git a/bonobo/settings.py b/bonobo/settings.py index e0e5289..8e8a780 100644 --- a/bonobo/settings.py +++ b/bonobo/settings.py @@ -27,7 +27,7 @@ class Setting: self.validator = None def __repr__(self): - return ''.format(self.name, self.value) + return ''.format(self.name, self.get()) def set(self, value): if self.validator and not self.validator(value): diff --git a/bonobo/util/collections.py b/bonobo/util/collections.py new file mode 100644 index 0000000..b97630a --- /dev/null +++ b/bonobo/util/collections.py @@ -0,0 +1,6 @@ +import bisect + + +class sortedlist(list): + def insort(self, x): + bisect.insort(self, x) \ No newline at end of file diff --git a/bonobo/util/inspect.py b/bonobo/util/inspect.py new file mode 100644 index 0000000..72fcc7e --- /dev/null +++ b/bonobo/util/inspect.py @@ -0,0 +1,114 @@ +from collections import namedtuple + + +def isconfigurabletype(mixed): + """ + Check if the given argument is an instance of :class:`bonobo.config.ConfigurableMeta`, meaning it has all the + plumbery necessary to build :class:`bonobo.config.Configurable`-like instances. + + :param mixed: + :return: bool + """ + from bonobo.config.configurables import ConfigurableMeta + return isinstance(mixed, ConfigurableMeta) + + +def isconfigurable(mixed): + """ + Check if the given argument is an instance of :class:`bonobo.config.Configurable`. + + :param mixed: + :return: bool + """ + from bonobo.config.configurables import Configurable + return isinstance(mixed, Configurable) + + +def isoption(mixed): + """ + Check if the given argument is an instance of :class:`bonobo.config.Option`. + + :param mixed: + :return: bool + """ + + from bonobo.config.options import Option + return isinstance(mixed, Option) + + +def ismethod(mixed): + """ + Check if the given argument is an instance of :class:`bonobo.config.Method`. + + :param mixed: + :return: bool + """ + from bonobo.config.options import Method + return isinstance(mixed, Method) + + +def iscontextprocessor(x): + """ + Check if the given argument is an instance of :class:`bonobo.config.ContextProcessor`. + + :param mixed: + :return: bool + """ + from bonobo.config.processors import ContextProcessor + return isinstance(x, ContextProcessor) + + +def istype(mixed): + """ + Check if the given argument is a type object. + + :param mixed: + :return: bool + """ + return isinstance(mixed, type) + + +ConfigurableInspection = namedtuple('ConfigurableInspection', + [ + 'type', + 'instance', + 'options', + 'processors', + 'partial', + ]) + +ConfigurableInspection.__enter__ = lambda self: self +ConfigurableInspection.__exit__ = lambda *exc_details: None + + +def inspect_node(mixed, *, _partial=None): + """ + If the given argument is somehow a :class:`bonobo.config.Configurable` object (either a subclass, an instance, or + a partially configured instance), then it will return a :class:`ConfigurableInspection` namedtuple, used to inspect + the configurable metadata (options). If you want to get the option values, you don't need this, it is only usefull + to perform introspection on a configurable. + + If it's not looking like a configurable, it will raise a :class:`TypeError`. + + :param mixed: + :return: ConfigurableInspection + + :raise: TypeError + """ + if isconfigurabletype(mixed): + inst, typ = None, mixed + elif isconfigurable(mixed): + inst, typ = mixed, type(mixed) + elif hasattr(mixed, 'func'): + return inspect_node(mixed.func, _partial=(mixed.args, mixed.keywords)) + else: + raise TypeError( + 'Not a Configurable, nor a Configurable instance and not even a partially configured Configurable. Check your inputs.') + + return ConfigurableInspection( + typ, + inst, + list(typ.__options__), + list(typ.__processors__), + _partial, + ) diff --git a/tests/config/test_configurables.py b/tests/config/test_configurables.py index 178c188..f1c5387 100644 --- a/tests/config/test_configurables.py +++ b/tests/config/test_configurables.py @@ -2,12 +2,17 @@ import pytest from bonobo.config.configurables import Configurable from bonobo.config.options import Option +from bonobo.util.inspect import inspect_node + + +class NoOptConfigurable(Configurable): + pass class MyConfigurable(Configurable): - required_str = Option(str, required=True) + required_str = Option(str) default_str = Option(str, default='foo') - integer = Option(int) + integer = Option(int, required=False) class MyHarderConfigurable(MyConfigurable): @@ -25,14 +30,20 @@ class MyConfigurableUsingPositionalOptions(MyConfigurable): def test_missing_required_option_error(): + with inspect_node(MyConfigurable()) as ni: + assert ni.partial + with pytest.raises(TypeError) as exc: - MyConfigurable() + MyConfigurable(_final=True) assert exc.match('missing 1 required option:') def test_missing_required_options_error(): + with inspect_node(MyHarderConfigurable()) as ni: + assert ni.partial + with pytest.raises(TypeError) as exc: - MyHarderConfigurable() + MyHarderConfigurable(_final=True) assert exc.match('missing 2 required options:') @@ -50,6 +61,10 @@ def test_extraneous_options_error(): def test_defaults(): o = MyConfigurable(required_str='hello') + + with inspect_node(o) as ni: + assert not ni.partial + assert o.required_str == 'hello' assert o.default_str == 'foo' assert o.integer == None @@ -57,6 +72,10 @@ def test_defaults(): def test_str_type_factory(): o = MyConfigurable(required_str=42) + + with inspect_node(o) as ni: + assert not ni.partial + assert o.required_str == '42' assert o.default_str == 'foo' assert o.integer == None @@ -64,6 +83,10 @@ def test_str_type_factory(): def test_int_type_factory(): o = MyConfigurable(required_str='yo', default_str='bar', integer='42') + + with inspect_node(o) as ni: + assert not ni.partial + assert o.required_str == 'yo' assert o.default_str == 'bar' assert o.integer == 42 @@ -71,6 +94,10 @@ def test_int_type_factory(): def test_bool_type_factory(): o = MyHarderConfigurable(required_str='yes', also_required='True') + + with inspect_node(o) as ni: + assert not ni.partial + assert o.required_str == 'yes' assert o.default_str == 'foo' assert o.integer == None @@ -79,6 +106,10 @@ def test_bool_type_factory(): def test_option_resolution_order(): o = MyBetterConfigurable() + + with inspect_node(o) as ni: + assert not ni.partial + assert o.required_str == 'kaboom' assert o.default_str == 'foo' assert o.integer == None @@ -86,3 +117,21 @@ def test_option_resolution_order(): def test_option_positional(): o = MyConfigurableUsingPositionalOptions('1', '2', '3', required_str='hello') + + with inspect_node(o) as ni: + assert not ni.partial + + assert o.first == '1' + assert o.second == '2' + assert o.third == '3' + assert o.required_str == 'hello' + assert o.default_str == 'foo' + assert o.integer is None + + +def test_no_opt_configurable(): + o = NoOptConfigurable() + + with inspect_node(o) as ni: + assert not ni.partial + diff --git a/tests/config/test_methods.py b/tests/config/test_methods.py index 3a5f6a3..a4e4ebb 100644 --- a/tests/config/test_methods.py +++ b/tests/config/test_methods.py @@ -1,7 +1,5 @@ -import pytest - from bonobo.config import Configurable, Method, Option -from bonobo.errors import ConfigurationError +from bonobo.util.inspect import inspect_node class MethodBasedConfigurable(Configurable): @@ -13,22 +11,56 @@ class MethodBasedConfigurable(Configurable): self.handler(*args, **kwargs) -def test_one_wrapper_only(): - with pytest.raises(ConfigurationError): +def test_multiple_wrapper_suppored(): + class TwoMethods(Configurable): + h1 = Method(required=True) + h2 = Method(required=True) - class TwoMethods(Configurable): - h1 = Method() - h2 = Method() + with inspect_node(TwoMethods) as ci: + assert ci.type == TwoMethods + assert not ci.instance + assert len(ci.options) == 2 + assert not len(ci.processors) + assert not ci.partial + + @TwoMethods + def OneMethod(): + pass + + with inspect_node(OneMethod) as ci: + assert ci.type == TwoMethods + assert not ci.instance + assert len(ci.options) == 2 + assert not len(ci.processors) + assert ci.partial + + @OneMethod + def transformation(): + pass + + with inspect_node(transformation) as ci: + assert ci.type == TwoMethods + assert ci.instance + assert len(ci.options) == 2 + assert not len(ci.processors) + assert not ci.partial def test_define_with_decorator(): calls = [] - @MethodBasedConfigurable - def Concrete(self, *args, **kwargs): - calls.append((args, kwargs, )) + def my_handler(*args, **kwargs): + calls.append((args, kwargs,)) + + Concrete = MethodBasedConfigurable(my_handler) assert callable(Concrete.handler) + assert Concrete.handler == my_handler + + with inspect_node(Concrete) as ci: + assert ci.type == MethodBasedConfigurable + assert ci.partial + t = Concrete('foo', bar='baz') assert callable(t.handler) @@ -37,13 +69,29 @@ def test_define_with_decorator(): assert len(calls) == 1 +def test_late_binding_method_decoration(): + calls = [] + + @MethodBasedConfigurable(foo='foo') + def Concrete(*args, **kwargs): + calls.append((args, kwargs,)) + + assert callable(Concrete.handler) + t = Concrete(bar='baz') + + assert callable(t.handler) + assert len(calls) == 0 + t() + assert len(calls) == 1 + + def test_define_with_argument(): calls = [] def concrete_handler(*args, **kwargs): - calls.append((args, kwargs, )) + calls.append((args, kwargs,)) - t = MethodBasedConfigurable('foo', bar='baz', handler=concrete_handler) + t = MethodBasedConfigurable(concrete_handler, 'foo', bar='baz') assert callable(t.handler) assert len(calls) == 0 t() @@ -55,7 +103,7 @@ def test_define_with_inheritance(): class Inheriting(MethodBasedConfigurable): def handler(self, *args, **kwargs): - calls.append((args, kwargs, )) + calls.append((args, kwargs,)) t = Inheriting('foo', bar='baz') assert callable(t.handler) @@ -71,8 +119,8 @@ def test_inheritance_then_decorate(): pass @Inheriting - def Concrete(self, *args, **kwargs): - calls.append((args, kwargs, )) + def Concrete(*args, **kwargs): + calls.append((args, kwargs,)) assert callable(Concrete.handler) t = Concrete('foo', bar='baz') diff --git a/tests/config/test_methods_partial.py b/tests/config/test_methods_partial.py new file mode 100644 index 0000000..fdb1111 --- /dev/null +++ b/tests/config/test_methods_partial.py @@ -0,0 +1,66 @@ +from unittest.mock import MagicMock + +from bonobo.config import Configurable, ContextProcessor, Method, Option +from bonobo.util.inspect import inspect_node + + +class Bobby(Configurable): + handler = Method() + handler2 = Method() + foo = Option(positional=True) + bar = Option(required=False) + + @ContextProcessor + def think(self, context): + yield 'different' + + def call(self, think, *args, **kwargs): + self.handler('1', *args, **kwargs) + self.handler2('2', *args, **kwargs) + + +def test_partial(): + C = Bobby + + # inspect the configurable class + with inspect_node(C) as ci: + assert ci.type == Bobby + assert not ci.instance + assert len(ci.options) == 4 + assert len(ci.processors) == 1 + assert not ci.partial + + # instanciate a partial instance ... + f1 = MagicMock() + C = C(f1) + + with inspect_node(C) as ci: + assert ci.type == Bobby + assert not ci.instance + assert len(ci.options) == 4 + assert len(ci.processors) == 1 + assert ci.partial + assert ci.partial[0] == (f1,) + assert not len(ci.partial[1]) + + # instanciate a more complete partial instance ... + f2 = MagicMock() + C = C(f2) + + with inspect_node(C) as ci: + assert ci.type == Bobby + assert not ci.instance + assert len(ci.options) == 4 + assert len(ci.processors) == 1 + assert ci.partial + assert ci.partial[0] == (f1, f2,) + assert not len(ci.partial[1]) + + c = C('foo') + + with inspect_node(c) as ci: + assert ci.type == Bobby + assert ci.instance + assert len(ci.options) == 4 + assert len(ci.processors) == 1 + assert not ci.partial diff --git a/tests/test_basics.py b/tests/test_basics.py index 283e3d7..5230b0b 100644 --- a/tests/test_basics.py +++ b/tests/test_basics.py @@ -5,6 +5,7 @@ import pytest import bonobo from bonobo.config.processors import ContextCurrifier from bonobo.constants import NOT_MODIFIED +from bonobo.util.inspect import inspect_node def test_count(): From 2ff19c18a68a69b5e9228739906635d1d1a9ce5e Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 5 Jul 2017 11:34:34 +0200 Subject: [PATCH 04/71] [errors] Implements unrecoverable errors, that will raise and stop the transformation. Used when an invalid ioformat is used. --- bonobo/errors.py | 14 ++ bonobo/examples/datasets/coffeeshops.json | 250 +++++++++++----------- bonobo/examples/datasets/coffeeshops.txt | 248 ++++++++++----------- bonobo/execution/node.py | 5 +- bonobo/nodes/io/base.py | 13 +- 5 files changed, 274 insertions(+), 256 deletions(-) diff --git a/bonobo/errors.py b/bonobo/errors.py index 564950d..8510a50 100644 --- a/bonobo/errors.py +++ b/bonobo/errors.py @@ -60,3 +60,17 @@ class ConfigurationError(Exception): class MissingServiceImplementationError(KeyError): pass + + +class UnrecoverableError(Exception): + """Flag for errors that must interrupt the workflow, either because they will happen for sure on each node run, or + because you know that your transformation has no point continuing runnning after a bad event.""" + +class UnrecoverableValueError(UnrecoverableError, ValueError): + pass + +class UnrecoverableRuntimeError(UnrecoverableError, RuntimeError): + pass + +class UnrecoverableNotImplementedError(UnrecoverableError, NotImplementedError): + pass diff --git a/bonobo/examples/datasets/coffeeshops.json b/bonobo/examples/datasets/coffeeshops.json index 60e89b1..391b5e8 100644 --- a/bonobo/examples/datasets/coffeeshops.json +++ b/bonobo/examples/datasets/coffeeshops.json @@ -1,182 +1,182 @@ -{"Le Reynou": "2 bis quai de la m\u00e9gisserie, 75001 Paris, France", -"les montparnos": "65 boulevard Pasteur, 75015 Paris, France", -"Le Saint Jean": "23 rue des abbesses, 75018 Paris, France", -"Le Felteu": "1 rue Pecquay, 75004 Paris, France", +{"les montparnos": "65 boulevard Pasteur, 75015 Paris, France", +"Coffee Chope": "344Vrue Vaugirard, 75015 Paris, France", +"Caf\u00e9 Lea": "5 rue Claude Bernard, 75005 Paris, France", +"Le Bellerive": "71 quai de Seine, 75019 Paris, France", +"Le drapeau de la fidelit\u00e9": "21 rue Copreaux, 75015 Paris, France", "O q de poule": "53 rue du ruisseau, 75018 Paris, France", +"Le caf\u00e9 des amis": "125 rue Blomet, 75015 Paris, France", "Le chantereine": "51 Rue Victoire, 75009 Paris, France", "Le M\u00fcller": "11 rue Feutrier, 75018 Paris, France", -"La Caravane": "Rue de la Fontaine au Roi, 75011 Paris, France", -"Le Pas Sage": "1 Passage du Grand Cerf, 75002 Paris, France", -"La Renaissance": "112 Rue Championnet, 75018 Paris, France", "Ext\u00e9rieur Quai": "5, rue d'Alsace, 75010 Paris, France", -"Le Sully": "6 Bd henri IV, 75004 Paris, France", -"Le drapeau de la fidelit\u00e9": "21 rue Copreaux, 75015 Paris, France", -"Le caf\u00e9 des amis": "125 rue Blomet, 75015 Paris, France", -"Le Kleemend's": "34 avenue Pierre Mend\u00e8s-France, 75013 Paris, France", -"Assaporare Dix sur Dix": "75, avenue Ledru-Rollin, 75012 Paris, France", -"Caf\u00e9 Pierre": "202 rue du faubourg st antoine, 75012 Paris, France", -"Le Caf\u00e9 Livres": "10 rue Saint Martin, 75004 Paris, France", -"Le Chaumontois": "12 rue Armand Carrel, 75018 Paris, France", -"Le Square": "31 rue Saint-Dominique, 75007 Paris, France", -"Les Arcades": "61 rue de Ponthieu, 75008 Paris, France", +"La Bauloise": "36 rue du hameau, 75015 Paris, France", +"Le Dellac": "14 rue Rougemont, 75009 Paris, France", "Le Bosquet": "46 avenue Bosquet, 75007 Paris, France", +"Le Sully": "6 Bd henri IV, 75004 Paris, France", +"Le Felteu": "1 rue Pecquay, 75004 Paris, France", "Le bistrot de Ma\u00eblle et Augustin": "42 rue coquill\u00e8re, 75001 Paris, France", "D\u00e9d\u00e9 la frite": "52 rue Notre-Dame des Victoires, 75002 Paris, France", "Cardinal Saint-Germain": "11 boulevard Saint-Germain, 75005 Paris, France", +"Le Reynou": "2 bis quai de la m\u00e9gisserie, 75001 Paris, France", +"Aux cadrans": "21 ter boulevard Diderot, 75012 Paris, France", +"Le Saint Jean": "23 rue des abbesses, 75018 Paris, France", +"La Renaissance": "112 Rue Championnet, 75018 Paris, France", +"Le Square": "31 rue Saint-Dominique, 75007 Paris, France", +"Les Arcades": "61 rue de Ponthieu, 75008 Paris, France", +"Le Kleemend's": "34 avenue Pierre Mend\u00e8s-France, 75013 Paris, France", +"Assaporare Dix sur Dix": "75, avenue Ledru-Rollin, 75012 Paris, France", +"Caf\u00e9 Pierre": "202 rue du faubourg st antoine, 75012 Paris, France", "Caf\u00e9 antoine": "17 rue Jean de la Fontaine, 75016 Paris, France", "Au cerceau d'or": "129 boulevard sebastopol, 75002 Paris, France", -"Aux cadrans": "21 ter boulevard Diderot, 75012 Paris, France", -"Caf\u00e9 Lea": "5 rue Claude Bernard, 75005 Paris, France", -"Le Bellerive": "71 quai de Seine, 75019 Paris, France", -"La Bauloise": "36 rue du hameau, 75015 Paris, France", -"Le Dellac": "14 rue Rougemont, 75009 Paris, France", +"La Caravane": "Rue de la Fontaine au Roi, 75011 Paris, France", +"Le Pas Sage": "1 Passage du Grand Cerf, 75002 Paris, France", +"Le Caf\u00e9 Livres": "10 rue Saint Martin, 75004 Paris, France", +"Le Chaumontois": "12 rue Armand Carrel, 75018 Paris, France", +"Drole d'endroit pour une rencontre": "58 rue de Montorgueil, 75002 Paris, France", +"Le pari's caf\u00e9": "104 rue caulaincourt, 75018 Paris, France", +"Le Poulailler": "60 rue saint-sabin, 75011 Paris, France", +"Chai 33": "33 Cour Saint Emilion, 75012 Paris, France", +"L'Assassin": "99 rue Jean-Pierre Timbaud, 75011 Paris, France", +"l'Usine": "1 rue d'Avron, 75020 Paris, France", +"La Bricole": "52 rue Liebniz, 75018 Paris, France", +"le ronsard": "place maubert, 75005 Paris, France", +"Face Bar": "82 rue des archives, 75003 Paris, France", +"American Kitchen": "49 rue bichat, 75010 Paris, France", +"La Marine": "55 bis quai de valmy, 75010 Paris, France", +"Le Bloc": "21 avenue Brochant, 75017 Paris, France", +"La Recoleta au Manoir": "229 avenue Gambetta, 75020 Paris, France", +"Le Pareloup": "80 Rue Saint-Charles, 75015 Paris, France", +"La Brasserie Gait\u00e9": "3 rue de la Gait\u00e9, 75014 Paris, France", +"Caf\u00e9 Zen": "46 rue Victoire, 75009 Paris, France", +"O'Breizh": "27 rue de Penthi\u00e8vre, 75008 Paris, France", +"Le Petit Choiseul": "23 rue saint augustin, 75002 Paris, France", +"Invitez vous chez nous": "7 rue Ep\u00e9e de Bois, 75005 Paris, France", +"La Cordonnerie": "142 Rue Saint-Denis 75002 Paris, 75002 Paris, France", +"Le Supercoin": "3, rue Baudelique, 75018 Paris, France", +"Populettes": "86 bis rue Riquet, 75018 Paris, France", +"Au bon coin": "49 rue des Cloys, 75018 Paris, France", +"Le Couvent": "69 rue Broca, 75013 Paris, France", +"La Br\u00fblerie des Ternes": "111 rue mouffetard, 75005 Paris, France", +"L'\u00c9cir": "59 Boulevard Saint-Jacques, 75014 Paris, France", +"Le Chat bossu": "126, rue du Faubourg Saint Antoine, 75012 Paris, France", +"Denfert caf\u00e9": "58 boulvevard Saint Jacques, 75014 Paris, France", +"Le Caf\u00e9 frapp\u00e9": "95 rue Montmartre, 75002 Paris, France", +"La Perle": "78 rue vieille du temple, 75003 Paris, France", +"Le Descartes": "1 rue Thouin, 75005 Paris, France", +"Bagels & Coffee Corner": "Place de Clichy, 75017 Paris, France", +"Le petit club": "55 rue de la tombe Issoire, 75014 Paris, France", +"Le Plein soleil": "90 avenue Parmentier, 75011 Paris, France", +"Le Relais Haussmann": "146, boulevard Haussmann, 75008 Paris, France", +"Le Malar": "88 rue Saint-Dominique, 75007 Paris, France", +"Au panini de la place": "47 rue Belgrand, 75020 Paris, France", +"Le Village": "182 rue de Courcelles, 75017 Paris, France", +"Pause Caf\u00e9": "41 rue de Charonne, 75011 Paris, France", +"Le Pure caf\u00e9": "14 rue Jean Mac\u00e9, 75011 Paris, France", +"Extra old caf\u00e9": "307 fg saint Antoine, 75011 Paris, France", +"Chez Fafa": "44 rue Vinaigriers, 75010 Paris, France", +"En attendant l'or": "3 rue Faidherbe, 75011 Paris, France", "Br\u00fblerie San Jos\u00e9": "30 rue des Petits-Champs, 75002 Paris, France", "Caf\u00e9 de la Mairie (du VIII)": "rue de Lisbonne, 75008 Paris, France", -"Le General Beuret": "9 Place du General Beuret, 75015 Paris, France", -"Le Cap Bourbon": "1 rue Louis le Grand, 75002 Paris, France", -"En attendant l'or": "3 rue Faidherbe, 75011 Paris, France", "Caf\u00e9 Martin": "2 place Martin Nadaud, 75001 Paris, France", "Etienne": "14 rue Turbigo, Paris, 75001 Paris, France", "L'ing\u00e9nu": "184 bd Voltaire, 75011 Paris, France", -"Le Biz": "18 rue Favart, 75002 Paris, France", "L'Olive": "8 rue L'Olive, 75018 Paris, France", -"Le pari's caf\u00e9": "104 rue caulaincourt, 75018 Paris, France", -"Le Poulailler": "60 rue saint-sabin, 75011 Paris, France", -"La Marine": "55 bis quai de valmy, 75010 Paris, France", -"American Kitchen": "49 rue bichat, 75010 Paris, France", -"Chai 33": "33 Cour Saint Emilion, 75012 Paris, France", -"Face Bar": "82 rue des archives, 75003 Paris, France", -"Le Bloc": "21 avenue Brochant, 75017 Paris, France", -"La Bricole": "52 rue Liebniz, 75018 Paris, France", -"le ronsard": "place maubert, 75005 Paris, France", -"l'Usine": "1 rue d'Avron, 75020 Paris, France", -"La Cordonnerie": "142 Rue Saint-Denis 75002 Paris, 75002 Paris, France", -"Invitez vous chez nous": "7 rue Ep\u00e9e de Bois, 75005 Paris, France", -"Le sully": "13 rue du Faubourg Saint Denis, 75010 Paris, France", -"Le Ragueneau": "202 rue Saint-Honor\u00e9, 75001 Paris, France", +"Le Biz": "18 rue Favart, 75002 Paris, France", +"Le Cap Bourbon": "1 rue Louis le Grand, 75002 Paris, France", +"Le General Beuret": "9 Place du General Beuret, 75015 Paris, France", "Le Germinal": "95 avenue Emile Zola, 75015 Paris, France", +"Le Ragueneau": "202 rue Saint-Honor\u00e9, 75001 Paris, France", "Le refuge": "72 rue lamarck, 75018 Paris, France", -"Drole d'endroit pour une rencontre": "58 rue de Montorgueil, 75002 Paris, France", -"Le Petit Choiseul": "23 rue saint augustin, 75002 Paris, France", -"O'Breizh": "27 rue de Penthi\u00e8vre, 75008 Paris, France", -"Le Supercoin": "3, rue Baudelique, 75018 Paris, France", -"Populettes": "86 bis rue Riquet, 75018 Paris, France", -"La Recoleta au Manoir": "229 avenue Gambetta, 75020 Paris, France", -"L'Assassin": "99 rue Jean-Pierre Timbaud, 75011 Paris, France", -"Le Pareloup": "80 Rue Saint-Charles, 75015 Paris, France", -"Caf\u00e9 Zen": "46 rue Victoire, 75009 Paris, France", -"La Brasserie Gait\u00e9": "3 rue de la Gait\u00e9, 75014 Paris, France", -"Au bon coin": "49 rue des Cloys, 75018 Paris, France", -"La Br\u00fblerie des Ternes": "111 rue mouffetard, 75005 Paris, France", -"Le Chat bossu": "126, rue du Faubourg Saint Antoine, 75012 Paris, France", -"Denfert caf\u00e9": "58 boulvevard Saint Jacques, 75014 Paris, France", -"Le Couvent": "69 rue Broca, 75013 Paris, France", -"Bagels & Coffee Corner": "Place de Clichy, 75017 Paris, France", -"La Perle": "78 rue vieille du temple, 75003 Paris, France", -"Le Caf\u00e9 frapp\u00e9": "95 rue Montmartre, 75002 Paris, France", -"L'\u00c9cir": "59 Boulevard Saint-Jacques, 75014 Paris, France", -"Le Descartes": "1 rue Thouin, 75005 Paris, France", -"Le petit club": "55 rue de la tombe Issoire, 75014 Paris, France", -"Le Relais Haussmann": "146, boulevard Haussmann, 75008 Paris, France", -"Au panini de la place": "47 rue Belgrand, 75020 Paris, France", -"Extra old caf\u00e9": "307 fg saint Antoine, 75011 Paris, France", -"Le Plein soleil": "90 avenue Parmentier, 75011 Paris, France", -"Le Pure caf\u00e9": "14 rue Jean Mac\u00e9, 75011 Paris, France", -"Le Village": "182 rue de Courcelles, 75017 Paris, France", -"Le Malar": "88 rue Saint-Dominique, 75007 Paris, France", -"Pause Caf\u00e9": "41 rue de Charonne, 75011 Paris, France", -"Chez Fafa": "44 rue Vinaigriers, 75010 Paris, France", -"Caf\u00e9 dans l'aerogare Air France Invalides": "2 rue Robert Esnault Pelterie, 75007 Paris, France", -"Le relais de la victoire": "73 rue de la Victoire, 75009 Paris, France", -"Caprice caf\u00e9": "12 avenue Jean Moulin, 75014 Paris, France", -"Caves populaires": "22 rue des Dames, 75017 Paris, France", -"Cafe de grenelle": "188 rue de Grenelle, 75007 Paris, France", -"Chez Prune": "36 rue Beaurepaire, 75010 Paris, France", -"L'anjou": "1 rue de Montholon, 75009 Paris, France", -"Le Brio": "216, rue Marcadet, 75018 Paris, France", -"Tamm Bara": "7 rue Clisson, 75013 Paris, France", -"La chaumi\u00e8re gourmande": "Route de la Muette \u00e0 Neuilly", -"Club hippique du Jardin d\u2019Acclimatation": "75016 Paris, France", -"Les P\u00e8res Populaires": "46 rue de Buzenval, 75020 Paris, France", -"Epicerie Musicale": "55bis quai de Valmy, 75010 Paris, France", -"Le Centenaire": "104 rue amelot, 75011 Paris, France", -"Le Zazabar": "116 Rue de M\u00e9nilmontant, 75020 Paris, France", -"Ragueneau": "202 rue Saint Honor\u00e9, 75001 Paris, France", -"L'In\u00e9vitable": "22 rue Linn\u00e9, 75005 Paris, France", +"Le sully": "13 rue du Faubourg Saint Denis, 75010 Paris, France", "Le Dunois": "77 rue Dunois, 75013 Paris, France", "La Montagne Sans Genevi\u00e8ve": "13 Rue du Pot de Fer, 75005 Paris, France", "Le Caminito": "48 rue du Dessous des Berges, 75013 Paris, France", "Le petit Bretonneau": "Le petit Bretonneau - \u00e0 l'int\u00e9rieur de l'H\u00f4pital, 75018 Paris, France", +"La chaumi\u00e8re gourmande": "Route de la Muette \u00e0 Neuilly", +"Club hippique du Jardin d\u2019Acclimatation": "75016 Paris, France", "Le bal du pirate": "60 rue des bergers, 75015 Paris, France", +"Le Zazabar": "116 Rue de M\u00e9nilmontant, 75020 Paris, France", "L'antre d'eux": "16 rue DE MEZIERES, 75006 Paris, France", "l'orillon bar": "35 rue de l'orillon, 75011 Paris, France", "zic zinc": "95 rue claude decaen, 75012 Paris, France", +"Les P\u00e8res Populaires": "46 rue de Buzenval, 75020 Paris, France", +"Epicerie Musicale": "55bis quai de Valmy, 75010 Paris, France", +"Le relais de la victoire": "73 rue de la Victoire, 75009 Paris, France", +"Le Centenaire": "104 rue amelot, 75011 Paris, France", +"Cafe de grenelle": "188 rue de Grenelle, 75007 Paris, France", +"Ragueneau": "202 rue Saint Honor\u00e9, 75001 Paris, France", "Caf\u00e9 Pistache": "9 rue des petits champs, 75001 Paris, France", "La Cagnotte": "13 Rue Jean-Baptiste Dumay, 75020 Paris, France", -"bistrot les timbr\u00e9s": "14 rue d'alleray, 75015 Paris, France", "Le Killy Jen": "28 bis boulevard Diderot, 75012 Paris, France", "Caf\u00e9 beauveau": "9 rue de Miromesnil, 75008 Paris, France", "le 1 cinq": "172 rue de vaugirard, 75015 Paris, France", -"Au Vin Des Rues": "21 rue Boulard, 75014 Paris, France", "Les Artisans": "106 rue Lecourbe, 75015 Paris, France", "Peperoni": "83 avenue de Wagram, 75001 Paris, France", -"Le BB (Bouchon des Batignolles)": "2 rue Lemercier, 75017 Paris, France", -"La Libert\u00e9": "196 rue du faubourg saint-antoine, 75012 Paris, France", -"Chez Rutabaga": "16 rue des Petits Champs, 75002 Paris, France", -"La cantoche de Paname": "40 Boulevard Beaumarchais, 75011 Paris, France", -"Le Saint Ren\u00e9": "148 Boulevard de Charonne, 75020 Paris, France", -"La Brocante": "10 rue Rossini, 75009 Paris, France", -"Caf\u00e9 Clochette": "16 avenue Richerand, 75010 Paris, France", -"L'europ\u00e9en": "21 Bis Boulevard Diderot, 75012 Paris, France", -"NoMa": "39 rue Notre Dame de Nazareth, 75003 Paris, France", -"O'Paris": "1 Rue des Envierges, 75020 Paris, France", +"Le Brio": "216, rue Marcadet, 75018 Paris, France", +"Tamm Bara": "7 rue Clisson, 75013 Paris, France", +"Caf\u00e9 dans l'aerogare Air France Invalides": "2 rue Robert Esnault Pelterie, 75007 Paris, France", +"bistrot les timbr\u00e9s": "14 rue d'alleray, 75015 Paris, France", +"Caprice caf\u00e9": "12 avenue Jean Moulin, 75014 Paris, France", +"Caves populaires": "22 rue des Dames, 75017 Paris, France", +"Au Vin Des Rues": "21 rue Boulard, 75014 Paris, France", +"Chez Prune": "36 rue Beaurepaire, 75010 Paris, France", +"L'In\u00e9vitable": "22 rue Linn\u00e9, 75005 Paris, France", +"L'anjou": "1 rue de Montholon, 75009 Paris, France", "Botak cafe": "1 rue Paul albert, 75018 Paris, France", -"La cantine de Zo\u00e9": "136 rue du Faubourg poissonni\u00e8re, 75010 Paris, France", -"Les caves populaires": "22 rue des Dames, 75017 Paris, France", -"Le Plomb du cantal": "3 rue Ga\u00eet\u00e9, 75014 Paris, France", -"Trois pi\u00e8ces cuisine": "101 rue des dames, 75017 Paris, France", -"Le Zinc": "61 avenue de la Motte Picquet, 75015 Paris, France", -"L'avant comptoir": "3 carrefour de l'Od\u00e9on, 75006 Paris, France", -"Les Vendangeurs": "6/8 rue Stanislas, 75006 Paris, France", -"Chez Luna": "108 rue de M\u00e9nilmontant, 75020 Paris, France", -"Le bar Fleuri": "1 rue du Plateau, 75019 Paris, France", "Bistrot Saint-Antoine": "58 rue du Fbg Saint-Antoine, 75012 Paris, France", "Chez Oscar": "11/13 boulevard Beaumarchais, 75004 Paris, France", "Le Piquet": "48 avenue de la Motte Picquet, 75015 Paris, France", +"L'avant comptoir": "3 carrefour de l'Od\u00e9on, 75006 Paris, France", "le chateau d'eau": "67 rue du Ch\u00e2teau d'eau, 75010 Paris, France", +"Les Vendangeurs": "6/8 rue Stanislas, 75006 Paris, France", "maison du vin": "52 rue des plantes, 75014 Paris, France", "Le Tournebride": "104 rue Mouffetard, 75005 Paris, France", "Le Fronton": "63 rue de Ponthieu, 75008 Paris, France", +"Le BB (Bouchon des Batignolles)": "2 rue Lemercier, 75017 Paris, France", +"La cantine de Zo\u00e9": "136 rue du Faubourg poissonni\u00e8re, 75010 Paris, France", +"Chez Rutabaga": "16 rue des Petits Champs, 75002 Paris, France", +"Les caves populaires": "22 rue des Dames, 75017 Paris, France", +"Le Plomb du cantal": "3 rue Ga\u00eet\u00e9, 75014 Paris, France", +"Trois pi\u00e8ces cuisine": "101 rue des dames, 75017 Paris, France", +"La Brocante": "10 rue Rossini, 75009 Paris, France", +"Le Zinc": "61 avenue de la Motte Picquet, 75015 Paris, France", +"Chez Luna": "108 rue de M\u00e9nilmontant, 75020 Paris, France", +"Le bar Fleuri": "1 rue du Plateau, 75019 Paris, France", +"La Libert\u00e9": "196 rue du faubourg saint-antoine, 75012 Paris, France", +"La cantoche de Paname": "40 Boulevard Beaumarchais, 75011 Paris, France", +"Le Saint Ren\u00e9": "148 Boulevard de Charonne, 75020 Paris, France", +"Caf\u00e9 Clochette": "16 avenue Richerand, 75010 Paris, France", +"L'europ\u00e9en": "21 Bis Boulevard Diderot, 75012 Paris, France", +"NoMa": "39 rue Notre Dame de Nazareth, 75003 Paris, France", "le lutece": "380 rue de vaugirard, 75015 Paris, France", +"O'Paris": "1 Rue des Envierges, 75020 Paris, France", "Rivolux": "16 rue de Rivoli, 75004 Paris, France", "Brasiloja": "16 rue Ganneron, 75018 Paris, France", -"Le caf\u00e9 Monde et M\u00e9dias": "Place de la R\u00e9publique, 75003 Paris, France", -"L'entrep\u00f4t": "157 rue Bercy 75012 Paris, 75012 Paris, France", -"Coffee Chope": "344Vrue Vaugirard, 75015 Paris, France", -"l'El\u00e9phant du nil": "125 Rue Saint-Antoine, 75004 Paris, France", -"Le Parc Vaugirard": "358 rue de Vaugirard, 75015 Paris, France", -"Pari's Caf\u00e9": "174 avenue de Clichy, 75017 Paris, France", -"Le Comptoir": "354 bis rue Vaugirard, 75015 Paris, France", -"Caf\u00e9 Varenne": "36 rue de Varenne, 75007 Paris, France", -"Melting Pot": "3 rue de Lagny, 75020 Paris, France", -"le Zango": "58 rue Daguerre, 75014 Paris, France", -"Chez Miamophile": "6 rue M\u00e9lingue, 75019 Paris, France", "Institut des Cultures d'Islam": "19-23 rue L\u00e9on, 75018 Paris, France", "Canopy Caf\u00e9 associatif": "19 rue Pajol, 75018 Paris, France", -"Caf\u00e9 rallye tournelles": "11 Quai de la Tournelle, 75005 Paris, France", "Petits Freres des Pauvres": "47 rue de Batignolles, 75017 Paris, France", -"Brasserie le Morvan": "61 rue du ch\u00e2teau d'eau, 75010 Paris, France", +"Le Lucernaire": "53 rue Notre-Dame des Champs, 75006 Paris, France", "L'Angle": "28 rue de Ponthieu, 75008 Paris, France", +"Le Caf\u00e9 d'avant": "35 rue Claude Bernard, 75005 Paris, France", "Caf\u00e9 Dupont": "198 rue de la Convention, 75015 Paris, France", +"Le S\u00e9vign\u00e9": "15 rue du Parc Royal, 75003 Paris, France", "L'Entracte": "place de l'opera, 75002 Paris, France", "Panem": "18 rue de Crussol, 75011 Paris, France", "Au pays de Vannes": "34 bis rue de Wattignies, 75012 Paris, France", -"Le Lucernaire": "53 rue Notre-Dame des Champs, 75006 Paris, France", -"Le Caf\u00e9 d'avant": "35 rue Claude Bernard, 75005 Paris, France", +"l'El\u00e9phant du nil": "125 Rue Saint-Antoine, 75004 Paris, France", "L'\u00e2ge d'or": "26 rue du Docteur Magnan, 75013 Paris, France", -"Le S\u00e9vign\u00e9": "15 rue du Parc Royal, 75003 Paris, France", +"Le Comptoir": "354 bis rue Vaugirard, 75015 Paris, France", "L'horizon": "93, rue de la Roquette, 75011 Paris, France", "L'empreinte": "54, avenue Daumesnil, 75012 Paris, France", "Caf\u00e9 Victor": "10 boulevard Victor, 75015 Paris, France", +"Caf\u00e9 Varenne": "36 rue de Varenne, 75007 Paris, France", "Le Brigadier": "12 rue Blanche, 75009 Paris, France", -"Waikiki": "10 rue d\"Ulm, 75005 Paris, France"} \ No newline at end of file +"Waikiki": "10 rue d\"Ulm, 75005 Paris, France", +"Le Parc Vaugirard": "358 rue de Vaugirard, 75015 Paris, France", +"Pari's Caf\u00e9": "174 avenue de Clichy, 75017 Paris, France", +"Melting Pot": "3 rue de Lagny, 75020 Paris, France", +"le Zango": "58 rue Daguerre, 75014 Paris, France", +"Chez Miamophile": "6 rue M\u00e9lingue, 75019 Paris, France", +"Le caf\u00e9 Monde et M\u00e9dias": "Place de la R\u00e9publique, 75003 Paris, France", +"Caf\u00e9 rallye tournelles": "11 Quai de la Tournelle, 75005 Paris, France", +"Brasserie le Morvan": "61 rue du ch\u00e2teau d'eau, 75010 Paris, France", +"L'entrep\u00f4t": "157 rue Bercy 75012 Paris, 75012 Paris, France"} \ No newline at end of file diff --git a/bonobo/examples/datasets/coffeeshops.txt b/bonobo/examples/datasets/coffeeshops.txt index 5fe1ef6..b87eacb 100644 --- a/bonobo/examples/datasets/coffeeshops.txt +++ b/bonobo/examples/datasets/coffeeshops.txt @@ -1,182 +1,182 @@ -Le Reynou, 2 bis quai de la mégisserie, 75001 Paris, France les montparnos, 65 boulevard Pasteur, 75015 Paris, France -Le Saint Jean, 23 rue des abbesses, 75018 Paris, France -Le Felteu, 1 rue Pecquay, 75004 Paris, France +Coffee Chope, 344Vrue Vaugirard, 75015 Paris, France +Café Lea, 5 rue Claude Bernard, 75005 Paris, France +Le Bellerive, 71 quai de Seine, 75019 Paris, France +Le drapeau de la fidelité, 21 rue Copreaux, 75015 Paris, France O q de poule, 53 rue du ruisseau, 75018 Paris, France +Le café des amis, 125 rue Blomet, 75015 Paris, France Le chantereine, 51 Rue Victoire, 75009 Paris, France Le Müller, 11 rue Feutrier, 75018 Paris, France -La Caravane, Rue de la Fontaine au Roi, 75011 Paris, France -Le Pas Sage, 1 Passage du Grand Cerf, 75002 Paris, France -La Renaissance, 112 Rue Championnet, 75018 Paris, France Extérieur Quai, 5, rue d'Alsace, 75010 Paris, France -Le Sully, 6 Bd henri IV, 75004 Paris, France -Le drapeau de la fidelité, 21 rue Copreaux, 75015 Paris, France -Le café des amis, 125 rue Blomet, 75015 Paris, France -Le Kleemend's, 34 avenue Pierre Mendès-France, 75013 Paris, France -Assaporare Dix sur Dix, 75, avenue Ledru-Rollin, 75012 Paris, France -Café Pierre, 202 rue du faubourg st antoine, 75012 Paris, France -Le Café Livres, 10 rue Saint Martin, 75004 Paris, France -Le Chaumontois, 12 rue Armand Carrel, 75018 Paris, France -Le Square, 31 rue Saint-Dominique, 75007 Paris, France -Les Arcades, 61 rue de Ponthieu, 75008 Paris, France +La Bauloise, 36 rue du hameau, 75015 Paris, France +Le Dellac, 14 rue Rougemont, 75009 Paris, France Le Bosquet, 46 avenue Bosquet, 75007 Paris, France +Le Sully, 6 Bd henri IV, 75004 Paris, France +Le Felteu, 1 rue Pecquay, 75004 Paris, France Le bistrot de Maëlle et Augustin, 42 rue coquillère, 75001 Paris, France Dédé la frite, 52 rue Notre-Dame des Victoires, 75002 Paris, France Cardinal Saint-Germain, 11 boulevard Saint-Germain, 75005 Paris, France +Le Reynou, 2 bis quai de la mégisserie, 75001 Paris, France +Aux cadrans, 21 ter boulevard Diderot, 75012 Paris, France +Le Saint Jean, 23 rue des abbesses, 75018 Paris, France +La Renaissance, 112 Rue Championnet, 75018 Paris, France +Le Square, 31 rue Saint-Dominique, 75007 Paris, France +Les Arcades, 61 rue de Ponthieu, 75008 Paris, France +Le Kleemend's, 34 avenue Pierre Mendès-France, 75013 Paris, France +Assaporare Dix sur Dix, 75, avenue Ledru-Rollin, 75012 Paris, France +Café Pierre, 202 rue du faubourg st antoine, 75012 Paris, France Café antoine, 17 rue Jean de la Fontaine, 75016 Paris, France Au cerceau d'or, 129 boulevard sebastopol, 75002 Paris, France -Aux cadrans, 21 ter boulevard Diderot, 75012 Paris, France -Café Lea, 5 rue Claude Bernard, 75005 Paris, France -Le Bellerive, 71 quai de Seine, 75019 Paris, France -La Bauloise, 36 rue du hameau, 75015 Paris, France -Le Dellac, 14 rue Rougemont, 75009 Paris, France +La Caravane, Rue de la Fontaine au Roi, 75011 Paris, France +Le Pas Sage, 1 Passage du Grand Cerf, 75002 Paris, France +Le Café Livres, 10 rue Saint Martin, 75004 Paris, France +Le Chaumontois, 12 rue Armand Carrel, 75018 Paris, France +Drole d'endroit pour une rencontre, 58 rue de Montorgueil, 75002 Paris, France +Le pari's café, 104 rue caulaincourt, 75018 Paris, France +Le Poulailler, 60 rue saint-sabin, 75011 Paris, France +Chai 33, 33 Cour Saint Emilion, 75012 Paris, France +L'Assassin, 99 rue Jean-Pierre Timbaud, 75011 Paris, France +l'Usine, 1 rue d'Avron, 75020 Paris, France +La Bricole, 52 rue Liebniz, 75018 Paris, France +le ronsard, place maubert, 75005 Paris, France +Face Bar, 82 rue des archives, 75003 Paris, France +American Kitchen, 49 rue bichat, 75010 Paris, France +La Marine, 55 bis quai de valmy, 75010 Paris, France +Le Bloc, 21 avenue Brochant, 75017 Paris, France +La Recoleta au Manoir, 229 avenue Gambetta, 75020 Paris, France +Le Pareloup, 80 Rue Saint-Charles, 75015 Paris, France +La Brasserie Gaité, 3 rue de la Gaité, 75014 Paris, France +Café Zen, 46 rue Victoire, 75009 Paris, France +O'Breizh, 27 rue de Penthièvre, 75008 Paris, France +Le Petit Choiseul, 23 rue saint augustin, 75002 Paris, France +Invitez vous chez nous, 7 rue Epée de Bois, 75005 Paris, France +La Cordonnerie, 142 Rue Saint-Denis 75002 Paris, 75002 Paris, France +Le Supercoin, 3, rue Baudelique, 75018 Paris, France +Populettes, 86 bis rue Riquet, 75018 Paris, France +Au bon coin, 49 rue des Cloys, 75018 Paris, France +Le Couvent, 69 rue Broca, 75013 Paris, France +La Brûlerie des Ternes, 111 rue mouffetard, 75005 Paris, France +L'Écir, 59 Boulevard Saint-Jacques, 75014 Paris, France +Le Chat bossu, 126, rue du Faubourg Saint Antoine, 75012 Paris, France +Denfert café, 58 boulvevard Saint Jacques, 75014 Paris, France +Le Café frappé, 95 rue Montmartre, 75002 Paris, France +La Perle, 78 rue vieille du temple, 75003 Paris, France +Le Descartes, 1 rue Thouin, 75005 Paris, France +Bagels & Coffee Corner, Place de Clichy, 75017 Paris, France +Le petit club, 55 rue de la tombe Issoire, 75014 Paris, France +Le Plein soleil, 90 avenue Parmentier, 75011 Paris, France +Le Relais Haussmann, 146, boulevard Haussmann, 75008 Paris, France +Le Malar, 88 rue Saint-Dominique, 75007 Paris, France +Au panini de la place, 47 rue Belgrand, 75020 Paris, France +Le Village, 182 rue de Courcelles, 75017 Paris, France +Pause Café, 41 rue de Charonne, 75011 Paris, France +Le Pure café, 14 rue Jean Macé, 75011 Paris, France +Extra old café, 307 fg saint Antoine, 75011 Paris, France +Chez Fafa, 44 rue Vinaigriers, 75010 Paris, France +En attendant l'or, 3 rue Faidherbe, 75011 Paris, France Brûlerie San José, 30 rue des Petits-Champs, 75002 Paris, France Café de la Mairie (du VIII), rue de Lisbonne, 75008 Paris, France -Le General Beuret, 9 Place du General Beuret, 75015 Paris, France -Le Cap Bourbon, 1 rue Louis le Grand, 75002 Paris, France -En attendant l'or, 3 rue Faidherbe, 75011 Paris, France Café Martin, 2 place Martin Nadaud, 75001 Paris, France Etienne, 14 rue Turbigo, Paris, 75001 Paris, France L'ingénu, 184 bd Voltaire, 75011 Paris, France -Le Biz, 18 rue Favart, 75002 Paris, France L'Olive, 8 rue L'Olive, 75018 Paris, France -Le pari's café, 104 rue caulaincourt, 75018 Paris, France -Le Poulailler, 60 rue saint-sabin, 75011 Paris, France -La Marine, 55 bis quai de valmy, 75010 Paris, France -American Kitchen, 49 rue bichat, 75010 Paris, France -Chai 33, 33 Cour Saint Emilion, 75012 Paris, France -Face Bar, 82 rue des archives, 75003 Paris, France -Le Bloc, 21 avenue Brochant, 75017 Paris, France -La Bricole, 52 rue Liebniz, 75018 Paris, France -le ronsard, place maubert, 75005 Paris, France -l'Usine, 1 rue d'Avron, 75020 Paris, France -La Cordonnerie, 142 Rue Saint-Denis 75002 Paris, 75002 Paris, France -Invitez vous chez nous, 7 rue Epée de Bois, 75005 Paris, France -Le sully, 13 rue du Faubourg Saint Denis, 75010 Paris, France -Le Ragueneau, 202 rue Saint-Honoré, 75001 Paris, France +Le Biz, 18 rue Favart, 75002 Paris, France +Le Cap Bourbon, 1 rue Louis le Grand, 75002 Paris, France +Le General Beuret, 9 Place du General Beuret, 75015 Paris, France Le Germinal, 95 avenue Emile Zola, 75015 Paris, France +Le Ragueneau, 202 rue Saint-Honoré, 75001 Paris, France Le refuge, 72 rue lamarck, 75018 Paris, France -Drole d'endroit pour une rencontre, 58 rue de Montorgueil, 75002 Paris, France -Le Petit Choiseul, 23 rue saint augustin, 75002 Paris, France -O'Breizh, 27 rue de Penthièvre, 75008 Paris, France -Le Supercoin, 3, rue Baudelique, 75018 Paris, France -Populettes, 86 bis rue Riquet, 75018 Paris, France -La Recoleta au Manoir, 229 avenue Gambetta, 75020 Paris, France -L'Assassin, 99 rue Jean-Pierre Timbaud, 75011 Paris, France -Le Pareloup, 80 Rue Saint-Charles, 75015 Paris, France -Café Zen, 46 rue Victoire, 75009 Paris, France -La Brasserie Gaité, 3 rue de la Gaité, 75014 Paris, France -Au bon coin, 49 rue des Cloys, 75018 Paris, France -La Brûlerie des Ternes, 111 rue mouffetard, 75005 Paris, France -Le Chat bossu, 126, rue du Faubourg Saint Antoine, 75012 Paris, France -Denfert café, 58 boulvevard Saint Jacques, 75014 Paris, France -Le Couvent, 69 rue Broca, 75013 Paris, France -Bagels & Coffee Corner, Place de Clichy, 75017 Paris, France -La Perle, 78 rue vieille du temple, 75003 Paris, France -Le Café frappé, 95 rue Montmartre, 75002 Paris, France -L'Écir, 59 Boulevard Saint-Jacques, 75014 Paris, France -Le Descartes, 1 rue Thouin, 75005 Paris, France -Le petit club, 55 rue de la tombe Issoire, 75014 Paris, France -Le Relais Haussmann, 146, boulevard Haussmann, 75008 Paris, France -Au panini de la place, 47 rue Belgrand, 75020 Paris, France -Extra old café, 307 fg saint Antoine, 75011 Paris, France -Le Plein soleil, 90 avenue Parmentier, 75011 Paris, France -Le Pure café, 14 rue Jean Macé, 75011 Paris, France -Le Village, 182 rue de Courcelles, 75017 Paris, France -Le Malar, 88 rue Saint-Dominique, 75007 Paris, France -Pause Café, 41 rue de Charonne, 75011 Paris, France -Chez Fafa, 44 rue Vinaigriers, 75010 Paris, France -Café dans l'aerogare Air France Invalides, 2 rue Robert Esnault Pelterie, 75007 Paris, France -Le relais de la victoire, 73 rue de la Victoire, 75009 Paris, France -Caprice café, 12 avenue Jean Moulin, 75014 Paris, France -Caves populaires, 22 rue des Dames, 75017 Paris, France -Cafe de grenelle, 188 rue de Grenelle, 75007 Paris, France -Chez Prune, 36 rue Beaurepaire, 75010 Paris, France -L'anjou, 1 rue de Montholon, 75009 Paris, France -Le Brio, 216, rue Marcadet, 75018 Paris, France -Tamm Bara, 7 rue Clisson, 75013 Paris, France -La chaumière gourmande, Route de la Muette à Neuilly -Club hippique du Jardin d’Acclimatation, 75016 Paris, France -Les Pères Populaires, 46 rue de Buzenval, 75020 Paris, France -Epicerie Musicale, 55bis quai de Valmy, 75010 Paris, France -Le Centenaire, 104 rue amelot, 75011 Paris, France -Le Zazabar, 116 Rue de Ménilmontant, 75020 Paris, France -Ragueneau, 202 rue Saint Honoré, 75001 Paris, France -L'Inévitable, 22 rue Linné, 75005 Paris, France +Le sully, 13 rue du Faubourg Saint Denis, 75010 Paris, France Le Dunois, 77 rue Dunois, 75013 Paris, France La Montagne Sans Geneviève, 13 Rue du Pot de Fer, 75005 Paris, France Le Caminito, 48 rue du Dessous des Berges, 75013 Paris, France Le petit Bretonneau, Le petit Bretonneau - à l'intérieur de l'Hôpital, 75018 Paris, France +La chaumière gourmande, Route de la Muette à Neuilly +Club hippique du Jardin d’Acclimatation, 75016 Paris, France Le bal du pirate, 60 rue des bergers, 75015 Paris, France +Le Zazabar, 116 Rue de Ménilmontant, 75020 Paris, France L'antre d'eux, 16 rue DE MEZIERES, 75006 Paris, France l'orillon bar, 35 rue de l'orillon, 75011 Paris, France zic zinc, 95 rue claude decaen, 75012 Paris, France +Les Pères Populaires, 46 rue de Buzenval, 75020 Paris, France +Epicerie Musicale, 55bis quai de Valmy, 75010 Paris, France +Le relais de la victoire, 73 rue de la Victoire, 75009 Paris, France +Le Centenaire, 104 rue amelot, 75011 Paris, France +Cafe de grenelle, 188 rue de Grenelle, 75007 Paris, France +Ragueneau, 202 rue Saint Honoré, 75001 Paris, France Café Pistache, 9 rue des petits champs, 75001 Paris, France La Cagnotte, 13 Rue Jean-Baptiste Dumay, 75020 Paris, France -bistrot les timbrés, 14 rue d'alleray, 75015 Paris, France Le Killy Jen, 28 bis boulevard Diderot, 75012 Paris, France Café beauveau, 9 rue de Miromesnil, 75008 Paris, France le 1 cinq, 172 rue de vaugirard, 75015 Paris, France -Au Vin Des Rues, 21 rue Boulard, 75014 Paris, France Les Artisans, 106 rue Lecourbe, 75015 Paris, France Peperoni, 83 avenue de Wagram, 75001 Paris, France -Le BB (Bouchon des Batignolles), 2 rue Lemercier, 75017 Paris, France -La Liberté, 196 rue du faubourg saint-antoine, 75012 Paris, France -Chez Rutabaga, 16 rue des Petits Champs, 75002 Paris, France -La cantoche de Paname, 40 Boulevard Beaumarchais, 75011 Paris, France -Le Saint René, 148 Boulevard de Charonne, 75020 Paris, France -La Brocante, 10 rue Rossini, 75009 Paris, France -Café Clochette, 16 avenue Richerand, 75010 Paris, France -L'européen, 21 Bis Boulevard Diderot, 75012 Paris, France -NoMa, 39 rue Notre Dame de Nazareth, 75003 Paris, France -O'Paris, 1 Rue des Envierges, 75020 Paris, France +Le Brio, 216, rue Marcadet, 75018 Paris, France +Tamm Bara, 7 rue Clisson, 75013 Paris, France +Café dans l'aerogare Air France Invalides, 2 rue Robert Esnault Pelterie, 75007 Paris, France +bistrot les timbrés, 14 rue d'alleray, 75015 Paris, France +Caprice café, 12 avenue Jean Moulin, 75014 Paris, France +Caves populaires, 22 rue des Dames, 75017 Paris, France +Au Vin Des Rues, 21 rue Boulard, 75014 Paris, France +Chez Prune, 36 rue Beaurepaire, 75010 Paris, France +L'Inévitable, 22 rue Linné, 75005 Paris, France +L'anjou, 1 rue de Montholon, 75009 Paris, France Botak cafe, 1 rue Paul albert, 75018 Paris, France -La cantine de Zoé, 136 rue du Faubourg poissonnière, 75010 Paris, France -Les caves populaires, 22 rue des Dames, 75017 Paris, France -Le Plomb du cantal, 3 rue Gaîté, 75014 Paris, France -Trois pièces cuisine, 101 rue des dames, 75017 Paris, France -Le Zinc, 61 avenue de la Motte Picquet, 75015 Paris, France -L'avant comptoir, 3 carrefour de l'Odéon, 75006 Paris, France -Les Vendangeurs, 6/8 rue Stanislas, 75006 Paris, France -Chez Luna, 108 rue de Ménilmontant, 75020 Paris, France -Le bar Fleuri, 1 rue du Plateau, 75019 Paris, France Bistrot Saint-Antoine, 58 rue du Fbg Saint-Antoine, 75012 Paris, France Chez Oscar, 11/13 boulevard Beaumarchais, 75004 Paris, France Le Piquet, 48 avenue de la Motte Picquet, 75015 Paris, France +L'avant comptoir, 3 carrefour de l'Odéon, 75006 Paris, France le chateau d'eau, 67 rue du Château d'eau, 75010 Paris, France +Les Vendangeurs, 6/8 rue Stanislas, 75006 Paris, France maison du vin, 52 rue des plantes, 75014 Paris, France Le Tournebride, 104 rue Mouffetard, 75005 Paris, France Le Fronton, 63 rue de Ponthieu, 75008 Paris, France +Le BB (Bouchon des Batignolles), 2 rue Lemercier, 75017 Paris, France +La cantine de Zoé, 136 rue du Faubourg poissonnière, 75010 Paris, France +Chez Rutabaga, 16 rue des Petits Champs, 75002 Paris, France +Les caves populaires, 22 rue des Dames, 75017 Paris, France +Le Plomb du cantal, 3 rue Gaîté, 75014 Paris, France +Trois pièces cuisine, 101 rue des dames, 75017 Paris, France +La Brocante, 10 rue Rossini, 75009 Paris, France +Le Zinc, 61 avenue de la Motte Picquet, 75015 Paris, France +Chez Luna, 108 rue de Ménilmontant, 75020 Paris, France +Le bar Fleuri, 1 rue du Plateau, 75019 Paris, France +La Liberté, 196 rue du faubourg saint-antoine, 75012 Paris, France +La cantoche de Paname, 40 Boulevard Beaumarchais, 75011 Paris, France +Le Saint René, 148 Boulevard de Charonne, 75020 Paris, France +Café Clochette, 16 avenue Richerand, 75010 Paris, France +L'européen, 21 Bis Boulevard Diderot, 75012 Paris, France +NoMa, 39 rue Notre Dame de Nazareth, 75003 Paris, France le lutece, 380 rue de vaugirard, 75015 Paris, France +O'Paris, 1 Rue des Envierges, 75020 Paris, France Rivolux, 16 rue de Rivoli, 75004 Paris, France Brasiloja, 16 rue Ganneron, 75018 Paris, France -Le café Monde et Médias, Place de la République, 75003 Paris, France -L'entrepôt, 157 rue Bercy 75012 Paris, 75012 Paris, France -Coffee Chope, 344Vrue Vaugirard, 75015 Paris, France -l'Eléphant du nil, 125 Rue Saint-Antoine, 75004 Paris, France -Le Parc Vaugirard, 358 rue de Vaugirard, 75015 Paris, France -Pari's Café, 174 avenue de Clichy, 75017 Paris, France -Le Comptoir, 354 bis rue Vaugirard, 75015 Paris, France -Café Varenne, 36 rue de Varenne, 75007 Paris, France -Melting Pot, 3 rue de Lagny, 75020 Paris, France -le Zango, 58 rue Daguerre, 75014 Paris, France -Chez Miamophile, 6 rue Mélingue, 75019 Paris, France Institut des Cultures d'Islam, 19-23 rue Léon, 75018 Paris, France Canopy Café associatif, 19 rue Pajol, 75018 Paris, France -Café rallye tournelles, 11 Quai de la Tournelle, 75005 Paris, France Petits Freres des Pauvres, 47 rue de Batignolles, 75017 Paris, France -Brasserie le Morvan, 61 rue du château d'eau, 75010 Paris, France +Le Lucernaire, 53 rue Notre-Dame des Champs, 75006 Paris, France L'Angle, 28 rue de Ponthieu, 75008 Paris, France +Le Café d'avant, 35 rue Claude Bernard, 75005 Paris, France Café Dupont, 198 rue de la Convention, 75015 Paris, France +Le Sévigné, 15 rue du Parc Royal, 75003 Paris, France L'Entracte, place de l'opera, 75002 Paris, France Panem, 18 rue de Crussol, 75011 Paris, France Au pays de Vannes, 34 bis rue de Wattignies, 75012 Paris, France -Le Lucernaire, 53 rue Notre-Dame des Champs, 75006 Paris, France -Le Café d'avant, 35 rue Claude Bernard, 75005 Paris, France +l'Eléphant du nil, 125 Rue Saint-Antoine, 75004 Paris, France L'âge d'or, 26 rue du Docteur Magnan, 75013 Paris, France -Le Sévigné, 15 rue du Parc Royal, 75003 Paris, France +Le Comptoir, 354 bis rue Vaugirard, 75015 Paris, France L'horizon, 93, rue de la Roquette, 75011 Paris, France L'empreinte, 54, avenue Daumesnil, 75012 Paris, France Café Victor, 10 boulevard Victor, 75015 Paris, France +Café Varenne, 36 rue de Varenne, 75007 Paris, France Le Brigadier, 12 rue Blanche, 75009 Paris, France -Waikiki, 10 rue d"Ulm, 75005 Paris, France \ No newline at end of file +Waikiki, 10 rue d"Ulm, 75005 Paris, France +Le Parc Vaugirard, 358 rue de Vaugirard, 75015 Paris, France +Pari's Café, 174 avenue de Clichy, 75017 Paris, France +Melting Pot, 3 rue de Lagny, 75020 Paris, France +le Zango, 58 rue Daguerre, 75014 Paris, France +Chez Miamophile, 6 rue Mélingue, 75019 Paris, France +Le café Monde et Médias, Place de la République, 75003 Paris, France +Café rallye tournelles, 11 Quai de la Tournelle, 75005 Paris, France +Brasserie le Morvan, 61 rue du château d'eau, 75010 Paris, France +L'entrepôt, 157 rue Bercy 75012 Paris, 75012 Paris, France \ No newline at end of file diff --git a/bonobo/execution/node.py b/bonobo/execution/node.py index 635068e..4edb75e 100644 --- a/bonobo/execution/node.py +++ b/bonobo/execution/node.py @@ -3,7 +3,7 @@ from queue import Empty from time import sleep from bonobo.constants import INHERIT_INPUT, NOT_MODIFIED -from bonobo.errors import InactiveReadableError +from bonobo.errors import InactiveReadableError, UnrecoverableError from bonobo.execution.base import LoopingExecutionContext from bonobo.structs.bags import Bag from bonobo.structs.inputs import Input @@ -93,6 +93,9 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): except Empty: sleep(self.PERIOD) continue + except UnrecoverableError as exc: + self.handle_error(exc, traceback.format_exc()) + break except Exception as exc: # pylint: disable=broad-except self.handle_error(exc, traceback.format_exc()) diff --git a/bonobo/nodes/io/base.py b/bonobo/nodes/io/base.py index d9b3212..58088d0 100644 --- a/bonobo/nodes/io/base.py +++ b/bonobo/nodes/io/base.py @@ -1,5 +1,6 @@ from bonobo import settings from bonobo.config import Configurable, ContextProcessor, Option, Service +from bonobo.errors import UnrecoverableValueError, UnrecoverableNotImplementedError from bonobo.structs.bags import Bag @@ -9,21 +10,21 @@ class IOFormatEnabled(Configurable): def get_input(self, *args, **kwargs): if self.ioformat == settings.IOFORMAT_ARG0: if len(args) != 1 or len(kwargs): - raise ValueError( + raise UnrecoverableValueError( 'Wrong input formating: IOFORMAT=ARG0 implies one arg and no kwargs, got args={!r} and kwargs={!r}.'. - format(args, kwargs) + format(args, kwargs) ) return args[0] if self.ioformat == settings.IOFORMAT_KWARGS: if len(args) or not len(kwargs): - raise ValueError( + raise UnrecoverableValueError( 'Wrong input formating: IOFORMAT=KWARGS ioformat implies no arg, got args={!r} and kwargs={!r}.'. - format(args, kwargs) + format(args, kwargs) ) return kwargs - raise NotImplementedError('Unsupported format.') + raise UnrecoverableNotImplementedError('Unsupported format.') def get_output(self, row): if self.ioformat == settings.IOFORMAT_ARG0: @@ -32,7 +33,7 @@ class IOFormatEnabled(Configurable): if self.ioformat == settings.IOFORMAT_KWARGS: return Bag(**row) - raise NotImplementedError('Unsupported format.') + raise UnrecoverableNotImplementedError('Unsupported format.') class FileHandler(Configurable): From 0bcdbd70ab948c03480f04f7f99a931dfd703c2d Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 5 Jul 2017 11:52:47 +0200 Subject: [PATCH 05/71] [nodes] Removes old pretty printers (bonobo.pprint, bonobo.PrettyPrint) in favor of simpler bonobo.PrettyPrinter implementation. /!\ BC break /!\ --- bonobo/_api.py | 3 +-- bonobo/nodes/basics.py | 49 ------------------------------------------ 2 files changed, 1 insertion(+), 51 deletions(-) diff --git a/bonobo/_api.py b/bonobo/_api.py index 89b6d4c..6cf328c 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -1,6 +1,6 @@ from bonobo.structs import Bag, Graph, Token from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \ - PrettyPrinter, PickleWriter, PickleReader, RateLimited, Tee, count, identity, noop, pprint + PrettyPrinter, PickleWriter, PickleReader, RateLimited, Tee, count, identity, noop from bonobo.strategies import create_strategy from bonobo.util.objects import get_name @@ -109,7 +109,6 @@ register_api_group( count, identity, noop, - pprint, ) diff --git a/bonobo/nodes/basics.py b/bonobo/nodes/basics.py index c1ead61..85b2114 100644 --- a/bonobo/nodes/basics.py +++ b/bonobo/nodes/basics.py @@ -1,14 +1,11 @@ import functools import itertools -from colorama import Fore, Style - from bonobo import settings from bonobo.config import Configurable, Option from bonobo.config.processors import ContextProcessor from bonobo.constants import NOT_MODIFIED from bonobo.structs.bags import Bag -from bonobo.util.compat import deprecated from bonobo.util.objects import ValueHolder from bonobo.util.term import CLEAR_EOL @@ -17,7 +14,6 @@ __all__ = [ 'Limit', 'Tee', 'count', - 'pprint', 'PrettyPrinter', 'noop', ] @@ -87,51 +83,6 @@ class PrettyPrinter(Configurable): ) -_pprint = PrettyPrinter() - - -@deprecated -def pprint(*args, **kwargs): - return _pprint(*args, **kwargs) - - -def PrettyPrint(title_keys=('title', 'name', 'id'), print_values=True, sort=True): - from bonobo.constants import NOT_MODIFIED - - def _pprint(*args, **kwargs): - nonlocal title_keys, sort, print_values - - row = args[0] - for key in title_keys: - if key in row: - print(Style.BRIGHT, row.get(key), Style.RESET_ALL, sep='') - break - - if print_values: - for k in sorted(row) if sort else row: - print( - ' • ', - Fore.BLUE, - k, - Style.RESET_ALL, - ' : ', - Fore.BLACK, - '(', - type(row[k]).__name__, - ')', - Style.RESET_ALL, - ' ', - repr(row[k]), - CLEAR_EOL, - ) - - yield NOT_MODIFIED - - _pprint.__name__ = 'pprint' - - return _pprint - - def noop(*args, **kwargs): # pylint: disable=unused-argument from bonobo.constants import NOT_MODIFIED return NOT_MODIFIED From 6ef25deac9a624f115459e3fbc7cdfb2320d1b2a Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 5 Jul 2017 12:01:37 +0200 Subject: [PATCH 06/71] [config] Adds test for requires() decorator. --- bonobo/errors.py | 11 +++++++---- tests/config/test_services.py | 22 +++++++++++++++++++++- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/bonobo/errors.py b/bonobo/errors.py index 8510a50..08b97d4 100644 --- a/bonobo/errors.py +++ b/bonobo/errors.py @@ -58,19 +58,22 @@ class ConfigurationError(Exception): pass -class MissingServiceImplementationError(KeyError): - pass - - class UnrecoverableError(Exception): """Flag for errors that must interrupt the workflow, either because they will happen for sure on each node run, or because you know that your transformation has no point continuing runnning after a bad event.""" + class UnrecoverableValueError(UnrecoverableError, ValueError): pass + class UnrecoverableRuntimeError(UnrecoverableError, RuntimeError): pass + class UnrecoverableNotImplementedError(UnrecoverableError, NotImplementedError): pass + + +class MissingServiceImplementationError(UnrecoverableError, KeyError): + pass diff --git a/tests/config/test_services.py b/tests/config/test_services.py index b762dbe..ff81e82 100644 --- a/tests/config/test_services.py +++ b/tests/config/test_services.py @@ -3,7 +3,7 @@ import time import pytest -from bonobo.config import Configurable, Container, Exclusive, Service +from bonobo.config import Configurable, Container, Exclusive, Service, requires from bonobo.config.services import validate_service_name @@ -94,3 +94,23 @@ def test_exclusive(): 'hello', '0 0', '0 1', '0 2', '0 3', '0 4', '1 0', '1 1', '1 2', '1 3', '1 4', '2 0', '2 1', '2 2', '2 3', '2 4', '3 0', '3 1', '3 2', '3 3', '3 4', '4 0', '4 1', '4 2', '4 3', '4 4' ] + + +def test_requires(): + vcr = VCR() + + services = Container( + output=vcr.append + ) + + @requires('output') + def append(out, x): + out(x) + + svcargs = services.args_for(append) + assert len(svcargs) == 1 + assert svcargs[0] == vcr.append + + + + From 9801c75720ceb5715e805c26d4be07c734155cac Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 5 Jul 2017 12:41:14 +0200 Subject: [PATCH 07/71] [settings] Better impl. of Setting class, tests for it and refactor hardcoded settings to use it. --- bonobo/_api.py | 2 +- bonobo/commands/__init__.py | 6 ++-- bonobo/commands/run.py | 4 +-- bonobo/ext/console.py | 4 +-- bonobo/logging.py | 2 +- bonobo/nodes/basics.py | 2 +- bonobo/settings.py | 53 ++++++++++++++++++++++++------- tests/test_settings.py | 63 +++++++++++++++++++++++++++++++++++++ 8 files changed, 115 insertions(+), 21 deletions(-) create mode 100644 tests/test_settings.py diff --git a/bonobo/_api.py b/bonobo/_api.py index 6cf328c..ab890c6 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -45,7 +45,7 @@ def run(graph, strategy=None, plugins=None, services=None): from bonobo import settings settings.check() - if not settings.QUIET: # pragma: no cover + if not settings.QUIET.get(): # pragma: no cover if _is_interactive_console(): from bonobo.ext.console import ConsoleOutputPlugin if ConsoleOutputPlugin not in plugins: diff --git a/bonobo/commands/__init__.py b/bonobo/commands/__init__.py index 59e6dfb..4e183a3 100644 --- a/bonobo/commands/__init__.py +++ b/bonobo/commands/__init__.py @@ -27,9 +27,9 @@ def entrypoint(args=None): args = parser.parse_args(args).__dict__ if args.pop('debug', False): - settings.DEBUG = True - settings.LOGGING_LEVEL = logging.DEBUG - logging.set_level(settings.LOGGING_LEVEL) + settings.DEBUG.set(True) + settings.LOGGING_LEVEL.set(logging.DEBUG) + logging.set_level(settings.LOGGING_LEVEL.get()) logger.debug('Command: ' + args['command'] + ' Arguments: ' + repr(args)) commands[args.pop('command')](**args) diff --git a/bonobo/commands/run.py b/bonobo/commands/run.py index 7f29d3f..6de6bf6 100644 --- a/bonobo/commands/run.py +++ b/bonobo/commands/run.py @@ -31,10 +31,10 @@ def execute(filename, module, install=False, quiet=False, verbose=False): from bonobo import Graph, run, settings if quiet: - settings.QUIET = True + settings.QUIET.set(True) if verbose: - settings.DEBUG = True + settings.DEBUG.set(True) if filename: if os.path.isdir(filename): diff --git a/bonobo/ext/console.py b/bonobo/ext/console.py index f30fae0..acf464b 100644 --- a/bonobo/ext/console.py +++ b/bonobo/ext/console.py @@ -65,7 +65,7 @@ class ConsoleOutputPlugin(Plugin): for i in context.graph.topologically_sorted_indexes: node = context[i] - name_suffix = '({})'.format(i) if settings.DEBUG else '' + name_suffix = '({})'.format(i) if settings.DEBUG.get() else '' if node.alive: _line = ''.join( ( @@ -100,7 +100,7 @@ class ConsoleOutputPlugin(Plugin): print(MOVE_CURSOR_UP(t_cnt + 2), file=sys.stderr) def _write(self, graph_context, rewind): - if settings.PROFILE: + if settings.PROFILE.get(): if self.counter % 10 and self._append_cache: append = self._append_cache else: diff --git a/bonobo/logging.py b/bonobo/logging.py index 17bdeb7..3784600 100644 --- a/bonobo/logging.py +++ b/bonobo/logging.py @@ -75,4 +75,4 @@ def get_logger(name='bonobo'): getLogger = get_logger # Setup formating and level. -setup(level=settings.LOGGING_LEVEL) +setup(level=settings.LOGGING_LEVEL.get()) diff --git a/bonobo/nodes/basics.py b/bonobo/nodes/basics.py index 85b2114..164eeb1 100644 --- a/bonobo/nodes/basics.py +++ b/bonobo/nodes/basics.py @@ -69,7 +69,7 @@ def _count_counter(self, context): class PrettyPrinter(Configurable): def call(self, *args, **kwargs): - formater = self._format_quiet if settings.QUIET else self._format_console + formater = self._format_quiet if settings.QUIET.get() else self._format_console for i, (item, value) in enumerate(itertools.chain(enumerate(args), kwargs.items())): print(formater(i, item, value)) diff --git a/bonobo/settings.py b/bonobo/settings.py index 8e8a780..e5edd83 100644 --- a/bonobo/settings.py +++ b/bonobo/settings.py @@ -5,6 +5,10 @@ from bonobo.errors import ValidationError def to_bool(s): + if s is None: + return False + if type(s) is bool: + return s if len(s): if s.lower() in ('f', 'false', 'n', 'no', '0'): return False @@ -13,7 +17,18 @@ def to_bool(s): class Setting: - def __init__(self, name, default=None, validator=None): + __all__ = {} + + @classmethod + def clear_all(cls): + for setting in Setting.__all__.values(): + setting.clear() + + def __new__(cls, name, *args, **kwargs): + Setting.__all__[name] = super().__new__(cls) + return Setting.__all__[name] + + def __init__(self, name, default=None, validator=None, formatter=None): self.name = name if default: @@ -21,15 +36,14 @@ class Setting: else: self.default = lambda: None - if validator: - self.validator = validator - else: - self.validator = None + self.validator = validator + self.formatter = formatter def __repr__(self): return ''.format(self.name, self.get()) def set(self, value): + value = self.formatter(value) if self.formatter else value if self.validator and not self.validator(value): raise ValidationError('Invalid value {!r} for setting {}.'.format(value, self.name)) self.value = value @@ -38,21 +52,35 @@ class Setting: try: return self.value except AttributeError: - self.value = self.default() + value = os.environ.get(self.name, None) + if value is None: + value = self.default() + self.set(value) return self.value + def clear(self): + try: + del self.value + except AttributeError: + pass + # Debug/verbose mode. -DEBUG = to_bool(os.environ.get('DEBUG', 'f')) +DEBUG = Setting('DEBUG', formatter=to_bool, default=False) # Profile mode. -PROFILE = to_bool(os.environ.get('PROFILE', 'f')) +PROFILE = Setting('PROFILE', formatter=to_bool, default=False) # Quiet mode. -QUIET = to_bool(os.environ.get('QUIET', 'f')) +QUIET = Setting('QUIET', formatter=to_bool, default=False) # Logging level. -LOGGING_LEVEL = logging.DEBUG if DEBUG else logging.INFO +LOGGING_LEVEL = Setting( + 'LOGGING_LEVEL', + formatter=logging._checkLevel, + validator=logging._checkLevel, + default=lambda: logging.DEBUG if DEBUG.get() else logging.INFO +) # Input/Output format for transformations IOFORMAT_ARG0 = 'arg0' @@ -67,5 +95,8 @@ IOFORMAT = Setting('IOFORMAT', default=IOFORMAT_KWARGS, validator=IOFORMATS.__co def check(): - if DEBUG and QUIET: + if DEBUG.get() and QUIET.get(): raise RuntimeError('I cannot be verbose and quiet at the same time.') + + +clear_all = Setting.clear_all diff --git a/tests/test_settings.py b/tests/test_settings.py new file mode 100644 index 0000000..c8313c5 --- /dev/null +++ b/tests/test_settings.py @@ -0,0 +1,63 @@ +import logging +from os import environ +from unittest.mock import patch + +import pytest + +from bonobo import settings + +TEST_SETTING = 'TEST_SETTING' + + +def test_to_bool(): + assert not settings.to_bool('') + assert not settings.to_bool('FALSE') + assert not settings.to_bool('NO') + assert not settings.to_bool('0') + + assert settings.to_bool('yup') + assert settings.to_bool('True') + assert settings.to_bool('yes') + assert settings.to_bool('1') + + +def test_setting(): + s = settings.Setting(TEST_SETTING) + assert s.get() is None + + with patch.dict(environ, {TEST_SETTING: 'hello'}): + assert s.get() is None + s.clear() + assert s.get() == 'hello' + + s = settings.Setting(TEST_SETTING, default='nope') + assert s.get() is 'nope' + + with patch.dict(environ, {TEST_SETTING: 'hello'}): + assert s.get() == 'nope' + s.clear() + assert s.get() == 'hello' + + +def test_default_settings(): + settings.clear_all() + + assert settings.DEBUG.get() == False + assert settings.PROFILE.get() == False + assert settings.QUIET.get() == False + assert settings.LOGGING_LEVEL.get() == logging._checkLevel('INFO') + + with patch.dict(environ, {'DEBUG': 't'}): + settings.clear_all() + assert settings.LOGGING_LEVEL.get() == logging._checkLevel('DEBUG') + + settings.clear_all() + + +def test_check(): + settings.check() + with patch.dict(environ, {'DEBUG': 't', 'PROFILE': 't', 'QUIET': 't'}): + settings.clear_all() + with pytest.raises(RuntimeError): + settings.check() + settings.clear_all() From 8de6f50523d74ff08d81a0764d19259393c705ae Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 5 Jul 2017 13:08:53 +0200 Subject: [PATCH 08/71] [examples] Fix examples, fix termination bug with unrecoverable errors. --- bonobo/examples/datasets/fablabs.py | 8 ++++---- bonobo/examples/nodes/filter.py | 7 +++++-- bonobo/examples/nodes/slow.py | 3 +++ bonobo/examples/tutorials/tut02e02_write.py | 2 +- bonobo/execution/node.py | 1 + bonobo/structs/inputs.py | 18 +++++++++++------- 6 files changed, 25 insertions(+), 14 deletions(-) diff --git a/bonobo/examples/datasets/fablabs.py b/bonobo/examples/datasets/fablabs.py index be95fe1..33ed91c 100644 --- a/bonobo/examples/datasets/fablabs.py +++ b/bonobo/examples/datasets/fablabs.py @@ -73,15 +73,15 @@ def display(row): print( ' - {}address{}: {address}'. - format(Fore.BLUE, Style.RESET_ALL, address=', '.join(address)) + format(Fore.BLUE, Style.RESET_ALL, address=', '.join(address)) ) print( ' - {}links{}: {links}'. - format(Fore.BLUE, Style.RESET_ALL, links=', '.join(row['links'])) + format(Fore.BLUE, Style.RESET_ALL, links=', '.join(row['links'])) ) print( ' - {}geometry{}: {geometry}'. - format(Fore.BLUE, Style.RESET_ALL, **row) + format(Fore.BLUE, Style.RESET_ALL, **row) ) print( ' - {}source{}: {source}'.format( @@ -96,8 +96,8 @@ graph = bonobo.Graph( ), normalize, filter_france, + bonobo.JsonWriter(path='fablabs.txt', ioformat='arg0'), bonobo.Tee(display), - bonobo.JsonWriter(path='fablabs.txt'), ) if __name__ == '__main__': diff --git a/bonobo/examples/nodes/filter.py b/bonobo/examples/nodes/filter.py index bf390e9..4f7219a 100644 --- a/bonobo/examples/nodes/filter.py +++ b/bonobo/examples/nodes/filter.py @@ -9,13 +9,16 @@ class OddOnlyFilter(Filter): @Filter -def MultiplesOfThreeOnlyFilter(self, i): +def multiples_of_three(i): return not (i % 3) graph = bonobo.Graph( lambda: tuple(range(50)), OddOnlyFilter(), - MultiplesOfThreeOnlyFilter(), + multiples_of_three, print, ) + +if __name__ == '__main__': + bonobo.run(graph) diff --git a/bonobo/examples/nodes/slow.py b/bonobo/examples/nodes/slow.py index b9623af..ecaaf44 100644 --- a/bonobo/examples/nodes/slow.py +++ b/bonobo/examples/nodes/slow.py @@ -14,3 +14,6 @@ graph = bonobo.Graph( pause, print, ) + +if __name__ == '__main__': + bonobo.run(graph) diff --git a/bonobo/examples/tutorials/tut02e02_write.py b/bonobo/examples/tutorials/tut02e02_write.py index 1d41ac2..664bca6 100644 --- a/bonobo/examples/tutorials/tut02e02_write.py +++ b/bonobo/examples/tutorials/tut02e02_write.py @@ -8,7 +8,7 @@ def split_one(line): graph = bonobo.Graph( bonobo.FileReader('coffeeshops.txt'), split_one, - bonobo.JsonWriter('coffeeshops.json'), + bonobo.JsonWriter('coffeeshops.json', ioformat='arg0'), ) if __name__ == '__main__': diff --git a/bonobo/execution/node.py b/bonobo/execution/node.py index 4edb75e..45691a6 100644 --- a/bonobo/execution/node.py +++ b/bonobo/execution/node.py @@ -95,6 +95,7 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): continue except UnrecoverableError as exc: self.handle_error(exc, traceback.format_exc()) + self.input.shutdown() break except Exception as exc: # pylint: disable=broad-except self.handle_error(exc, traceback.format_exc()) diff --git a/bonobo/structs/inputs.py b/bonobo/structs/inputs.py index cf9a6ec..7cfe12f 100644 --- a/bonobo/structs/inputs.py +++ b/bonobo/structs/inputs.py @@ -77,6 +77,12 @@ class Input(Queue, Readable, Writable): return Queue.put(self, data, block, timeout) + def _decrement_runlevel(self): + if self._runlevel == 1: + self.on_finalize() + self._runlevel -= 1 + self.on_end() + def get(self, block=True, timeout=None): if not self.alive: raise InactiveReadableError('Cannot get() on an inactive {}.'.format(Readable.__name__)) @@ -84,13 +90,7 @@ class Input(Queue, Readable, Writable): data = Queue.get(self, block, timeout) if data == END: - if self._runlevel == 1: - self.on_finalize() - - self._runlevel -= 1 - - # callback - self.on_end() + self._decrement_runlevel() if not self.alive: raise InactiveReadableError( @@ -100,6 +100,10 @@ class Input(Queue, Readable, Writable): return data + def shutdown(self): + while self._runlevel >= 1: + self._decrement_runlevel() + def empty(self): self.mutex.acquire() while self._qsize() and self.queue[0] == END: From 4a2c7280d6f8b54f55fa3402dd992a14bed04ddb Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 5 Jul 2017 13:09:46 +0200 Subject: [PATCH 09/71] [misc] Fixes formatting. --- bonobo/config/configurables.py | 7 +++---- bonobo/config/options.py | 9 ++++----- bonobo/examples/datasets/fablabs.py | 6 +++--- bonobo/nodes/io/base.py | 4 ++-- bonobo/util/inspect.py | 20 +++++++++++--------- tests/config/test_configurables.py | 1 - tests/config/test_methods.py | 10 +++++----- tests/config/test_methods_partial.py | 4 ++-- tests/config/test_services.py | 8 +------- 9 files changed, 31 insertions(+), 38 deletions(-) diff --git a/bonobo/config/configurables.py b/bonobo/config/configurables.py index 01db9e0..7b40303 100644 --- a/bonobo/config/configurables.py +++ b/bonobo/config/configurables.py @@ -51,7 +51,7 @@ class ConfigurableMeta(type): return (processor for _, processor in cls.__processors) def __repr__(self): - return ' '.join((' Date: Thu, 6 Jul 2017 11:29:55 +0200 Subject: [PATCH 10/71] [core] Adds a .copy() method to graph structure. --- bonobo/structs/graphs.py | 11 +++++++++++ tests/structs/test_graphs.py | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/bonobo/structs/graphs.py b/bonobo/structs/graphs.py index ccafb6b..fe7c1df 100644 --- a/bonobo/structs/graphs.py +++ b/bonobo/structs/graphs.py @@ -1,3 +1,5 @@ +from copy import copy + from bonobo.constants import BEGIN @@ -62,6 +64,15 @@ class Graph: return self + def copy(self): + g = Graph() + + g.edges = copy(self.edges) + g.named = copy(self.named) + g.nodes = copy(self.nodes) + + return g + @property def topologically_sorted_indexes(self): """Iterate in topological order, based on networkx's topological_sort() function. diff --git a/tests/structs/test_graphs.py b/tests/structs/test_graphs.py index af1a6df..7f3a58d 100644 --- a/tests/structs/test_graphs.py +++ b/tests/structs/test_graphs.py @@ -71,3 +71,23 @@ def test_graph_topological_sort(): assert g.topologically_sorted_indexes.index(3) < g.topologically_sorted_indexes.index(4) assert g[3] == sentinel.b1 assert g[4] == sentinel.b2 + + +def test_copy(): + g1 = Graph() + g2 = g1.copy() + + assert g1 is not g2 + + assert len(g1) == 0 + assert len(g2) == 0 + + g1.add_chain([]) + + assert len(g1) == 1 + assert len(g2) == 0 + + g2.add_chain([], identity) + + assert len(g1) == 1 + assert len(g2) == 2 From 71386ea30c54119f63dbb700698408d7fec3c2ee Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Thu, 6 Jul 2017 12:46:19 +0200 Subject: [PATCH 11/71] [doc] sqla: move logger usage to service, fix service name. --- docs/tutorial/tut04.rst | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/docs/tutorial/tut04.rst b/docs/tutorial/tut04.rst index 69e1846..6cd7675 100644 --- a/docs/tutorial/tut04.rst +++ b/docs/tutorial/tut04.rst @@ -39,17 +39,21 @@ Open your `_services.py` file and replace the code: .. code-block:: python - import bonobo - import dotenv - + import bonobo, dotenv, logging, os from bonobo_sqlalchemy.util import create_postgresql_engine dotenv.load_dotenv(dotenv.find_dotenv()) + logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) def get_services(): return { - 'fs': bonobo.open_fs(), - 'db': create_postgresql_engine(name='tutorial') + 'fs': bonobo.open_examples_fs('datasets'), + 'fs.output': bonobo.open_fs(), + 'sqlalchemy.engine': create_postgresql_engine(**{ + 'name': 'tutorial', + 'user': 'tutorial', + 'pass': 'tutorial', + }) } The `create_postgresql_engine` is a tiny function building the DSN from reasonable defaults, that you can override @@ -125,15 +129,15 @@ Now, let's use a little trick and add this section to `pgdb.py`: .. code-block:: python - import logging, sys - - from bonobo.commands.run import get_default_services + import sys from sqlalchemy import Table, Column, String, Integer, MetaData def main(): + from bonobo.commands.run import get_default_services services = get_default_services(__file__) - - if len(sys.argv) == 2 and sys.argv[1] == 'reset': + if len(sys.argv) == 1: + return bonobo.run(graph, services=services) + elif len(sys.argv) == 2 and sys.argv[1] == 'reset': engine = services.get('sqlalchemy.engine') metadata = MetaData() @@ -145,11 +149,10 @@ Now, let's use a little trick and add this section to `pgdb.py`: Column('address', String(255)), ) - logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) metadata.drop_all(engine) metadata.create_all(engine) else: - return bonobo.run(graph, services=services) + raise NotImplementedError('I do not understand.') if __name__ == '__main__': main() From 53d6ac5887d70b74a726f10e808941ad1d151c45 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Tue, 11 Jul 2017 16:25:32 +0200 Subject: [PATCH 12/71] [nodes] Adds arg0_to_kwargs and kwargs_to_arg0 transformations. --- bonobo/nodes/basics.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/bonobo/nodes/basics.py b/bonobo/nodes/basics.py index 164eeb1..c0434ed 100644 --- a/bonobo/nodes/basics.py +++ b/bonobo/nodes/basics.py @@ -10,11 +10,13 @@ from bonobo.util.objects import ValueHolder from bonobo.util.term import CLEAR_EOL __all__ = [ - 'identity', 'Limit', - 'Tee', - 'count', 'PrettyPrinter', + 'Tee', + 'arg0_to_kwargs', + 'count', + 'identity', + 'kwargs_to_arg0', 'noop', ] @@ -86,3 +88,11 @@ class PrettyPrinter(Configurable): def noop(*args, **kwargs): # pylint: disable=unused-argument from bonobo.constants import NOT_MODIFIED return NOT_MODIFIED + + +def arg0_to_kwargs(row): + return Bag(**row) + + +def kwargs_to_arg0(**row): + return Bag(row) From f2a9a45fd134715c929dc69d5ec25f77152768a3 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 15 Jul 2017 10:14:30 +0200 Subject: [PATCH 13/71] [nodes] Adds arg0_to_kwargs and kwargs_to_arg0 transformations. --- bonobo/_api.py | 6 ++++-- bonobo/nodes/basics.py | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/bonobo/_api.py b/bonobo/_api.py index ab890c6..6b2a72d 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -1,6 +1,6 @@ from bonobo.structs import Bag, Graph, Token from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \ - PrettyPrinter, PickleWriter, PickleReader, RateLimited, Tee, count, identity, noop + PrettyPrinter, PickleWriter, PickleReader, RateLimited, Tee, count, identity, noop, arg0_to_kwargs, kwargs_to_arg0 from bonobo.strategies import create_strategy from bonobo.util.objects import get_name @@ -101,13 +101,15 @@ register_api_group( JsonReader, JsonWriter, Limit, - PrettyPrinter, PickleReader, PickleWriter, + PrettyPrinter, RateLimited, Tee, + arg0_to_kwargs, count, identity, + kwargs_to_arg0, noop, ) diff --git a/bonobo/nodes/basics.py b/bonobo/nodes/basics.py index c0434ed..ea05c29 100644 --- a/bonobo/nodes/basics.py +++ b/bonobo/nodes/basics.py @@ -91,8 +91,22 @@ def noop(*args, **kwargs): # pylint: disable=unused-argument def arg0_to_kwargs(row): + """ + Transform items in a stream from "arg0" format (each call only has one positional argument, which is a dict-like + object) to "kwargs" format (each call only has keyword arguments that represent a row). + + :param row: + :return: bonobo.Bag + """ return Bag(**row) def kwargs_to_arg0(**row): + """ + Transform items in a stream from "kwargs" format (each call only has keyword arguments that represent a row) to + "arg0" format (each call only has one positional argument, which is a dict-like object) . + + :param **row: + :return: bonobo.Bag + """ return Bag(row) From f4a018bfe22e12c02573c56fe927961ab79a5404 Mon Sep 17 00:00:00 2001 From: Alex Vykaliuk Date: Sat, 15 Jul 2017 12:07:08 +0200 Subject: [PATCH 14/71] Do not fail in ipykernel without ipywidgets. --- bonobo/_api.py | 13 ++++++++++--- bonobo/ext/jupyter/plugin.py | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/bonobo/_api.py b/bonobo/_api.py index ab890c6..37ff996 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -1,3 +1,5 @@ +import logging + from bonobo.structs import Bag, Graph, Token from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \ PrettyPrinter, PickleWriter, PickleReader, RateLimited, Tee, count, identity, noop @@ -52,9 +54,14 @@ def run(graph, strategy=None, plugins=None, services=None): plugins.append(ConsoleOutputPlugin) if _is_jupyter_notebook(): - from bonobo.ext.jupyter import JupyterOutputPlugin - if JupyterOutputPlugin not in plugins: - plugins.append(JupyterOutputPlugin) + try: + from bonobo.ext.jupyter import JupyterOutputPlugin + except ImportError: + logging.warning( + 'Failed to load jupyter widget. Easiest way is to install the optional "jupyter" ' 'dependencies with «pip install bonobo[jupyter]», but you can also install a specific ' 'version by yourself.') + else: + if JupyterOutputPlugin not in plugins: + plugins.append(JupyterOutputPlugin) return strategy.execute(graph, plugins=plugins, services=services) diff --git a/bonobo/ext/jupyter/plugin.py b/bonobo/ext/jupyter/plugin.py index a72141c..715b057 100644 --- a/bonobo/ext/jupyter/plugin.py +++ b/bonobo/ext/jupyter/plugin.py @@ -1,11 +1,11 @@ +import logging + from bonobo.ext.jupyter.widget import BonoboWidget from bonobo.plugins import Plugin try: import IPython.core.display except ImportError as e: - import logging - logging.exception( 'You must install Jupyter to use the bonobo Jupyter extension. Easiest way is to install the ' 'optional "jupyter" dependencies with «pip install bonobo[jupyter]», but you can also install a ' From 9c988027638205f6edbefd4d9dd3c9ebc0b7265a Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 15 Jul 2017 12:46:51 +0200 Subject: [PATCH 15/71] [misc] ordering of imports --- bonobo/_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bonobo/_api.py b/bonobo/_api.py index 70090b7..26df660 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -2,7 +2,7 @@ import logging from bonobo.structs import Bag, Graph, Token from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \ - PrettyPrinter, PickleWriter, PickleReader, RateLimited, Tee, count, identity, noop, arg0_to_kwargs, kwargs_to_arg0 + PickleReader, PickleWriter, PrettyPrinter, RateLimited, Tee, arg0_to_kwargs, count, identity, kwargs_to_arg0, noop from bonobo.strategies import create_strategy from bonobo.util.objects import get_name From d13b8b28e572e6f4ac175151de44dd6c9eba6f63 Mon Sep 17 00:00:00 2001 From: Alex Vykaliuk Date: Sat, 15 Jul 2017 12:52:58 +0200 Subject: [PATCH 16/71] Add ability to install requirements with for a requirements.txt residing in the same dir --- bonobo/commands/run.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/bonobo/commands/run.py b/bonobo/commands/run.py index 6de6bf6..fb93e77 100644 --- a/bonobo/commands/run.py +++ b/bonobo/commands/run.py @@ -26,6 +26,20 @@ def get_default_services(filename, services=None): return services or {} +def _install_requirements(requirements): + """Install requirements given a path to requirements.txt file.""" + import importlib + import pip + + pip.main(['install', '-r', requirements]) + # Some shenanigans to be sure everything is importable after this, especially .egg-link files which + # are referenced in *.pth files and apparently loaded by site.py at some magic bootstrap moment of the + # python interpreter. + pip.utils.pkg_resources = importlib.reload(pip.utils.pkg_resources) + import site + importlib.reload(site) + + def execute(filename, module, install=False, quiet=False, verbose=False): import runpy from bonobo import Graph, run, settings @@ -39,16 +53,8 @@ def execute(filename, module, install=False, quiet=False, verbose=False): if filename: if os.path.isdir(filename): if install: - import importlib - import pip requirements = os.path.join(filename, 'requirements.txt') - pip.main(['install', '-r', requirements]) - # Some shenanigans to be sure everything is importable after this, especially .egg-link files which - # are referenced in *.pth files and apparently loaded by site.py at some magic bootstrap moment of the - # python interpreter. - pip.utils.pkg_resources = importlib.reload(pip.utils.pkg_resources) - import site - importlib.reload(site) + _install_requirements(requirements) pathname = filename for filename in DEFAULT_GRAPH_FILENAMES: @@ -58,7 +64,8 @@ def execute(filename, module, install=False, quiet=False, verbose=False): if not os.path.exists(filename): raise IOError('Could not find entrypoint (candidates: {}).'.format(', '.join(DEFAULT_GRAPH_FILENAMES))) elif install: - raise RuntimeError('Cannot --install on a file (only available for dirs containing requirements.txt).') + requirements = os.path.join(os.path.dirname(filename), 'requirements.txt') + _install_requirements(requirements) context = runpy.run_path(filename, run_name='__bonobo__') elif module: context = runpy.run_module(module, run_name='__bonobo__') From 75c15ae1f8c2213da29ca2186d1a95fc0ddc03d3 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 15 Jul 2017 13:56:51 +0200 Subject: [PATCH 17/71] [tests] Adds runners descriptions. --- tests/test_commands.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_commands.py b/tests/test_commands.py index 280308d..593cfd6 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -10,10 +10,12 @@ from bonobo.commands import entrypoint def runner_entrypoint(*args): + """ Run bonobo using the python command entrypoint directly (bonobo.commands.entrypoint). """ return entrypoint(list(args)) def runner_module(*args): + """ Run bonobo using the bonobo.__main__ file, which is equivalent as doing "python -m bonobo ...".""" with patch.object(sys, 'argv', ['bonobo', *args]): return runpy.run_path(__main__.__file__, run_name='__main__') From 7aee728b8dd14aeed9f38866e529744017d9440e Mon Sep 17 00:00:00 2001 From: Alex Vykaliuk Date: Sat, 15 Jul 2017 14:24:44 +0200 Subject: [PATCH 18/71] Add tests for --install of run command --- tests/test_commands.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/test_commands.py b/tests/test_commands.py index 280308d..df59115 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -1,3 +1,4 @@ +import os import runpy import sys from unittest.mock import patch @@ -70,6 +71,24 @@ def test_run_path(runner, capsys): assert out[2].startswith('Baz ') +@all_runners +def test_install_requirements_for_dir(runner): + dirname = get_examples_path('types') + with patch('pip.main') as pip_mock: + runner('run', '--install', dirname) + pip_mock.assert_called_once_with( + ['install', '-r', os.path.join(dirname, 'requirements.txt')]) + + +@all_runners +def test_install_requirements_for_file(runner): + dirname = get_examples_path('types') + with patch('pip.main') as pip_mock: + runner('run', '--install', os.path.join(dirname, 'strings.py')) + pip_mock.assert_called_once_with( + ['install', '-r', os.path.join(dirname, 'requirements.txt')]) + + @all_runners def test_version(runner, capsys): runner('version') From a8ed0e432220ae4e8b35653f23a69ba816da5180 Mon Sep 17 00:00:00 2001 From: Alex Vykaliuk Date: Sat, 15 Jul 2017 14:52:22 +0200 Subject: [PATCH 19/71] Move patch one level up because importlib brakes all the CI tools. --- tests/test_commands.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_commands.py b/tests/test_commands.py index df59115..e2d7b7b 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -74,19 +74,19 @@ def test_run_path(runner, capsys): @all_runners def test_install_requirements_for_dir(runner): dirname = get_examples_path('types') - with patch('pip.main') as pip_mock: + with patch('bonobo.commands.run._install_requirements') as install_mock: runner('run', '--install', dirname) - pip_mock.assert_called_once_with( - ['install', '-r', os.path.join(dirname, 'requirements.txt')]) + install_mock.assert_called_once_with( + os.path.join(dirname, 'requirements.txt')) @all_runners def test_install_requirements_for_file(runner): dirname = get_examples_path('types') - with patch('pip.main') as pip_mock: + with patch('bonobo.commands.run._install_requirements') as install_mock: runner('run', '--install', os.path.join(dirname, 'strings.py')) - pip_mock.assert_called_once_with( - ['install', '-r', os.path.join(dirname, 'requirements.txt')]) + install_mock.assert_called_once_with( + os.path.join(dirname, 'requirements.txt')) @all_runners From 575462ca4cf9f5345939026ce5571bdc7e8277ad Mon Sep 17 00:00:00 2001 From: Vitalii Vokhmin Date: Sat, 15 Jul 2017 15:35:01 +0200 Subject: [PATCH 20/71] Check if PluginExecutionContext was started before shutting it down. If a `PluginExecutionContext().shutdown()` is called _before_ `PluginExecutionContext().start()` was called, this leads to an `AttributeError` exception since finalizer tries to access to attributes which were never defined. --- bonobo/execution/plugin.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bonobo/execution/plugin.py b/bonobo/execution/plugin.py index d928f4a..a207f23 100644 --- a/bonobo/execution/plugin.py +++ b/bonobo/execution/plugin.py @@ -16,8 +16,9 @@ class PluginExecutionContext(LoopingExecutionContext): self.wrapped.initialize() def shutdown(self): - with recoverable(self.handle_error): - self.wrapped.finalize() + if self.started: + with recoverable(self.handle_error): + self.wrapped.finalize() self.alive = False def step(self): From abde68108b9beaec07826afb5295e5b0a52c90d5 Mon Sep 17 00:00:00 2001 From: Parthiv20 Date: Sat, 15 Jul 2017 17:27:34 +0200 Subject: [PATCH 21/71] better windows console output --- bonobo/_api.py | 14 ++++++++------ bonobo/ext/console.py | 20 +++++++++++++------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/bonobo/_api.py b/bonobo/_api.py index ab890c6..adeaf8b 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -4,6 +4,8 @@ from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, J from bonobo.strategies import create_strategy from bonobo.util.objects import get_name + + __all__ = [] @@ -21,17 +23,17 @@ def register_api_group(*args): def run(graph, strategy=None, plugins=None, services=None): """ Main entry point of bonobo. It takes a graph and creates all the necessary plumbery around to execute it. - + The only necessary argument is a :class:`Graph` instance, containing the logic you actually want to execute. - + By default, this graph will be executed using the "threadpool" strategy: each graph node will be wrapped in a thread, and executed in a loop until there is no more input to this node. - + You can provide plugins factory objects in the plugins list, this function will add the necessary plugins for interactive console execution and jupyter notebook execution if it detects correctly that it runs in this context. - + You'll probably want to provide a services dictionary mapping service names to service instances. - + :param Graph graph: The :class:`Graph` to execute. :param str strategy: The :class:`bonobo.strategies.base.Strategy` to use. :param list plugins: The list of plugins to enhance execution. @@ -71,7 +73,7 @@ register_api(create_strategy) def open_fs(fs_url=None, *args, **kwargs): """ Wraps :func:`fs.open_fs` function with a few candies. - + :param str fs_url: A filesystem URL :param parse_result: A parsed filesystem URL. :type parse_result: :class:`ParseResult` diff --git a/bonobo/ext/console.py b/bonobo/ext/console.py index acf464b..146b991 100644 --- a/bonobo/ext/console.py +++ b/bonobo/ext/console.py @@ -2,7 +2,9 @@ import io import sys from contextlib import redirect_stdout -from colorama import Style, Fore +from colorama import Style, Fore, init +init(wrap=True) + from bonobo import settings from bonobo.plugins import Plugin @@ -23,7 +25,6 @@ class IOBuffer(): finally: previous.close() - class ConsoleOutputPlugin(Plugin): """ Outputs status information to the connected stdout. Can be a TTY, with or without support for colors/cursor @@ -43,11 +44,11 @@ class ConsoleOutputPlugin(Plugin): self._stdout = sys.stdout self.stdout = IOBuffer() - self.redirect_stdout = redirect_stdout(self.stdout) + self.redirect_stdout = redirect_stdout(self.stdout if sys.platform != 'win32' else self._stdout) self.redirect_stdout.__enter__() def run(self): - if self.isatty: + if self.isatty and sys.platform != 'win32': self._write(self.context.parent, rewind=True) else: pass # not a tty @@ -60,8 +61,13 @@ class ConsoleOutputPlugin(Plugin): t_cnt = len(context) buffered = self.stdout.switch() - for line in buffered.split('\n')[:-1]: - print(line + CLEAR_EOL, file=sys.stderr) + + if sys.platform == 'win32': + for line in buffered.split('\n')[:-1]: + print(line, file=sys.stderr) + else: + for line in buffered.split('\n')[:-1]: + print(line + CLEAR_EOL, file=sys.stderr) for i in context.graph.topologically_sorted_indexes: node = context[i] @@ -76,7 +82,7 @@ class ConsoleOutputPlugin(Plugin): else: _line = ''.join( ( - ' ', Fore.BLACK, '-', ' ', node.name, name_suffix, ' ', node.get_statistics_as_string(), + ' ', Style.BRIGHT+Fore.BLACK, '-', ' ', node.name, name_suffix, ' ', node.get_statistics_as_string(), Style.RESET_ALL, ' ', ) ) From 937c61bd762d0e3828e735f591966017f015d27a Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 16 Jul 2017 10:20:25 +0200 Subject: [PATCH 22/71] Update dependencies --- Makefile | 2 +- requirements-dev.txt | 4 ++-- requirements-docker.txt | 4 ++-- requirements-jupyter.txt | 4 ++-- requirements.txt | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 10094af..f5d0f8b 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # This file has been auto-generated. # All changes will be lost, see Projectfile. # -# Updated at 2017-07-04 10:50:55.775681 +# Updated at 2017-07-16 10:20:05.825842 PACKAGE ?= bonobo PYTHON ?= $(shell which python) diff --git a/requirements-dev.txt b/requirements-dev.txt index 55ba71c..69d64d8 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -21,8 +21,8 @@ pygments==2.2.0 pytest-cov==2.5.1 pytest-sugar==0.8.0 pytest-timeout==1.2.0 -pytest==3.1.2 -python-dateutil==2.6.0 +pytest==3.1.3 +python-dateutil==2.6.1 pytz==2017.2 requests==2.18.1 six==1.10.0 diff --git a/requirements-docker.txt b/requirements-docker.txt index 0a39353..77ea242 100644 --- a/requirements-docker.txt +++ b/requirements-docker.txt @@ -1,6 +1,6 @@ -e .[docker] appdirs==1.4.3 -bonobo-docker==0.2.9 +bonobo-docker==0.2.10 certifi==2017.4.17 chardet==3.0.4 colorama==0.3.9 @@ -15,6 +15,6 @@ pyparsing==2.2.0 pytz==2017.2 requests==2.18.1 six==1.10.0 -stevedore==1.23.0 +stevedore==1.24.0 urllib3==1.21.1 websocket-client==0.44.0 diff --git a/requirements-jupyter.txt b/requirements-jupyter.txt index d6a6fdb..2542040 100644 --- a/requirements-jupyter.txt +++ b/requirements-jupyter.txt @@ -1,7 +1,7 @@ -e .[jupyter] appnope==0.1.0 bleach==2.0.0 -decorator==4.0.11 +decorator==4.1.1 entrypoints==0.2.3 html5lib==0.999999999 ipykernel==4.6.1 @@ -26,7 +26,7 @@ pickleshare==0.7.4 prompt-toolkit==1.0.14 ptyprocess==0.5.2 pygments==2.2.0 -python-dateutil==2.6.0 +python-dateutil==2.6.1 pyzmq==16.0.2 qtconsole==4.3.0 simplegeneric==0.8.1 diff --git a/requirements.txt b/requirements.txt index 093a6a1..5ddbb01 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,5 +12,5 @@ pyparsing==2.2.0 pytz==2017.2 requests==2.18.1 six==1.10.0 -stevedore==1.23.0 +stevedore==1.24.0 urllib3==1.21.1 From 258bd6235da7618e7bf470a1b116505ead5bda26 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 16 Jul 2017 11:19:06 +0200 Subject: [PATCH 23/71] [logging] Removes logging colors on windows for now as the codes are mis-interpreted. --- bonobo/logging.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/bonobo/logging.py b/bonobo/logging.py index 3784600..1884511 100644 --- a/bonobo/logging.py +++ b/bonobo/logging.py @@ -8,6 +8,8 @@ from colorama import Fore, Style from bonobo import settings from bonobo.util.term import CLEAR_EOL +iswindows = (sys.platform == 'win32') + def get_format(): yield '{b}[%(fg)s%(levelname)s{b}][{w}' @@ -18,9 +20,9 @@ def get_format(): colors = { - 'b': Fore.BLACK, - 'w': Fore.LIGHTBLACK_EX, - 'r': Style.RESET_ALL, + 'b': '' if iswindows else Fore.BLACK, + 'w': '' if iswindows else Fore.LIGHTBLACK_EX, + 'r': '' if iswindows else Style.RESET_ALL, } format = (''.join(get_format())).format(**colors) @@ -28,7 +30,9 @@ format = (''.join(get_format())).format(**colors) class Filter(logging.Filter): def filter(self, record): record.spent = record.relativeCreated // 1000 - if record.levelname == 'DEBG': + if iswindows: + record.fg = '' + elif record.levelname == 'DEBG': record.fg = Fore.LIGHTBLACK_EX elif record.levelname == 'INFO': record.fg = Fore.LIGHTWHITE_EX @@ -46,7 +50,10 @@ class Filter(logging.Filter): class Formatter(logging.Formatter): def formatException(self, ei): tb = super().formatException(ei) - return textwrap.indent(tb, Fore.BLACK + ' | ' + Fore.WHITE) + if iswindows: + return textwrap.indent(tb, ' | ') + else: + return textwrap.indent(tb, Fore.BLACK + ' | ' + Fore.WHITE) def setup(level): From 423a75d554396512ac58ee37989a91835d8720f3 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 16 Jul 2017 11:26:48 +0200 Subject: [PATCH 24/71] [logging] Removes kill-until-eol character on windows platform. --- bonobo/logging.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bonobo/logging.py b/bonobo/logging.py index 1884511..071fcd3 100644 --- a/bonobo/logging.py +++ b/bonobo/logging.py @@ -16,7 +16,8 @@ def get_format(): yield '{b}][{w}'.join(('%(spent)04d', '%(name)s')) yield '{b}]' yield ' %(fg)s%(message)s{r}' - yield CLEAR_EOL + if not iswindows: + yield CLEAR_EOL colors = { From 3e961776e3b8a7b0b4c0fe1c9be27c1cfb24efbf Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 16 Jul 2017 18:45:49 +0200 Subject: [PATCH 25/71] [cli] First draft implementation of "convert" command, which builds a simple graph of reader+writer and executes it. --- Makefile | 2 +- Projectfile | 1 + bonobo/commands/convert.py | 61 ++++++++++++++++++++++++++++++++++++++ setup.py | 4 +-- 4 files changed, 65 insertions(+), 3 deletions(-) create mode 100644 bonobo/commands/convert.py diff --git a/Makefile b/Makefile index 10094af..fcdc7e2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # This file has been auto-generated. # All changes will be lost, see Projectfile. # -# Updated at 2017-07-04 10:50:55.775681 +# Updated at 2017-07-16 18:44:56.169119 PACKAGE ?= bonobo PYTHON ?= $(shell which python) diff --git a/Projectfile b/Projectfile index 6fe6c2b..117dd76 100644 --- a/Projectfile +++ b/Projectfile @@ -29,6 +29,7 @@ python.setup( 'bonobo = bonobo.commands:entrypoint', ], 'bonobo.commands': [ + 'convert = bonobo.commands.convert:register', 'init = bonobo.commands.init:register', 'run = bonobo.commands.run:register', 'version = bonobo.commands.version:register', diff --git a/bonobo/commands/convert.py b/bonobo/commands/convert.py new file mode 100644 index 0000000..c29f31c --- /dev/null +++ b/bonobo/commands/convert.py @@ -0,0 +1,61 @@ +import mimetypes + +import bonobo + +SHORTCUTS = { + 'plain': 'text/plain', + 'txt': 'text/plain', + 'text': 'text/plain', + 'csv': 'text/csv', + 'json': 'application/json', +} + +REGISTRY = { + 'text/plain': (bonobo.FileReader, bonobo.FileWriter), + 'text/csv': (bonobo.CsvReader, bonobo.CsvWriter), + 'application/json': (bonobo.JsonReader, bonobo.JsonWriter), +} + + +def resolve_factory(name, filename, factory_type): + """ + Try to resolve which transformation factory to use for this filename. User eventually provided a name, which has + priority, otherwise we try to detect it using the mimetype detection on filename. + + """ + if name is None: + name = mimetypes.guess_type(filename)[0] + + if name in SHORTCUTS: + name = SHORTCUTS[name] + + if not name in REGISTRY: + raise RuntimeError('Could not resolve {factory_type} factory for {filename} ({name}). Try providing it explicitely using -{opt} .'.format(name=name, filename=filename, factory_type=factory_type, opt=factory_type[0])) + + if factory_type == 'reader': + return REGISTRY[name][0] + elif factory_type == 'writer': + return REGISTRY[name][1] + else: + raise ValueError('Invalid factory type.') + +def execute(input, output, reader=None, reader_options=None, writer=None, writer_options=None, options=None): + reader = resolve_factory(reader, input, 'reader')(input) + writer = resolve_factory(writer, output, 'writer')(output) + + graph = bonobo.Graph() + graph.add_chain(reader, writer) + + return bonobo.run(graph, services={ + 'fs': bonobo.open_fs(), + }) + +def register(parser): + parser.add_argument('input') + parser.add_argument('output') + parser.add_argument('--reader', '-r') + parser.add_argument('--writer', '-w') + parser.add_argument('--reader-option', '-ro', dest='reader_options') + parser.add_argument('--writer-option', '-wo', dest='writer_options') + parser.add_argument('--option', '-o', dest='options') + return execute diff --git a/setup.py b/setup.py index 89c9ccd..a925811 100644 --- a/setup.py +++ b/setup.py @@ -67,8 +67,8 @@ setup( }, entry_points={ 'bonobo.commands': [ - 'init = bonobo.commands.init:register', 'run = bonobo.commands.run:register', - 'version = bonobo.commands.version:register' + 'convert = bonobo.commands.convert:register', 'init = bonobo.commands.init:register', + 'run = bonobo.commands.run:register', 'version = bonobo.commands.version:register' ], 'console_scripts': ['bonobo = bonobo.commands:entrypoint'] }, From c881ca106155a8777af6bd69aaf007680c7df376 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 16 Jul 2017 18:51:17 +0200 Subject: [PATCH 26/71] [cli] Convert: adds pickle format, comment out unused arguments, for now. --- bonobo/commands/convert.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/bonobo/commands/convert.py b/bonobo/commands/convert.py index c29f31c..90bd57b 100644 --- a/bonobo/commands/convert.py +++ b/bonobo/commands/convert.py @@ -11,9 +11,10 @@ SHORTCUTS = { } REGISTRY = { - 'text/plain': (bonobo.FileReader, bonobo.FileWriter), - 'text/csv': (bonobo.CsvReader, bonobo.CsvWriter), 'application/json': (bonobo.JsonReader, bonobo.JsonWriter), + 'pickle': (bonobo.PickleReader, bonobo.PickleWriter), + 'text/csv': (bonobo.CsvReader, bonobo.CsvWriter), + 'text/plain': (bonobo.FileReader, bonobo.FileWriter), } @@ -55,7 +56,7 @@ def register(parser): parser.add_argument('output') parser.add_argument('--reader', '-r') parser.add_argument('--writer', '-w') - parser.add_argument('--reader-option', '-ro', dest='reader_options') - parser.add_argument('--writer-option', '-wo', dest='writer_options') - parser.add_argument('--option', '-o', dest='options') + # parser.add_argument('--reader-option', '-ro', dest='reader_options') + # parser.add_argument('--writer-option', '-wo', dest='writer_options') + # parser.add_argument('--option', '-o', dest='options') return execute From fcc24f4badba3843a27971ade82cbea136d50478 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sun, 16 Jul 2017 18:58:56 +0200 Subject: [PATCH 27/71] Restore removed examples that were used in tests. --- bonobo/examples/types/__init__.py | 7 +++++ bonobo/examples/types/bags.py | 41 +++++++++++++++++++++++++++++ bonobo/examples/types/dicts.py | 43 +++++++++++++++++++++++++++++++ bonobo/examples/types/strings.py | 39 ++++++++++++++++++++++++++++ 4 files changed, 130 insertions(+) create mode 100644 bonobo/examples/types/__init__.py create mode 100644 bonobo/examples/types/bags.py create mode 100644 bonobo/examples/types/dicts.py create mode 100644 bonobo/examples/types/strings.py diff --git a/bonobo/examples/types/__init__.py b/bonobo/examples/types/__init__.py new file mode 100644 index 0000000..a2c0ceb --- /dev/null +++ b/bonobo/examples/types/__init__.py @@ -0,0 +1,7 @@ +from . import bags, dicts, strings + +__all__ = [ + 'bags', + 'dicts', + 'strings', +] \ No newline at end of file diff --git a/bonobo/examples/types/bags.py b/bonobo/examples/types/bags.py new file mode 100644 index 0000000..2bfe5de --- /dev/null +++ b/bonobo/examples/types/bags.py @@ -0,0 +1,41 @@ +""" +Example on how to use :class:`bonobo.Bag` instances to pass flexible args/kwargs to the next callable. + +.. graphviz:: + + digraph { + rankdir = LR; + stylesheet = "../_static/graphs.css"; + + BEGIN [shape="point"]; + BEGIN -> "extract()" -> "transform(...)" -> "load(...)"; + } + +""" + +from random import randint + +from bonobo import Bag, Graph + + +def extract(): + yield Bag(topic='foo') + yield Bag(topic='bar') + yield Bag(topic='baz') + + +def transform(topic: str): + return Bag.inherit(title=topic.title(), rand=randint(10, 99)) + + +def load(topic: str, title: str, rand: int): + print('{} ({}) wait={}'.format(title, topic, rand)) + + +graph = Graph() +graph.add_chain(extract, transform, load) + +if __name__ == '__main__': + from bonobo import run + + run(graph) diff --git a/bonobo/examples/types/dicts.py b/bonobo/examples/types/dicts.py new file mode 100644 index 0000000..fde4b08 --- /dev/null +++ b/bonobo/examples/types/dicts.py @@ -0,0 +1,43 @@ +""" +Example on how to use symple python dictionaries to communicate between transformations. + +.. graphviz:: + + digraph { + rankdir = LR; + stylesheet = "../_static/graphs.css"; + + BEGIN [shape="point"]; + BEGIN -> "extract()" -> "transform(row: dict)" -> "load(row: dict)"; + } + +""" + +from random import randint + +from bonobo import Graph + + +def extract(): + yield {'topic': 'foo'} + yield {'topic': 'bar'} + yield {'topic': 'baz'} + + +def transform(row: dict): + return { + 'topic': row['topic'].title(), + 'randint': randint(10, 99), + } + + +def load(row: dict): + print(row) + + +graph = Graph(extract, transform, load) + +if __name__ == '__main__': + from bonobo import run + + run(graph) diff --git a/bonobo/examples/types/strings.py b/bonobo/examples/types/strings.py new file mode 100644 index 0000000..1903151 --- /dev/null +++ b/bonobo/examples/types/strings.py @@ -0,0 +1,39 @@ +""" +Example on how to use symple python strings to communicate between transformations. + +.. graphviz:: + + digraph { + rankdir = LR; + stylesheet = "../_static/graphs.css"; + + BEGIN [shape="point"]; + BEGIN -> "extract()" -> "transform(s: str)" -> "load(s: str)"; + } + +""" +from random import randint + +from bonobo import Graph + + +def extract(): + yield 'foo' + yield 'bar' + yield 'baz' + + +def transform(s: str): + return '{} ({})'.format(s.title(), randint(10, 99)) + + +def load(s: str): + print(s) + + +graph = Graph(extract, transform, load) + +if __name__ == '__main__': + from bonobo import run + + run(graph) From 4ef4364f4285dbc932038e20045606466eda5705 Mon Sep 17 00:00:00 2001 From: Travis Cook Date: Fri, 21 Jul 2017 15:32:06 -0700 Subject: [PATCH 28/71] [docs] Fix grammar in purity.rst --- docs/guide/purity.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guide/purity.rst b/docs/guide/purity.rst index e01ff38..472c3f9 100644 --- a/docs/guide/purity.rst +++ b/docs/guide/purity.rst @@ -8,7 +8,7 @@ The major problem we have is that one message (underlying implementation: :class through more than one component, and at the same time. If you wanna be safe, you tend to :func:`copy.copy()` everything between two calls to two different components, but that's very expensive. -Instead of that, we chosed the oposite: copies are never made, and you should not modify in place the inputs of your +Instead, we chose the opposite: copies are never made, and you should not modify in place the inputs of your component before yielding them, and that mostly means that you want to recreate dicts and lists before yielding (or returning) them. Numeric values, strings and tuples being immutable in python, modifying a variable of one of those type will already return a different instance. From dda0a6388026a94226050f1fa11b670eb7bf7504 Mon Sep 17 00:00:00 2001 From: Travis Cook Date: Fri, 21 Jul 2017 15:38:06 -0700 Subject: [PATCH 29/71] [docs] Fix additional grammar issues at bottom of purity.rst --- docs/guide/purity.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/guide/purity.rst b/docs/guide/purity.rst index 472c3f9..bd20d4e 100644 --- a/docs/guide/purity.rst +++ b/docs/guide/purity.rst @@ -130,10 +130,10 @@ Now let's see how to do it correctly: I hear you think «Yeah, but if I create like millions of dicts ...». -Let's say we chosed the oposite way and copy the dict outside the transformation (in fact, `it's what we did in bonobo's +Let's say we chose the opposite way and copied the dict outside the transformation (in fact, `it's what we did in bonobo's ancestor `_). This means you will also create the -same number of dicts, the difference is that you won't even notice it. Also, it means that if you want to yield 1 million -times the same dict, going "pure" makes it efficient (you'll just yield the same object 1 million times) while going "copy +same number of dicts, the difference is that you won't even notice it. Also, it means that if you want to yield the same +dict 1 million times , going "pure" makes it efficient (you'll just yield the same object 1 million times) while going "copy crazy" will create 1 million objects. Using dicts like this will create a lot of dicts, but also free them as soon as all the future components that take this dict From 966628e15697d20205e1f5b615b48e5e1257df73 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Mon, 18 Sep 2017 17:16:18 +0200 Subject: [PATCH 30/71] Implements graphviz output after a graph inspection. --- bonobo/commands/graph.py | 22 ++++++++++++++++------ bonobo/commands/run.py | 8 +++++++- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/bonobo/commands/graph.py b/bonobo/commands/graph.py index b8bbdf9..7afa8de 100644 --- a/bonobo/commands/graph.py +++ b/bonobo/commands/graph.py @@ -1,22 +1,32 @@ import json +import itertools + from bonobo.util.objects import get_name -from bonobo.commands.run import read_file +from bonobo.commands.run import read, register_generic_run_arguments from bonobo.constants import BEGIN -def execute(file): - graph, plugins, services = read_file(file) +def execute(filename, module, install=False, quiet=False, verbose=False): + graph, plugins, services = read(filename, module, install, quiet, verbose) print('digraph {') print(' rankdir = LR;') print(' "BEGIN" [shape="point"];') + for i in graph.outputs_of(BEGIN): - print(' "BEGIN" -> ' + json.dumps(get_name(graph.nodes[i])) + ';') + print(' "BEGIN" -> ' + json.dumps(get_name(graph[i])) + ';') + + for ix in graph.topologically_sorted_indexes: + for iy in graph.outputs_of(ix): + print(' {} -> {};'.format( + json.dumps(get_name(graph[ix])), + json.dumps(get_name(graph[iy])) + )) + print('}') def register(parser): - import argparse - parser.add_argument('file', type=argparse.FileType()) + register_generic_run_arguments(parser) return execute diff --git a/bonobo/commands/run.py b/bonobo/commands/run.py index 58cd82d..604fc39 100644 --- a/bonobo/commands/run.py +++ b/bonobo/commands/run.py @@ -40,7 +40,7 @@ def _install_requirements(requirements): importlib.reload(site) -def execute(filename, module, install=False, quiet=False, verbose=False): +def read(filename, module, install=False, quiet=False, verbose=False): import runpy from bonobo import Graph, settings @@ -86,6 +86,12 @@ def execute(filename, module, install=False, quiet=False, verbose=False): filename, context.get(DEFAULT_SERVICES_ATTR)() if DEFAULT_SERVICES_ATTR in context else None ) + return graph, plugins, services + + +def execute(filename, module, install=False, quiet=False, verbose=False): + graph, plugins, services = read(filename, module, install, quiet, verbose) + return bonobo.run( graph, plugins=plugins, From 8b9dac50eca8d2737e377c225fdd5e4c4dc948c8 Mon Sep 17 00:00:00 2001 From: cwandrews Date: Mon, 18 Sep 2017 15:24:27 -0400 Subject: [PATCH 31/71] Added optional passing of one or multiple environment variables via --env flag to the bonobo cli. --- bonobo/commands/run.py | 12 +++++++++++- tests/test_commands.py | 27 +++++++++++++++++++++++++++ tests/util/get_passed_env.py | 22 ++++++++++++++++++++++ 3 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 tests/util/get_passed_env.py diff --git a/bonobo/commands/run.py b/bonobo/commands/run.py index fb93e77..c9bb39e 100644 --- a/bonobo/commands/run.py +++ b/bonobo/commands/run.py @@ -40,7 +40,10 @@ def _install_requirements(requirements): importlib.reload(site) -def execute(filename, module, install=False, quiet=False, verbose=False): +def execute(filename, module, install=False, quiet=False, verbose=False, + env=None): + import re + import runpy from bonobo import Graph, run, settings @@ -50,6 +53,12 @@ def execute(filename, module, install=False, quiet=False, verbose=False): if verbose: settings.DEBUG.set(True) + if env: + quote_killer = re.compile('["\']') + for e in env: + var_name, var_value = e.split('=') + os.environ[var_name] = quote_killer.sub('', var_value) + if filename: if os.path.isdir(filename): if install: @@ -106,4 +115,5 @@ def register(parser): verbosity_group.add_argument('--quiet', '-q', action='store_true') verbosity_group.add_argument('--verbose', '-v', action='store_true') parser.add_argument('--install', '-I', action='store_true') + parser.add_argument('--env', '-e', action='append') return execute diff --git a/tests/test_commands.py b/tests/test_commands.py index daf245f..cff9e38 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -3,6 +3,7 @@ import runpy import sys from unittest.mock import patch +import pathlib import pkg_resources import pytest @@ -96,3 +97,29 @@ def test_version(runner, capsys): out = out.strip() assert out.startswith('bonobo ') assert __version__ in out + + +@all_runners +def test_run_with_env(runner, capsys): + runner('run', '--quiet', + str(pathlib.Path(os.path.dirname(__file__), + 'util', 'get_passed_env.py')), + '--env', 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', + '--env', "ENV_TEST_STRING='my_test_string'") + out, err = capsys.readouterr() + out = out.split('\n') + assert out[0] == 'cwandrews' + assert out[1] == '123' + assert out[2] == 'my_test_string' + + +@all_runners +def test_run_module_with_env(runner, capsys): + runner('run', '--quiet', '-m', 'tests.util.get_passed_env', + '--env', 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', + '--env', "ENV_TEST_STRING='my_test_string'") + out, err = capsys.readouterr() + out = out.split('\n') + assert out[0] == 'cwandrews' + assert out[1] == '123' + assert out[2] == 'my_test_string' diff --git a/tests/util/get_passed_env.py b/tests/util/get_passed_env.py new file mode 100644 index 0000000..d9c4ba6 --- /dev/null +++ b/tests/util/get_passed_env.py @@ -0,0 +1,22 @@ +import os + +from bonobo import Graph + + +def extract(): + env_test_user = os.getenv('ENV_TEST_USER') + env_test_number = os.getenv('ENV_TEST_NUMBER') + env_test_string = os.getenv('ENV_TEST_STRING') + return env_test_user, env_test_number, env_test_string + + +def load(s: str): + print(s) + + +graph = Graph(extract, load) + +if __name__ == '__main__': + from bonobo import run + + run(graph) From 21514ad670f99da42b2967b89a6ece3d2e7e5ba1 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 30 Sep 2017 09:54:30 +0200 Subject: [PATCH 32/71] Update dependencies. --- Makefile | 6 +++--- requirements-dev.txt | 20 ++++++++++---------- requirements-docker.txt | 16 ++++++++-------- requirements-jupyter.txt | 31 ++++++++++++++++--------------- requirements.txt | 16 ++++++++-------- 5 files changed, 45 insertions(+), 44 deletions(-) diff --git a/Makefile b/Makefile index cb5f8bc..8175b3e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # This file has been auto-generated. # All changes will be lost, see Projectfile. # -# Updated at 2017-07-16 10:52:31.093416 +# Updated at 2017-09-30 09:50:47.806007 PACKAGE ?= bonobo PYTHON ?= $(shell which python) @@ -27,13 +27,13 @@ VERSION ?= $(shell git describe 2>/dev/null || echo dev) # Installs the local project dependencies. install: if [ -z "$(QUICK)" ]; then \ - $(PIP) install -U pip wheel $(PYTHON_PIP_INSTALL_OPTIONS) -r $(PYTHON_REQUIREMENTS_FILE) ; \ + $(PIP) install -U pip wheel $(PIP_INSTALL_OPTIONS) -r $(PYTHON_REQUIREMENTS_FILE) ; \ fi # Installs the local project dependencies, including development-only libraries. install-dev: if [ -z "$(QUICK)" ]; then \ - $(PIP) install -U pip wheel $(PYTHON_PIP_INSTALL_OPTIONS) -r $(PYTHON_REQUIREMENTS_DEV_FILE) ; \ + $(PIP) install -U pip wheel $(PIP_INSTALL_OPTIONS) -r $(PYTHON_REQUIREMENTS_DEV_FILE) ; \ fi # Cleans up the local mess. diff --git a/requirements-dev.txt b/requirements-dev.txt index 69d64d8..92123d5 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,16 +1,16 @@ -e .[dev] alabaster==0.7.10 arrow==0.10.0 -babel==2.4.0 -binaryornot==0.4.3 -certifi==2017.4.17 +babel==2.5.1 +binaryornot==0.4.4 +certifi==2017.7.27.1 chardet==3.0.4 click==6.7 cookiecutter==1.5.1 coverage==4.4.1 -docutils==0.13.1 +docutils==0.14 future==0.16.0 -idna==2.5 +idna==2.6 imagesize==0.7.1 jinja2-time==0.2.0 jinja2==2.9.6 @@ -21,14 +21,14 @@ pygments==2.2.0 pytest-cov==2.5.1 pytest-sugar==0.8.0 pytest-timeout==1.2.0 -pytest==3.1.3 +pytest==3.2.2 python-dateutil==2.6.1 pytz==2017.2 -requests==2.18.1 -six==1.10.0 +requests==2.18.4 +six==1.11.0 snowballstemmer==1.2.1 -sphinx==1.6.3 +sphinx==1.6.4 sphinxcontrib-websupport==1.0.1 termcolor==1.1.0 -urllib3==1.21.1 +urllib3==1.22 whichcraft==0.4.1 diff --git a/requirements-docker.txt b/requirements-docker.txt index f5e74fc..870223d 100644 --- a/requirements-docker.txt +++ b/requirements-docker.txt @@ -1,20 +1,20 @@ -e .[docker] appdirs==1.4.3 bonobo-docker==0.2.11 -certifi==2017.4.17 +certifi==2017.7.27.1 chardet==3.0.4 colorama==0.3.9 docker-pycreds==0.2.1 docker==2.3.0 -fs==2.0.4 -idna==2.5 +fs==2.0.11 +idna==2.6 packaging==16.8 pbr==3.1.1 -psutil==5.2.2 +psutil==5.3.1 pyparsing==2.2.0 pytz==2017.2 -requests==2.18.1 -six==1.10.0 -stevedore==1.24.0 -urllib3==1.21.1 +requests==2.18.4 +six==1.11.0 +stevedore==1.27.0 +urllib3==1.22 websocket-client==0.44.0 diff --git a/requirements-jupyter.txt b/requirements-jupyter.txt index 2542040..94b10ea 100644 --- a/requirements-jupyter.txt +++ b/requirements-jupyter.txt @@ -1,40 +1,41 @@ -e .[jupyter] appnope==0.1.0 -bleach==2.0.0 -decorator==4.1.1 +bleach==2.1 +decorator==4.1.2 entrypoints==0.2.3 html5lib==0.999999999 ipykernel==4.6.1 ipython-genutils==0.2.0 -ipython==6.1.0 -ipywidgets==6.0.0 -jedi==0.10.2 +ipython==6.2.1 +ipywidgets==6.0.1 +jedi==0.11.0 jinja2==2.9.6 jsonschema==2.6.0 jupyter-client==5.1.0 -jupyter-console==5.1.0 +jupyter-console==5.2.0 jupyter-core==4.3.0 jupyter==1.0.0 markupsafe==1.0 mistune==0.7.4 -nbconvert==5.2.1 -nbformat==4.3.0 -notebook==5.0.0 -pandocfilters==1.4.1 +nbconvert==5.3.1 +nbformat==4.4.0 +notebook==5.1.0 +pandocfilters==1.4.2 +parso==0.1.0 pexpect==4.2.1 pickleshare==0.7.4 -prompt-toolkit==1.0.14 +prompt-toolkit==1.0.15 ptyprocess==0.5.2 pygments==2.2.0 python-dateutil==2.6.1 pyzmq==16.0.2 -qtconsole==4.3.0 +qtconsole==4.3.1 simplegeneric==0.8.1 -six==1.10.0 +six==1.11.0 terminado==0.6 testpath==0.3.1 -tornado==4.5.1 +tornado==4.5.2 traitlets==4.3.2 wcwidth==0.1.7 webencodings==0.5.1 -widgetsnbextension==2.0.0 +widgetsnbextension==2.0.1 diff --git a/requirements.txt b/requirements.txt index 5ddbb01..61ab27d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,16 @@ -e . appdirs==1.4.3 -certifi==2017.4.17 +certifi==2017.7.27.1 chardet==3.0.4 colorama==0.3.9 -fs==2.0.4 -idna==2.5 +fs==2.0.11 +idna==2.6 packaging==16.8 pbr==3.1.1 -psutil==5.2.2 +psutil==5.3.1 pyparsing==2.2.0 pytz==2017.2 -requests==2.18.1 -six==1.10.0 -stevedore==1.24.0 -urllib3==1.21.1 +requests==2.18.4 +six==1.11.0 +stevedore==1.27.0 +urllib3==1.22 From 7ca3369f7141246823e0fe2dabb0d98e5d91af5a Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 30 Sep 2017 11:01:53 +0200 Subject: [PATCH 33/71] Rename "bonobo graph" to "bonobo inspect". For now, graphviz is default but there will probably be a humand default in the future, with graphviz source generation set if --graph (or -g) flag is passed. --- Makefile | 2 +- Projectfile | 2 +- bonobo/commands/graph.py | 32 -------------------------------- bonobo/commands/inspect.py | 33 +++++++++++++++++++++++++++++++++ bonobo/commands/run.py | 8 ++------ bonobo/util/graphviz.py | 3 +-- setup.py | 2 +- tests/test_commands.py | 17 +++++++++-------- 8 files changed, 48 insertions(+), 51 deletions(-) delete mode 100644 bonobo/commands/graph.py create mode 100644 bonobo/commands/inspect.py diff --git a/Makefile b/Makefile index 1e617cb..aac0c92 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # This file has been auto-generated. # All changes will be lost, see Projectfile. # -# Updated at 2017-09-30 10:24:51.699716 +# Updated at 2017-09-30 10:57:00.855477 PACKAGE ?= bonobo PYTHON ?= $(shell which python) diff --git a/Projectfile b/Projectfile index eea8cc5..8e848b3 100644 --- a/Projectfile +++ b/Projectfile @@ -30,7 +30,7 @@ python.setup( ], 'bonobo.commands': [ 'init = bonobo.commands.init:register', - 'graph = bonobo.commands.graph:register', + 'inspect = bonobo.commands.inspect:register', 'run = bonobo.commands.run:register', 'version = bonobo.commands.version:register', ], diff --git a/bonobo/commands/graph.py b/bonobo/commands/graph.py deleted file mode 100644 index 7afa8de..0000000 --- a/bonobo/commands/graph.py +++ /dev/null @@ -1,32 +0,0 @@ -import json - -import itertools - -from bonobo.util.objects import get_name -from bonobo.commands.run import read, register_generic_run_arguments -from bonobo.constants import BEGIN - - -def execute(filename, module, install=False, quiet=False, verbose=False): - graph, plugins, services = read(filename, module, install, quiet, verbose) - - print('digraph {') - print(' rankdir = LR;') - print(' "BEGIN" [shape="point"];') - - for i in graph.outputs_of(BEGIN): - print(' "BEGIN" -> ' + json.dumps(get_name(graph[i])) + ';') - - for ix in graph.topologically_sorted_indexes: - for iy in graph.outputs_of(ix): - print(' {} -> {};'.format( - json.dumps(get_name(graph[ix])), - json.dumps(get_name(graph[iy])) - )) - - print('}') - - -def register(parser): - register_generic_run_arguments(parser) - return execute diff --git a/bonobo/commands/inspect.py b/bonobo/commands/inspect.py new file mode 100644 index 0000000..83b770e --- /dev/null +++ b/bonobo/commands/inspect.py @@ -0,0 +1,33 @@ +import json + +from bonobo.commands.run import read, register_generic_run_arguments +from bonobo.constants import BEGIN +from bonobo.util.objects import get_name + +OUTPUT_GRAPHVIZ = 'graphviz' + +def execute(*, output, **kwargs): + graph, plugins, services = read(**kwargs) + + if output == OUTPUT_GRAPHVIZ: + print('digraph {') + print(' rankdir = LR;') + print(' "BEGIN" [shape="point"];') + + for i in graph.outputs_of(BEGIN): + print(' "BEGIN" -> ' + json.dumps(get_name(graph[i])) + ';') + + for ix in graph.topologically_sorted_indexes: + for iy in graph.outputs_of(ix): + print(' {} -> {};'.format(json.dumps(get_name(graph[ix])), json.dumps(get_name(graph[iy])))) + + print('}') + else: + raise NotImplementedError('Output type not implemented.') + + +def register(parser): + register_generic_run_arguments(parser) + parser.add_argument('--graph', '-g', dest='output', action='store_const', const=OUTPUT_GRAPHVIZ) + parser.set_defaults(output=OUTPUT_GRAPHVIZ) + return execute diff --git a/bonobo/commands/run.py b/bonobo/commands/run.py index 27a2329..2204a3b 100644 --- a/bonobo/commands/run.py +++ b/bonobo/commands/run.py @@ -3,7 +3,7 @@ import os import bonobo from bonobo.constants import DEFAULT_SERVICES_ATTR, DEFAULT_SERVICES_FILENAME -DEFAULT_GRAPH_FILENAMES = ('__main__.py', 'main.py',) +DEFAULT_GRAPH_FILENAMES = ('__main__.py', 'main.py', ) DEFAULT_GRAPH_ATTR = 'get_graph' @@ -99,11 +99,7 @@ def read(filename, module, install=False, quiet=False, verbose=False, env=None): def execute(filename, module, install=False, quiet=False, verbose=False, env=None): graph, plugins, services = read(filename, module, install, quiet, verbose, env) - return bonobo.run( - graph, - plugins=plugins, - services=services - ) + return bonobo.run(graph, plugins=plugins, services=services) def register_generic_run_arguments(parser, required=True): diff --git a/bonobo/util/graphviz.py b/bonobo/util/graphviz.py index fa88974..588e374 100644 --- a/bonobo/util/graphviz.py +++ b/bonobo/util/graphviz.py @@ -1,4 +1,3 @@ - def render_as_dot(graph): """ @@ -6,4 +5,4 @@ def render_as_dot(graph): :return: str """ - pass \ No newline at end of file + pass diff --git a/setup.py b/setup.py index 0abee00..08b84e0 100644 --- a/setup.py +++ b/setup.py @@ -67,7 +67,7 @@ setup( }, entry_points={ 'bonobo.commands': [ - 'init = bonobo.commands.init:register', 'graph = bonobo.commands.graph:register', + 'init = bonobo.commands.init:register', 'inspect = bonobo.commands.inspect:register', 'run = bonobo.commands.run:register', 'version = bonobo.commands.version:register' ], 'console_scripts': ['bonobo = bonobo.commands:entrypoint'] diff --git a/tests/test_commands.py b/tests/test_commands.py index cff9e38..730bc0b 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -101,11 +101,11 @@ def test_version(runner, capsys): @all_runners def test_run_with_env(runner, capsys): - runner('run', '--quiet', - str(pathlib.Path(os.path.dirname(__file__), - 'util', 'get_passed_env.py')), - '--env', 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', - '--env', "ENV_TEST_STRING='my_test_string'") + runner( + 'run', '--quiet', + str(pathlib.Path(os.path.dirname(__file__), 'util', 'get_passed_env.py')), '--env', 'ENV_TEST_NUMBER=123', + '--env', 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'" + ) out, err = capsys.readouterr() out = out.split('\n') assert out[0] == 'cwandrews' @@ -115,9 +115,10 @@ def test_run_with_env(runner, capsys): @all_runners def test_run_module_with_env(runner, capsys): - runner('run', '--quiet', '-m', 'tests.util.get_passed_env', - '--env', 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', - '--env', "ENV_TEST_STRING='my_test_string'") + runner( + 'run', '--quiet', '-m', 'tests.util.get_passed_env', '--env', 'ENV_TEST_NUMBER=123', '--env', + 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'" + ) out, err = capsys.readouterr() out = out.split('\n') assert out[0] == 'cwandrews' From b49ccaa7a77aad78a4560c8469c1e09d37a74abb Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 30 Sep 2017 11:26:22 +0200 Subject: [PATCH 34/71] Formating. --- bonobo/commands/inspect.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bonobo/commands/inspect.py b/bonobo/commands/inspect.py index 83b770e..bb82704 100644 --- a/bonobo/commands/inspect.py +++ b/bonobo/commands/inspect.py @@ -6,6 +6,7 @@ from bonobo.util.objects import get_name OUTPUT_GRAPHVIZ = 'graphviz' + def execute(*, output, **kwargs): graph, plugins, services = read(**kwargs) From c6cde93f4ee8afe0256b8b46e6f331375551921b Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 30 Sep 2017 11:38:36 +0200 Subject: [PATCH 35/71] [cli] small refactoring in bonobo convert to use shortcuts as extensions if nothing else matched. --- bonobo/commands/convert.py | 42 ++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/bonobo/commands/convert.py b/bonobo/commands/convert.py index 90bd57b..3c26724 100644 --- a/bonobo/commands/convert.py +++ b/bonobo/commands/convert.py @@ -1,13 +1,15 @@ import mimetypes +import os import bonobo SHORTCUTS = { - 'plain': 'text/plain', - 'txt': 'text/plain', - 'text': 'text/plain', 'csv': 'text/csv', 'json': 'application/json', + 'pickle': 'pickle', + 'plain': 'text/plain', + 'text': 'text/plain', + 'txt': 'text/plain', } REGISTRY = { @@ -17,6 +19,9 @@ REGISTRY = { 'text/plain': (bonobo.FileReader, bonobo.FileWriter), } +READER = 'reader' +WRITER = 'writer' + def resolve_factory(name, filename, factory_type): """ @@ -30,32 +35,43 @@ def resolve_factory(name, filename, factory_type): if name in SHORTCUTS: name = SHORTCUTS[name] - if not name in REGISTRY: - raise RuntimeError('Could not resolve {factory_type} factory for {filename} ({name}). Try providing it explicitely using -{opt} .'.format(name=name, filename=filename, factory_type=factory_type, opt=factory_type[0])) + if name is None: + _, _ext = os.path.splitext(filename) + if _ext: + _ext = _ext[1:] + if _ext in SHORTCUTS: + name = SHORTCUTS[_ext] - if factory_type == 'reader': + if not name in REGISTRY: + raise RuntimeError( + 'Could not resolve {factory_type} factory for {filename} ({name}). Try providing it explicitely using -{opt} .'.format( + name=name, filename=filename, factory_type=factory_type, opt=factory_type[0])) + + if factory_type == READER: return REGISTRY[name][0] - elif factory_type == 'writer': + elif factory_type == WRITER: return REGISTRY[name][1] else: raise ValueError('Invalid factory type.') + def execute(input, output, reader=None, reader_options=None, writer=None, writer_options=None, options=None): - reader = resolve_factory(reader, input, 'reader')(input) - writer = resolve_factory(writer, output, 'writer')(output) + reader = resolve_factory(reader, input, READER)(input) + writer = resolve_factory(writer, output, WRITER)(output) graph = bonobo.Graph() graph.add_chain(reader, writer) return bonobo.run(graph, services={ - 'fs': bonobo.open_fs(), - }) + 'fs': bonobo.open_fs(), + }) + def register(parser): parser.add_argument('input') parser.add_argument('output') - parser.add_argument('--reader', '-r') - parser.add_argument('--writer', '-w') + parser.add_argument('--' + READER, '-r') + parser.add_argument('--' + WRITER, '-w') # parser.add_argument('--reader-option', '-ro', dest='reader_options') # parser.add_argument('--writer-option', '-wo', dest='writer_options') # parser.add_argument('--option', '-o', dest='options') From 7fb572ec90548050dff47ae72a1cc1df70487cd7 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 30 Sep 2017 11:45:13 +0200 Subject: [PATCH 36/71] [doc] new commands. --- docs/reference/commands.rst | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/docs/reference/commands.rst b/docs/reference/commands.rst index dcd054a..674d549 100644 --- a/docs/reference/commands.rst +++ b/docs/reference/commands.rst @@ -1,6 +1,21 @@ Command-line ============ + +Bonobo Convert +:::::::::::::: + +Build a simple bonobo graph with one reader and one writer, then execute it, allowing to use bonobo in "no code" mode +for simple file format conversions. + +Syntax: `bonobo convert [-r reader] input_filename [-w writer] output_filename` + +.. todo:: + + add a way to override default options of reader/writers, add a way to add "filters", for example this could be used + to read from csv and write to csv too (or other format) but adding a geocoder filter that would add some fields. + + Bonobo Init ::::::::::: @@ -8,7 +23,17 @@ Create an empty project, ready to use bonobo. Syntax: `bonobo init` -Requires `edgy.project`. +Requires `cookiecutter`. + + +Bonobo Inspect +:::::::::::::: + +Inspects a bonobo graph source files. For now, only support graphviz output. + +Syntax: `bonobo inspect [--graph|-g] filename` + +Requires graphviz if you want to generate an actual graph picture, although the command itself depends on nothing. Bonobo Run @@ -20,6 +45,7 @@ Syntax: `bonobo run [-c cmd | -m mod | file | -] [arg]` .. todo:: implement -m, check if -c is of any use and if yes, implement it too. Implement args, too. + Bonobo RunC ::::::::::: From 9b3777f88d6bbf44372684e6681b813f4cc48b1d Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 30 Sep 2017 11:48:17 +0200 Subject: [PATCH 37/71] [minor] update test script for graphs. --- bin/test_graph | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/test_graph b/bin/test_graph index 1e5fd85..29841f5 100644 --- a/bin/test_graph +++ b/bin/test_graph @@ -1 +1 @@ -bonobo graph bonobo/examples/tutorials/tut02_03_writeasmap.py | dot -otest.png -Tpng && bin/imgcat test.png +bonobo inspect --graph bonobo/examples/tutorials/tut02e03_writeasmap.py | dot -o test_output.png -T png && bin/imgcat test_output.png From b26dbc83cbfd1b35903920b6bdaaa7f93152e2d9 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 30 Sep 2017 11:50:34 +0200 Subject: [PATCH 38/71] minor cleanups --- bonobo/nodes/throttle.py | 3 --- bonobo/util/graphviz.py | 8 -------- 2 files changed, 11 deletions(-) delete mode 100644 bonobo/util/graphviz.py diff --git a/bonobo/nodes/throttle.py b/bonobo/nodes/throttle.py index 2f08cd3..58f5c09 100644 --- a/bonobo/nodes/throttle.py +++ b/bonobo/nodes/throttle.py @@ -41,15 +41,12 @@ class RateLimited(Configurable): @ContextProcessor def bucket(self, context): - print(context) bucket = RateLimitBucket(self.initial, self.amount, self.period) bucket.start() - print(bucket) yield bucket bucket.stop() bucket.join() def call(self, bucket, *args, **kwargs): - print(bucket, args, kwargs) bucket.wait() return self.handler(*args, **kwargs) diff --git a/bonobo/util/graphviz.py b/bonobo/util/graphviz.py deleted file mode 100644 index 588e374..0000000 --- a/bonobo/util/graphviz.py +++ /dev/null @@ -1,8 +0,0 @@ -def render_as_dot(graph): - """ - - :param bonobo.Graph graph: - :return: str - """ - - pass From ef938c99707168a28ed6b98945831de7fc30e86d Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 30 Sep 2017 11:58:37 +0200 Subject: [PATCH 39/71] Adding warnings to transformation factory, and code formating. --- bonobo/commands/convert.py | 13 ++++++++----- bonobo/examples/datasets/fablabs.py | 11 ++++++----- bonobo/examples/nodes/factory.py | 7 ++++--- bonobo/nodes/factory.py | 18 ++++++++++++------ 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/bonobo/commands/convert.py b/bonobo/commands/convert.py index 3c26724..17b98c2 100644 --- a/bonobo/commands/convert.py +++ b/bonobo/commands/convert.py @@ -44,8 +44,9 @@ def resolve_factory(name, filename, factory_type): if not name in REGISTRY: raise RuntimeError( - 'Could not resolve {factory_type} factory for {filename} ({name}). Try providing it explicitely using -{opt} .'.format( - name=name, filename=filename, factory_type=factory_type, opt=factory_type[0])) + 'Could not resolve {factory_type} factory for {filename} ({name}). Try providing it explicitely using -{opt} .'. + format(name=name, filename=filename, factory_type=factory_type, opt=factory_type[0]) + ) if factory_type == READER: return REGISTRY[name][0] @@ -62,9 +63,11 @@ def execute(input, output, reader=None, reader_options=None, writer=None, writer graph = bonobo.Graph() graph.add_chain(reader, writer) - return bonobo.run(graph, services={ - 'fs': bonobo.open_fs(), - }) + return bonobo.run( + graph, services={ + 'fs': bonobo.open_fs(), + } + ) def register(parser): diff --git a/bonobo/examples/datasets/fablabs.py b/bonobo/examples/datasets/fablabs.py index 1a21671..b87019f 100644 --- a/bonobo/examples/datasets/fablabs.py +++ b/bonobo/examples/datasets/fablabs.py @@ -48,7 +48,6 @@ def normalize(row): return result - def display(row): print(Style.BRIGHT, row.get('name'), Style.RESET_ALL, sep='') @@ -69,15 +68,15 @@ def display(row): print( ' - {}address{}: {address}'. - format(Fore.BLUE, Style.RESET_ALL, address=', '.join(address)) + format(Fore.BLUE, Style.RESET_ALL, address=', '.join(address)) ) print( ' - {}links{}: {links}'. - format(Fore.BLUE, Style.RESET_ALL, links=', '.join(row['links'])) + format(Fore.BLUE, Style.RESET_ALL, links=', '.join(row['links'])) ) print( ' - {}geometry{}: {geometry}'. - format(Fore.BLUE, Style.RESET_ALL, **row) + format(Fore.BLUE, Style.RESET_ALL, **row) ) print( ' - {}source{}: {source}'.format( @@ -87,7 +86,9 @@ def display(row): graph = bonobo.Graph( - OpenDataSoftAPI(dataset=API_DATASET, netloc=API_NETLOC, timezone='Europe/Paris'), + OpenDataSoftAPI( + dataset=API_DATASET, netloc=API_NETLOC, timezone='Europe/Paris' + ), normalize, bonobo.Filter(filter=lambda row: row.get('country') == 'France'), bonobo.JsonWriter(path='fablabs.txt', ioformat='arg0'), diff --git a/bonobo/examples/nodes/factory.py b/bonobo/examples/nodes/factory.py index d4702c6..d1aac89 100644 --- a/bonobo/examples/nodes/factory.py +++ b/bonobo/examples/nodes/factory.py @@ -8,6 +8,7 @@ from bonobo.config import Configurable from bonobo.nodes.factory import Factory from bonobo.nodes.io.json import JsonDictReader + @Factory def Normalize(self): self[0].str().title() @@ -15,11 +16,11 @@ def Normalize(self): self.move(0, 'address') - - class PrettyPrinter(Configurable): def call(self, *args, **kwargs): - for i, (item, value) in enumerate(itertools.chain(enumerate(args), kwargs.items())): + for i, ( + item, value + ) in enumerate(itertools.chain(enumerate(args), kwargs.items())): print(' ' if i else '• ', item, '=', value) diff --git a/bonobo/nodes/factory.py b/bonobo/nodes/factory.py index af3f778..736ca2b 100644 --- a/bonobo/nodes/factory.py +++ b/bonobo/nodes/factory.py @@ -1,8 +1,9 @@ import functools +import warnings from functools import partial from bonobo import Bag -from bonobo.config import Configurable, Method +from bonobo.config import Configurable _isarg = lambda item: type(item) is int _iskwarg = lambda item: type(item) is str @@ -110,7 +111,10 @@ class Cursor(): setattr(self, item, partial(_operation, self)) return getattr(self, item) - raise AttributeError('Unknown operation {}.{}().'.format(type(self).__name__, item, )) + raise AttributeError('Unknown operation {}.{}().'.format( + type(self).__name__, + item, + )) CURSOR_TYPES['default'] = Cursor @@ -139,12 +143,15 @@ CURSOR_TYPES['str'] = StringCursor class Factory(Configurable): - setup = Method() - def __init__(self): + warnings.warn( + __file__ + + ' is experimental, API may change in the future, use it as a preview only and knowing the risks.', + FutureWarning + ) + super(Factory, self).__init__() self.default_cursor_type = 'default' self.operations = [] - self.setup() @factory_operation def move(self, _from, _to, *args, **kwargs): @@ -186,7 +193,6 @@ if __name__ == '__main__': print('operations:', f.operations) print(f({'foo': 'bisou'}, foo='blah')) - ''' specs: From 0d53e135813d83aee73cc0e92eb51054aee66c93 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 30 Sep 2017 12:23:14 +0200 Subject: [PATCH 40/71] Making factory example work. Still work in progress. --- bonobo/examples/datasets/coffeeshops.txt | 214 +++++++++++------------ bonobo/examples/nodes/factory.py | 32 +--- bonobo/nodes/factory.py | 15 +- bonobo/nodes/io/json.py | 9 +- 4 files changed, 132 insertions(+), 138 deletions(-) diff --git a/bonobo/examples/datasets/coffeeshops.txt b/bonobo/examples/datasets/coffeeshops.txt index b87eacb..9e3c181 100644 --- a/bonobo/examples/datasets/coffeeshops.txt +++ b/bonobo/examples/datasets/coffeeshops.txt @@ -1,36 +1,37 @@ -les montparnos, 65 boulevard Pasteur, 75015 Paris, France -Coffee Chope, 344Vrue Vaugirard, 75015 Paris, France -Café Lea, 5 rue Claude Bernard, 75005 Paris, France -Le Bellerive, 71 quai de Seine, 75019 Paris, France -Le drapeau de la fidelité, 21 rue Copreaux, 75015 Paris, France +Extérieur Quai, 5, rue d'Alsace, 75010 Paris, France +Le Sully, 6 Bd henri IV, 75004 Paris, France O q de poule, 53 rue du ruisseau, 75018 Paris, France -Le café des amis, 125 rue Blomet, 75015 Paris, France +Le Pas Sage, 1 Passage du Grand Cerf, 75002 Paris, France +La Renaissance, 112 Rue Championnet, 75018 Paris, France +La Caravane, Rue de la Fontaine au Roi, 75011 Paris, France Le chantereine, 51 Rue Victoire, 75009 Paris, France Le Müller, 11 rue Feutrier, 75018 Paris, France -Extérieur Quai, 5, rue d'Alsace, 75010 Paris, France -La Bauloise, 36 rue du hameau, 75015 Paris, France -Le Dellac, 14 rue Rougemont, 75009 Paris, France -Le Bosquet, 46 avenue Bosquet, 75007 Paris, France -Le Sully, 6 Bd henri IV, 75004 Paris, France -Le Felteu, 1 rue Pecquay, 75004 Paris, France -Le bistrot de Maëlle et Augustin, 42 rue coquillère, 75001 Paris, France -Dédé la frite, 52 rue Notre-Dame des Victoires, 75002 Paris, France -Cardinal Saint-Germain, 11 boulevard Saint-Germain, 75005 Paris, France -Le Reynou, 2 bis quai de la mégisserie, 75001 Paris, France -Aux cadrans, 21 ter boulevard Diderot, 75012 Paris, France -Le Saint Jean, 23 rue des abbesses, 75018 Paris, France -La Renaissance, 112 Rue Championnet, 75018 Paris, France -Le Square, 31 rue Saint-Dominique, 75007 Paris, France -Les Arcades, 61 rue de Ponthieu, 75008 Paris, France -Le Kleemend's, 34 avenue Pierre Mendès-France, 75013 Paris, France -Assaporare Dix sur Dix, 75, avenue Ledru-Rollin, 75012 Paris, France -Café Pierre, 202 rue du faubourg st antoine, 75012 Paris, France -Café antoine, 17 rue Jean de la Fontaine, 75016 Paris, France -Au cerceau d'or, 129 boulevard sebastopol, 75002 Paris, France -La Caravane, Rue de la Fontaine au Roi, 75011 Paris, France -Le Pas Sage, 1 Passage du Grand Cerf, 75002 Paris, France +Le drapeau de la fidelité, 21 rue Copreaux, 75015 Paris, France +Le café des amis, 125 rue Blomet, 75015 Paris, France Le Café Livres, 10 rue Saint Martin, 75004 Paris, France +Le Bosquet, 46 avenue Bosquet, 75007 Paris, France Le Chaumontois, 12 rue Armand Carrel, 75018 Paris, France +Le Kleemend's, 34 avenue Pierre Mendès-France, 75013 Paris, France +Café Pierre, 202 rue du faubourg st antoine, 75012 Paris, France +Les Arcades, 61 rue de Ponthieu, 75008 Paris, France +Le Square, 31 rue Saint-Dominique, 75007 Paris, France +Assaporare Dix sur Dix, 75, avenue Ledru-Rollin, 75012 Paris, France +Au cerceau d'or, 129 boulevard sebastopol, 75002 Paris, France +Aux cadrans, 21 ter boulevard Diderot, 75012 Paris, France +Café antoine, 17 rue Jean de la Fontaine, 75016 Paris, France +Café de la Mairie (du VIII), rue de Lisbonne, 75008 Paris, France +Café Lea, 5 rue Claude Bernard, 75005 Paris, France +Cardinal Saint-Germain, 11 boulevard Saint-Germain, 75005 Paris, France +Dédé la frite, 52 rue Notre-Dame des Victoires, 75002 Paris, France +La Bauloise, 36 rue du hameau, 75015 Paris, France +Le Bellerive, 71 quai de Seine, 75019 Paris, France +Le bistrot de Maëlle et Augustin, 42 rue coquillère, 75001 Paris, France +Le Dellac, 14 rue Rougemont, 75009 Paris, France +Le Felteu, 1 rue Pecquay, 75004 Paris, France +Le Reynou, 2 bis quai de la mégisserie, 75001 Paris, France +Le Saint Jean, 23 rue des abbesses, 75018 Paris, France +les montparnos, 65 boulevard Pasteur, 75015 Paris, France +L'antre d'eux, 16 rue DE MEZIERES, 75006 Paris, France Drole d'endroit pour une rencontre, 58 rue de Montorgueil, 75002 Paris, France Le pari's café, 104 rue caulaincourt, 75018 Paris, France Le Poulailler, 60 rue saint-sabin, 75011 Paris, France @@ -62,7 +63,6 @@ Denfert café, 58 boulvevard Saint Jacques, 75014 Paris, France Le Café frappé, 95 rue Montmartre, 75002 Paris, France La Perle, 78 rue vieille du temple, 75003 Paris, France Le Descartes, 1 rue Thouin, 75005 Paris, France -Bagels & Coffee Corner, Place de Clichy, 75017 Paris, France Le petit club, 55 rue de la tombe Issoire, 75014 Paris, France Le Plein soleil, 90 avenue Parmentier, 75011 Paris, France Le Relais Haussmann, 146, boulevard Haussmann, 75008 Paris, France @@ -75,7 +75,6 @@ Extra old café, 307 fg saint Antoine, 75011 Paris, France Chez Fafa, 44 rue Vinaigriers, 75010 Paris, France En attendant l'or, 3 rue Faidherbe, 75011 Paris, France Brûlerie San José, 30 rue des Petits-Champs, 75002 Paris, France -Café de la Mairie (du VIII), rue de Lisbonne, 75008 Paris, France Café Martin, 2 place Martin Nadaud, 75001 Paris, France Etienne, 14 rue Turbigo, Paris, 75001 Paris, France L'ingénu, 184 bd Voltaire, 75011 Paris, France @@ -87,96 +86,97 @@ Le Germinal, 95 avenue Emile Zola, 75015 Paris, France Le Ragueneau, 202 rue Saint-Honoré, 75001 Paris, France Le refuge, 72 rue lamarck, 75018 Paris, France Le sully, 13 rue du Faubourg Saint Denis, 75010 Paris, France +Coffee Chope, 344Vrue Vaugirard, 75015 Paris, France +Le bal du pirate, 60 rue des bergers, 75015 Paris, France +zic zinc, 95 rue claude decaen, 75012 Paris, France +l'orillon bar, 35 rue de l'orillon, 75011 Paris, France +Le Zazabar, 116 Rue de Ménilmontant, 75020 Paris, France +L'Inévitable, 22 rue Linné, 75005 Paris, France Le Dunois, 77 rue Dunois, 75013 Paris, France -La Montagne Sans Geneviève, 13 Rue du Pot de Fer, 75005 Paris, France +Ragueneau, 202 rue Saint Honoré, 75001 Paris, France Le Caminito, 48 rue du Dessous des Berges, 75013 Paris, France +Epicerie Musicale, 55bis quai de Valmy, 75010 Paris, France Le petit Bretonneau, Le petit Bretonneau - à l'intérieur de l'Hôpital, 75018 Paris, France +Le Centenaire, 104 rue amelot, 75011 Paris, France +La Montagne Sans Geneviève, 13 Rue du Pot de Fer, 75005 Paris, France +Les Pères Populaires, 46 rue de Buzenval, 75020 Paris, France +Cafe de grenelle, 188 rue de Grenelle, 75007 Paris, France +Le relais de la victoire, 73 rue de la Victoire, 75009 Paris, France La chaumière gourmande, Route de la Muette à Neuilly Club hippique du Jardin d’Acclimatation, 75016 Paris, France -Le bal du pirate, 60 rue des bergers, 75015 Paris, France -Le Zazabar, 116 Rue de Ménilmontant, 75020 Paris, France -L'antre d'eux, 16 rue DE MEZIERES, 75006 Paris, France -l'orillon bar, 35 rue de l'orillon, 75011 Paris, France -zic zinc, 95 rue claude decaen, 75012 Paris, France -Les Pères Populaires, 46 rue de Buzenval, 75020 Paris, France -Epicerie Musicale, 55bis quai de Valmy, 75010 Paris, France -Le relais de la victoire, 73 rue de la Victoire, 75009 Paris, France -Le Centenaire, 104 rue amelot, 75011 Paris, France -Cafe de grenelle, 188 rue de Grenelle, 75007 Paris, France -Ragueneau, 202 rue Saint Honoré, 75001 Paris, France +Le Brio, 216, rue Marcadet, 75018 Paris, France +Caves populaires, 22 rue des Dames, 75017 Paris, France +Caprice café, 12 avenue Jean Moulin, 75014 Paris, France +Tamm Bara, 7 rue Clisson, 75013 Paris, France +L'anjou, 1 rue de Montholon, 75009 Paris, France +Café dans l'aerogare Air France Invalides, 2 rue Robert Esnault Pelterie, 75007 Paris, France +Chez Prune, 36 rue Beaurepaire, 75010 Paris, France +Au Vin Des Rues, 21 rue Boulard, 75014 Paris, France +bistrot les timbrés, 14 rue d'alleray, 75015 Paris, France +Café beauveau, 9 rue de Miromesnil, 75008 Paris, France Café Pistache, 9 rue des petits champs, 75001 Paris, France La Cagnotte, 13 Rue Jean-Baptiste Dumay, 75020 Paris, France -Le Killy Jen, 28 bis boulevard Diderot, 75012 Paris, France -Café beauveau, 9 rue de Miromesnil, 75008 Paris, France le 1 cinq, 172 rue de vaugirard, 75015 Paris, France +Le Killy Jen, 28 bis boulevard Diderot, 75012 Paris, France Les Artisans, 106 rue Lecourbe, 75015 Paris, France Peperoni, 83 avenue de Wagram, 75001 Paris, France -Le Brio, 216, rue Marcadet, 75018 Paris, France -Tamm Bara, 7 rue Clisson, 75013 Paris, France -Café dans l'aerogare Air France Invalides, 2 rue Robert Esnault Pelterie, 75007 Paris, France -bistrot les timbrés, 14 rue d'alleray, 75015 Paris, France -Caprice café, 12 avenue Jean Moulin, 75014 Paris, France -Caves populaires, 22 rue des Dames, 75017 Paris, France -Au Vin Des Rues, 21 rue Boulard, 75014 Paris, France -Chez Prune, 36 rue Beaurepaire, 75010 Paris, France -L'Inévitable, 22 rue Linné, 75005 Paris, France -L'anjou, 1 rue de Montholon, 75009 Paris, France -Botak cafe, 1 rue Paul albert, 75018 Paris, France -Bistrot Saint-Antoine, 58 rue du Fbg Saint-Antoine, 75012 Paris, France -Chez Oscar, 11/13 boulevard Beaumarchais, 75004 Paris, France -Le Piquet, 48 avenue de la Motte Picquet, 75015 Paris, France -L'avant comptoir, 3 carrefour de l'Odéon, 75006 Paris, France -le chateau d'eau, 67 rue du Château d'eau, 75010 Paris, France -Les Vendangeurs, 6/8 rue Stanislas, 75006 Paris, France -maison du vin, 52 rue des plantes, 75014 Paris, France -Le Tournebride, 104 rue Mouffetard, 75005 Paris, France -Le Fronton, 63 rue de Ponthieu, 75008 Paris, France -Le BB (Bouchon des Batignolles), 2 rue Lemercier, 75017 Paris, France -La cantine de Zoé, 136 rue du Faubourg poissonnière, 75010 Paris, France -Chez Rutabaga, 16 rue des Petits Champs, 75002 Paris, France -Les caves populaires, 22 rue des Dames, 75017 Paris, France -Le Plomb du cantal, 3 rue Gaîté, 75014 Paris, France -Trois pièces cuisine, 101 rue des dames, 75017 Paris, France -La Brocante, 10 rue Rossini, 75009 Paris, France -Le Zinc, 61 avenue de la Motte Picquet, 75015 Paris, France -Chez Luna, 108 rue de Ménilmontant, 75020 Paris, France -Le bar Fleuri, 1 rue du Plateau, 75019 Paris, France -La Liberté, 196 rue du faubourg saint-antoine, 75012 Paris, France -La cantoche de Paname, 40 Boulevard Beaumarchais, 75011 Paris, France -Le Saint René, 148 Boulevard de Charonne, 75020 Paris, France -Café Clochette, 16 avenue Richerand, 75010 Paris, France +le lutece, 380 rue de vaugirard, 75015 Paris, France +Brasiloja, 16 rue Ganneron, 75018 Paris, France +Rivolux, 16 rue de Rivoli, 75004 Paris, France L'européen, 21 Bis Boulevard Diderot, 75012 Paris, France NoMa, 39 rue Notre Dame de Nazareth, 75003 Paris, France -le lutece, 380 rue de vaugirard, 75015 Paris, France O'Paris, 1 Rue des Envierges, 75020 Paris, France -Rivolux, 16 rue de Rivoli, 75004 Paris, France -Brasiloja, 16 rue Ganneron, 75018 Paris, France -Institut des Cultures d'Islam, 19-23 rue Léon, 75018 Paris, France -Canopy Café associatif, 19 rue Pajol, 75018 Paris, France -Petits Freres des Pauvres, 47 rue de Batignolles, 75017 Paris, France -Le Lucernaire, 53 rue Notre-Dame des Champs, 75006 Paris, France -L'Angle, 28 rue de Ponthieu, 75008 Paris, France -Le Café d'avant, 35 rue Claude Bernard, 75005 Paris, France -Café Dupont, 198 rue de la Convention, 75015 Paris, France -Le Sévigné, 15 rue du Parc Royal, 75003 Paris, France -L'Entracte, place de l'opera, 75002 Paris, France -Panem, 18 rue de Crussol, 75011 Paris, France -Au pays de Vannes, 34 bis rue de Wattignies, 75012 Paris, France -l'Eléphant du nil, 125 Rue Saint-Antoine, 75004 Paris, France -L'âge d'or, 26 rue du Docteur Magnan, 75013 Paris, France -Le Comptoir, 354 bis rue Vaugirard, 75015 Paris, France -L'horizon, 93, rue de la Roquette, 75011 Paris, France -L'empreinte, 54, avenue Daumesnil, 75012 Paris, France -Café Victor, 10 boulevard Victor, 75015 Paris, France -Café Varenne, 36 rue de Varenne, 75007 Paris, France -Le Brigadier, 12 rue Blanche, 75009 Paris, France -Waikiki, 10 rue d"Ulm, 75005 Paris, France -Le Parc Vaugirard, 358 rue de Vaugirard, 75015 Paris, France -Pari's Café, 174 avenue de Clichy, 75017 Paris, France -Melting Pot, 3 rue de Lagny, 75020 Paris, France -le Zango, 58 rue Daguerre, 75014 Paris, France -Chez Miamophile, 6 rue Mélingue, 75019 Paris, France +Café Clochette, 16 avenue Richerand, 75010 Paris, France +La cantoche de Paname, 40 Boulevard Beaumarchais, 75011 Paris, France +Le Saint René, 148 Boulevard de Charonne, 75020 Paris, France +La Liberté, 196 rue du faubourg saint-antoine, 75012 Paris, France +Chez Rutabaga, 16 rue des Petits Champs, 75002 Paris, France +Le BB (Bouchon des Batignolles), 2 rue Lemercier, 75017 Paris, France +La Brocante, 10 rue Rossini, 75009 Paris, France +Le Plomb du cantal, 3 rue Gaîté, 75014 Paris, France +Les caves populaires, 22 rue des Dames, 75017 Paris, France +Chez Luna, 108 rue de Ménilmontant, 75020 Paris, France +Le bar Fleuri, 1 rue du Plateau, 75019 Paris, France +Trois pièces cuisine, 101 rue des dames, 75017 Paris, France +Le Zinc, 61 avenue de la Motte Picquet, 75015 Paris, France +La cantine de Zoé, 136 rue du Faubourg poissonnière, 75010 Paris, France +Les Vendangeurs, 6/8 rue Stanislas, 75006 Paris, France +L'avant comptoir, 3 carrefour de l'Odéon, 75006 Paris, France +Botak cafe, 1 rue Paul albert, 75018 Paris, France +le chateau d'eau, 67 rue du Château d'eau, 75010 Paris, France +Bistrot Saint-Antoine, 58 rue du Fbg Saint-Antoine, 75012 Paris, France +Chez Oscar, 11/13 boulevard Beaumarchais, 75004 Paris, France +Le Fronton, 63 rue de Ponthieu, 75008 Paris, France +Le Piquet, 48 avenue de la Motte Picquet, 75015 Paris, France +Le Tournebride, 104 rue Mouffetard, 75005 Paris, France +maison du vin, 52 rue des plantes, 75014 Paris, France +L'entrepôt, 157 rue Bercy 75012 Paris, 75012 Paris, France Le café Monde et Médias, Place de la République, 75003 Paris, France Café rallye tournelles, 11 Quai de la Tournelle, 75005 Paris, France Brasserie le Morvan, 61 rue du château d'eau, 75010 Paris, France -L'entrepôt, 157 rue Bercy 75012 Paris, 75012 Paris, France \ No newline at end of file +Chez Miamophile, 6 rue Mélingue, 75019 Paris, France +Panem, 18 rue de Crussol, 75011 Paris, France +Petits Freres des Pauvres, 47 rue de Batignolles, 75017 Paris, France +Café Dupont, 198 rue de la Convention, 75015 Paris, France +L'Angle, 28 rue de Ponthieu, 75008 Paris, France +Institut des Cultures d'Islam, 19-23 rue Léon, 75018 Paris, France +Canopy Café associatif, 19 rue Pajol, 75018 Paris, France +L'Entracte, place de l'opera, 75002 Paris, France +Le Sévigné, 15 rue du Parc Royal, 75003 Paris, France +Le Café d'avant, 35 rue Claude Bernard, 75005 Paris, France +Le Lucernaire, 53 rue Notre-Dame des Champs, 75006 Paris, France +Le Brigadier, 12 rue Blanche, 75009 Paris, France +L'âge d'or, 26 rue du Docteur Magnan, 75013 Paris, France +Bagels & Coffee Corner, Place de Clichy, 75017 Paris, France +Café Victor, 10 boulevard Victor, 75015 Paris, France +L'empreinte, 54, avenue Daumesnil, 75012 Paris, France +L'horizon, 93, rue de la Roquette, 75011 Paris, France +Waikiki, 10 rue d"Ulm, 75005 Paris, France +Au pays de Vannes, 34 bis rue de Wattignies, 75012 Paris, France +Café Varenne, 36 rue de Varenne, 75007 Paris, France +l'Eléphant du nil, 125 Rue Saint-Antoine, 75004 Paris, France +Le Comptoir, 354 bis rue Vaugirard, 75015 Paris, France +Le Parc Vaugirard, 358 rue de Vaugirard, 75015 Paris, France +le Zango, 58 rue Daguerre, 75014 Paris, France +Melting Pot, 3 rue de Lagny, 75020 Paris, France +Pari's Café, 174 avenue de Clichy, 75017 Paris, France \ No newline at end of file diff --git a/bonobo/examples/nodes/factory.py b/bonobo/examples/nodes/factory.py index d1aac89..c1f3818 100644 --- a/bonobo/examples/nodes/factory.py +++ b/bonobo/examples/nodes/factory.py @@ -1,33 +1,17 @@ -from functools import partial - -import itertools - import bonobo from bonobo.commands.run import get_default_services -from bonobo.config import Configurable from bonobo.nodes.factory import Factory -from bonobo.nodes.io.json import JsonDictReader - - -@Factory -def Normalize(self): - self[0].str().title() - self.move(0, 'title') - self.move(0, 'address') - - -class PrettyPrinter(Configurable): - def call(self, *args, **kwargs): - for i, ( - item, value - ) in enumerate(itertools.chain(enumerate(args), kwargs.items())): - print(' ' if i else '• ', item, '=', value) +from bonobo.nodes.io.json import JsonDictItemsReader +normalize = Factory() +normalize[0].str().title() +normalize.move(0, 'title') +normalize.move(0, 'address') graph = bonobo.Graph( - JsonDictReader('datasets/coffeeshops.json'), - Normalize(), - PrettyPrinter(), + JsonDictItemsReader('datasets/coffeeshops.json'), + normalize, + bonobo.PrettyPrinter(), ) if __name__ == '__main__': diff --git a/bonobo/nodes/factory.py b/bonobo/nodes/factory.py index 736ca2b..2a1c30b 100644 --- a/bonobo/nodes/factory.py +++ b/bonobo/nodes/factory.py @@ -3,7 +3,7 @@ import warnings from functools import partial from bonobo import Bag -from bonobo.config import Configurable +from bonobo.config import Configurable, Method _isarg = lambda item: type(item) is int _iskwarg = lambda item: type(item) is str @@ -143,16 +143,21 @@ CURSOR_TYPES['str'] = StringCursor class Factory(Configurable): - def __init__(self): + initialize = Method(required=False) + + def __init__(self, *args, **kwargs): warnings.warn( __file__ + ' is experimental, API may change in the future, use it as a preview only and knowing the risks.', FutureWarning ) - super(Factory, self).__init__() + super(Factory, self).__init__(*args, **kwargs) self.default_cursor_type = 'default' self.operations = [] + if self.initialize is not None: + self.initialize(self) + @factory_operation def move(self, _from, _to, *args, **kwargs): if _from == _to: @@ -175,10 +180,10 @@ class Factory(Configurable): raise RuntimeError('Houston, we have a problem...') def __call__(self, *args, **kwargs): - # print('factory call on', args, kwargs) + print('factory call on', args, kwargs) for operation in self.operations: args, kwargs = operation.apply(*args, **kwargs) - # print(' ... after', operation, 'got', args, kwargs) + print(' ... after', operation, 'got', args, kwargs) return Bag(*args, **kwargs) def __getitem__(self, item): diff --git a/bonobo/nodes/io/json.py b/bonobo/nodes/io/json.py index 81b6870..f1c6df0 100644 --- a/bonobo/nodes/io/json.py +++ b/bonobo/nodes/io/json.py @@ -1,11 +1,10 @@ import json -from itertools import starmap -from bonobo.structs.bags import Bag from bonobo.config.processors import ContextProcessor from bonobo.constants import NOT_MODIFIED from bonobo.nodes.io.base import FileHandler, IOFormatEnabled from bonobo.nodes.io.file import FileReader, FileWriter +from bonobo.structs.bags import Bag class JsonHandler(FileHandler): @@ -21,6 +20,12 @@ class JsonReader(IOFormatEnabled, FileReader, JsonHandler): yield self.get_output(line) +class JsonDictItemsReader(JsonReader): + def read(self, fs, file): + for line in self.loader(file).items(): + yield Bag(*line) + + class JsonWriter(IOFormatEnabled, FileWriter, JsonHandler): @ContextProcessor def envelope(self, context, fs, file, lineno): From 0eb5fd8a150c95e984321a30e7f7748c78416acd Mon Sep 17 00:00:00 2001 From: spagoc Date: Thu, 28 Sep 2017 08:42:35 +0200 Subject: [PATCH 41/71] More readable statistics on Ubuntu workstation standard terminal statistics are hard to read on the Ubuntu standard terminal issue #165 --- bonobo/ext/console.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bonobo/ext/console.py b/bonobo/ext/console.py index 6679092..4d8cb6f 100644 --- a/bonobo/ext/console.py +++ b/bonobo/ext/console.py @@ -81,7 +81,7 @@ class ConsoleOutputPlugin(Plugin): print(line + CLEAR_EOL, file=sys.stderr) alive_color = Style.BRIGHT - dead_color = (Style.BRIGHT + Fore.BLACK) if self.iswindows else Fore.BLACK + dead_color = Style.BRIGHT + Fore.BLACK for i in context.graph.topologically_sorted_indexes: node = context[i] From 6abdf09ef7615ad9de9e14e68afc77998fb8ea3b Mon Sep 17 00:00:00 2001 From: cwandrews Date: Sun, 1 Oct 2017 19:56:49 -0400 Subject: [PATCH 42/71] Moved tests/util/get_passed_env.py into /bonobo/examples/env_vars/get_passed_env.py. --- bonobo/examples/env_vars/__init__.py | 0 bonobo/examples/env_vars/get_passed_env.py | 22 ++++++++++++++++++++++ tests/test_commands.py | 4 ++-- 3 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 bonobo/examples/env_vars/__init__.py create mode 100644 bonobo/examples/env_vars/get_passed_env.py diff --git a/bonobo/examples/env_vars/__init__.py b/bonobo/examples/env_vars/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bonobo/examples/env_vars/get_passed_env.py b/bonobo/examples/env_vars/get_passed_env.py new file mode 100644 index 0000000..d9c4ba6 --- /dev/null +++ b/bonobo/examples/env_vars/get_passed_env.py @@ -0,0 +1,22 @@ +import os + +from bonobo import Graph + + +def extract(): + env_test_user = os.getenv('ENV_TEST_USER') + env_test_number = os.getenv('ENV_TEST_NUMBER') + env_test_string = os.getenv('ENV_TEST_STRING') + return env_test_user, env_test_number, env_test_string + + +def load(s: str): + print(s) + + +graph = Graph(extract, load) + +if __name__ == '__main__': + from bonobo import run + + run(graph) diff --git a/tests/test_commands.py b/tests/test_commands.py index 730bc0b..3f26e44 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -103,7 +103,7 @@ def test_version(runner, capsys): def test_run_with_env(runner, capsys): runner( 'run', '--quiet', - str(pathlib.Path(os.path.dirname(__file__), 'util', 'get_passed_env.py')), '--env', 'ENV_TEST_NUMBER=123', + get_examples_path('env_vars/get_passed_env.py'), '--env', 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'" ) out, err = capsys.readouterr() @@ -116,7 +116,7 @@ def test_run_with_env(runner, capsys): @all_runners def test_run_module_with_env(runner, capsys): runner( - 'run', '--quiet', '-m', 'tests.util.get_passed_env', '--env', 'ENV_TEST_NUMBER=123', '--env', + 'run', '--quiet', '-m', 'bonobo.examples.env_vars.get_passed_env', '--env', 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'" ) out, err = capsys.readouterr() From e38ce3cd1144687b30df54d4f3ac75a68d23745d Mon Sep 17 00:00:00 2001 From: cwandrews Date: Sun, 1 Oct 2017 20:03:11 -0400 Subject: [PATCH 43/71] Switched --env / -e optional argument from register to register_generic_run_arguements per suggestion. Also removed tests/util/get_passed_env.py --- bonobo/commands/run.py | 2 +- tests/util/get_passed_env.py | 22 ---------------------- 2 files changed, 1 insertion(+), 23 deletions(-) delete mode 100644 tests/util/get_passed_env.py diff --git a/bonobo/commands/run.py b/bonobo/commands/run.py index 2204a3b..a37282c 100644 --- a/bonobo/commands/run.py +++ b/bonobo/commands/run.py @@ -106,6 +106,7 @@ def register_generic_run_arguments(parser, required=True): source_group = parser.add_mutually_exclusive_group(required=required) source_group.add_argument('filename', nargs='?', type=str) source_group.add_argument('--module', '-m', type=str) + parser.add_argument('--env', '-e', action='append') return parser @@ -115,5 +116,4 @@ def register(parser): verbosity_group.add_argument('--quiet', '-q', action='store_true') verbosity_group.add_argument('--verbose', '-v', action='store_true') parser.add_argument('--install', '-I', action='store_true') - parser.add_argument('--env', '-e', action='append') return execute diff --git a/tests/util/get_passed_env.py b/tests/util/get_passed_env.py deleted file mode 100644 index d9c4ba6..0000000 --- a/tests/util/get_passed_env.py +++ /dev/null @@ -1,22 +0,0 @@ -import os - -from bonobo import Graph - - -def extract(): - env_test_user = os.getenv('ENV_TEST_USER') - env_test_number = os.getenv('ENV_TEST_NUMBER') - env_test_string = os.getenv('ENV_TEST_STRING') - return env_test_user, env_test_number, env_test_string - - -def load(s: str): - print(s) - - -graph = Graph(extract, load) - -if __name__ == '__main__': - from bonobo import run - - run(graph) From c1a5750b60b34539140bfc67996643f390dcfa50 Mon Sep 17 00:00:00 2001 From: cwandrews Date: Sun, 1 Oct 2017 20:16:24 -0400 Subject: [PATCH 44/71] Made a few spelling and grammar corrections/adjustments to the docs under guide. --- docs/guide/ext/jupyter.rst | 6 +++--- docs/guide/purity.rst | 2 +- docs/guide/services.rst | 6 +++--- docs/guide/transformations.rst | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/guide/ext/jupyter.rst b/docs/guide/ext/jupyter.rst index 6e96bf6..0d00c58 100644 --- a/docs/guide/ext/jupyter.rst +++ b/docs/guide/ext/jupyter.rst @@ -1,8 +1,8 @@ Bonobo with Jupyter =================== - -There is a builtin plugin that integrates (kind of minimalistically, for now) bonobo within jupyter notebooks, so -you can read the execution status of a graph within a nice (ok not so nice) html/javascript widget. +minimalistically +There is a builtin plugin that integrates (somewhat minimallistically, for now) bonobo within jupyter notebooks, so +you can read the execution status of a graph within a nice (ok, not so nice) html/javascript widget. See https://github.com/jupyter-widgets/widget-cookiecutter for the base template used. diff --git a/docs/guide/purity.rst b/docs/guide/purity.rst index bd20d4e..4b21735 100644 --- a/docs/guide/purity.rst +++ b/docs/guide/purity.rst @@ -128,7 +128,7 @@ Now let's see how to do it correctly: 'index': i } -I hear you think «Yeah, but if I create like millions of dicts ...». +I bet you think «Yeah, but if I create like millions of dicts ...». Let's say we chose the opposite way and copied the dict outside the transformation (in fact, `it's what we did in bonobo's ancestor `_). This means you will also create the diff --git a/docs/guide/services.rst b/docs/guide/services.rst index cf7ecc7..4e1a22c 100644 --- a/docs/guide/services.rst +++ b/docs/guide/services.rst @@ -12,8 +12,8 @@ If you're going a little further than that, you'll feel limited, for a few reaso * Hardcoded and tightly linked dependencies make your transformations hard to test, and hard to reuse. * Processing data on your laptop is great, but being able to do it on different target systems (or stages), in different - environments, is more realistic. You'll want to contigure a different database on a staging environment, - preprod environment or production system. Maybe you have silimar systems for different clients and want to select + environments, is more realistic. You'll want to configure a different database on a staging environment, + pre-production environment, or production system. Maybe you have similar systems for different clients and want to select the system at runtime. Etc. Service injection @@ -44,7 +44,7 @@ Let's define such a transformation: 'category': database.get_category_name_for_sku(row['sku']) } -This piece of code tells bonobo that your transformation expect a sercive called "primary_sql_database", that will be +This piece of code tells bonobo that your transformation expect a service called "primary_sql_database", that will be injected to your calls under the parameter name "database". Function-based transformations diff --git a/docs/guide/transformations.rst b/docs/guide/transformations.rst index 8222357..e0fc347 100644 --- a/docs/guide/transformations.rst +++ b/docs/guide/transformations.rst @@ -22,7 +22,7 @@ underscores and lowercase names: def uppercase(s: str) -> str: return s.upper() -If you're naming something which is configurable, that will need to be instanciated or called to obtain something that +If you're naming something which is configurable, that will need to be instantiated or called to obtain something that can be used as a graph node, then use camelcase names: .. code-block:: python From 074d173ea72d450544210f83061b9ac50fbdc746 Mon Sep 17 00:00:00 2001 From: cwandrews Date: Sun, 1 Oct 2017 21:26:10 -0400 Subject: [PATCH 45/71] Added envrionmental_variables page to the guides section. --- docs/guide/environmental_variables.rst | 71 ++++++++++++++++++++++++++ docs/guide/index.rst | 1 + 2 files changed, 72 insertions(+) create mode 100644 docs/guide/environmental_variables.rst diff --git a/docs/guide/environmental_variables.rst b/docs/guide/environmental_variables.rst new file mode 100644 index 0000000..aa8bbaf --- /dev/null +++ b/docs/guide/environmental_variables.rst @@ -0,0 +1,71 @@ +Environmental Variables +======================= + +Best practice holds that variables should be passed to graphs via environmental variables. +Doing this is important for keeping sensitive data out of the code - such as an +API token or username and password used to access a database. Not only is this +approach more secure, it also makes graphs more flexible by allowing adjustments +for a variety of environments and contexts. Importantly, environmental variables +are also the means by-which arguments can be passed to graphs. + + +Passing / Setting Environmental Variables +:::::::::::::::::::::::::::::::::::::::::::: + +The recommended way to set environmental variables for a given graph is simply to use +the optional ``--env`` argument when running bonobo from the shell (bash, command prompt, etc). +``--env`` (or ``-e`` for short) should then be followed by the variable name and value using the +syntax `VAR_NAME=VAR_VALUE`. Multiple environmental variables can be passed by using +multiple ``--env`` / ``-e`` flags. + +Example: + +.. code-block:: bash + + # Using one environmental variable: + bonobo run csvsanitizer --env SECRET_TOKEN=secret123 + + # Using multiple environmental variables: + bonobo run csvsanitizer -e SRC_FILE=inventory.txt -e DST_FILE=inventory_processed.csv + +If you're naming something which is configurable, that is will need to be instantiated or called to obtain something that +can be used as a graph node, then use camelcase names: + + +Accessing Environmental Variables from within the Graph Context +::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +Environmental variables, whether global or only for the scope of the graph, +can be can be accessed using any of the normal means. It is important to note +that whether set globally for the system or just for the graph context, +environmental variables are accessed by bonobo in the same way. In the example +below the database user and password are accessed via the ``os`` module's ``getenv`` +function and used to get data from the database. + +.. code-block:: python + + import os + + from bonobo import Graph, run + + + def extract(): + database_user = os.getenv('DB_USER') + database_password = os.getenv('DB_PASS') + # ... + # (connect to database using database_user and database_password) + # (get data from database) + # ... + + return database_data + + + def load(database_data: dict): + for k, v in database_data.items(): + print('{key} = {value}'.format(key=k, value=v)) + + + graph = Graph(extract, load) + + if __name__ == '__main__': + run(graph) diff --git a/docs/guide/index.rst b/docs/guide/index.rst index 18e5565..a05dbf0 100644 --- a/docs/guide/index.rst +++ b/docs/guide/index.rst @@ -12,6 +12,7 @@ There are a few things that you should know while writing transformations graphs purity transformations services + envrionmental_variables Third party integrations :::::::::::::::::::::::: From 1863c2cd57d2d09a8655ed15b741b1a038355321 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Mon, 2 Oct 2017 08:27:33 +0200 Subject: [PATCH 46/71] Change import style in example. --- tests/util/get_passed_env.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/util/get_passed_env.py b/tests/util/get_passed_env.py index d9c4ba6..54a3280 100644 --- a/tests/util/get_passed_env.py +++ b/tests/util/get_passed_env.py @@ -1,6 +1,6 @@ import os -from bonobo import Graph +import bonobo def extract(): @@ -14,9 +14,7 @@ def load(s: str): print(s) -graph = Graph(extract, load) +graph = bonobo.Graph(extract, load) if __name__ == '__main__': - from bonobo import run - - run(graph) + bonobo.run(graph) From d858b246ad3a44f4bbadf9393cfbfdffa0f9acf0 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Mon, 2 Oct 2017 06:16:30 +0200 Subject: [PATCH 47/71] [api] require/requires is confusing --- bonobo/config/__init__.py | 3 +++ bonobo/util/__init__.py | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/bonobo/config/__init__.py b/bonobo/config/__init__.py index 6a4247e..bd00845 100644 --- a/bonobo/config/__init__.py +++ b/bonobo/config/__init__.py @@ -3,6 +3,8 @@ from bonobo.config.options import Method, Option from bonobo.config.processors import ContextProcessor from bonobo.config.services import Container, Exclusive, Service, requires +use = requires + # Bonobo's Config API __all__ = [ 'Configurable', @@ -13,4 +15,5 @@ __all__ = [ 'Option', 'Service', 'requires', + 'use', ] diff --git a/bonobo/util/__init__.py b/bonobo/util/__init__.py index 8b13789..f4b4158 100644 --- a/bonobo/util/__init__.py +++ b/bonobo/util/__init__.py @@ -1 +1,6 @@ +from bonobo.util.python import require +# Bonobo's util API +__all__ = [ + 'require' +] From c09c1010745535fecb916b149dc84043e7616e42 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Mon, 2 Oct 2017 08:38:31 +0200 Subject: [PATCH 48/71] Making config/util/structs apis available at level 2 import (x.y), implements the roots for loopbacks (recursive transformations). This still needs work, as its hard not to close an input queue as soon as the last item was read. --- bonobo/_api.py | 4 +-- bonobo/constants.py | 1 + bonobo/execution/node.py | 10 ++++--- bonobo/structs/__init__.py | 10 +++++-- bonobo/structs/bags.py | 16 +++++++---- bonobo/util/__init__.py | 24 +++++++++++++++- bonobo/util/errors.py | 7 ----- bonobo/util/inspect.py | 56 ++++++++++++++++++++++++++++++-------- 8 files changed, 96 insertions(+), 32 deletions(-) diff --git a/bonobo/_api.py b/bonobo/_api.py index 1b7e424..de75bd2 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -1,6 +1,6 @@ import logging -from bonobo.structs import Bag, Graph, Token +from bonobo.structs import Bag, ErrorBag, Graph, Token from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \ PickleReader, PickleWriter, PrettyPrinter, RateLimited, Tee, arg0_to_kwargs, count, identity, kwargs_to_arg0, noop from bonobo.strategies import create_strategy @@ -70,7 +70,7 @@ def run(graph, strategy=None, plugins=None, services=None): # bonobo.structs -register_api_group(Bag, Graph, Token) +register_api_group(Bag, ErrorBag, Graph, Token) # bonobo.strategies register_api(create_strategy) diff --git a/bonobo/constants.py b/bonobo/constants.py index 4187197..4a02f5e 100644 --- a/bonobo/constants.py +++ b/bonobo/constants.py @@ -3,6 +3,7 @@ from bonobo.structs.tokens import Token BEGIN = Token('Begin') END = Token('End') INHERIT_INPUT = Token('InheritInput') +LOOPBACK = Token('Loopback') NOT_MODIFIED = Token('NotModified') DEFAULT_SERVICES_FILENAME = '_services.py' DEFAULT_SERVICES_ATTR = 'get_services' \ No newline at end of file diff --git a/bonobo/execution/node.py b/bonobo/execution/node.py index 45691a6..6f83f04 100644 --- a/bonobo/execution/node.py +++ b/bonobo/execution/node.py @@ -2,13 +2,13 @@ import traceback from queue import Empty from time import sleep -from bonobo.constants import INHERIT_INPUT, NOT_MODIFIED +from bonobo.constants import INHERIT_INPUT, NOT_MODIFIED, BEGIN, END from bonobo.errors import InactiveReadableError, UnrecoverableError from bonobo.execution.base import LoopingExecutionContext from bonobo.structs.bags import Bag from bonobo.structs.inputs import Input from bonobo.util.compat import deprecated_alias -from bonobo.util.errors import is_error +from bonobo.util.inspect import iserrorbag, isloopbackbag from bonobo.util.iterators import iter_if_not_sequence from bonobo.util.objects import get_name from bonobo.util.statistics import WithStatistics @@ -65,8 +65,10 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): if not _control: self.increment('out') - if is_error(value): + if iserrorbag(value): value.apply(self.handle_error) + elif isloopbackbag(value): + self.input.put(value) else: for output in self.outputs: output.put(value) @@ -137,7 +139,7 @@ def _resolve(input_bag, output): if output is NOT_MODIFIED: return input_bag - if is_error(output): + if iserrorbag(output): return output # If it does not look like a bag, let's create one for easier manipulation diff --git a/bonobo/structs/__init__.py b/bonobo/structs/__init__.py index 15e76a9..678cea1 100644 --- a/bonobo/structs/__init__.py +++ b/bonobo/structs/__init__.py @@ -1,5 +1,11 @@ -from bonobo.structs.bags import Bag +from bonobo.structs.bags import Bag, ErrorBag, LoopbackBag from bonobo.structs.graphs import Graph from bonobo.structs.tokens import Token -__all__ = ['Bag', 'Graph', 'Token'] +__all__ = [ + 'Bag', + 'ErrorBag', + 'Graph', + 'LoopbackBag', + 'Token', +] diff --git a/bonobo/structs/bags.py b/bonobo/structs/bags.py index 4ef2fa7..0c91274 100644 --- a/bonobo/structs/bags.py +++ b/bonobo/structs/bags.py @@ -1,6 +1,6 @@ import itertools -from bonobo.constants import INHERIT_INPUT +from bonobo.constants import INHERIT_INPUT, LOOPBACK __all__ = [ 'Bag', @@ -33,8 +33,10 @@ class Bag: """ + default_flags = () + def __init__(self, *args, _flags=None, _parent=None, **kwargs): - self._flags = _flags or () + self._flags = type(self).default_flags + (_flags or ()) self._parent = _parent self._args = args self._kwargs = kwargs @@ -43,7 +45,7 @@ class Bag: def args(self): if self._parent is None: return self._args - return (*self._parent.args, *self._args, ) + return (*self._parent.args, *self._args,) @property def kwargs(self): @@ -91,7 +93,7 @@ class Bag: @classmethod def inherit(cls, *args, **kwargs): - return cls(*args, _flags=(INHERIT_INPUT, ), **kwargs) + return cls(*args, _flags=(INHERIT_INPUT,), **kwargs) def __eq__(self, other): return isinstance(other, Bag) and other.args == self.args and other.kwargs == self.kwargs @@ -99,12 +101,16 @@ class Bag: def __repr__(self): return '<{} ({})>'.format( type(self).__name__, ', '. - join(itertools.chain( + join(itertools.chain( map(repr, self.args), ('{}={}'.format(k, repr(v)) for k, v in self.kwargs.items()), )) ) +class LoopbackBag(Bag): + default_flags = (LOOPBACK,) + + class ErrorBag(Bag): pass diff --git a/bonobo/util/__init__.py b/bonobo/util/__init__.py index f4b4158..4a5e8dc 100644 --- a/bonobo/util/__init__.py +++ b/bonobo/util/__init__.py @@ -1,6 +1,28 @@ +from bonobo.util.inspect import ( + inspect_node, + isbag, + isconfigurable, + isconfigurabletype, + iscontextprocessor, + iserrorbag, + isloopbackbag, + ismethod, + isoption, + istype, +) from bonobo.util.python import require # Bonobo's util API __all__ = [ - 'require' + 'require', + 'inspect_node', + 'isbag', + 'isconfigurable', + 'isconfigurabletype', + 'iscontextprocessor', + 'iserrorbag', + 'isloopbackbag', + 'ismethod', + 'isoption', + 'istype', ] diff --git a/bonobo/util/errors.py b/bonobo/util/errors.py index 0ea4e58..cae2789 100644 --- a/bonobo/util/errors.py +++ b/bonobo/util/errors.py @@ -1,13 +1,6 @@ import sys from textwrap import indent -from bonobo import settings -from bonobo.structs.bags import ErrorBag - - -def is_error(bag): - return isinstance(bag, ErrorBag) - def _get_error_message(exc): if hasattr(exc, '__str__'): diff --git a/bonobo/util/inspect.py b/bonobo/util/inspect.py index 42da6ed..1594d1e 100644 --- a/bonobo/util/inspect.py +++ b/bonobo/util/inspect.py @@ -1,5 +1,18 @@ from collections import namedtuple +from bonobo.constants import LOOPBACK + + +def isconfigurable(mixed): + """ + Check if the given argument is an instance of :class:`bonobo.config.Configurable`. + + :param mixed: + :return: bool + """ + from bonobo.config.configurables import Configurable + return isinstance(mixed, Configurable) + def isconfigurabletype(mixed): """ @@ -13,17 +26,6 @@ def isconfigurabletype(mixed): return isinstance(mixed, ConfigurableMeta) -def isconfigurable(mixed): - """ - Check if the given argument is an instance of :class:`bonobo.config.Configurable`. - - :param mixed: - :return: bool - """ - from bonobo.config.configurables import Configurable - return isinstance(mixed, Configurable) - - def isoption(mixed): """ Check if the given argument is an instance of :class:`bonobo.config.Option`. @@ -68,6 +70,38 @@ def istype(mixed): return isinstance(mixed, type) +def isbag(mixed): + """ + Check if the given argument is an instance of a :class:`bonobo.Bag`. + + :param mixed: + :return: bool + """ + from bonobo.structs.bags import Bag + return isinstance(mixed, Bag) + + +def iserrorbag(mixed): + """ + Check if the given argument is an instance of an :class:`bonobo.ErrorBag`. + + :param mixed: + :return: bool + """ + from bonobo.structs.bags import ErrorBag + return isinstance(mixed, ErrorBag) + + +def isloopbackbag(mixed): + """ + Check if the given argument is an instance of a :class:`bonobo.Bag`, marked for loopback behaviour. + + :param mixed: + :return: bool + """ + return isbag(mixed) and LOOPBACK in mixed.flags + + ConfigurableInspection = namedtuple( 'ConfigurableInspection', [ 'type', From d35598fe8ca1ea85267c475d4dc1e60a846e6786 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Mon, 2 Oct 2017 08:57:46 +0200 Subject: [PATCH 49/71] Add a reference to graph context (private) in service container. --- bonobo/execution/graph.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bonobo/execution/graph.py b/bonobo/execution/graph.py index 00d2c43..2c08589 100644 --- a/bonobo/execution/graph.py +++ b/bonobo/execution/graph.py @@ -25,6 +25,9 @@ class GraphExecutionContext: self.plugins = [PluginExecutionContext(plugin, parent=self) for plugin in plugins or ()] self.services = Container(services) if services else Container() + # Probably not a good idea to use it unless you really know what you're doing. But you can access the context. + self.services['__graph_context'] = self + for i, node_context in enumerate(self): node_context.outputs = [self[j].input for j in self.graph.outputs_of(i)] node_context.input.on_begin = partial(node_context.send, BEGIN, _control=True) From e04c3dd84916a0b5cb08da9264104d76a4c5d6f5 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Mon, 2 Oct 2017 09:14:10 +0200 Subject: [PATCH 50/71] Uniformisation of API for graph and node contexts (recv -> write), removing publication of LoopbackBag as this is not stable with current BEGIN/END implementation. --- bonobo/config/features.py | 0 bonobo/execution/graph.py | 12 ++++++------ bonobo/execution/node.py | 2 +- bonobo/strategies/executor.py | 4 ++-- bonobo/strategies/naive.py | 2 +- bonobo/structs/__init__.py | 3 +-- bonobo/structs/bags.py | 8 ++++---- tests/test_commands.py | 4 ++-- tests/test_execution.py | 2 +- 9 files changed, 18 insertions(+), 19 deletions(-) create mode 100644 bonobo/config/features.py diff --git a/bonobo/config/features.py b/bonobo/config/features.py new file mode 100644 index 0000000..e69de29 diff --git a/bonobo/execution/graph.py b/bonobo/execution/graph.py index 2c08589..1859adc 100644 --- a/bonobo/execution/graph.py +++ b/bonobo/execution/graph.py @@ -43,7 +43,7 @@ class GraphExecutionContext: def __iter__(self): yield from self.nodes - def recv(self, *messages): + def write(self, *messages): """Push a list of messages in the inputs of this graph's inputs, matching the output of special node "BEGIN" in our graph.""" @@ -56,12 +56,12 @@ class GraphExecutionContext: for node in self.nodes: node.start() - def loop(self): - # todo use strategy - for node in self.nodes: - node.loop() - def stop(self): # todo use strategy for node in self.nodes: node.stop() + + def loop(self): + # todo use strategy + for node in self.nodes: + node.loop() diff --git a/bonobo/execution/node.py b/bonobo/execution/node.py index 6f83f04..e8869ac 100644 --- a/bonobo/execution/node.py +++ b/bonobo/execution/node.py @@ -2,7 +2,7 @@ import traceback from queue import Empty from time import sleep -from bonobo.constants import INHERIT_INPUT, NOT_MODIFIED, BEGIN, END +from bonobo.constants import INHERIT_INPUT, NOT_MODIFIED from bonobo.errors import InactiveReadableError, UnrecoverableError from bonobo.execution.base import LoopingExecutionContext from bonobo.structs.bags import Bag diff --git a/bonobo/strategies/executor.py b/bonobo/strategies/executor.py index 44d206e..a0bd4f4 100644 --- a/bonobo/strategies/executor.py +++ b/bonobo/strategies/executor.py @@ -21,7 +21,7 @@ class ExecutorStrategy(Strategy): def execute(self, graph, *args, plugins=None, services=None, **kwargs): context = self.create_graph_execution_context(graph, plugins=plugins, services=services) - context.recv(BEGIN, Bag(), END) + context.write(BEGIN, Bag(), END) executor = self.create_executor() @@ -57,7 +57,7 @@ class ExecutorStrategy(Strategy): futures.append(executor.submit(_runner)) while context.alive: - time.sleep(0.2) + time.sleep(0.1) for plugin_context in context.plugins: plugin_context.shutdown() diff --git a/bonobo/strategies/naive.py b/bonobo/strategies/naive.py index b93a2e9..cab9c57 100644 --- a/bonobo/strategies/naive.py +++ b/bonobo/strategies/naive.py @@ -6,7 +6,7 @@ from bonobo.structs.bags import Bag class NaiveStrategy(Strategy): def execute(self, graph, *args, plugins=None, **kwargs): context = self.create_graph_execution_context(graph, plugins=plugins) - context.recv(BEGIN, Bag(), END) + context.write(BEGIN, Bag(), END) # TODO: how to run plugins in "naive" mode ? context.start() diff --git a/bonobo/structs/__init__.py b/bonobo/structs/__init__.py index 678cea1..6c0d9ab 100644 --- a/bonobo/structs/__init__.py +++ b/bonobo/structs/__init__.py @@ -1,4 +1,4 @@ -from bonobo.structs.bags import Bag, ErrorBag, LoopbackBag +from bonobo.structs.bags import Bag, ErrorBag from bonobo.structs.graphs import Graph from bonobo.structs.tokens import Token @@ -6,6 +6,5 @@ __all__ = [ 'Bag', 'ErrorBag', 'Graph', - 'LoopbackBag', 'Token', ] diff --git a/bonobo/structs/bags.py b/bonobo/structs/bags.py index 0c91274..3eae9ff 100644 --- a/bonobo/structs/bags.py +++ b/bonobo/structs/bags.py @@ -45,7 +45,7 @@ class Bag: def args(self): if self._parent is None: return self._args - return (*self._parent.args, *self._args,) + return (*self._parent.args, *self._args, ) @property def kwargs(self): @@ -93,7 +93,7 @@ class Bag: @classmethod def inherit(cls, *args, **kwargs): - return cls(*args, _flags=(INHERIT_INPUT,), **kwargs) + return cls(*args, _flags=(INHERIT_INPUT, ), **kwargs) def __eq__(self, other): return isinstance(other, Bag) and other.args == self.args and other.kwargs == self.kwargs @@ -101,7 +101,7 @@ class Bag: def __repr__(self): return '<{} ({})>'.format( type(self).__name__, ', '. - join(itertools.chain( + join(itertools.chain( map(repr, self.args), ('{}={}'.format(k, repr(v)) for k, v in self.kwargs.items()), )) @@ -109,7 +109,7 @@ class Bag: class LoopbackBag(Bag): - default_flags = (LOOPBACK,) + default_flags = (LOOPBACK, ) class ErrorBag(Bag): diff --git a/tests/test_commands.py b/tests/test_commands.py index 3f26e44..1fca75a 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -103,8 +103,8 @@ def test_version(runner, capsys): def test_run_with_env(runner, capsys): runner( 'run', '--quiet', - get_examples_path('env_vars/get_passed_env.py'), '--env', 'ENV_TEST_NUMBER=123', - '--env', 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'" + get_examples_path('env_vars/get_passed_env.py'), '--env', 'ENV_TEST_NUMBER=123', '--env', + 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'" ) out, err = capsys.readouterr() out = out.split('\n') diff --git a/tests/test_execution.py b/tests/test_execution.py index e6099fd..70e12ac 100644 --- a/tests/test_execution.py +++ b/tests/test_execution.py @@ -62,7 +62,7 @@ def test_simple_execution_context(): assert not ctx.started assert not ctx.stopped - ctx.recv(BEGIN, Bag(), END) + ctx.write(BEGIN, Bag(), END) assert not ctx.alive assert not ctx.started From a6b3787513685813249ac15190e7377e702472a5 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Mon, 2 Oct 2017 09:14:51 +0200 Subject: [PATCH 51/71] Removing features.py as it is experimental, vill probably go in 0.6. --- bonobo/config/features.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 bonobo/config/features.py diff --git a/bonobo/config/features.py b/bonobo/config/features.py deleted file mode 100644 index e69de29..0000000 From b83ba99beb2426a7a6e4a9160c34042b854517e1 Mon Sep 17 00:00:00 2001 From: CW Andrews Date: Mon, 2 Oct 2017 15:24:41 -0400 Subject: [PATCH 52/71] Update index.rst --- docs/guide/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guide/index.rst b/docs/guide/index.rst index a05dbf0..27b0a3e 100644 --- a/docs/guide/index.rst +++ b/docs/guide/index.rst @@ -12,7 +12,7 @@ There are a few things that you should know while writing transformations graphs purity transformations services - envrionmental_variables + envrionment_variables Third party integrations :::::::::::::::::::::::: From 5f83aef47d7af31b2b71a715d7edf28d57902467 Mon Sep 17 00:00:00 2001 From: CW Andrews Date: Mon, 2 Oct 2017 15:25:13 -0400 Subject: [PATCH 53/71] Update jupyter.rst --- docs/guide/ext/jupyter.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guide/ext/jupyter.rst b/docs/guide/ext/jupyter.rst index 0d00c58..8250853 100644 --- a/docs/guide/ext/jupyter.rst +++ b/docs/guide/ext/jupyter.rst @@ -1,6 +1,6 @@ Bonobo with Jupyter =================== -minimalistically + There is a builtin plugin that integrates (somewhat minimallistically, for now) bonobo within jupyter notebooks, so you can read the execution status of a graph within a nice (ok, not so nice) html/javascript widget. From 56d8f3291066348309121787f72cd0a67193c63a Mon Sep 17 00:00:00 2001 From: CW Andrews Date: Mon, 2 Oct 2017 15:34:00 -0400 Subject: [PATCH 54/71] Update environmental_variables.rst --- docs/guide/environmental_variables.rst | 30 ++++++++++++++------------ 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/docs/guide/environmental_variables.rst b/docs/guide/environmental_variables.rst index aa8bbaf..b81197a 100644 --- a/docs/guide/environmental_variables.rst +++ b/docs/guide/environmental_variables.rst @@ -1,44 +1,46 @@ -Environmental Variables +Environment Variables ======================= -Best practice holds that variables should be passed to graphs via environmental variables. +Best practice holds that variables should be passed to graphs via environment variables. Doing this is important for keeping sensitive data out of the code - such as an API token or username and password used to access a database. Not only is this approach more secure, it also makes graphs more flexible by allowing adjustments -for a variety of environments and contexts. Importantly, environmental variables +for a variety of environments and contexts. Importantly, environment variables are also the means by-which arguments can be passed to graphs. -Passing / Setting Environmental Variables +Passing / Setting Environment Variables :::::::::::::::::::::::::::::::::::::::::::: -The recommended way to set environmental variables for a given graph is simply to use +The recommended way to set environment variables for a given graph is simply to use the optional ``--env`` argument when running bonobo from the shell (bash, command prompt, etc). ``--env`` (or ``-e`` for short) should then be followed by the variable name and value using the -syntax `VAR_NAME=VAR_VALUE`. Multiple environmental variables can be passed by using +syntax `VAR_NAME=VAR_VALUE`. Multiple environment variables can be passed by using multiple ``--env`` / ``-e`` flags. Example: .. code-block:: bash - # Using one environmental variable: + # Using one environment variable via --env flag: bonobo run csvsanitizer --env SECRET_TOKEN=secret123 - # Using multiple environmental variables: + # Using multiple environment variables via -e (env) flag: bonobo run csvsanitizer -e SRC_FILE=inventory.txt -e DST_FILE=inventory_processed.csv + + # Using one environment variable in bash (*bash only): + SECRET_TOKEN=secret123 bonobo run csvsanitizer -If you're naming something which is configurable, that is will need to be instantiated or called to obtain something that -can be used as a graph node, then use camelcase names: + # Using multiple environment variables in bash (*bash only): + SRC_FILE=inventory.txt DST_FILE=inventory_processed.csv bonobo run csvsanitizer - -Accessing Environmental Variables from within the Graph Context +Accessing Environment Variables from within the Graph Context ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -Environmental variables, whether global or only for the scope of the graph, +Environment variables, whether global or only for the scope of the graph, can be can be accessed using any of the normal means. It is important to note that whether set globally for the system or just for the graph context, -environmental variables are accessed by bonobo in the same way. In the example +environment variables are accessed by bonobo in the same way. In the example below the database user and password are accessed via the ``os`` module's ``getenv`` function and used to get data from the database. From 504e8920f250689a65834bf728bb478bceaa2ff4 Mon Sep 17 00:00:00 2001 From: CW Andrews Date: Mon, 2 Oct 2017 16:06:04 -0400 Subject: [PATCH 55/71] Update and rename environmental_variables.rst to environment_variables.rst Made review requests per @hardym. --- ...ental_variables.rst => environment_variables.rst} | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) rename docs/guide/{environmental_variables.rst => environment_variables.rst} (74%) diff --git a/docs/guide/environmental_variables.rst b/docs/guide/environment_variables.rst similarity index 74% rename from docs/guide/environmental_variables.rst rename to docs/guide/environment_variables.rst index b81197a..44feb0d 100644 --- a/docs/guide/environmental_variables.rst +++ b/docs/guide/environment_variables.rst @@ -12,13 +12,13 @@ are also the means by-which arguments can be passed to graphs. Passing / Setting Environment Variables :::::::::::::::::::::::::::::::::::::::::::: -The recommended way to set environment variables for a given graph is simply to use -the optional ``--env`` argument when running bonobo from the shell (bash, command prompt, etc). +Setting environment variables for your graphs to use can be done in a variety of ways and which one used can vary +based-upon context. Perhaps the most immediate and simple way to set/override a variable for a given graph is +simply to use the optional ``--env`` argument when running bonobo from the shell (bash, command prompt, etc). ``--env`` (or ``-e`` for short) should then be followed by the variable name and value using the -syntax `VAR_NAME=VAR_VALUE`. Multiple environment variables can be passed by using -multiple ``--env`` / ``-e`` flags. +syntax `VAR_NAME=VAR_VALUE`. Multiple environment variables can be passed by using multiple ``--env`` / ``-e`` flags. Additionally, in bash you can also set environment variables by listing those you wish to set before the `bonobo run` command with space separating the key-value pairs (i.e. `FIZZ=buzz bonobo run ...` or `FIZZ=buzz FOO=bar bonobo run ...`). -Example: +The Examples below demonstrate setting one or multiple variables using both of these methods: .. code-block:: bash @@ -33,6 +33,8 @@ Example: # Using multiple environment variables in bash (*bash only): SRC_FILE=inventory.txt DST_FILE=inventory_processed.csv bonobo run csvsanitizer + +*Though not-yet implemented, the bonobo roadmap includes implementing environment / .env files as well.* Accessing Environment Variables from within the Graph Context ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: From 142a6d60dfaf5f95198e0beb1f1fdd35812ad523 Mon Sep 17 00:00:00 2001 From: CW Andrews Date: Mon, 2 Oct 2017 16:11:41 -0400 Subject: [PATCH 56/71] Updated inline examples and fixed code markup. --- docs/guide/environment_variables.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/guide/environment_variables.rst b/docs/guide/environment_variables.rst index 44feb0d..003f0a1 100644 --- a/docs/guide/environment_variables.rst +++ b/docs/guide/environment_variables.rst @@ -16,7 +16,7 @@ Setting environment variables for your graphs to use can be done in a variety of based-upon context. Perhaps the most immediate and simple way to set/override a variable for a given graph is simply to use the optional ``--env`` argument when running bonobo from the shell (bash, command prompt, etc). ``--env`` (or ``-e`` for short) should then be followed by the variable name and value using the -syntax `VAR_NAME=VAR_VALUE`. Multiple environment variables can be passed by using multiple ``--env`` / ``-e`` flags. Additionally, in bash you can also set environment variables by listing those you wish to set before the `bonobo run` command with space separating the key-value pairs (i.e. `FIZZ=buzz bonobo run ...` or `FIZZ=buzz FOO=bar bonobo run ...`). +syntax ``VAR_NAME=VAR_VALUE``. Multiple environment variables can be passed by using multiple ``--env`` / ``-e`` flags (i.e. ``bonobo run --env FIZZ=buzz ...`` and ``bonobo run --env FIZZ=buzz --env Foo=bar ...``). Additionally, in bash you can also set environment variables by listing those you wish to set before the `bonobo run` command with space separating the key-value pairs (i.e. ``FIZZ=buzz bonobo run ...`` or ``FIZZ=buzz FOO=bar bonobo run ...``). The Examples below demonstrate setting one or multiple variables using both of these methods: @@ -39,7 +39,7 @@ The Examples below demonstrate setting one or multiple variables using both of t Accessing Environment Variables from within the Graph Context ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -Environment variables, whether global or only for the scope of the graph, +Environment variables, whether set globally or only for the scope of the graph, can be can be accessed using any of the normal means. It is important to note that whether set globally for the system or just for the graph context, environment variables are accessed by bonobo in the same way. In the example From a1e381fad5501265d17f3fb2cbf2627a3c5874ca Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Tue, 3 Oct 2017 07:57:30 +0200 Subject: [PATCH 57/71] [doc] fix spaces in links. --- docs/_templates/index.html | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/_templates/index.html b/docs/_templates/index.html index 8f9185a..c3aa330 100644 --- a/docs/_templates/index.html +++ b/docs/_templates/index.html @@ -25,25 +25,25 @@ From bfa43e65e65c14636ae7f022b0f587c11a86e4d7 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Tue, 3 Oct 2017 08:09:50 +0200 Subject: [PATCH 58/71] [doc] tuning a few things in documentation. --- ...ironment_variables.rst => environment.rst} | 43 +++++++++---------- docs/guide/index.rst | 2 +- docs/reference/api_util.rst | 10 +++++ docs/reference/index.rst | 1 + docs/reference/settings.rst | 3 +- 5 files changed, 35 insertions(+), 24 deletions(-) rename docs/guide/{environment_variables.rst => environment.rst} (65%) create mode 100644 docs/reference/api_util.rst diff --git a/docs/guide/environment_variables.rst b/docs/guide/environment.rst similarity index 65% rename from docs/guide/environment_variables.rst rename to docs/guide/environment.rst index 003f0a1..5df9c9a 100644 --- a/docs/guide/environment_variables.rst +++ b/docs/guide/environment.rst @@ -1,5 +1,5 @@ Environment Variables -======================= +===================== Best practice holds that variables should be passed to graphs via environment variables. Doing this is important for keeping sensitive data out of the code - such as an @@ -10,13 +10,16 @@ are also the means by-which arguments can be passed to graphs. Passing / Setting Environment Variables -:::::::::::::::::::::::::::::::::::::::::::: +::::::::::::::::::::::::::::::::::::::: Setting environment variables for your graphs to use can be done in a variety of ways and which one used can vary based-upon context. Perhaps the most immediate and simple way to set/override a variable for a given graph is simply to use the optional ``--env`` argument when running bonobo from the shell (bash, command prompt, etc). ``--env`` (or ``-e`` for short) should then be followed by the variable name and value using the -syntax ``VAR_NAME=VAR_VALUE``. Multiple environment variables can be passed by using multiple ``--env`` / ``-e`` flags (i.e. ``bonobo run --env FIZZ=buzz ...`` and ``bonobo run --env FIZZ=buzz --env Foo=bar ...``). Additionally, in bash you can also set environment variables by listing those you wish to set before the `bonobo run` command with space separating the key-value pairs (i.e. ``FIZZ=buzz bonobo run ...`` or ``FIZZ=buzz FOO=bar bonobo run ...``). +syntax ``VAR_NAME=VAR_VALUE``. Multiple environment variables can be passed by using multiple ``--env`` / ``-e`` flags +(i.e. ``bonobo run --env FIZZ=buzz ...`` and ``bonobo run --env FIZZ=buzz --env Foo=bar ...``). Additionally, in bash +you can also set environment variables by listing those you wish to set before the `bonobo run` command with space +separating the key-value pairs (i.e. ``FIZZ=buzz bonobo run ...`` or ``FIZZ=buzz FOO=bar bonobo run ...``). The Examples below demonstrate setting one or multiple variables using both of these methods: @@ -28,16 +31,16 @@ The Examples below demonstrate setting one or multiple variables using both of t # Using multiple environment variables via -e (env) flag: bonobo run csvsanitizer -e SRC_FILE=inventory.txt -e DST_FILE=inventory_processed.csv - # Using one environment variable in bash (*bash only): + # Using one environment variable inline (bash only): SECRET_TOKEN=secret123 bonobo run csvsanitizer - # Using multiple environment variables in bash (*bash only): + # Using multiple environment variables inline (bash only): SRC_FILE=inventory.txt DST_FILE=inventory_processed.csv bonobo run csvsanitizer *Though not-yet implemented, the bonobo roadmap includes implementing environment / .env files as well.* Accessing Environment Variables from within the Graph Context -::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: Environment variables, whether set globally or only for the scope of the graph, can be can be accessed using any of the normal means. It is important to note @@ -50,26 +53,22 @@ function and used to get data from the database. import os - from bonobo import Graph, run + import bonobo + from bonobo.config import use - def extract(): - database_user = os.getenv('DB_USER') - database_password = os.getenv('DB_PASS') - # ... - # (connect to database using database_user and database_password) - # (get data from database) - # ... - - return database_data + DB_USER = os.getenv('DB_USER') + DB_PASS = os.getenv('DB_PASS') - def load(database_data: dict): - for k, v in database_data.items(): - print('{key} = {value}'.format(key=k, value=v)) + @use('database') + def extract(database): + with database.connect(DB_USER, DB_PASS) as conn: + yield from conn.query_all() - graph = Graph(extract, load) + graph = bonobo.Graph( + extract, + bonobo.PrettyPrinter(), + ) - if __name__ == '__main__': - run(graph) diff --git a/docs/guide/index.rst b/docs/guide/index.rst index 27b0a3e..8229cde 100644 --- a/docs/guide/index.rst +++ b/docs/guide/index.rst @@ -12,7 +12,7 @@ There are a few things that you should know while writing transformations graphs purity transformations services - envrionment_variables + envrionment Third party integrations :::::::::::::::::::::::: diff --git a/docs/reference/api_util.rst b/docs/reference/api_util.rst new file mode 100644 index 0000000..cf5dae8 --- /dev/null +++ b/docs/reference/api_util.rst @@ -0,0 +1,10 @@ +Util API +======== + +The Util API, located under the :mod:`bonobo.util` namespace, contains helpers functions and decorators to work with +and inspect transformations, graphs, and nodes. + +.. automodule:: bonobo.util + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/reference/index.rst b/docs/reference/index.rst index cc3a36b..de78b72 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -9,6 +9,7 @@ means that the api is not yet 1.0-proof. api api_config + api_util commands settings examples diff --git a/docs/reference/settings.rst b/docs/reference/settings.rst index 1b2ffea..b0a7c48 100644 --- a/docs/reference/settings.rst +++ b/docs/reference/settings.rst @@ -3,7 +3,8 @@ Settings & Environment .. module:: bonobo.settings -All settings that you can find in the :module:`bonobo.settings` module. +All settings that you can find in the :mod:`bonobo.settings` module. You can override those settings using +environment variables. For you own settings and configuration values, see the :doc:`/guide/environment` guide. Debug ::::: From 0bfa873743281acae021cad014467d0d31b77d83 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Tue, 3 Oct 2017 08:13:17 +0200 Subject: [PATCH 59/71] [doc] fix typo --- docs/guide/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guide/index.rst b/docs/guide/index.rst index 8229cde..8b23ee6 100644 --- a/docs/guide/index.rst +++ b/docs/guide/index.rst @@ -12,7 +12,7 @@ There are a few things that you should know while writing transformations graphs purity transformations services - envrionment + environment Third party integrations :::::::::::::::::::::::: From e9dfcfe0d8d84b16d684efad10c2f642bf5be4be Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Tue, 3 Oct 2017 08:15:50 +0200 Subject: [PATCH 60/71] [doc] adds cross ref in env vars page --- docs/guide/environment.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/guide/environment.rst b/docs/guide/environment.rst index 5df9c9a..203368d 100644 --- a/docs/guide/environment.rst +++ b/docs/guide/environment.rst @@ -8,6 +8,10 @@ approach more secure, it also makes graphs more flexible by allowing adjustments for a variety of environments and contexts. Importantly, environment variables are also the means by-which arguments can be passed to graphs. +.. note:: + + This document is about using your own settings and configuration values. If you're looking for bonobo's builtin + settings, also configurable using environment variables, please check :doc:`/reference/settings`. Passing / Setting Environment Variables ::::::::::::::::::::::::::::::::::::::: From 2ab48080e6f997cb9ce72a04c5c970fd97e4b812 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Tue, 3 Oct 2017 08:25:15 +0200 Subject: [PATCH 61/71] [doc] refactors guides index so the toc is showing up correctly in sidebar. --- docs/guide/ext/docker.rst | 4 ++-- docs/guide/ext/jupyter.rst | 4 ++-- docs/guide/ext/selenium.rst | 4 ++-- docs/guide/ext/sqlalchemy.rst | 4 ++-- docs/guide/index.rst | 10 ++-------- 5 files changed, 10 insertions(+), 16 deletions(-) diff --git a/docs/guide/ext/docker.rst b/docs/guide/ext/docker.rst index 8ff667f..f0fd17c 100644 --- a/docs/guide/ext/docker.rst +++ b/docs/guide/ext/docker.rst @@ -1,5 +1,5 @@ -Bonobo with Docker -================== +Docker Extension +================ .. todo:: The `bonobo-docker` package is at a very alpha stage, and things will change. This section is here to give a brief overview but is neither complete nor definitive. diff --git a/docs/guide/ext/jupyter.rst b/docs/guide/ext/jupyter.rst index 8250853..6c3385f 100644 --- a/docs/guide/ext/jupyter.rst +++ b/docs/guide/ext/jupyter.rst @@ -1,5 +1,5 @@ -Bonobo with Jupyter -=================== +Jupyter Extension +================= There is a builtin plugin that integrates (somewhat minimallistically, for now) bonobo within jupyter notebooks, so you can read the execution status of a graph within a nice (ok, not so nice) html/javascript widget. diff --git a/docs/guide/ext/selenium.rst b/docs/guide/ext/selenium.rst index e588cd6..c262c13 100644 --- a/docs/guide/ext/selenium.rst +++ b/docs/guide/ext/selenium.rst @@ -1,5 +1,5 @@ -Bonobo with Selenium -==================== +Selenium Extension +================== .. todo:: The `bonobo-selenium` package is at a very alpha stage, and things will change. This section is here to give a brief overview but is neither complete nor definitive. diff --git a/docs/guide/ext/sqlalchemy.rst b/docs/guide/ext/sqlalchemy.rst index d7da4e8..734bbf6 100644 --- a/docs/guide/ext/sqlalchemy.rst +++ b/docs/guide/ext/sqlalchemy.rst @@ -1,5 +1,5 @@ -Bonobo with SQLAlchemy -====================== +SQLAlchemy Extension +==================== .. todo:: The `bonobo-sqlalchemy` package is at a very alpha stage, and things will change. This section is here to give a brief overview but is neither complete nor definitive. diff --git a/docs/guide/index.rst b/docs/guide/index.rst index 8b23ee6..88d86a1 100644 --- a/docs/guide/index.rst +++ b/docs/guide/index.rst @@ -1,10 +1,7 @@ Guides ====== -Concepts and best practices -::::::::::::::::::::::::::: - -There are a few things that you should know while writing transformations graphs with bonobo. +Here are a few guides and best practices to work with bonobo. .. toctree:: :maxdepth: 2 @@ -14,10 +11,7 @@ There are a few things that you should know while writing transformations graphs services environment -Third party integrations -:::::::::::::::::::::::: - -There is a few **bonobo** extensions that ease the use of the library with third party tools. Each integration is +There is a also few extensions that ease the use of the library with third party tools. Each integration is available as an optional extra dependency, and the maturity stage of each extension vary. .. toctree:: From d936e164acc5dc8c1f1dd36ea3af76b81a77de76 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Tue, 3 Oct 2017 08:37:46 +0200 Subject: [PATCH 62/71] [doc] cleanup & refactorings --- docs/{guide/ext => extension}/docker.rst | 0 docs/extension/index.rst | 14 +++++ docs/{guide/ext => extension}/jupyter.rst | 0 docs/{guide/ext => extension}/selenium.rst | 0 docs/{guide/ext => extension}/sqlalchemy.rst | 0 docs/guide/graphs.rst | 11 ++++ docs/guide/index.rst | 13 +--- docs/guide/purity.rst | 63 +++++++++++--------- docs/index.rst | 1 + docs/install.rst | 40 +++++++++++-- 10 files changed, 97 insertions(+), 45 deletions(-) rename docs/{guide/ext => extension}/docker.rst (100%) create mode 100644 docs/extension/index.rst rename docs/{guide/ext => extension}/jupyter.rst (100%) rename docs/{guide/ext => extension}/selenium.rst (100%) rename docs/{guide/ext => extension}/sqlalchemy.rst (100%) create mode 100644 docs/guide/graphs.rst diff --git a/docs/guide/ext/docker.rst b/docs/extension/docker.rst similarity index 100% rename from docs/guide/ext/docker.rst rename to docs/extension/docker.rst diff --git a/docs/extension/index.rst b/docs/extension/index.rst new file mode 100644 index 0000000..53f10be --- /dev/null +++ b/docs/extension/index.rst @@ -0,0 +1,14 @@ +Extensions +========== + +Extensions contains all things needed to work with a few popular third party tools. + +Most of them are available as optional extra dependencies, and the maturity stage of each may vary. + +.. toctree:: + :maxdepth: 2 + + docker + jupyter + selenium + sqlalchemy diff --git a/docs/guide/ext/jupyter.rst b/docs/extension/jupyter.rst similarity index 100% rename from docs/guide/ext/jupyter.rst rename to docs/extension/jupyter.rst diff --git a/docs/guide/ext/selenium.rst b/docs/extension/selenium.rst similarity index 100% rename from docs/guide/ext/selenium.rst rename to docs/extension/selenium.rst diff --git a/docs/guide/ext/sqlalchemy.rst b/docs/extension/sqlalchemy.rst similarity index 100% rename from docs/guide/ext/sqlalchemy.rst rename to docs/extension/sqlalchemy.rst diff --git a/docs/guide/graphs.rst b/docs/guide/graphs.rst new file mode 100644 index 0000000..14af705 --- /dev/null +++ b/docs/guide/graphs.rst @@ -0,0 +1,11 @@ +Graphs +====== + +Writing graphs +:::::::::::::: + +Debugging graphs +:::::::::::::::: + +Executing graphs +:::::::::::::::: diff --git a/docs/guide/index.rst b/docs/guide/index.rst index 88d86a1..76e426a 100644 --- a/docs/guide/index.rst +++ b/docs/guide/index.rst @@ -6,18 +6,9 @@ Here are a few guides and best practices to work with bonobo. .. toctree:: :maxdepth: 2 - purity + graphs transformations services environment + purity -There is a also few extensions that ease the use of the library with third party tools. Each integration is -available as an optional extra dependency, and the maturity stage of each extension vary. - -.. toctree:: - :maxdepth: 2 - - ext/docker - ext/jupyter - ext/selenium - ext/sqlalchemy diff --git a/docs/guide/purity.rst b/docs/guide/purity.rst index 4b21735..ba0d56e 100644 --- a/docs/guide/purity.rst +++ b/docs/guide/purity.rst @@ -1,34 +1,39 @@ -Pure transformations -==================== +Best Practices +============== The nature of components, and how the data flow from one to another, can be a bit tricky. Hopefully, they should be very easy to write with a few hints. -The major problem we have is that one message (underlying implementation: :class:`bonobo.structs.bags.Bag`) can go -through more than one component, and at the same time. If you wanna be safe, you tend to :func:`copy.copy()` everything -between two calls to two different components, but that's very expensive. +Pure transformations +:::::::::::::::::::: -Instead, we chose the opposite: copies are never made, and you should not modify in place the inputs of your -component before yielding them, and that mostly means that you want to recreate dicts and lists before yielding (or -returning) them. Numeric values, strings and tuples being immutable in python, modifying a variable of one of those -type will already return a different instance. +One “message” (a.k.a :class:`bonobo.Bag` instance) may go through more than one component, and at the same time. +To ensure your code is safe, one could :func:`copy.copy()` each message on each transformation input but that's quite +expensive, especially because it may not be needed. + +Instead, we chose the opposite: copies are never made, instead you should not modify in place the inputs of your +component before yielding them, which that mostly means that you want to recreate dicts and lists before yielding if +their values changed. + +Numeric values, strings and tuples being immutable in python, modifying a variable of one of those type will already +return a different instance. Examples will be shown with `return` statements, of course you can do the same with `yield` statements in generators. Numbers -::::::: +------- In python, numbers are immutable. So you can't be wrong with numbers. All of the following are correct. .. code-block:: python - def do_your_number_thing(n: int) -> int: + def do_your_number_thing(n): return n - def do_your_number_thing(n: int) -> int: + def do_your_number_thing(n): return n + 1 - def do_your_number_thing(n: int) -> int: + def do_your_number_thing(n): # correct, but bad style n += 1 return n @@ -37,37 +42,37 @@ The same is true with other numeric types, so don't be shy. Tuples -:::::: +------ Tuples are immutable, so you risk nothing. .. code-block:: python - def do_your_tuple_thing(t: tuple) -> tuple: + def do_your_tuple_thing(t): return ('foo', ) + t - def do_your_tuple_thing(t: tuple) -> tuple: + def do_your_tuple_thing(t): return t + ('bar', ) - def do_your_tuple_thing(t: tuple) -> tuple: + def do_your_tuple_thing(t): # correct, but bad style t += ('baaaz', ) return t Strings -::::::: +------- -You know the drill, strings are immutable. +You know the drill, strings are immutable, too. .. code-block:: python - def do_your_str_thing(t: str) -> str: + def do_your_str_thing(t): return 'foo ' + t + ' bar' - def do_your_str_thing(t: str) -> str: + def do_your_str_thing(t): return ' '.join(('foo', t, 'bar', )) - def do_your_str_thing(t: str) -> str: + def do_your_str_thing(t): return 'foo {} bar'.format(t) You can, if you're using python 3.6+, use `f-strings `_, @@ -75,15 +80,15 @@ but the core bonobo libraries won't use it to stay 3.5 compatible. Dicts -::::: +----- So, now it gets interesting. Dicts are mutable. It means that you can mess things up if you're not cautious. -For example, doing the following may cause unexpected problems: +For example, doing the following may (will) cause unexpected problems: .. code-block:: python - def mutate_my_dict_like_crazy(d: dict) -> dict: + def mutate_my_dict_like_crazy(d): # Bad! Don't do that! d.update({ 'foo': compute_something() @@ -112,7 +117,7 @@ Now let's see how to do it correctly: .. code-block:: python - def new_dicts_like_crazy(d: dict) -> dict: + def new_dicts_like_crazy(d): # Creating a new dict is correct. return { **d, @@ -120,7 +125,7 @@ Now let's see how to do it correctly: 'bar': compute_anotherthing(), } - def new_dict_and_yield() -> dict: + def new_dict_and_yield(): d = {} for i in range(100): # Different dict each time. @@ -133,8 +138,8 @@ I bet you think «Yeah, but if I create like millions of dicts ...». Let's say we chose the opposite way and copied the dict outside the transformation (in fact, `it's what we did in bonobo's ancestor `_). This means you will also create the same number of dicts, the difference is that you won't even notice it. Also, it means that if you want to yield the same -dict 1 million times , going "pure" makes it efficient (you'll just yield the same object 1 million times) while going "copy -crazy" will create 1 million objects. +dict 1 million times, going "pure" makes it efficient (you'll just yield the same object 1 million times) while going +"copy crazy" would create 1 million identical objects. Using dicts like this will create a lot of dicts, but also free them as soon as all the future components that take this dict as input are done. Also, one important thing to note is that most primitive data structures in python are immutable, so creating diff --git a/docs/index.rst b/docs/index.rst index 8fbcd6e..1d6b708 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -7,6 +7,7 @@ Bonobo install tutorial/index guide/index + extension/index reference/index faq contribute/index diff --git a/docs/install.rst b/docs/install.rst index 87df3d3..c006c88 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -1,6 +1,7 @@ Installation ============ + Create an ETL project ::::::::::::::::::::: @@ -15,6 +16,7 @@ Creating a project and starting to write code should take less than a minute: Once you bootstrapped a project, you can start editing the default example transformation by editing `my-etl-project/main.py`. Now, you can head to :doc:`tutorial/index`. + Other installation options :::::::::::::::::::::::::: @@ -27,6 +29,7 @@ You can install it directly from the `Python Package Index Date: Tue, 3 Oct 2017 08:41:03 +0200 Subject: [PATCH 63/71] [doc] fix links --- docs/tutorial/index.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/tutorial/index.rst b/docs/tutorial/index.rst index d449df5..3296afb 100644 --- a/docs/tutorial/index.rst +++ b/docs/tutorial/index.rst @@ -56,8 +56,8 @@ Read about best development practices Read about integrating external tools with bonobo ------------------------------------------------- -* :doc:`../guide/ext/docker`: run transformation graphs in isolated containers. -* :doc:`../guide/ext/jupyter`: run transformations within jupyter notebooks. -* :doc:`../guide/ext/selenium`: crawl the web using a real browser and work with the gathered data. -* :doc:`../guide/ext/sqlalchemy`: everything you need to interract with SQL databases. +* :doc:`../extension/docker`: run transformation graphs in isolated containers. +* :doc:`../extension/jupyter`: run transformations within jupyter notebooks. +* :doc:`../extension/selenium`: crawl the web using a real browser and work with the gathered data. +* :doc:`../extension/sqlalchemy`: everything you need to interract with SQL databases. From a830263efc7c9002de40ba8937726d0b9d6939ec Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Tue, 3 Oct 2017 23:26:31 +0200 Subject: [PATCH 64/71] [cli] inspect is buggy if there is more than one node with a given name. --- bonobo/commands/inspect.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bonobo/commands/inspect.py b/bonobo/commands/inspect.py index bb82704..988b10c 100644 --- a/bonobo/commands/inspect.py +++ b/bonobo/commands/inspect.py @@ -6,6 +6,8 @@ from bonobo.util.objects import get_name OUTPUT_GRAPHVIZ = 'graphviz' +def _ident(graph, i): + return json.dumps(get_name(graph[i])+' ('+str(i)+')') def execute(*, output, **kwargs): graph, plugins, services = read(**kwargs) @@ -16,11 +18,11 @@ def execute(*, output, **kwargs): print(' "BEGIN" [shape="point"];') for i in graph.outputs_of(BEGIN): - print(' "BEGIN" -> ' + json.dumps(get_name(graph[i])) + ';') + print(' "BEGIN" -> ' + _ident(graph, i) + ';') for ix in graph.topologically_sorted_indexes: for iy in graph.outputs_of(ix): - print(' {} -> {};'.format(json.dumps(get_name(graph[ix])), json.dumps(get_name(graph[iy])))) + print(' {} -> {};'.format(_ident(graph, ix), _ident(graph, iy))) print('}') else: From 54375e808f402927eff6be016b3fc971733a3177 Mon Sep 17 00:00:00 2001 From: Tomas Zubiri Date: Wed, 4 Oct 2017 00:03:10 -0300 Subject: [PATCH 65/71] removed id from graphviz label --- bonobo/commands/inspect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bonobo/commands/inspect.py b/bonobo/commands/inspect.py index 988b10c..dbb9a74 100644 --- a/bonobo/commands/inspect.py +++ b/bonobo/commands/inspect.py @@ -7,7 +7,7 @@ from bonobo.util.objects import get_name OUTPUT_GRAPHVIZ = 'graphviz' def _ident(graph, i): - return json.dumps(get_name(graph[i])+' ('+str(i)+')') + return json.dumps('{} [label="{}"]'.format(str(i),get_name(graph[i]))) def execute(*, output, **kwargs): graph, plugins, services = read(**kwargs) From 097a6cccdb0adc56b839ba597085e5ed7d0374be Mon Sep 17 00:00:00 2001 From: Tomas Zubiri Date: Wed, 4 Oct 2017 00:38:31 -0300 Subject: [PATCH 66/71] fixed escaping issues --- bonobo/commands/inspect.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bonobo/commands/inspect.py b/bonobo/commands/inspect.py index dbb9a74..2646ce4 100644 --- a/bonobo/commands/inspect.py +++ b/bonobo/commands/inspect.py @@ -7,7 +7,9 @@ from bonobo.util.objects import get_name OUTPUT_GRAPHVIZ = 'graphviz' def _ident(graph, i): - return json.dumps('{} [label="{}"]'.format(str(i),get_name(graph[i]))) + escaped_index = str(i) + escaped_name = json.dumps(get_name(graph[i])) + return '{{{} [label={}]}}'.format(escaped_index,escaped_name) def execute(*, output, **kwargs): graph, plugins, services = read(**kwargs) From c337aad14d142d9e429d962c89a1ca06891f6948 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 4 Oct 2017 08:40:08 +0200 Subject: [PATCH 67/71] [doc] fix typos and errors. --- docs/_templates/index.html | 4 ++-- docs/_templates/sidebarintro.html | 14 +++++++------- docs/reference/examples/tutorials.rst | 2 +- docs/tutorial/tut03.rst | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/_templates/index.html b/docs/_templates/index.html index c3aa330..3fb8d38 100644 --- a/docs/_templates/index.html +++ b/docs/_templates/index.html @@ -103,8 +103,8 @@

{% trans %} You can also download PDF/EPUB versions of the Bonobo documentation: - PDF version, - EPUB version. + PDF version, + EPUB version. {% endtrans %}

diff --git a/docs/_templates/sidebarintro.html b/docs/_templates/sidebarintro.html index fc8acf2..ef4ad45 100644 --- a/docs/_templates/sidebarintro.html +++ b/docs/_templates/sidebarintro.html @@ -9,14 +9,14 @@ You can download the documentation in other formats as well:

Useful Links

\ No newline at end of file +
  • Bonobo ETL
  • +
  • Bonobo ETL @ PyPI
  • +
  • Bonobo ETL @ GitHub
  • + diff --git a/docs/reference/examples/tutorials.rst b/docs/reference/examples/tutorials.rst index 2a5ca4f..dbee6fd 100644 --- a/docs/reference/examples/tutorials.rst +++ b/docs/reference/examples/tutorials.rst @@ -42,7 +42,7 @@ Example 2: Write Example 3: Write as map ----------------------- -.. automodule:: bonobo.examples.tutorials.tut02e02_writeasmap +.. automodule:: bonobo.examples.tutorials.tut02e03_writeasmap :members: :undoc-members: :show-inheritance: diff --git a/docs/tutorial/tut03.rst b/docs/tutorial/tut03.rst index 325bc9d..4875bd8 100644 --- a/docs/tutorial/tut03.rst +++ b/docs/tutorial/tut03.rst @@ -98,7 +98,7 @@ string-options of the service names, and we provide an implementation at the las There are two ways of providing implementations: * Either file-wide, by providing a `get_services()` function that returns a dict of named implementations (we did so - with filesystems in the previous step, :doc:`tut02.rst`) + with filesystems in the previous step, :doc:`tut02`) * Either directory-wide, by providing a `get_services()` function in a specially named `_services.py` file. The first is simpler if you only have one transformation graph in one file, the second allows to group coherent From f7cf7ca4822b6daadf28a439b632c5991c568206 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 4 Oct 2017 19:14:36 +0200 Subject: [PATCH 68/71] [doc] homepage --- docs/_templates/index.html | 76 +++++++++++++++----------------------- 1 file changed, 29 insertions(+), 47 deletions(-) diff --git a/docs/_templates/index.html b/docs/_templates/index.html index 3fb8d38..33c4116 100644 --- a/docs/_templates/index.html +++ b/docs/_templates/index.html @@ -9,14 +9,37 @@

    {% trans %} - Bonobo is a line-by-line data-processing toolkit for python 3.5+ (extract-transform-load - framework, or ETL) emphasizing simple and atomic data transformations defined using a directed graph of plain old - python objects (functions, iterables, generators, ...). + Bonobo is an Extract Transform Load framework for the Python (3.5+) language. {% endtrans %}

    -
    - Bonobo is ALPHA software. Some APIs will change. +

    + {% trans %} + It works by streaming data through a directed acyclic graph of python callables, one row at a time. + {% endtrans %} +

    + +

    + {% trans %} + It is targeting small scale data (as in “not big data”), allowing it to be quick and easy to install (no + client-server, no daemon, lightweight requirements, no surprises). + {% endtrans %} +

    +

    + {% trans %} + Most common file formats (XML, CSV, JSON, Excel, ...) and basic services (SQL databases, REST web services, ...) can + be worked with using the built-in or extension libraries, other services can benefit the richness of PyPI. + {% endtrans %} +

    +

    + {% trans %} + In short, Bonobo contains the logic to execute efficiently an ETL process, the glue to use plain old python + ojects and common operations, readers and writers. For the rest, it's just python! + {% endtrans %} +

    + +
    + Bonobo is currently released as alpha version. Expect some APIs to change.
    @@ -65,53 +88,12 @@
    - -
    - - -
    -

    Features

    -
      -
    • - {% trans %} - 10 minutes to get started: Know some python? Writing your first data processor is an affair - of minutes. - {% endtrans %} -
    • -
    • - {% trans %} - Data sources and targets: HTML, JSON, XML, SQL databases, NoSQL databases, HTTP/REST APIs, - streaming APIs, python objects... - {% endtrans %} -
    • -
    • - {% trans %} - Service injection: Abstract the transformation dependencies to easily switch data sources and - dependant libraries. You'll be able to specify the concrete implementations or configurations at - runtime, for example to switch a database connection string or an API endpoint. - {% endtrans %} -
    • -
    • - {% trans %} - Plugins: Easily add features to all your transformations by using builtin plugins (Jupyter, - Console, ...) or write your own. - {% endtrans %} -
    • -
    • - {% trans %} - Bonobo is young, and the todo-list is huge. Read the roadmap. - {% endtrans %} -
    • -
    - -

    {% trans %} - You can also download PDF/EPUB versions of the Bonobo documentation: - PDF version, - EPUB version. - {% endtrans %} -

    Table of contents

    -
    {{ toctree(maxdepth=2, collapse=False)}}
    + {% endblock %} From b2f93b24166f5ee4d5dd6e7e2f27fab5b0da5ee7 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Thu, 5 Oct 2017 07:17:16 +0200 Subject: [PATCH 69/71] [config/dx] bundle a default fs (and http?) service if none is provided (#179) --- bonobo/_api.py | 4 ++-- bonobo/config/__init__.py | 3 ++- bonobo/config/configurables.py | 3 +-- bonobo/config/services.py | 24 ++++++++++++++++++++++++ bonobo/constants.py | 2 +- bonobo/execution/base.py | 4 ++-- bonobo/execution/graph.py | 4 ++-- bonobo/nodes/basics.py | 3 ++- bonobo/util/__init__.py | 11 ++++++++++- bonobo/util/inspect.py | 2 +- tests/config/test_services.py | 23 ++++++++++++++++++++++- tests/test_basics.py | 1 - tests/test_commands.py | 1 - 13 files changed, 69 insertions(+), 16 deletions(-) diff --git a/bonobo/_api.py b/bonobo/_api.py index de75bd2..84b5e19 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -1,10 +1,10 @@ import logging -from bonobo.structs import Bag, ErrorBag, Graph, Token from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \ PickleReader, PickleWriter, PrettyPrinter, RateLimited, Tee, arg0_to_kwargs, count, identity, kwargs_to_arg0, noop from bonobo.strategies import create_strategy -from bonobo.util.objects import get_name +from bonobo.structs import Bag, ErrorBag, Graph, Token +from bonobo.util import get_name __all__ = [] diff --git a/bonobo/config/__init__.py b/bonobo/config/__init__.py index bd00845..a86e8ba 100644 --- a/bonobo/config/__init__.py +++ b/bonobo/config/__init__.py @@ -1,7 +1,7 @@ from bonobo.config.configurables import Configurable from bonobo.config.options import Method, Option from bonobo.config.processors import ContextProcessor -from bonobo.config.services import Container, Exclusive, Service, requires +from bonobo.config.services import Container, Exclusive, Service, requires, create_container use = requires @@ -14,6 +14,7 @@ __all__ = [ 'Method', 'Option', 'Service', + 'create_container', 'requires', 'use', ] diff --git a/bonobo/config/configurables.py b/bonobo/config/configurables.py index 7b40303..1b0201f 100644 --- a/bonobo/config/configurables.py +++ b/bonobo/config/configurables.py @@ -1,6 +1,5 @@ -from bonobo.util.inspect import isoption, iscontextprocessor +from bonobo.util import isoption, iscontextprocessor, sortedlist from bonobo.errors import AbstractError -from bonobo.util.collections import sortedlist __all__ = [ 'Configurable', diff --git a/bonobo/config/services.py b/bonobo/config/services.py index 1fe066d..1810ebc 100644 --- a/bonobo/config/services.py +++ b/bonobo/config/services.py @@ -95,6 +95,30 @@ class Container(dict): return value +def create_container(services=None, factory=Container): + """ + Create a container with reasonable default service implementations for commonly use, standard-named, services. + + Services: + - `fs` defaults to a fs2 instance based on current working directory + - `http`defaults to requests + + :param services: + :return: + """ + container = factory(services) if services else factory() + + if not 'fs' in container: + import bonobo + container.setdefault('fs', bonobo.open_fs()) + + if not 'http' in container: + import requests + container.setdefault('http', requests) + + return container + + class Exclusive(ContextDecorator): """ Decorator and context manager used to require exclusive usage of an object, most probably a service. It's usefull diff --git a/bonobo/constants.py b/bonobo/constants.py index 4a02f5e..8c6eba5 100644 --- a/bonobo/constants.py +++ b/bonobo/constants.py @@ -6,4 +6,4 @@ INHERIT_INPUT = Token('InheritInput') LOOPBACK = Token('Loopback') NOT_MODIFIED = Token('NotModified') DEFAULT_SERVICES_FILENAME = '_services.py' -DEFAULT_SERVICES_ATTR = 'get_services' \ No newline at end of file +DEFAULT_SERVICES_ATTR = 'get_services' diff --git a/bonobo/execution/base.py b/bonobo/execution/base.py index 641d761..abb3516 100644 --- a/bonobo/execution/base.py +++ b/bonobo/execution/base.py @@ -2,7 +2,7 @@ import traceback from contextlib import contextmanager from time import sleep -from bonobo.config import Container +from bonobo.config import create_container from bonobo.config.processors import ContextCurrifier from bonobo.plugins import get_enhancers from bonobo.util.errors import print_error @@ -48,7 +48,7 @@ class LoopingExecutionContext(Wrapper): raise RuntimeError( 'Having services defined both in GraphExecutionContext and child NodeExecutionContext is not supported, for now.' ) - self.services = Container(services) if services else Container() + self.services = create_container(services) else: self.services = None diff --git a/bonobo/execution/graph.py b/bonobo/execution/graph.py index 1859adc..91e4aef 100644 --- a/bonobo/execution/graph.py +++ b/bonobo/execution/graph.py @@ -1,6 +1,6 @@ from functools import partial -from bonobo.config.services import Container +from bonobo.config import create_container from bonobo.constants import BEGIN, END from bonobo.execution.node import NodeExecutionContext from bonobo.execution.plugin import PluginExecutionContext @@ -23,7 +23,7 @@ class GraphExecutionContext: self.graph = graph self.nodes = [NodeExecutionContext(node, parent=self) for node in self.graph] self.plugins = [PluginExecutionContext(plugin, parent=self) for plugin in plugins or ()] - self.services = Container(services) if services else Container() + self.services = create_container(services) # Probably not a good idea to use it unless you really know what you're doing. But you can access the context. self.services['__graph_context'] = self diff --git a/bonobo/nodes/basics.py b/bonobo/nodes/basics.py index ea05c29..e23dd05 100644 --- a/bonobo/nodes/basics.py +++ b/bonobo/nodes/basics.py @@ -4,11 +4,12 @@ import itertools from bonobo import settings from bonobo.config import Configurable, Option from bonobo.config.processors import ContextProcessor -from bonobo.constants import NOT_MODIFIED from bonobo.structs.bags import Bag from bonobo.util.objects import ValueHolder from bonobo.util.term import CLEAR_EOL +from bonobo.constants import NOT_MODIFIED + __all__ = [ 'Limit', 'PrettyPrinter', diff --git a/bonobo/util/__init__.py b/bonobo/util/__init__.py index 4a5e8dc..682cbe7 100644 --- a/bonobo/util/__init__.py +++ b/bonobo/util/__init__.py @@ -1,3 +1,4 @@ +from bonobo.util.collections import sortedlist from bonobo.util.inspect import ( inspect_node, isbag, @@ -10,11 +11,18 @@ from bonobo.util.inspect import ( isoption, istype, ) +from bonobo.util.objects import ( + get_name, + get_attribute_or_create, + ValueHolder +) from bonobo.util.python import require # Bonobo's util API __all__ = [ - 'require', + 'ValueHolder', + 'get_attribute_or_create', + 'get_name', 'inspect_node', 'isbag', 'isconfigurable', @@ -25,4 +33,5 @@ __all__ = [ 'ismethod', 'isoption', 'istype', + 'require', ] diff --git a/bonobo/util/inspect.py b/bonobo/util/inspect.py index 1594d1e..2a19803 100644 --- a/bonobo/util/inspect.py +++ b/bonobo/util/inspect.py @@ -1,6 +1,5 @@ from collections import namedtuple -from bonobo.constants import LOOPBACK def isconfigurable(mixed): @@ -99,6 +98,7 @@ def isloopbackbag(mixed): :param mixed: :return: bool """ + from bonobo.constants import LOOPBACK return isbag(mixed) and LOOPBACK in mixed.flags diff --git a/tests/config/test_services.py b/tests/config/test_services.py index 66f554e..fb74098 100644 --- a/tests/config/test_services.py +++ b/tests/config/test_services.py @@ -3,8 +3,9 @@ import time import pytest +from bonobo.util import get_name from bonobo.config import Configurable, Container, Exclusive, Service, requires -from bonobo.config.services import validate_service_name +from bonobo.config.services import validate_service_name, create_container class PrinterInterface(): @@ -108,3 +109,23 @@ def test_requires(): svcargs = services.args_for(append) assert len(svcargs) == 1 assert svcargs[0] == vcr.append + + +@pytest.mark.parametrize('services', [None, {}]) +def test_create_container_empty_values(services): + c = create_container(services) + assert len(c) == 2 + assert 'fs' in c and get_name(c['fs']) == 'OSFS' + assert 'http' in c and get_name(c['http']) == 'requests' + + +def test_create_container_override(): + c = create_container({ + 'http': 'http', + 'fs': 'fs', + }) + assert len(c) == 2 + assert 'fs' in c and c['fs'] == 'fs' + assert 'http' in c and c['http'] == 'http' + + diff --git a/tests/test_basics.py b/tests/test_basics.py index 5230b0b..283e3d7 100644 --- a/tests/test_basics.py +++ b/tests/test_basics.py @@ -5,7 +5,6 @@ import pytest import bonobo from bonobo.config.processors import ContextCurrifier from bonobo.constants import NOT_MODIFIED -from bonobo.util.inspect import inspect_node def test_count(): diff --git a/tests/test_commands.py b/tests/test_commands.py index 1fca75a..a29465c 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -3,7 +3,6 @@ import runpy import sys from unittest.mock import patch -import pathlib import pkg_resources import pytest From ee65cd736bf6e24545dab490f409cf5b4a5edca4 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Thu, 5 Oct 2017 18:52:53 +0200 Subject: [PATCH 70/71] Update dependencies. --- Makefile | 2 +- requirements-dev.txt | 2 +- requirements-docker.txt | 2 +- requirements-jupyter.txt | 2 +- requirements.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index d082166..277dbad 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # This file has been auto-generated. # All changes will be lost, see Projectfile. # -# Updated at 2017-09-30 11:26:44.075878 +# Updated at 2017-10-05 18:52:31.477250 PACKAGE ?= bonobo PYTHON ?= $(shell which python) diff --git a/requirements-dev.txt b/requirements-dev.txt index 92123d5..4e005a7 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -21,7 +21,7 @@ pygments==2.2.0 pytest-cov==2.5.1 pytest-sugar==0.8.0 pytest-timeout==1.2.0 -pytest==3.2.2 +pytest==3.2.3 python-dateutil==2.6.1 pytz==2017.2 requests==2.18.4 diff --git a/requirements-docker.txt b/requirements-docker.txt index 870223d..976b56d 100644 --- a/requirements-docker.txt +++ b/requirements-docker.txt @@ -15,6 +15,6 @@ pyparsing==2.2.0 pytz==2017.2 requests==2.18.4 six==1.11.0 -stevedore==1.27.0 +stevedore==1.27.1 urllib3==1.22 websocket-client==0.44.0 diff --git a/requirements-jupyter.txt b/requirements-jupyter.txt index 94b10ea..e1b0ba7 100644 --- a/requirements-jupyter.txt +++ b/requirements-jupyter.txt @@ -1,6 +1,6 @@ -e .[jupyter] appnope==0.1.0 -bleach==2.1 +bleach==2.1.1 decorator==4.1.2 entrypoints==0.2.3 html5lib==0.999999999 diff --git a/requirements.txt b/requirements.txt index 61ab27d..d6439df 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,5 +12,5 @@ pyparsing==2.2.0 pytz==2017.2 requests==2.18.4 six==1.11.0 -stevedore==1.27.0 +stevedore==1.27.1 urllib3==1.22 From 32e222787d50bfe69a43ab4bc9b346120ee6ad86 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Thu, 5 Oct 2017 19:16:03 +0200 Subject: [PATCH 71/71] release: 0.5.0 --- Makefile | 2 +- bonobo/_version.py | 2 +- bonobo/commands/inspect.py | 10 +++--- bonobo/util/__init__.py | 6 +--- bonobo/util/inspect.py | 1 - docs/changelog.rst | 64 +++++++++++++++++++++++++++++++++++ tests/config/test_services.py | 2 -- 7 files changed, 73 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index 277dbad..221b012 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # This file has been auto-generated. # All changes will be lost, see Projectfile. # -# Updated at 2017-10-05 18:52:31.477250 +# Updated at 2017-10-05 18:56:33.985014 PACKAGE ?= bonobo PYTHON ?= $(shell which python) diff --git a/bonobo/_version.py b/bonobo/_version.py index 908c0bb..2b8877c 100644 --- a/bonobo/_version.py +++ b/bonobo/_version.py @@ -1 +1 @@ -__version__ = '0.4.3' +__version__ = '0.5.0' diff --git a/bonobo/commands/inspect.py b/bonobo/commands/inspect.py index 2646ce4..1ab6b5b 100644 --- a/bonobo/commands/inspect.py +++ b/bonobo/commands/inspect.py @@ -6,10 +6,12 @@ from bonobo.util.objects import get_name OUTPUT_GRAPHVIZ = 'graphviz' + def _ident(graph, i): - escaped_index = str(i) - escaped_name = json.dumps(get_name(graph[i])) - return '{{{} [label={}]}}'.format(escaped_index,escaped_name) + escaped_index = str(i) + escaped_name = json.dumps(get_name(graph[i])) + return '{{{} [label={}]}}'.format(escaped_index, escaped_name) + def execute(*, output, **kwargs): graph, plugins, services = read(**kwargs) @@ -20,7 +22,7 @@ def execute(*, output, **kwargs): print(' "BEGIN" [shape="point"];') for i in graph.outputs_of(BEGIN): - print(' "BEGIN" -> ' + _ident(graph, i) + ';') + print(' "BEGIN" -> ' + _ident(graph, i) + ';') for ix in graph.topologically_sorted_indexes: for iy in graph.outputs_of(ix): diff --git a/bonobo/util/__init__.py b/bonobo/util/__init__.py index 682cbe7..df14e9a 100644 --- a/bonobo/util/__init__.py +++ b/bonobo/util/__init__.py @@ -11,11 +11,7 @@ from bonobo.util.inspect import ( isoption, istype, ) -from bonobo.util.objects import ( - get_name, - get_attribute_or_create, - ValueHolder -) +from bonobo.util.objects import (get_name, get_attribute_or_create, ValueHolder) from bonobo.util.python import require # Bonobo's util API diff --git a/bonobo/util/inspect.py b/bonobo/util/inspect.py index 2a19803..f9ae4d8 100644 --- a/bonobo/util/inspect.py +++ b/bonobo/util/inspect.py @@ -1,7 +1,6 @@ from collections import namedtuple - def isconfigurable(mixed): """ Check if the given argument is an instance of :class:`bonobo.config.Configurable`. diff --git a/docs/changelog.rst b/docs/changelog.rst index 2f12063..a049822 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,70 @@ Changelog ========= +v.0.5.0 - 5 october 2017 +:::::::::::::::::::::::: + +Important highlights +-------------------- + +* `bonobo.pprint` and `bonobo.PrettyPrint` have been removed, in favor of `bonobo.PrettyPrinter` (BC break). +* The `bonobo.config` API has suffered a major refactoring. It has been done carefully and most of your code should + work unchanged, but you may have surprises. This was necessary for this API to be more uniform (potential BC break). +* bonobo.pprint and bonobo.PrettyPrint have been removed, in favor of new bonobo.PrettyPrinter() generic printer. If + you're still using the old versions, time to switch (BC break). +* Secondary APIs start to be more uniform (bonobo.config, bonobo.util). + +New features +------------ + +Graphs & Nodes +.............. + +* Graphs now have a .copy() method. +* New helper transformations arg0_to_kwargs and kwargs_to_arg0. +* The unique pretty printer provided by the core library is now bonobo.PrettyPrinter(). +* Services now have "fs" and "http" configured by default. + +Command line +............ + +* New `bonobo convert` command now allows to run simple conversion jobs without coding anything. +* New `bonobo inspect` command now allows to generate graphviz source for graph visualization. +* Passing environment variables to graph executions now can be done using -e/--env. (cwandrews) +* Add ability to install requirements with for a requirements.txt residing in the same dir (Alex Vykaliuk) + +Preview +....... + +* A "transformation factory" makes its first appearance. It is considered a preview unstable feature. Stay + tuned. + +Internals +--------- + +* Configurables have undergone a refactoring, all types of descriptors should now behave in the same way. +* An UnrecoverrableError exception subclass allows for some errors to stop the whole execution. +* Refactoring of Settings (bonobo.settings). +* Add a reference to graph context (private) in service container. +* Few internal APIs changes and refactorings. + +Bugfixes +-------- + +* Check if PluginExecutionContext was started before shutting it down. (Vitalii Vokhmin) +* Move patch one level up because importlib brakes all the CI tools. (Alex Vykaliuk) +* Do not fail in ipykernel without ipywidgets. (Alex Vykaliuk) +* Escaping issues (Tomas Zubiri) + +Miscellaneous +------------- + +* Windows console output should now be correct. (Parthiv20) +* Various bugfixes. +* More readable statistics on Ubuntu workstation standard terminal (spagoc) +* Documentation, more documentation, documentation again. + + v.0.4.3 - 16 july 2017 :::::::::::::::::::::: diff --git a/tests/config/test_services.py b/tests/config/test_services.py index fb74098..b12ae78 100644 --- a/tests/config/test_services.py +++ b/tests/config/test_services.py @@ -127,5 +127,3 @@ def test_create_container_override(): assert len(c) == 2 assert 'fs' in c and c['fs'] == 'fs' assert 'http' in c and c['http'] == 'http' - -