diff --git a/Makefile b/Makefile index 0c1f0b4..221b012 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # This file has been auto-generated. # All changes will be lost, see Projectfile. # -# Updated at 2017-07-16 10:42:53.988109 +# Updated at 2017-10-05 18:56:33.985014 PACKAGE ?= bonobo PYTHON ?= $(shell which python) @@ -27,13 +27,13 @@ VERSION ?= $(shell git describe 2>/dev/null || echo dev) # Installs the local project dependencies. install: if [ -z "$(QUICK)" ]; then \ - $(PIP) install -U pip wheel $(PYTHON_PIP_INSTALL_OPTIONS) -r $(PYTHON_REQUIREMENTS_FILE) ; \ + $(PIP) install -U pip wheel $(PIP_INSTALL_OPTIONS) -r $(PYTHON_REQUIREMENTS_FILE) ; \ fi # Installs the local project dependencies, including development-only libraries. install-dev: if [ -z "$(QUICK)" ]; then \ - $(PIP) install -U pip wheel $(PYTHON_PIP_INSTALL_OPTIONS) -r $(PYTHON_REQUIREMENTS_DEV_FILE) ; \ + $(PIP) install -U pip wheel $(PIP_INSTALL_OPTIONS) -r $(PYTHON_REQUIREMENTS_DEV_FILE) ; \ fi # Cleans up the local mess. diff --git a/Projectfile b/Projectfile index 6fe6c2b..0973c1f 100644 --- a/Projectfile +++ b/Projectfile @@ -29,7 +29,9 @@ python.setup( 'bonobo = bonobo.commands:entrypoint', ], 'bonobo.commands': [ + 'convert = bonobo.commands.convert:register', 'init = bonobo.commands.init:register', + 'inspect = bonobo.commands.inspect:register', 'run = bonobo.commands.run:register', 'version = bonobo.commands.version:register', ], @@ -56,3 +58,5 @@ python.add_requirements( 'ipywidgets >=6.0.0,<7', ] ) + +# vim: ft=python: diff --git a/bin/imgcat b/bin/imgcat new file mode 100755 index 0000000..001d2b8 --- /dev/null +++ b/bin/imgcat @@ -0,0 +1,112 @@ +#!/bin/bash + +# tmux requires unrecognized OSC sequences to be wrapped with DCS tmux; +# ST, and for all ESCs in to be replaced with ESC ESC. It +# only accepts ESC backslash for ST. +function print_osc() { + if [[ $TERM == screen* ]] ; then + printf "\033Ptmux;\033\033]" + else + printf "\033]" + fi +} + +# More of the tmux workaround described above. +function print_st() { + if [[ $TERM == screen* ]] ; then + printf "\a\033\\" + else + printf "\a" + fi +} + +# print_image filename inline base64contents print_filename +# filename: Filename to convey to client +# inline: 0 or 1 +# base64contents: Base64-encoded contents +# print_filename: If non-empty, print the filename +# before outputting the image +function print_image() { + print_osc + printf '1337;File=' + if [[ -n "$1" ]]; then + printf 'name='`printf "%s" "$1" | base64`";" + fi + + VERSION=$(base64 --version 2>&1) + if [[ "$VERSION" =~ fourmilab ]]; then + BASE64ARG=-d + elif [[ "$VERSION" =~ GNU ]]; then + BASE64ARG=-di + else + BASE64ARG=-D + fi + + printf "%s" "$3" | base64 $BASE64ARG | wc -c | awk '{printf "size=%d",$1}' + printf ";inline=$2" + printf ":" + printf "%s" "$3" + print_st + printf '\n' + if [[ -n "$4" ]]; then + echo $1 + fi +} + +function error() { + echo "ERROR: $*" 1>&2 +} + +function show_help() { + echo "Usage: imgcat [-p] filename ..." 1>& 2 + echo " or: cat filename | imgcat" 1>& 2 +} + +## Main + +if [ -t 0 ]; then + has_stdin=f +else + has_stdin=t +fi + +# Show help if no arguments and no stdin. +if [ $has_stdin = f -a $# -eq 0 ]; then + show_help + exit +fi + +# Look for command line flags. +while [ $# -gt 0 ]; do + case "$1" in + -h|--h|--help) + show_help + exit + ;; + -p|--p|--print) + print_filename=1 + ;; + -*) + error "Unknown option flag: $1" + show_help + exit 1 + ;; + *) + if [ -r "$1" ] ; then + has_stdin=f + print_image "$1" 1 "$(base64 < "$1")" "$print_filename" + else + error "imgcat: $1: No such file or directory" + exit 2 + fi + ;; + esac + shift +done + +# Read and print stdin +if [ $has_stdin = t ]; then + print_image "" 1 "$(cat | base64)" "" +fi + +exit 0 diff --git a/bin/test_graph b/bin/test_graph new file mode 100644 index 0000000..29841f5 --- /dev/null +++ b/bin/test_graph @@ -0,0 +1 @@ +bonobo inspect --graph bonobo/examples/tutorials/tut02e03_writeasmap.py | dot -o test_output.png -T png && bin/imgcat test_output.png diff --git a/bonobo/_api.py b/bonobo/_api.py index cf28a33..84b5e19 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -1,8 +1,10 @@ -from bonobo.structs import Bag, Graph, Token +import logging + from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \ - PrettyPrinter, PickleWriter, PickleReader, Tee, count, identity, noop, pprint + PickleReader, PickleWriter, PrettyPrinter, RateLimited, Tee, arg0_to_kwargs, count, identity, kwargs_to_arg0, noop from bonobo.strategies import create_strategy -from bonobo.util.objects import get_name +from bonobo.structs import Bag, ErrorBag, Graph, Token +from bonobo.util import get_name __all__ = [] @@ -21,17 +23,17 @@ def register_api_group(*args): def run(graph, strategy=None, plugins=None, services=None): """ Main entry point of bonobo. It takes a graph and creates all the necessary plumbery around to execute it. - + The only necessary argument is a :class:`Graph` instance, containing the logic you actually want to execute. - + By default, this graph will be executed using the "threadpool" strategy: each graph node will be wrapped in a thread, and executed in a loop until there is no more input to this node. - + You can provide plugins factory objects in the plugins list, this function will add the necessary plugins for interactive console execution and jupyter notebook execution if it detects correctly that it runs in this context. - + You'll probably want to provide a services dictionary mapping service names to service instances. - + :param Graph graph: The :class:`Graph` to execute. :param str strategy: The :class:`bonobo.strategies.base.Strategy` to use. :param list plugins: The list of plugins to enhance execution. @@ -45,22 +47,30 @@ def run(graph, strategy=None, plugins=None, services=None): from bonobo import settings settings.check() - if not settings.QUIET: # pragma: no cover + if not settings.QUIET.get(): # pragma: no cover if _is_interactive_console(): from bonobo.ext.console import ConsoleOutputPlugin if ConsoleOutputPlugin not in plugins: plugins.append(ConsoleOutputPlugin) if _is_jupyter_notebook(): - from bonobo.ext.jupyter import JupyterOutputPlugin - if JupyterOutputPlugin not in plugins: - plugins.append(JupyterOutputPlugin) + try: + from bonobo.ext.jupyter import JupyterOutputPlugin + except ImportError: + logging.warning( + 'Failed to load jupyter widget. Easiest way is to install the optional "jupyter" ' + 'dependencies with «pip install bonobo[jupyter]», but you can also install a specific ' + 'version by yourself.' + ) + else: + if JupyterOutputPlugin not in plugins: + plugins.append(JupyterOutputPlugin) return strategy.execute(graph, plugins=plugins, services=services) # bonobo.structs -register_api_group(Bag, Graph, Token) +register_api_group(Bag, ErrorBag, Graph, Token) # bonobo.strategies register_api(create_strategy) @@ -71,7 +81,7 @@ register_api(create_strategy) def open_fs(fs_url=None, *args, **kwargs): """ Wraps :func:`fs.open_fs` function with a few candies. - + :param str fs_url: A filesystem URL :param parse_result: A parsed filesystem URL. :type parse_result: :class:`ParseResult` @@ -101,14 +111,16 @@ register_api_group( JsonReader, JsonWriter, Limit, - PrettyPrinter, PickleReader, PickleWriter, + PrettyPrinter, + RateLimited, Tee, + arg0_to_kwargs, count, identity, + kwargs_to_arg0, noop, - pprint, ) diff --git a/bonobo/_version.py b/bonobo/_version.py index 908c0bb..2b8877c 100644 --- a/bonobo/_version.py +++ b/bonobo/_version.py @@ -1 +1 @@ -__version__ = '0.4.3' +__version__ = '0.5.0' diff --git a/bonobo/commands/__init__.py b/bonobo/commands/__init__.py index 59e6dfb..4e183a3 100644 --- a/bonobo/commands/__init__.py +++ b/bonobo/commands/__init__.py @@ -27,9 +27,9 @@ def entrypoint(args=None): args = parser.parse_args(args).__dict__ if args.pop('debug', False): - settings.DEBUG = True - settings.LOGGING_LEVEL = logging.DEBUG - logging.set_level(settings.LOGGING_LEVEL) + settings.DEBUG.set(True) + settings.LOGGING_LEVEL.set(logging.DEBUG) + logging.set_level(settings.LOGGING_LEVEL.get()) logger.debug('Command: ' + args['command'] + ' Arguments: ' + repr(args)) commands[args.pop('command')](**args) diff --git a/bonobo/commands/convert.py b/bonobo/commands/convert.py new file mode 100644 index 0000000..17b98c2 --- /dev/null +++ b/bonobo/commands/convert.py @@ -0,0 +1,81 @@ +import mimetypes +import os + +import bonobo + +SHORTCUTS = { + 'csv': 'text/csv', + 'json': 'application/json', + 'pickle': 'pickle', + 'plain': 'text/plain', + 'text': 'text/plain', + 'txt': 'text/plain', +} + +REGISTRY = { + 'application/json': (bonobo.JsonReader, bonobo.JsonWriter), + 'pickle': (bonobo.PickleReader, bonobo.PickleWriter), + 'text/csv': (bonobo.CsvReader, bonobo.CsvWriter), + 'text/plain': (bonobo.FileReader, bonobo.FileWriter), +} + +READER = 'reader' +WRITER = 'writer' + + +def resolve_factory(name, filename, factory_type): + """ + Try to resolve which transformation factory to use for this filename. User eventually provided a name, which has + priority, otherwise we try to detect it using the mimetype detection on filename. + + """ + if name is None: + name = mimetypes.guess_type(filename)[0] + + if name in SHORTCUTS: + name = SHORTCUTS[name] + + if name is None: + _, _ext = os.path.splitext(filename) + if _ext: + _ext = _ext[1:] + if _ext in SHORTCUTS: + name = SHORTCUTS[_ext] + + if not name in REGISTRY: + raise RuntimeError( + 'Could not resolve {factory_type} factory for {filename} ({name}). Try providing it explicitely using -{opt} .'. + format(name=name, filename=filename, factory_type=factory_type, opt=factory_type[0]) + ) + + if factory_type == READER: + return REGISTRY[name][0] + elif factory_type == WRITER: + return REGISTRY[name][1] + else: + raise ValueError('Invalid factory type.') + + +def execute(input, output, reader=None, reader_options=None, writer=None, writer_options=None, options=None): + reader = resolve_factory(reader, input, READER)(input) + writer = resolve_factory(writer, output, WRITER)(output) + + graph = bonobo.Graph() + graph.add_chain(reader, writer) + + return bonobo.run( + graph, services={ + 'fs': bonobo.open_fs(), + } + ) + + +def register(parser): + parser.add_argument('input') + parser.add_argument('output') + parser.add_argument('--' + READER, '-r') + parser.add_argument('--' + WRITER, '-w') + # parser.add_argument('--reader-option', '-ro', dest='reader_options') + # parser.add_argument('--writer-option', '-wo', dest='writer_options') + # parser.add_argument('--option', '-o', dest='options') + return execute diff --git a/bonobo/commands/inspect.py b/bonobo/commands/inspect.py new file mode 100644 index 0000000..1ab6b5b --- /dev/null +++ b/bonobo/commands/inspect.py @@ -0,0 +1,40 @@ +import json + +from bonobo.commands.run import read, register_generic_run_arguments +from bonobo.constants import BEGIN +from bonobo.util.objects import get_name + +OUTPUT_GRAPHVIZ = 'graphviz' + + +def _ident(graph, i): + escaped_index = str(i) + escaped_name = json.dumps(get_name(graph[i])) + return '{{{} [label={}]}}'.format(escaped_index, escaped_name) + + +def execute(*, output, **kwargs): + graph, plugins, services = read(**kwargs) + + if output == OUTPUT_GRAPHVIZ: + print('digraph {') + print(' rankdir = LR;') + print(' "BEGIN" [shape="point"];') + + for i in graph.outputs_of(BEGIN): + print(' "BEGIN" -> ' + _ident(graph, i) + ';') + + for ix in graph.topologically_sorted_indexes: + for iy in graph.outputs_of(ix): + print(' {} -> {};'.format(_ident(graph, ix), _ident(graph, iy))) + + print('}') + else: + raise NotImplementedError('Output type not implemented.') + + +def register(parser): + register_generic_run_arguments(parser) + parser.add_argument('--graph', '-g', dest='output', action='store_const', const=OUTPUT_GRAPHVIZ) + parser.set_defaults(output=OUTPUT_GRAPHVIZ) + return execute diff --git a/bonobo/commands/run.py b/bonobo/commands/run.py index 7f29d3f..a37282c 100644 --- a/bonobo/commands/run.py +++ b/bonobo/commands/run.py @@ -1,7 +1,7 @@ import os -DEFAULT_SERVICES_FILENAME = '_services.py' -DEFAULT_SERVICES_ATTR = 'get_services' +import bonobo +from bonobo.constants import DEFAULT_SERVICES_ATTR, DEFAULT_SERVICES_FILENAME DEFAULT_GRAPH_FILENAMES = ('__main__.py', 'main.py', ) DEFAULT_GRAPH_ATTR = 'get_graph' @@ -26,29 +26,42 @@ def get_default_services(filename, services=None): return services or {} -def execute(filename, module, install=False, quiet=False, verbose=False): +def _install_requirements(requirements): + """Install requirements given a path to requirements.txt file.""" + import importlib + import pip + + pip.main(['install', '-r', requirements]) + # Some shenanigans to be sure everything is importable after this, especially .egg-link files which + # are referenced in *.pth files and apparently loaded by site.py at some magic bootstrap moment of the + # python interpreter. + pip.utils.pkg_resources = importlib.reload(pip.utils.pkg_resources) + import site + importlib.reload(site) + + +def read(filename, module, install=False, quiet=False, verbose=False, env=None): + import re import runpy - from bonobo import Graph, run, settings + from bonobo import Graph, settings if quiet: - settings.QUIET = True + settings.QUIET.set(True) if verbose: - settings.DEBUG = True + settings.DEBUG.set(True) + + if env: + quote_killer = re.compile('["\']') + for e in env: + var_name, var_value = e.split('=') + os.environ[var_name] = quote_killer.sub('', var_value) if filename: if os.path.isdir(filename): if install: - import importlib - import pip requirements = os.path.join(filename, 'requirements.txt') - pip.main(['install', '-r', requirements]) - # Some shenanigans to be sure everything is importable after this, especially .egg-link files which - # are referenced in *.pth files and apparently loaded by site.py at some magic bootstrap moment of the - # python interpreter. - pip.utils.pkg_resources = importlib.reload(pip.utils.pkg_resources) - import site - importlib.reload(site) + _install_requirements(requirements) pathname = filename for filename in DEFAULT_GRAPH_FILENAMES: @@ -58,7 +71,8 @@ def execute(filename, module, install=False, quiet=False, verbose=False): if not os.path.exists(filename): raise IOError('Could not find entrypoint (candidates: {}).'.format(', '.join(DEFAULT_GRAPH_FILENAMES))) elif install: - raise RuntimeError('Cannot --install on a file (only available for dirs containing requirements.txt).') + requirements = os.path.join(os.path.dirname(filename), 'requirements.txt') + _install_requirements(requirements) context = runpy.run_path(filename, run_name='__bonobo__') elif module: context = runpy.run_module(module, run_name='__bonobo__') @@ -74,22 +88,25 @@ def execute(filename, module, install=False, quiet=False, verbose=False): ).format(len(graphs)) graph = list(graphs.values())[0] - - # todo if console and not quiet, then add the console plugin - # todo when better console plugin, add it if console and just disable display - return run( - graph, - plugins=[], - services=get_default_services( - filename, context.get(DEFAULT_SERVICES_ATTR)() if DEFAULT_SERVICES_ATTR in context else None - ) + plugins = [] + services = get_default_services( + filename, context.get(DEFAULT_SERVICES_ATTR)() if DEFAULT_SERVICES_ATTR in context else None ) + return graph, plugins, services + + +def execute(filename, module, install=False, quiet=False, verbose=False, env=None): + graph, plugins, services = read(filename, module, install, quiet, verbose, env) + + return bonobo.run(graph, plugins=plugins, services=services) + def register_generic_run_arguments(parser, required=True): source_group = parser.add_mutually_exclusive_group(required=required) source_group.add_argument('filename', nargs='?', type=str) source_group.add_argument('--module', '-m', type=str) + parser.add_argument('--env', '-e', action='append') return parser diff --git a/bonobo/config/__init__.py b/bonobo/config/__init__.py index 08be544..a86e8ba 100644 --- a/bonobo/config/__init__.py +++ b/bonobo/config/__init__.py @@ -1,9 +1,11 @@ from bonobo.config.configurables import Configurable from bonobo.config.options import Method, Option from bonobo.config.processors import ContextProcessor -from bonobo.config.services import Container, Exclusive, Service, requires +from bonobo.config.services import Container, Exclusive, Service, requires, create_container -# bonobo.config public programming interface +use = requires + +# Bonobo's Config API __all__ = [ 'Configurable', 'Container', @@ -12,5 +14,7 @@ __all__ = [ 'Method', 'Option', 'Service', + 'create_container', 'requires', + 'use', ] diff --git a/bonobo/config/configurables.py b/bonobo/config/configurables.py index 43cb8c2..1b0201f 100644 --- a/bonobo/config/configurables.py +++ b/bonobo/config/configurables.py @@ -1,12 +1,13 @@ -from bonobo.config.options import Method, Option -from bonobo.config.processors import ContextProcessor -from bonobo.errors import ConfigurationError, AbstractError +from bonobo.util import isoption, iscontextprocessor, sortedlist +from bonobo.errors import AbstractError __all__ = [ 'Configurable', 'Option', ] +get_creation_counter = lambda v: v._creation_counter + class ConfigurableMeta(type): """ @@ -15,36 +16,78 @@ class ConfigurableMeta(type): def __init__(cls, what, bases=None, dict=None): super().__init__(what, bases, dict) - cls.__options__ = {} - cls.__positional_options__ = [] - cls.__processors__ = [] - cls.__wrappable__ = None + + cls.__processors = sortedlist() + cls.__methods = sortedlist() + cls.__options = sortedlist() + cls.__names = set() + + # cls.__kwoptions = [] for typ in cls.__mro__: - for name, value in typ.__dict__.items(): - if isinstance(value, Option): - if isinstance(value, ContextProcessor): - cls.__processors__.append(value) - else: - if not value.name: - value.name = name + for name, value in filter(lambda x: isoption(x[1]), typ.__dict__.items()): + if iscontextprocessor(value): + cls.__processors.insort((value._creation_counter, value)) + continue - if isinstance(value, Method): - if cls.__wrappable__: - raise ConfigurationError( - 'Cannot define more than one "Method" option in a configurable. That may change in the future.' - ) - cls.__wrappable__ = name + if not value.name: + value.name = name - if not name in cls.__options__: - cls.__options__[name] = value + if not name in cls.__names: + cls.__names.add(name) + cls.__options.insort((not value.positional, value._creation_counter, name, value)) - if value.positional: - cls.__positional_options__.append(name) + @property + def __options__(cls): + return ((name, option) for _, _, name, option in cls.__options) - # This can be done before, more efficiently. Not so bad neither as this is only done at type() creation time - # (aka class Xxx(...) time) and there should not be hundreds of processors. Still not very elegant. - cls.__processors__ = sorted(cls.__processors__, key=lambda v: v._creation_counter) + @property + def __options_dict__(cls): + return dict(cls.__options__) + + @property + def __processors__(cls): + return (processor for _, processor in cls.__processors) + + def __repr__(self): + return ' '.join(('= position + 1 else None + position += 1 + + return self.__options_values + + def __getattr__(self, item): + _dict = self.func.__options_dict__ + if item in _dict: + return _dict[item].__get__(self, self.func) + return getattr(self.func, item) class Configurable(metaclass=ConfigurableMeta): @@ -54,61 +97,106 @@ class Configurable(metaclass=ConfigurableMeta): """ - def __new__(cls, *args, **kwargs): - if cls.__wrappable__ and len(args) == 1 and hasattr(args[0], '__call__'): - return type(args[0].__name__, (cls, ), {cls.__wrappable__: args[0]}) + def __new__(cls, *args, _final=False, **kwargs): + """ + Custom instance builder. If not all options are fulfilled, will return a :class:`PartiallyConfigured` instance + which is just a :class:`functools.partial` object that behaves like a :class:`Configurable` instance. - return super(Configurable, cls).__new__(cls) - - def __init__(self, *args, **kwargs): - super().__init__() - - # initialize option's value dictionary, used by descriptor implementation (see Option). - self.__options_values__ = {} + The special `_final` argument can be used to force final instance to be created, or an error raised if options + are missing. + :param args: + :param _final: bool + :param kwargs: + :return: Configurable or PartiallyConfigured + """ + options = tuple(cls.__options__) # compute missing options, given the kwargs. missing = set() - for name, option in type(self).__options__.items(): + for name, option in options: if option.required and not option.name in kwargs: missing.add(name) # transform positional arguments in keyword arguments if possible. position = 0 - for positional_option in self.__positional_options__: - if len(args) <= position: - break - kwargs[positional_option] = args[position] - position += 1 - if positional_option in missing: - missing.remove(positional_option) + for name, option in options: + if not option.positional: + break # option orders make all positional options first, job done. - # complain if there are still missing options. - if len(missing): - raise TypeError( - '{}() missing {} required option{}: {}.'.format( - type(self).__name__, - len(missing), 's' if len(missing) > 1 else '', ', '.join(map(repr, sorted(missing))) - ) - ) + if not isoption(getattr(cls, name)): + missing.remove(name) + continue + + if len(args) <= position: + break # no more positional arguments given. + + position += 1 + if name in missing: + missing.remove(name) # complain if there is more options than possible. - extraneous = set(kwargs.keys()) - set(type(self).__options__.keys()) + extraneous = set(kwargs.keys()) - (set(next(zip(*options))) if len(options) else set()) if len(extraneous): raise TypeError( '{}() got {} unexpected option{}: {}.'.format( - type(self).__name__, + cls.__name__, len(extraneous), 's' if len(extraneous) > 1 else '', ', '.join(map(repr, sorted(extraneous))) ) ) + # missing options? we'll return a partial instance to finish the work later, unless we're required to be + # "final". + if len(missing): + if _final: + raise TypeError( + '{}() missing {} required option{}: {}.'.format( + cls.__name__, + len(missing), 's' if len(missing) > 1 else '', ', '.join(map(repr, sorted(missing))) + ) + ) + return PartiallyConfigured(cls, *args, **kwargs) + + return super(Configurable, cls).__new__(cls) + + def __init__(self, *args, **kwargs): + # initialize option's value dictionary, used by descriptor implementation (see Option). + self._options_values = {**kwargs} + # set option values. for name, value in kwargs.items(): setattr(self, name, value) + position = 0 + for name, option in self.__options__: + if not option.positional: + break # option orders make all positional options first + + # value was overriden? Skip. + maybe_value = getattr(type(self), name) + if not isoption(maybe_value): + continue + + if len(args) <= position: + break + + if name in self._options_values: + raise ValueError('Already got a value for option {}'.format(name)) + + setattr(self, name, args[position]) + position += 1 + def __call__(self, *args, **kwargs): """ You can implement a configurable callable behaviour by implemenenting the call(...) method. Of course, it is also backward compatible with legacy __call__ override. """ return self.call(*args, **kwargs) + @property + def __options__(self): + return type(self).__options__ + + @property + def __processors__(self): + return type(self).__processors__ + def call(self, *args, **kwargs): raise AbstractError('Not implemented.') diff --git a/bonobo/config/options.py b/bonobo/config/options.py index 51f4a20..065cc9d 100644 --- a/bonobo/config/options.py +++ b/bonobo/config/options.py @@ -1,3 +1,6 @@ +from bonobo.util.inspect import istype + + class Option: """ An Option is a descriptor for Configurable's parameters. @@ -14,7 +17,9 @@ class Option: If an option is required, an error will be raised if no value is provided (at runtime). If it is not, option will have the default value if user does not override it at runtime. - (default: False) + Ignored if a default is provided, meaning that the option cannot be required. + + (default: True) .. attribute:: positional @@ -48,10 +53,10 @@ class Option: _creation_counter = 0 - def __init__(self, type=None, *, required=False, positional=False, default=None): + def __init__(self, type=None, *, required=True, positional=False, default=None): self.name = None self.type = type - self.required = required + self.required = required if default is None else False self.positional = positional self.default = default @@ -60,12 +65,27 @@ class Option: Option._creation_counter += 1 def __get__(self, inst, typ): - if not self.name in inst.__options_values__: - inst.__options_values__[self.name] = self.get_default() - return inst.__options_values__[self.name] + # XXX If we call this on the type, then either return overriden value or ... ??? + if inst is None: + return vars(type).get(self.name, self) + + if not self.name in inst._options_values: + inst._options_values[self.name] = self.get_default() + + return inst._options_values[self.name] def __set__(self, inst, value): - inst.__options_values__[self.name] = self.clean(value) + inst._options_values[self.name] = self.clean(value) + + def __repr__(self): + return '<{positional}{typename} {type}{name} default={default!r}{required}>'.format( + typename=type(self).__name__, + type='({})'.format(self.type) if istype(self.type) else '', + name=self.name, + positional='*' if self.positional else '**', + default=self.default, + required=' (required)' if self.required else '', + ) def clean(self, value): return self.type(value) if self.type else value @@ -105,20 +125,17 @@ class Method(Option): """ - def __init__(self): - super().__init__(None, required=False) - - def __get__(self, inst, typ): - if not self.name in inst.__options_values__: - inst.__options_values__[self.name] = getattr(inst, self.name) - return inst.__options_values__[self.name] + def __init__(self, *, required=True, positional=True): + super().__init__(None, required=required, positional=positional) def __set__(self, inst, value): - if isinstance(value, str): - raise ValueError('should be callable') - inst.__options_values__[self.name] = self.type(value) if self.type else value - - def clean(self, value): if not hasattr(value, '__call__'): - raise ValueError('{} value must be callable.'.format(type(self).__name__)) - return value + raise TypeError( + 'Option of type {!r} is expecting a callable value, got {!r} object (which is not).'. + format(type(self).__name__, type(value).__name__) + ) + inst._options_values[self.name] = self.type(value) if self.type else value + + def __call__(self, *args, **kwargs): + # only here to trick IDEs into thinking this is callable. + raise NotImplementedError('You cannot call the descriptor') diff --git a/bonobo/config/processors.py b/bonobo/config/processors.py index d441b6e..27f8703 100644 --- a/bonobo/config/processors.py +++ b/bonobo/config/processors.py @@ -74,8 +74,7 @@ class ContextCurrifier: def __init__(self, wrapped, *initial_context): self.wrapped = wrapped self.context = tuple(initial_context) - self._stack = [] - self._stack_values = [] + self._stack, self._stack_values = None, None def __iter__(self): yield from self.wrapped @@ -86,8 +85,10 @@ class ContextCurrifier: return self.wrapped(*self.context, *args, **kwargs) def setup(self, *context): - if len(self._stack): + if self._stack is not None: raise RuntimeError('Cannot setup context currification twice.') + + self._stack, self._stack_values = list(), list() for processor in resolve_processors(self.wrapped): _processed = processor(self.wrapped, *context, *self.context) _append_to_context = next(_processed) @@ -97,7 +98,7 @@ class ContextCurrifier: self._stack.append(_processed) def teardown(self): - while len(self._stack): + while self._stack: processor = self._stack.pop() try: # todo yield from ? how to ? @@ -108,6 +109,7 @@ class ContextCurrifier: else: # No error ? We should have had StopIteration ... raise RuntimeError('Context processors should not yield more than once.') + self._stack, self._stack_values = None, None @contextmanager def as_contextmanager(self, *context): diff --git a/bonobo/config/services.py b/bonobo/config/services.py index d792175..1810ebc 100644 --- a/bonobo/config/services.py +++ b/bonobo/config/services.py @@ -53,7 +53,7 @@ class Service(Option): super().__init__(str, required=False, default=name) def __set__(self, inst, value): - inst.__options_values__[self.name] = validate_service_name(value) + inst._options_values[self.name] = validate_service_name(value) def resolve(self, inst, services): try: @@ -75,7 +75,7 @@ class Container(dict): def args_for(self, mixed): try: - options = mixed.__options__ + options = dict(mixed.__options__) except AttributeError: options = {} @@ -95,6 +95,30 @@ class Container(dict): return value +def create_container(services=None, factory=Container): + """ + Create a container with reasonable default service implementations for commonly use, standard-named, services. + + Services: + - `fs` defaults to a fs2 instance based on current working directory + - `http`defaults to requests + + :param services: + :return: + """ + container = factory(services) if services else factory() + + if not 'fs' in container: + import bonobo + container.setdefault('fs', bonobo.open_fs()) + + if not 'http' in container: + import requests + container.setdefault('http', requests) + + return container + + class Exclusive(ContextDecorator): """ Decorator and context manager used to require exclusive usage of an object, most probably a service. It's usefull diff --git a/bonobo/constants.py b/bonobo/constants.py index d567229..8c6eba5 100644 --- a/bonobo/constants.py +++ b/bonobo/constants.py @@ -3,4 +3,7 @@ from bonobo.structs.tokens import Token BEGIN = Token('Begin') END = Token('End') INHERIT_INPUT = Token('InheritInput') +LOOPBACK = Token('Loopback') NOT_MODIFIED = Token('NotModified') +DEFAULT_SERVICES_FILENAME = '_services.py' +DEFAULT_SERVICES_ATTR = 'get_services' diff --git a/bonobo/errors.py b/bonobo/errors.py index 564950d..08b97d4 100644 --- a/bonobo/errors.py +++ b/bonobo/errors.py @@ -58,5 +58,22 @@ class ConfigurationError(Exception): pass -class MissingServiceImplementationError(KeyError): +class UnrecoverableError(Exception): + """Flag for errors that must interrupt the workflow, either because they will happen for sure on each node run, or + because you know that your transformation has no point continuing runnning after a bad event.""" + + +class UnrecoverableValueError(UnrecoverableError, ValueError): + pass + + +class UnrecoverableRuntimeError(UnrecoverableError, RuntimeError): + pass + + +class UnrecoverableNotImplementedError(UnrecoverableError, NotImplementedError): + pass + + +class MissingServiceImplementationError(UnrecoverableError, KeyError): pass diff --git a/bonobo/examples/datasets/coffeeshops.json b/bonobo/examples/datasets/coffeeshops.json index 60e89b1..391b5e8 100644 --- a/bonobo/examples/datasets/coffeeshops.json +++ b/bonobo/examples/datasets/coffeeshops.json @@ -1,182 +1,182 @@ -{"Le Reynou": "2 bis quai de la m\u00e9gisserie, 75001 Paris, France", -"les montparnos": "65 boulevard Pasteur, 75015 Paris, France", -"Le Saint Jean": "23 rue des abbesses, 75018 Paris, France", -"Le Felteu": "1 rue Pecquay, 75004 Paris, France", +{"les montparnos": "65 boulevard Pasteur, 75015 Paris, France", +"Coffee Chope": "344Vrue Vaugirard, 75015 Paris, France", +"Caf\u00e9 Lea": "5 rue Claude Bernard, 75005 Paris, France", +"Le Bellerive": "71 quai de Seine, 75019 Paris, France", +"Le drapeau de la fidelit\u00e9": "21 rue Copreaux, 75015 Paris, France", "O q de poule": "53 rue du ruisseau, 75018 Paris, France", +"Le caf\u00e9 des amis": "125 rue Blomet, 75015 Paris, France", "Le chantereine": "51 Rue Victoire, 75009 Paris, France", "Le M\u00fcller": "11 rue Feutrier, 75018 Paris, France", -"La Caravane": "Rue de la Fontaine au Roi, 75011 Paris, France", -"Le Pas Sage": "1 Passage du Grand Cerf, 75002 Paris, France", -"La Renaissance": "112 Rue Championnet, 75018 Paris, France", "Ext\u00e9rieur Quai": "5, rue d'Alsace, 75010 Paris, France", -"Le Sully": "6 Bd henri IV, 75004 Paris, France", -"Le drapeau de la fidelit\u00e9": "21 rue Copreaux, 75015 Paris, France", -"Le caf\u00e9 des amis": "125 rue Blomet, 75015 Paris, France", -"Le Kleemend's": "34 avenue Pierre Mend\u00e8s-France, 75013 Paris, France", -"Assaporare Dix sur Dix": "75, avenue Ledru-Rollin, 75012 Paris, France", -"Caf\u00e9 Pierre": "202 rue du faubourg st antoine, 75012 Paris, France", -"Le Caf\u00e9 Livres": "10 rue Saint Martin, 75004 Paris, France", -"Le Chaumontois": "12 rue Armand Carrel, 75018 Paris, France", -"Le Square": "31 rue Saint-Dominique, 75007 Paris, France", -"Les Arcades": "61 rue de Ponthieu, 75008 Paris, France", +"La Bauloise": "36 rue du hameau, 75015 Paris, France", +"Le Dellac": "14 rue Rougemont, 75009 Paris, France", "Le Bosquet": "46 avenue Bosquet, 75007 Paris, France", +"Le Sully": "6 Bd henri IV, 75004 Paris, France", +"Le Felteu": "1 rue Pecquay, 75004 Paris, France", "Le bistrot de Ma\u00eblle et Augustin": "42 rue coquill\u00e8re, 75001 Paris, France", "D\u00e9d\u00e9 la frite": "52 rue Notre-Dame des Victoires, 75002 Paris, France", "Cardinal Saint-Germain": "11 boulevard Saint-Germain, 75005 Paris, France", +"Le Reynou": "2 bis quai de la m\u00e9gisserie, 75001 Paris, France", +"Aux cadrans": "21 ter boulevard Diderot, 75012 Paris, France", +"Le Saint Jean": "23 rue des abbesses, 75018 Paris, France", +"La Renaissance": "112 Rue Championnet, 75018 Paris, France", +"Le Square": "31 rue Saint-Dominique, 75007 Paris, France", +"Les Arcades": "61 rue de Ponthieu, 75008 Paris, France", +"Le Kleemend's": "34 avenue Pierre Mend\u00e8s-France, 75013 Paris, France", +"Assaporare Dix sur Dix": "75, avenue Ledru-Rollin, 75012 Paris, France", +"Caf\u00e9 Pierre": "202 rue du faubourg st antoine, 75012 Paris, France", "Caf\u00e9 antoine": "17 rue Jean de la Fontaine, 75016 Paris, France", "Au cerceau d'or": "129 boulevard sebastopol, 75002 Paris, France", -"Aux cadrans": "21 ter boulevard Diderot, 75012 Paris, France", -"Caf\u00e9 Lea": "5 rue Claude Bernard, 75005 Paris, France", -"Le Bellerive": "71 quai de Seine, 75019 Paris, France", -"La Bauloise": "36 rue du hameau, 75015 Paris, France", -"Le Dellac": "14 rue Rougemont, 75009 Paris, France", +"La Caravane": "Rue de la Fontaine au Roi, 75011 Paris, France", +"Le Pas Sage": "1 Passage du Grand Cerf, 75002 Paris, France", +"Le Caf\u00e9 Livres": "10 rue Saint Martin, 75004 Paris, France", +"Le Chaumontois": "12 rue Armand Carrel, 75018 Paris, France", +"Drole d'endroit pour une rencontre": "58 rue de Montorgueil, 75002 Paris, France", +"Le pari's caf\u00e9": "104 rue caulaincourt, 75018 Paris, France", +"Le Poulailler": "60 rue saint-sabin, 75011 Paris, France", +"Chai 33": "33 Cour Saint Emilion, 75012 Paris, France", +"L'Assassin": "99 rue Jean-Pierre Timbaud, 75011 Paris, France", +"l'Usine": "1 rue d'Avron, 75020 Paris, France", +"La Bricole": "52 rue Liebniz, 75018 Paris, France", +"le ronsard": "place maubert, 75005 Paris, France", +"Face Bar": "82 rue des archives, 75003 Paris, France", +"American Kitchen": "49 rue bichat, 75010 Paris, France", +"La Marine": "55 bis quai de valmy, 75010 Paris, France", +"Le Bloc": "21 avenue Brochant, 75017 Paris, France", +"La Recoleta au Manoir": "229 avenue Gambetta, 75020 Paris, France", +"Le Pareloup": "80 Rue Saint-Charles, 75015 Paris, France", +"La Brasserie Gait\u00e9": "3 rue de la Gait\u00e9, 75014 Paris, France", +"Caf\u00e9 Zen": "46 rue Victoire, 75009 Paris, France", +"O'Breizh": "27 rue de Penthi\u00e8vre, 75008 Paris, France", +"Le Petit Choiseul": "23 rue saint augustin, 75002 Paris, France", +"Invitez vous chez nous": "7 rue Ep\u00e9e de Bois, 75005 Paris, France", +"La Cordonnerie": "142 Rue Saint-Denis 75002 Paris, 75002 Paris, France", +"Le Supercoin": "3, rue Baudelique, 75018 Paris, France", +"Populettes": "86 bis rue Riquet, 75018 Paris, France", +"Au bon coin": "49 rue des Cloys, 75018 Paris, France", +"Le Couvent": "69 rue Broca, 75013 Paris, France", +"La Br\u00fblerie des Ternes": "111 rue mouffetard, 75005 Paris, France", +"L'\u00c9cir": "59 Boulevard Saint-Jacques, 75014 Paris, France", +"Le Chat bossu": "126, rue du Faubourg Saint Antoine, 75012 Paris, France", +"Denfert caf\u00e9": "58 boulvevard Saint Jacques, 75014 Paris, France", +"Le Caf\u00e9 frapp\u00e9": "95 rue Montmartre, 75002 Paris, France", +"La Perle": "78 rue vieille du temple, 75003 Paris, France", +"Le Descartes": "1 rue Thouin, 75005 Paris, France", +"Bagels & Coffee Corner": "Place de Clichy, 75017 Paris, France", +"Le petit club": "55 rue de la tombe Issoire, 75014 Paris, France", +"Le Plein soleil": "90 avenue Parmentier, 75011 Paris, France", +"Le Relais Haussmann": "146, boulevard Haussmann, 75008 Paris, France", +"Le Malar": "88 rue Saint-Dominique, 75007 Paris, France", +"Au panini de la place": "47 rue Belgrand, 75020 Paris, France", +"Le Village": "182 rue de Courcelles, 75017 Paris, France", +"Pause Caf\u00e9": "41 rue de Charonne, 75011 Paris, France", +"Le Pure caf\u00e9": "14 rue Jean Mac\u00e9, 75011 Paris, France", +"Extra old caf\u00e9": "307 fg saint Antoine, 75011 Paris, France", +"Chez Fafa": "44 rue Vinaigriers, 75010 Paris, France", +"En attendant l'or": "3 rue Faidherbe, 75011 Paris, France", "Br\u00fblerie San Jos\u00e9": "30 rue des Petits-Champs, 75002 Paris, France", "Caf\u00e9 de la Mairie (du VIII)": "rue de Lisbonne, 75008 Paris, France", -"Le General Beuret": "9 Place du General Beuret, 75015 Paris, France", -"Le Cap Bourbon": "1 rue Louis le Grand, 75002 Paris, France", -"En attendant l'or": "3 rue Faidherbe, 75011 Paris, France", "Caf\u00e9 Martin": "2 place Martin Nadaud, 75001 Paris, France", "Etienne": "14 rue Turbigo, Paris, 75001 Paris, France", "L'ing\u00e9nu": "184 bd Voltaire, 75011 Paris, France", -"Le Biz": "18 rue Favart, 75002 Paris, France", "L'Olive": "8 rue L'Olive, 75018 Paris, France", -"Le pari's caf\u00e9": "104 rue caulaincourt, 75018 Paris, France", -"Le Poulailler": "60 rue saint-sabin, 75011 Paris, France", -"La Marine": "55 bis quai de valmy, 75010 Paris, France", -"American Kitchen": "49 rue bichat, 75010 Paris, France", -"Chai 33": "33 Cour Saint Emilion, 75012 Paris, France", -"Face Bar": "82 rue des archives, 75003 Paris, France", -"Le Bloc": "21 avenue Brochant, 75017 Paris, France", -"La Bricole": "52 rue Liebniz, 75018 Paris, France", -"le ronsard": "place maubert, 75005 Paris, France", -"l'Usine": "1 rue d'Avron, 75020 Paris, France", -"La Cordonnerie": "142 Rue Saint-Denis 75002 Paris, 75002 Paris, France", -"Invitez vous chez nous": "7 rue Ep\u00e9e de Bois, 75005 Paris, France", -"Le sully": "13 rue du Faubourg Saint Denis, 75010 Paris, France", -"Le Ragueneau": "202 rue Saint-Honor\u00e9, 75001 Paris, France", +"Le Biz": "18 rue Favart, 75002 Paris, France", +"Le Cap Bourbon": "1 rue Louis le Grand, 75002 Paris, France", +"Le General Beuret": "9 Place du General Beuret, 75015 Paris, France", "Le Germinal": "95 avenue Emile Zola, 75015 Paris, France", +"Le Ragueneau": "202 rue Saint-Honor\u00e9, 75001 Paris, France", "Le refuge": "72 rue lamarck, 75018 Paris, France", -"Drole d'endroit pour une rencontre": "58 rue de Montorgueil, 75002 Paris, France", -"Le Petit Choiseul": "23 rue saint augustin, 75002 Paris, France", -"O'Breizh": "27 rue de Penthi\u00e8vre, 75008 Paris, France", -"Le Supercoin": "3, rue Baudelique, 75018 Paris, France", -"Populettes": "86 bis rue Riquet, 75018 Paris, France", -"La Recoleta au Manoir": "229 avenue Gambetta, 75020 Paris, France", -"L'Assassin": "99 rue Jean-Pierre Timbaud, 75011 Paris, France", -"Le Pareloup": "80 Rue Saint-Charles, 75015 Paris, France", -"Caf\u00e9 Zen": "46 rue Victoire, 75009 Paris, France", -"La Brasserie Gait\u00e9": "3 rue de la Gait\u00e9, 75014 Paris, France", -"Au bon coin": "49 rue des Cloys, 75018 Paris, France", -"La Br\u00fblerie des Ternes": "111 rue mouffetard, 75005 Paris, France", -"Le Chat bossu": "126, rue du Faubourg Saint Antoine, 75012 Paris, France", -"Denfert caf\u00e9": "58 boulvevard Saint Jacques, 75014 Paris, France", -"Le Couvent": "69 rue Broca, 75013 Paris, France", -"Bagels & Coffee Corner": "Place de Clichy, 75017 Paris, France", -"La Perle": "78 rue vieille du temple, 75003 Paris, France", -"Le Caf\u00e9 frapp\u00e9": "95 rue Montmartre, 75002 Paris, France", -"L'\u00c9cir": "59 Boulevard Saint-Jacques, 75014 Paris, France", -"Le Descartes": "1 rue Thouin, 75005 Paris, France", -"Le petit club": "55 rue de la tombe Issoire, 75014 Paris, France", -"Le Relais Haussmann": "146, boulevard Haussmann, 75008 Paris, France", -"Au panini de la place": "47 rue Belgrand, 75020 Paris, France", -"Extra old caf\u00e9": "307 fg saint Antoine, 75011 Paris, France", -"Le Plein soleil": "90 avenue Parmentier, 75011 Paris, France", -"Le Pure caf\u00e9": "14 rue Jean Mac\u00e9, 75011 Paris, France", -"Le Village": "182 rue de Courcelles, 75017 Paris, France", -"Le Malar": "88 rue Saint-Dominique, 75007 Paris, France", -"Pause Caf\u00e9": "41 rue de Charonne, 75011 Paris, France", -"Chez Fafa": "44 rue Vinaigriers, 75010 Paris, France", -"Caf\u00e9 dans l'aerogare Air France Invalides": "2 rue Robert Esnault Pelterie, 75007 Paris, France", -"Le relais de la victoire": "73 rue de la Victoire, 75009 Paris, France", -"Caprice caf\u00e9": "12 avenue Jean Moulin, 75014 Paris, France", -"Caves populaires": "22 rue des Dames, 75017 Paris, France", -"Cafe de grenelle": "188 rue de Grenelle, 75007 Paris, France", -"Chez Prune": "36 rue Beaurepaire, 75010 Paris, France", -"L'anjou": "1 rue de Montholon, 75009 Paris, France", -"Le Brio": "216, rue Marcadet, 75018 Paris, France", -"Tamm Bara": "7 rue Clisson, 75013 Paris, France", -"La chaumi\u00e8re gourmande": "Route de la Muette \u00e0 Neuilly", -"Club hippique du Jardin d\u2019Acclimatation": "75016 Paris, France", -"Les P\u00e8res Populaires": "46 rue de Buzenval, 75020 Paris, France", -"Epicerie Musicale": "55bis quai de Valmy, 75010 Paris, France", -"Le Centenaire": "104 rue amelot, 75011 Paris, France", -"Le Zazabar": "116 Rue de M\u00e9nilmontant, 75020 Paris, France", -"Ragueneau": "202 rue Saint Honor\u00e9, 75001 Paris, France", -"L'In\u00e9vitable": "22 rue Linn\u00e9, 75005 Paris, France", +"Le sully": "13 rue du Faubourg Saint Denis, 75010 Paris, France", "Le Dunois": "77 rue Dunois, 75013 Paris, France", "La Montagne Sans Genevi\u00e8ve": "13 Rue du Pot de Fer, 75005 Paris, France", "Le Caminito": "48 rue du Dessous des Berges, 75013 Paris, France", "Le petit Bretonneau": "Le petit Bretonneau - \u00e0 l'int\u00e9rieur de l'H\u00f4pital, 75018 Paris, France", +"La chaumi\u00e8re gourmande": "Route de la Muette \u00e0 Neuilly", +"Club hippique du Jardin d\u2019Acclimatation": "75016 Paris, France", "Le bal du pirate": "60 rue des bergers, 75015 Paris, France", +"Le Zazabar": "116 Rue de M\u00e9nilmontant, 75020 Paris, France", "L'antre d'eux": "16 rue DE MEZIERES, 75006 Paris, France", "l'orillon bar": "35 rue de l'orillon, 75011 Paris, France", "zic zinc": "95 rue claude decaen, 75012 Paris, France", +"Les P\u00e8res Populaires": "46 rue de Buzenval, 75020 Paris, France", +"Epicerie Musicale": "55bis quai de Valmy, 75010 Paris, France", +"Le relais de la victoire": "73 rue de la Victoire, 75009 Paris, France", +"Le Centenaire": "104 rue amelot, 75011 Paris, France", +"Cafe de grenelle": "188 rue de Grenelle, 75007 Paris, France", +"Ragueneau": "202 rue Saint Honor\u00e9, 75001 Paris, France", "Caf\u00e9 Pistache": "9 rue des petits champs, 75001 Paris, France", "La Cagnotte": "13 Rue Jean-Baptiste Dumay, 75020 Paris, France", -"bistrot les timbr\u00e9s": "14 rue d'alleray, 75015 Paris, France", "Le Killy Jen": "28 bis boulevard Diderot, 75012 Paris, France", "Caf\u00e9 beauveau": "9 rue de Miromesnil, 75008 Paris, France", "le 1 cinq": "172 rue de vaugirard, 75015 Paris, France", -"Au Vin Des Rues": "21 rue Boulard, 75014 Paris, France", "Les Artisans": "106 rue Lecourbe, 75015 Paris, France", "Peperoni": "83 avenue de Wagram, 75001 Paris, France", -"Le BB (Bouchon des Batignolles)": "2 rue Lemercier, 75017 Paris, France", -"La Libert\u00e9": "196 rue du faubourg saint-antoine, 75012 Paris, France", -"Chez Rutabaga": "16 rue des Petits Champs, 75002 Paris, France", -"La cantoche de Paname": "40 Boulevard Beaumarchais, 75011 Paris, France", -"Le Saint Ren\u00e9": "148 Boulevard de Charonne, 75020 Paris, France", -"La Brocante": "10 rue Rossini, 75009 Paris, France", -"Caf\u00e9 Clochette": "16 avenue Richerand, 75010 Paris, France", -"L'europ\u00e9en": "21 Bis Boulevard Diderot, 75012 Paris, France", -"NoMa": "39 rue Notre Dame de Nazareth, 75003 Paris, France", -"O'Paris": "1 Rue des Envierges, 75020 Paris, France", +"Le Brio": "216, rue Marcadet, 75018 Paris, France", +"Tamm Bara": "7 rue Clisson, 75013 Paris, France", +"Caf\u00e9 dans l'aerogare Air France Invalides": "2 rue Robert Esnault Pelterie, 75007 Paris, France", +"bistrot les timbr\u00e9s": "14 rue d'alleray, 75015 Paris, France", +"Caprice caf\u00e9": "12 avenue Jean Moulin, 75014 Paris, France", +"Caves populaires": "22 rue des Dames, 75017 Paris, France", +"Au Vin Des Rues": "21 rue Boulard, 75014 Paris, France", +"Chez Prune": "36 rue Beaurepaire, 75010 Paris, France", +"L'In\u00e9vitable": "22 rue Linn\u00e9, 75005 Paris, France", +"L'anjou": "1 rue de Montholon, 75009 Paris, France", "Botak cafe": "1 rue Paul albert, 75018 Paris, France", -"La cantine de Zo\u00e9": "136 rue du Faubourg poissonni\u00e8re, 75010 Paris, France", -"Les caves populaires": "22 rue des Dames, 75017 Paris, France", -"Le Plomb du cantal": "3 rue Ga\u00eet\u00e9, 75014 Paris, France", -"Trois pi\u00e8ces cuisine": "101 rue des dames, 75017 Paris, France", -"Le Zinc": "61 avenue de la Motte Picquet, 75015 Paris, France", -"L'avant comptoir": "3 carrefour de l'Od\u00e9on, 75006 Paris, France", -"Les Vendangeurs": "6/8 rue Stanislas, 75006 Paris, France", -"Chez Luna": "108 rue de M\u00e9nilmontant, 75020 Paris, France", -"Le bar Fleuri": "1 rue du Plateau, 75019 Paris, France", "Bistrot Saint-Antoine": "58 rue du Fbg Saint-Antoine, 75012 Paris, France", "Chez Oscar": "11/13 boulevard Beaumarchais, 75004 Paris, France", "Le Piquet": "48 avenue de la Motte Picquet, 75015 Paris, France", +"L'avant comptoir": "3 carrefour de l'Od\u00e9on, 75006 Paris, France", "le chateau d'eau": "67 rue du Ch\u00e2teau d'eau, 75010 Paris, France", +"Les Vendangeurs": "6/8 rue Stanislas, 75006 Paris, France", "maison du vin": "52 rue des plantes, 75014 Paris, France", "Le Tournebride": "104 rue Mouffetard, 75005 Paris, France", "Le Fronton": "63 rue de Ponthieu, 75008 Paris, France", +"Le BB (Bouchon des Batignolles)": "2 rue Lemercier, 75017 Paris, France", +"La cantine de Zo\u00e9": "136 rue du Faubourg poissonni\u00e8re, 75010 Paris, France", +"Chez Rutabaga": "16 rue des Petits Champs, 75002 Paris, France", +"Les caves populaires": "22 rue des Dames, 75017 Paris, France", +"Le Plomb du cantal": "3 rue Ga\u00eet\u00e9, 75014 Paris, France", +"Trois pi\u00e8ces cuisine": "101 rue des dames, 75017 Paris, France", +"La Brocante": "10 rue Rossini, 75009 Paris, France", +"Le Zinc": "61 avenue de la Motte Picquet, 75015 Paris, France", +"Chez Luna": "108 rue de M\u00e9nilmontant, 75020 Paris, France", +"Le bar Fleuri": "1 rue du Plateau, 75019 Paris, France", +"La Libert\u00e9": "196 rue du faubourg saint-antoine, 75012 Paris, France", +"La cantoche de Paname": "40 Boulevard Beaumarchais, 75011 Paris, France", +"Le Saint Ren\u00e9": "148 Boulevard de Charonne, 75020 Paris, France", +"Caf\u00e9 Clochette": "16 avenue Richerand, 75010 Paris, France", +"L'europ\u00e9en": "21 Bis Boulevard Diderot, 75012 Paris, France", +"NoMa": "39 rue Notre Dame de Nazareth, 75003 Paris, France", "le lutece": "380 rue de vaugirard, 75015 Paris, France", +"O'Paris": "1 Rue des Envierges, 75020 Paris, France", "Rivolux": "16 rue de Rivoli, 75004 Paris, France", "Brasiloja": "16 rue Ganneron, 75018 Paris, France", -"Le caf\u00e9 Monde et M\u00e9dias": "Place de la R\u00e9publique, 75003 Paris, France", -"L'entrep\u00f4t": "157 rue Bercy 75012 Paris, 75012 Paris, France", -"Coffee Chope": "344Vrue Vaugirard, 75015 Paris, France", -"l'El\u00e9phant du nil": "125 Rue Saint-Antoine, 75004 Paris, France", -"Le Parc Vaugirard": "358 rue de Vaugirard, 75015 Paris, France", -"Pari's Caf\u00e9": "174 avenue de Clichy, 75017 Paris, France", -"Le Comptoir": "354 bis rue Vaugirard, 75015 Paris, France", -"Caf\u00e9 Varenne": "36 rue de Varenne, 75007 Paris, France", -"Melting Pot": "3 rue de Lagny, 75020 Paris, France", -"le Zango": "58 rue Daguerre, 75014 Paris, France", -"Chez Miamophile": "6 rue M\u00e9lingue, 75019 Paris, France", "Institut des Cultures d'Islam": "19-23 rue L\u00e9on, 75018 Paris, France", "Canopy Caf\u00e9 associatif": "19 rue Pajol, 75018 Paris, France", -"Caf\u00e9 rallye tournelles": "11 Quai de la Tournelle, 75005 Paris, France", "Petits Freres des Pauvres": "47 rue de Batignolles, 75017 Paris, France", -"Brasserie le Morvan": "61 rue du ch\u00e2teau d'eau, 75010 Paris, France", +"Le Lucernaire": "53 rue Notre-Dame des Champs, 75006 Paris, France", "L'Angle": "28 rue de Ponthieu, 75008 Paris, France", +"Le Caf\u00e9 d'avant": "35 rue Claude Bernard, 75005 Paris, France", "Caf\u00e9 Dupont": "198 rue de la Convention, 75015 Paris, France", +"Le S\u00e9vign\u00e9": "15 rue du Parc Royal, 75003 Paris, France", "L'Entracte": "place de l'opera, 75002 Paris, France", "Panem": "18 rue de Crussol, 75011 Paris, France", "Au pays de Vannes": "34 bis rue de Wattignies, 75012 Paris, France", -"Le Lucernaire": "53 rue Notre-Dame des Champs, 75006 Paris, France", -"Le Caf\u00e9 d'avant": "35 rue Claude Bernard, 75005 Paris, France", +"l'El\u00e9phant du nil": "125 Rue Saint-Antoine, 75004 Paris, France", "L'\u00e2ge d'or": "26 rue du Docteur Magnan, 75013 Paris, France", -"Le S\u00e9vign\u00e9": "15 rue du Parc Royal, 75003 Paris, France", +"Le Comptoir": "354 bis rue Vaugirard, 75015 Paris, France", "L'horizon": "93, rue de la Roquette, 75011 Paris, France", "L'empreinte": "54, avenue Daumesnil, 75012 Paris, France", "Caf\u00e9 Victor": "10 boulevard Victor, 75015 Paris, France", +"Caf\u00e9 Varenne": "36 rue de Varenne, 75007 Paris, France", "Le Brigadier": "12 rue Blanche, 75009 Paris, France", -"Waikiki": "10 rue d\"Ulm, 75005 Paris, France"} \ No newline at end of file +"Waikiki": "10 rue d\"Ulm, 75005 Paris, France", +"Le Parc Vaugirard": "358 rue de Vaugirard, 75015 Paris, France", +"Pari's Caf\u00e9": "174 avenue de Clichy, 75017 Paris, France", +"Melting Pot": "3 rue de Lagny, 75020 Paris, France", +"le Zango": "58 rue Daguerre, 75014 Paris, France", +"Chez Miamophile": "6 rue M\u00e9lingue, 75019 Paris, France", +"Le caf\u00e9 Monde et M\u00e9dias": "Place de la R\u00e9publique, 75003 Paris, France", +"Caf\u00e9 rallye tournelles": "11 Quai de la Tournelle, 75005 Paris, France", +"Brasserie le Morvan": "61 rue du ch\u00e2teau d'eau, 75010 Paris, France", +"L'entrep\u00f4t": "157 rue Bercy 75012 Paris, 75012 Paris, France"} \ No newline at end of file diff --git a/bonobo/examples/datasets/coffeeshops.txt b/bonobo/examples/datasets/coffeeshops.txt index 5fe1ef6..9e3c181 100644 --- a/bonobo/examples/datasets/coffeeshops.txt +++ b/bonobo/examples/datasets/coffeeshops.txt @@ -1,182 +1,182 @@ -Le Reynou, 2 bis quai de la mégisserie, 75001 Paris, France -les montparnos, 65 boulevard Pasteur, 75015 Paris, France -Le Saint Jean, 23 rue des abbesses, 75018 Paris, France -Le Felteu, 1 rue Pecquay, 75004 Paris, France -O q de poule, 53 rue du ruisseau, 75018 Paris, France -Le chantereine, 51 Rue Victoire, 75009 Paris, France -Le Müller, 11 rue Feutrier, 75018 Paris, France -La Caravane, Rue de la Fontaine au Roi, 75011 Paris, France -Le Pas Sage, 1 Passage du Grand Cerf, 75002 Paris, France -La Renaissance, 112 Rue Championnet, 75018 Paris, France Extérieur Quai, 5, rue d'Alsace, 75010 Paris, France Le Sully, 6 Bd henri IV, 75004 Paris, France +O q de poule, 53 rue du ruisseau, 75018 Paris, France +Le Pas Sage, 1 Passage du Grand Cerf, 75002 Paris, France +La Renaissance, 112 Rue Championnet, 75018 Paris, France +La Caravane, Rue de la Fontaine au Roi, 75011 Paris, France +Le chantereine, 51 Rue Victoire, 75009 Paris, France +Le Müller, 11 rue Feutrier, 75018 Paris, France Le drapeau de la fidelité, 21 rue Copreaux, 75015 Paris, France Le café des amis, 125 rue Blomet, 75015 Paris, France -Le Kleemend's, 34 avenue Pierre Mendès-France, 75013 Paris, France -Assaporare Dix sur Dix, 75, avenue Ledru-Rollin, 75012 Paris, France -Café Pierre, 202 rue du faubourg st antoine, 75012 Paris, France Le Café Livres, 10 rue Saint Martin, 75004 Paris, France -Le Chaumontois, 12 rue Armand Carrel, 75018 Paris, France -Le Square, 31 rue Saint-Dominique, 75007 Paris, France -Les Arcades, 61 rue de Ponthieu, 75008 Paris, France Le Bosquet, 46 avenue Bosquet, 75007 Paris, France -Le bistrot de Maëlle et Augustin, 42 rue coquillère, 75001 Paris, France -Dédé la frite, 52 rue Notre-Dame des Victoires, 75002 Paris, France -Cardinal Saint-Germain, 11 boulevard Saint-Germain, 75005 Paris, France -Café antoine, 17 rue Jean de la Fontaine, 75016 Paris, France +Le Chaumontois, 12 rue Armand Carrel, 75018 Paris, France +Le Kleemend's, 34 avenue Pierre Mendès-France, 75013 Paris, France +Café Pierre, 202 rue du faubourg st antoine, 75012 Paris, France +Les Arcades, 61 rue de Ponthieu, 75008 Paris, France +Le Square, 31 rue Saint-Dominique, 75007 Paris, France +Assaporare Dix sur Dix, 75, avenue Ledru-Rollin, 75012 Paris, France Au cerceau d'or, 129 boulevard sebastopol, 75002 Paris, France Aux cadrans, 21 ter boulevard Diderot, 75012 Paris, France -Café Lea, 5 rue Claude Bernard, 75005 Paris, France -Le Bellerive, 71 quai de Seine, 75019 Paris, France -La Bauloise, 36 rue du hameau, 75015 Paris, France -Le Dellac, 14 rue Rougemont, 75009 Paris, France -Brûlerie San José, 30 rue des Petits-Champs, 75002 Paris, France +Café antoine, 17 rue Jean de la Fontaine, 75016 Paris, France Café de la Mairie (du VIII), rue de Lisbonne, 75008 Paris, France -Le General Beuret, 9 Place du General Beuret, 75015 Paris, France -Le Cap Bourbon, 1 rue Louis le Grand, 75002 Paris, France +Café Lea, 5 rue Claude Bernard, 75005 Paris, France +Cardinal Saint-Germain, 11 boulevard Saint-Germain, 75005 Paris, France +Dédé la frite, 52 rue Notre-Dame des Victoires, 75002 Paris, France +La Bauloise, 36 rue du hameau, 75015 Paris, France +Le Bellerive, 71 quai de Seine, 75019 Paris, France +Le bistrot de Maëlle et Augustin, 42 rue coquillère, 75001 Paris, France +Le Dellac, 14 rue Rougemont, 75009 Paris, France +Le Felteu, 1 rue Pecquay, 75004 Paris, France +Le Reynou, 2 bis quai de la mégisserie, 75001 Paris, France +Le Saint Jean, 23 rue des abbesses, 75018 Paris, France +les montparnos, 65 boulevard Pasteur, 75015 Paris, France +L'antre d'eux, 16 rue DE MEZIERES, 75006 Paris, France +Drole d'endroit pour une rencontre, 58 rue de Montorgueil, 75002 Paris, France +Le pari's café, 104 rue caulaincourt, 75018 Paris, France +Le Poulailler, 60 rue saint-sabin, 75011 Paris, France +Chai 33, 33 Cour Saint Emilion, 75012 Paris, France +L'Assassin, 99 rue Jean-Pierre Timbaud, 75011 Paris, France +l'Usine, 1 rue d'Avron, 75020 Paris, France +La Bricole, 52 rue Liebniz, 75018 Paris, France +le ronsard, place maubert, 75005 Paris, France +Face Bar, 82 rue des archives, 75003 Paris, France +American Kitchen, 49 rue bichat, 75010 Paris, France +La Marine, 55 bis quai de valmy, 75010 Paris, France +Le Bloc, 21 avenue Brochant, 75017 Paris, France +La Recoleta au Manoir, 229 avenue Gambetta, 75020 Paris, France +Le Pareloup, 80 Rue Saint-Charles, 75015 Paris, France +La Brasserie Gaité, 3 rue de la Gaité, 75014 Paris, France +Café Zen, 46 rue Victoire, 75009 Paris, France +O'Breizh, 27 rue de Penthièvre, 75008 Paris, France +Le Petit Choiseul, 23 rue saint augustin, 75002 Paris, France +Invitez vous chez nous, 7 rue Epée de Bois, 75005 Paris, France +La Cordonnerie, 142 Rue Saint-Denis 75002 Paris, 75002 Paris, France +Le Supercoin, 3, rue Baudelique, 75018 Paris, France +Populettes, 86 bis rue Riquet, 75018 Paris, France +Au bon coin, 49 rue des Cloys, 75018 Paris, France +Le Couvent, 69 rue Broca, 75013 Paris, France +La Brûlerie des Ternes, 111 rue mouffetard, 75005 Paris, France +L'Écir, 59 Boulevard Saint-Jacques, 75014 Paris, France +Le Chat bossu, 126, rue du Faubourg Saint Antoine, 75012 Paris, France +Denfert café, 58 boulvevard Saint Jacques, 75014 Paris, France +Le Café frappé, 95 rue Montmartre, 75002 Paris, France +La Perle, 78 rue vieille du temple, 75003 Paris, France +Le Descartes, 1 rue Thouin, 75005 Paris, France +Le petit club, 55 rue de la tombe Issoire, 75014 Paris, France +Le Plein soleil, 90 avenue Parmentier, 75011 Paris, France +Le Relais Haussmann, 146, boulevard Haussmann, 75008 Paris, France +Le Malar, 88 rue Saint-Dominique, 75007 Paris, France +Au panini de la place, 47 rue Belgrand, 75020 Paris, France +Le Village, 182 rue de Courcelles, 75017 Paris, France +Pause Café, 41 rue de Charonne, 75011 Paris, France +Le Pure café, 14 rue Jean Macé, 75011 Paris, France +Extra old café, 307 fg saint Antoine, 75011 Paris, France +Chez Fafa, 44 rue Vinaigriers, 75010 Paris, France En attendant l'or, 3 rue Faidherbe, 75011 Paris, France +Brûlerie San José, 30 rue des Petits-Champs, 75002 Paris, France Café Martin, 2 place Martin Nadaud, 75001 Paris, France Etienne, 14 rue Turbigo, Paris, 75001 Paris, France L'ingénu, 184 bd Voltaire, 75011 Paris, France -Le Biz, 18 rue Favart, 75002 Paris, France L'Olive, 8 rue L'Olive, 75018 Paris, France -Le pari's café, 104 rue caulaincourt, 75018 Paris, France -Le Poulailler, 60 rue saint-sabin, 75011 Paris, France -La Marine, 55 bis quai de valmy, 75010 Paris, France -American Kitchen, 49 rue bichat, 75010 Paris, France -Chai 33, 33 Cour Saint Emilion, 75012 Paris, France -Face Bar, 82 rue des archives, 75003 Paris, France -Le Bloc, 21 avenue Brochant, 75017 Paris, France -La Bricole, 52 rue Liebniz, 75018 Paris, France -le ronsard, place maubert, 75005 Paris, France -l'Usine, 1 rue d'Avron, 75020 Paris, France -La Cordonnerie, 142 Rue Saint-Denis 75002 Paris, 75002 Paris, France -Invitez vous chez nous, 7 rue Epée de Bois, 75005 Paris, France -Le sully, 13 rue du Faubourg Saint Denis, 75010 Paris, France -Le Ragueneau, 202 rue Saint-Honoré, 75001 Paris, France +Le Biz, 18 rue Favart, 75002 Paris, France +Le Cap Bourbon, 1 rue Louis le Grand, 75002 Paris, France +Le General Beuret, 9 Place du General Beuret, 75015 Paris, France Le Germinal, 95 avenue Emile Zola, 75015 Paris, France +Le Ragueneau, 202 rue Saint-Honoré, 75001 Paris, France Le refuge, 72 rue lamarck, 75018 Paris, France -Drole d'endroit pour une rencontre, 58 rue de Montorgueil, 75002 Paris, France -Le Petit Choiseul, 23 rue saint augustin, 75002 Paris, France -O'Breizh, 27 rue de Penthièvre, 75008 Paris, France -Le Supercoin, 3, rue Baudelique, 75018 Paris, France -Populettes, 86 bis rue Riquet, 75018 Paris, France -La Recoleta au Manoir, 229 avenue Gambetta, 75020 Paris, France -L'Assassin, 99 rue Jean-Pierre Timbaud, 75011 Paris, France -Le Pareloup, 80 Rue Saint-Charles, 75015 Paris, France -Café Zen, 46 rue Victoire, 75009 Paris, France -La Brasserie Gaité, 3 rue de la Gaité, 75014 Paris, France -Au bon coin, 49 rue des Cloys, 75018 Paris, France -La Brûlerie des Ternes, 111 rue mouffetard, 75005 Paris, France -Le Chat bossu, 126, rue du Faubourg Saint Antoine, 75012 Paris, France -Denfert café, 58 boulvevard Saint Jacques, 75014 Paris, France -Le Couvent, 69 rue Broca, 75013 Paris, France -Bagels & Coffee Corner, Place de Clichy, 75017 Paris, France -La Perle, 78 rue vieille du temple, 75003 Paris, France -Le Café frappé, 95 rue Montmartre, 75002 Paris, France -L'Écir, 59 Boulevard Saint-Jacques, 75014 Paris, France -Le Descartes, 1 rue Thouin, 75005 Paris, France -Le petit club, 55 rue de la tombe Issoire, 75014 Paris, France -Le Relais Haussmann, 146, boulevard Haussmann, 75008 Paris, France -Au panini de la place, 47 rue Belgrand, 75020 Paris, France -Extra old café, 307 fg saint Antoine, 75011 Paris, France -Le Plein soleil, 90 avenue Parmentier, 75011 Paris, France -Le Pure café, 14 rue Jean Macé, 75011 Paris, France -Le Village, 182 rue de Courcelles, 75017 Paris, France -Le Malar, 88 rue Saint-Dominique, 75007 Paris, France -Pause Café, 41 rue de Charonne, 75011 Paris, France -Chez Fafa, 44 rue Vinaigriers, 75010 Paris, France -Café dans l'aerogare Air France Invalides, 2 rue Robert Esnault Pelterie, 75007 Paris, France -Le relais de la victoire, 73 rue de la Victoire, 75009 Paris, France -Caprice café, 12 avenue Jean Moulin, 75014 Paris, France -Caves populaires, 22 rue des Dames, 75017 Paris, France -Cafe de grenelle, 188 rue de Grenelle, 75007 Paris, France -Chez Prune, 36 rue Beaurepaire, 75010 Paris, France -L'anjou, 1 rue de Montholon, 75009 Paris, France -Le Brio, 216, rue Marcadet, 75018 Paris, France -Tamm Bara, 7 rue Clisson, 75013 Paris, France -La chaumière gourmande, Route de la Muette à Neuilly -Club hippique du Jardin d’Acclimatation, 75016 Paris, France -Les Pères Populaires, 46 rue de Buzenval, 75020 Paris, France -Epicerie Musicale, 55bis quai de Valmy, 75010 Paris, France -Le Centenaire, 104 rue amelot, 75011 Paris, France +Le sully, 13 rue du Faubourg Saint Denis, 75010 Paris, France +Coffee Chope, 344Vrue Vaugirard, 75015 Paris, France +Le bal du pirate, 60 rue des bergers, 75015 Paris, France +zic zinc, 95 rue claude decaen, 75012 Paris, France +l'orillon bar, 35 rue de l'orillon, 75011 Paris, France Le Zazabar, 116 Rue de Ménilmontant, 75020 Paris, France -Ragueneau, 202 rue Saint Honoré, 75001 Paris, France L'Inévitable, 22 rue Linné, 75005 Paris, France Le Dunois, 77 rue Dunois, 75013 Paris, France -La Montagne Sans Geneviève, 13 Rue du Pot de Fer, 75005 Paris, France +Ragueneau, 202 rue Saint Honoré, 75001 Paris, France Le Caminito, 48 rue du Dessous des Berges, 75013 Paris, France +Epicerie Musicale, 55bis quai de Valmy, 75010 Paris, France Le petit Bretonneau, Le petit Bretonneau - à l'intérieur de l'Hôpital, 75018 Paris, France -Le bal du pirate, 60 rue des bergers, 75015 Paris, France -L'antre d'eux, 16 rue DE MEZIERES, 75006 Paris, France -l'orillon bar, 35 rue de l'orillon, 75011 Paris, France -zic zinc, 95 rue claude decaen, 75012 Paris, France +Le Centenaire, 104 rue amelot, 75011 Paris, France +La Montagne Sans Geneviève, 13 Rue du Pot de Fer, 75005 Paris, France +Les Pères Populaires, 46 rue de Buzenval, 75020 Paris, France +Cafe de grenelle, 188 rue de Grenelle, 75007 Paris, France +Le relais de la victoire, 73 rue de la Victoire, 75009 Paris, France +La chaumière gourmande, Route de la Muette à Neuilly +Club hippique du Jardin d’Acclimatation, 75016 Paris, France +Le Brio, 216, rue Marcadet, 75018 Paris, France +Caves populaires, 22 rue des Dames, 75017 Paris, France +Caprice café, 12 avenue Jean Moulin, 75014 Paris, France +Tamm Bara, 7 rue Clisson, 75013 Paris, France +L'anjou, 1 rue de Montholon, 75009 Paris, France +Café dans l'aerogare Air France Invalides, 2 rue Robert Esnault Pelterie, 75007 Paris, France +Chez Prune, 36 rue Beaurepaire, 75010 Paris, France +Au Vin Des Rues, 21 rue Boulard, 75014 Paris, France +bistrot les timbrés, 14 rue d'alleray, 75015 Paris, France +Café beauveau, 9 rue de Miromesnil, 75008 Paris, France Café Pistache, 9 rue des petits champs, 75001 Paris, France La Cagnotte, 13 Rue Jean-Baptiste Dumay, 75020 Paris, France -bistrot les timbrés, 14 rue d'alleray, 75015 Paris, France -Le Killy Jen, 28 bis boulevard Diderot, 75012 Paris, France -Café beauveau, 9 rue de Miromesnil, 75008 Paris, France le 1 cinq, 172 rue de vaugirard, 75015 Paris, France -Au Vin Des Rues, 21 rue Boulard, 75014 Paris, France +Le Killy Jen, 28 bis boulevard Diderot, 75012 Paris, France Les Artisans, 106 rue Lecourbe, 75015 Paris, France Peperoni, 83 avenue de Wagram, 75001 Paris, France -Le BB (Bouchon des Batignolles), 2 rue Lemercier, 75017 Paris, France -La Liberté, 196 rue du faubourg saint-antoine, 75012 Paris, France -Chez Rutabaga, 16 rue des Petits Champs, 75002 Paris, France -La cantoche de Paname, 40 Boulevard Beaumarchais, 75011 Paris, France -Le Saint René, 148 Boulevard de Charonne, 75020 Paris, France -La Brocante, 10 rue Rossini, 75009 Paris, France -Café Clochette, 16 avenue Richerand, 75010 Paris, France +le lutece, 380 rue de vaugirard, 75015 Paris, France +Brasiloja, 16 rue Ganneron, 75018 Paris, France +Rivolux, 16 rue de Rivoli, 75004 Paris, France L'européen, 21 Bis Boulevard Diderot, 75012 Paris, France NoMa, 39 rue Notre Dame de Nazareth, 75003 Paris, France O'Paris, 1 Rue des Envierges, 75020 Paris, France -Botak cafe, 1 rue Paul albert, 75018 Paris, France -La cantine de Zoé, 136 rue du Faubourg poissonnière, 75010 Paris, France -Les caves populaires, 22 rue des Dames, 75017 Paris, France +Café Clochette, 16 avenue Richerand, 75010 Paris, France +La cantoche de Paname, 40 Boulevard Beaumarchais, 75011 Paris, France +Le Saint René, 148 Boulevard de Charonne, 75020 Paris, France +La Liberté, 196 rue du faubourg saint-antoine, 75012 Paris, France +Chez Rutabaga, 16 rue des Petits Champs, 75002 Paris, France +Le BB (Bouchon des Batignolles), 2 rue Lemercier, 75017 Paris, France +La Brocante, 10 rue Rossini, 75009 Paris, France Le Plomb du cantal, 3 rue Gaîté, 75014 Paris, France -Trois pièces cuisine, 101 rue des dames, 75017 Paris, France -Le Zinc, 61 avenue de la Motte Picquet, 75015 Paris, France -L'avant comptoir, 3 carrefour de l'Odéon, 75006 Paris, France -Les Vendangeurs, 6/8 rue Stanislas, 75006 Paris, France +Les caves populaires, 22 rue des Dames, 75017 Paris, France Chez Luna, 108 rue de Ménilmontant, 75020 Paris, France Le bar Fleuri, 1 rue du Plateau, 75019 Paris, France +Trois pièces cuisine, 101 rue des dames, 75017 Paris, France +Le Zinc, 61 avenue de la Motte Picquet, 75015 Paris, France +La cantine de Zoé, 136 rue du Faubourg poissonnière, 75010 Paris, France +Les Vendangeurs, 6/8 rue Stanislas, 75006 Paris, France +L'avant comptoir, 3 carrefour de l'Odéon, 75006 Paris, France +Botak cafe, 1 rue Paul albert, 75018 Paris, France +le chateau d'eau, 67 rue du Château d'eau, 75010 Paris, France Bistrot Saint-Antoine, 58 rue du Fbg Saint-Antoine, 75012 Paris, France Chez Oscar, 11/13 boulevard Beaumarchais, 75004 Paris, France -Le Piquet, 48 avenue de la Motte Picquet, 75015 Paris, France -le chateau d'eau, 67 rue du Château d'eau, 75010 Paris, France -maison du vin, 52 rue des plantes, 75014 Paris, France -Le Tournebride, 104 rue Mouffetard, 75005 Paris, France Le Fronton, 63 rue de Ponthieu, 75008 Paris, France -le lutece, 380 rue de vaugirard, 75015 Paris, France -Rivolux, 16 rue de Rivoli, 75004 Paris, France -Brasiloja, 16 rue Ganneron, 75018 Paris, France -Le café Monde et Médias, Place de la République, 75003 Paris, France +Le Piquet, 48 avenue de la Motte Picquet, 75015 Paris, France +Le Tournebride, 104 rue Mouffetard, 75005 Paris, France +maison du vin, 52 rue des plantes, 75014 Paris, France L'entrepôt, 157 rue Bercy 75012 Paris, 75012 Paris, France -Coffee Chope, 344Vrue Vaugirard, 75015 Paris, France -l'Eléphant du nil, 125 Rue Saint-Antoine, 75004 Paris, France -Le Parc Vaugirard, 358 rue de Vaugirard, 75015 Paris, France -Pari's Café, 174 avenue de Clichy, 75017 Paris, France -Le Comptoir, 354 bis rue Vaugirard, 75015 Paris, France -Café Varenne, 36 rue de Varenne, 75007 Paris, France -Melting Pot, 3 rue de Lagny, 75020 Paris, France -le Zango, 58 rue Daguerre, 75014 Paris, France +Le café Monde et Médias, Place de la République, 75003 Paris, France +Café rallye tournelles, 11 Quai de la Tournelle, 75005 Paris, France +Brasserie le Morvan, 61 rue du château d'eau, 75010 Paris, France Chez Miamophile, 6 rue Mélingue, 75019 Paris, France +Panem, 18 rue de Crussol, 75011 Paris, France +Petits Freres des Pauvres, 47 rue de Batignolles, 75017 Paris, France +Café Dupont, 198 rue de la Convention, 75015 Paris, France +L'Angle, 28 rue de Ponthieu, 75008 Paris, France Institut des Cultures d'Islam, 19-23 rue Léon, 75018 Paris, France Canopy Café associatif, 19 rue Pajol, 75018 Paris, France -Café rallye tournelles, 11 Quai de la Tournelle, 75005 Paris, France -Petits Freres des Pauvres, 47 rue de Batignolles, 75017 Paris, France -Brasserie le Morvan, 61 rue du château d'eau, 75010 Paris, France -L'Angle, 28 rue de Ponthieu, 75008 Paris, France -Café Dupont, 198 rue de la Convention, 75015 Paris, France L'Entracte, place de l'opera, 75002 Paris, France -Panem, 18 rue de Crussol, 75011 Paris, France -Au pays de Vannes, 34 bis rue de Wattignies, 75012 Paris, France -Le Lucernaire, 53 rue Notre-Dame des Champs, 75006 Paris, France -Le Café d'avant, 35 rue Claude Bernard, 75005 Paris, France -L'âge d'or, 26 rue du Docteur Magnan, 75013 Paris, France Le Sévigné, 15 rue du Parc Royal, 75003 Paris, France -L'horizon, 93, rue de la Roquette, 75011 Paris, France -L'empreinte, 54, avenue Daumesnil, 75012 Paris, France -Café Victor, 10 boulevard Victor, 75015 Paris, France +Le Café d'avant, 35 rue Claude Bernard, 75005 Paris, France +Le Lucernaire, 53 rue Notre-Dame des Champs, 75006 Paris, France Le Brigadier, 12 rue Blanche, 75009 Paris, France -Waikiki, 10 rue d"Ulm, 75005 Paris, France \ No newline at end of file +L'âge d'or, 26 rue du Docteur Magnan, 75013 Paris, France +Bagels & Coffee Corner, Place de Clichy, 75017 Paris, France +Café Victor, 10 boulevard Victor, 75015 Paris, France +L'empreinte, 54, avenue Daumesnil, 75012 Paris, France +L'horizon, 93, rue de la Roquette, 75011 Paris, France +Waikiki, 10 rue d"Ulm, 75005 Paris, France +Au pays de Vannes, 34 bis rue de Wattignies, 75012 Paris, France +Café Varenne, 36 rue de Varenne, 75007 Paris, France +l'Eléphant du nil, 125 Rue Saint-Antoine, 75004 Paris, France +Le Comptoir, 354 bis rue Vaugirard, 75015 Paris, France +Le Parc Vaugirard, 358 rue de Vaugirard, 75015 Paris, France +le Zango, 58 rue Daguerre, 75014 Paris, France +Melting Pot, 3 rue de Lagny, 75020 Paris, France +Pari's Café, 174 avenue de Clichy, 75017 Paris, France \ No newline at end of file diff --git a/bonobo/examples/datasets/fablabs.py b/bonobo/examples/datasets/fablabs.py index be95fe1..b87019f 100644 --- a/bonobo/examples/datasets/fablabs.py +++ b/bonobo/examples/datasets/fablabs.py @@ -48,11 +48,6 @@ def normalize(row): return result -def filter_france(row): - if row.get('country') == 'France': - yield row - - def display(row): print(Style.BRIGHT, row.get('name'), Style.RESET_ALL, sep='') @@ -95,9 +90,9 @@ graph = bonobo.Graph( dataset=API_DATASET, netloc=API_NETLOC, timezone='Europe/Paris' ), normalize, - filter_france, + bonobo.Filter(filter=lambda row: row.get('country') == 'France'), + bonobo.JsonWriter(path='fablabs.txt', ioformat='arg0'), bonobo.Tee(display), - bonobo.JsonWriter(path='fablabs.txt'), ) if __name__ == '__main__': diff --git a/bonobo/examples/env_vars/__init__.py b/bonobo/examples/env_vars/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bonobo/examples/env_vars/get_passed_env.py b/bonobo/examples/env_vars/get_passed_env.py new file mode 100644 index 0000000..54a3280 --- /dev/null +++ b/bonobo/examples/env_vars/get_passed_env.py @@ -0,0 +1,20 @@ +import os + +import bonobo + + +def extract(): + env_test_user = os.getenv('ENV_TEST_USER') + env_test_number = os.getenv('ENV_TEST_NUMBER') + env_test_string = os.getenv('ENV_TEST_STRING') + return env_test_user, env_test_number, env_test_string + + +def load(s: str): + print(s) + + +graph = bonobo.Graph(extract, load) + +if __name__ == '__main__': + bonobo.run(graph) diff --git a/bonobo/examples/nodes/bags.py b/bonobo/examples/nodes/bags.py new file mode 100644 index 0000000..2bfe5de --- /dev/null +++ b/bonobo/examples/nodes/bags.py @@ -0,0 +1,41 @@ +""" +Example on how to use :class:`bonobo.Bag` instances to pass flexible args/kwargs to the next callable. + +.. graphviz:: + + digraph { + rankdir = LR; + stylesheet = "../_static/graphs.css"; + + BEGIN [shape="point"]; + BEGIN -> "extract()" -> "transform(...)" -> "load(...)"; + } + +""" + +from random import randint + +from bonobo import Bag, Graph + + +def extract(): + yield Bag(topic='foo') + yield Bag(topic='bar') + yield Bag(topic='baz') + + +def transform(topic: str): + return Bag.inherit(title=topic.title(), rand=randint(10, 99)) + + +def load(topic: str, title: str, rand: int): + print('{} ({}) wait={}'.format(title, topic, rand)) + + +graph = Graph() +graph.add_chain(extract, transform, load) + +if __name__ == '__main__': + from bonobo import run + + run(graph) diff --git a/bonobo/examples/nodes/dicts.py b/bonobo/examples/nodes/dicts.py new file mode 100644 index 0000000..fde4b08 --- /dev/null +++ b/bonobo/examples/nodes/dicts.py @@ -0,0 +1,43 @@ +""" +Example on how to use symple python dictionaries to communicate between transformations. + +.. graphviz:: + + digraph { + rankdir = LR; + stylesheet = "../_static/graphs.css"; + + BEGIN [shape="point"]; + BEGIN -> "extract()" -> "transform(row: dict)" -> "load(row: dict)"; + } + +""" + +from random import randint + +from bonobo import Graph + + +def extract(): + yield {'topic': 'foo'} + yield {'topic': 'bar'} + yield {'topic': 'baz'} + + +def transform(row: dict): + return { + 'topic': row['topic'].title(), + 'randint': randint(10, 99), + } + + +def load(row: dict): + print(row) + + +graph = Graph(extract, transform, load) + +if __name__ == '__main__': + from bonobo import run + + run(graph) diff --git a/bonobo/examples/nodes/factory.py b/bonobo/examples/nodes/factory.py new file mode 100644 index 0000000..c1f3818 --- /dev/null +++ b/bonobo/examples/nodes/factory.py @@ -0,0 +1,18 @@ +import bonobo +from bonobo.commands.run import get_default_services +from bonobo.nodes.factory import Factory +from bonobo.nodes.io.json import JsonDictItemsReader + +normalize = Factory() +normalize[0].str().title() +normalize.move(0, 'title') +normalize.move(0, 'address') + +graph = bonobo.Graph( + JsonDictItemsReader('datasets/coffeeshops.json'), + normalize, + bonobo.PrettyPrinter(), +) + +if __name__ == '__main__': + bonobo.run(graph, services=get_default_services(__file__)) diff --git a/bonobo/examples/nodes/filter.py b/bonobo/examples/nodes/filter.py index bf390e9..4f7219a 100644 --- a/bonobo/examples/nodes/filter.py +++ b/bonobo/examples/nodes/filter.py @@ -9,13 +9,16 @@ class OddOnlyFilter(Filter): @Filter -def MultiplesOfThreeOnlyFilter(self, i): +def multiples_of_three(i): return not (i % 3) graph = bonobo.Graph( lambda: tuple(range(50)), OddOnlyFilter(), - MultiplesOfThreeOnlyFilter(), + multiples_of_three, print, ) + +if __name__ == '__main__': + bonobo.run(graph) diff --git a/bonobo/examples/nodes/slow.py b/bonobo/examples/nodes/slow.py index b9623af..ecaaf44 100644 --- a/bonobo/examples/nodes/slow.py +++ b/bonobo/examples/nodes/slow.py @@ -14,3 +14,6 @@ graph = bonobo.Graph( pause, print, ) + +if __name__ == '__main__': + bonobo.run(graph) diff --git a/bonobo/examples/nodes/strings.py b/bonobo/examples/nodes/strings.py new file mode 100644 index 0000000..1903151 --- /dev/null +++ b/bonobo/examples/nodes/strings.py @@ -0,0 +1,39 @@ +""" +Example on how to use symple python strings to communicate between transformations. + +.. graphviz:: + + digraph { + rankdir = LR; + stylesheet = "../_static/graphs.css"; + + BEGIN [shape="point"]; + BEGIN -> "extract()" -> "transform(s: str)" -> "load(s: str)"; + } + +""" +from random import randint + +from bonobo import Graph + + +def extract(): + yield 'foo' + yield 'bar' + yield 'baz' + + +def transform(s: str): + return '{} ({})'.format(s.title(), randint(10, 99)) + + +def load(s: str): + print(s) + + +graph = Graph(extract, transform, load) + +if __name__ == '__main__': + from bonobo import run + + run(graph) diff --git a/bonobo/execution/base.py b/bonobo/execution/base.py index 641d761..abb3516 100644 --- a/bonobo/execution/base.py +++ b/bonobo/execution/base.py @@ -2,7 +2,7 @@ import traceback from contextlib import contextmanager from time import sleep -from bonobo.config import Container +from bonobo.config import create_container from bonobo.config.processors import ContextCurrifier from bonobo.plugins import get_enhancers from bonobo.util.errors import print_error @@ -48,7 +48,7 @@ class LoopingExecutionContext(Wrapper): raise RuntimeError( 'Having services defined both in GraphExecutionContext and child NodeExecutionContext is not supported, for now.' ) - self.services = Container(services) if services else Container() + self.services = create_container(services) else: self.services = None diff --git a/bonobo/execution/graph.py b/bonobo/execution/graph.py index 00d2c43..91e4aef 100644 --- a/bonobo/execution/graph.py +++ b/bonobo/execution/graph.py @@ -1,6 +1,6 @@ from functools import partial -from bonobo.config.services import Container +from bonobo.config import create_container from bonobo.constants import BEGIN, END from bonobo.execution.node import NodeExecutionContext from bonobo.execution.plugin import PluginExecutionContext @@ -23,7 +23,10 @@ class GraphExecutionContext: self.graph = graph self.nodes = [NodeExecutionContext(node, parent=self) for node in self.graph] self.plugins = [PluginExecutionContext(plugin, parent=self) for plugin in plugins or ()] - self.services = Container(services) if services else Container() + self.services = create_container(services) + + # Probably not a good idea to use it unless you really know what you're doing. But you can access the context. + self.services['__graph_context'] = self for i, node_context in enumerate(self): node_context.outputs = [self[j].input for j in self.graph.outputs_of(i)] @@ -40,7 +43,7 @@ class GraphExecutionContext: def __iter__(self): yield from self.nodes - def recv(self, *messages): + def write(self, *messages): """Push a list of messages in the inputs of this graph's inputs, matching the output of special node "BEGIN" in our graph.""" @@ -53,12 +56,12 @@ class GraphExecutionContext: for node in self.nodes: node.start() - def loop(self): - # todo use strategy - for node in self.nodes: - node.loop() - def stop(self): # todo use strategy for node in self.nodes: node.stop() + + def loop(self): + # todo use strategy + for node in self.nodes: + node.loop() diff --git a/bonobo/execution/node.py b/bonobo/execution/node.py index 635068e..e8869ac 100644 --- a/bonobo/execution/node.py +++ b/bonobo/execution/node.py @@ -3,12 +3,12 @@ from queue import Empty from time import sleep from bonobo.constants import INHERIT_INPUT, NOT_MODIFIED -from bonobo.errors import InactiveReadableError +from bonobo.errors import InactiveReadableError, UnrecoverableError from bonobo.execution.base import LoopingExecutionContext from bonobo.structs.bags import Bag from bonobo.structs.inputs import Input from bonobo.util.compat import deprecated_alias -from bonobo.util.errors import is_error +from bonobo.util.inspect import iserrorbag, isloopbackbag from bonobo.util.iterators import iter_if_not_sequence from bonobo.util.objects import get_name from bonobo.util.statistics import WithStatistics @@ -65,8 +65,10 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): if not _control: self.increment('out') - if is_error(value): + if iserrorbag(value): value.apply(self.handle_error) + elif isloopbackbag(value): + self.input.put(value) else: for output in self.outputs: output.put(value) @@ -93,6 +95,10 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext): except Empty: sleep(self.PERIOD) continue + except UnrecoverableError as exc: + self.handle_error(exc, traceback.format_exc()) + self.input.shutdown() + break except Exception as exc: # pylint: disable=broad-except self.handle_error(exc, traceback.format_exc()) @@ -133,7 +139,7 @@ def _resolve(input_bag, output): if output is NOT_MODIFIED: return input_bag - if is_error(output): + if iserrorbag(output): return output # If it does not look like a bag, let's create one for easier manipulation diff --git a/bonobo/execution/plugin.py b/bonobo/execution/plugin.py index d928f4a..a207f23 100644 --- a/bonobo/execution/plugin.py +++ b/bonobo/execution/plugin.py @@ -16,8 +16,9 @@ class PluginExecutionContext(LoopingExecutionContext): self.wrapped.initialize() def shutdown(self): - with recoverable(self.handle_error): - self.wrapped.finalize() + if self.started: + with recoverable(self.handle_error): + self.wrapped.finalize() self.alive = False def step(self): diff --git a/bonobo/ext/console.py b/bonobo/ext/console.py index edc6436..4d8cb6f 100644 --- a/bonobo/ext/console.py +++ b/bonobo/ext/console.py @@ -2,7 +2,9 @@ import io import sys from contextlib import redirect_stdout -from colorama import Style, Fore +from colorama import Style, Fore, init + +init(wrap=True) from bonobo import settings from bonobo.plugins import Plugin @@ -10,6 +12,13 @@ from bonobo.util.term import CLEAR_EOL, MOVE_CURSOR_UP class IOBuffer(): + """ + The role of IOBuffer is to overcome the problem of multiple threads wanting to write to stdout at the same time. It + works a bit like a videogame: there are two buffers, one that is used to write, and one which is used to read from. + On each cycle, we swap the buffers, and the console plugin handle output of the one which is not anymore "active". + + """ + def __init__(self): self.current = io.StringIO() self.write = self.current.write @@ -32,6 +41,9 @@ class ConsoleOutputPlugin(Plugin): Outputs status information to the connected stdout. Can be a TTY, with or without support for colors/cursor movements, or a non tty (pipe, file, ...). The features are adapted to terminal capabilities. + On Windows, we'll play a bit differently because we don't know how to manipulate cursor position. We'll only + display stats at the very end, and there won't be this "buffering" logic we need to display both stats and stdout. + .. attribute:: prefix String prefix of output lines. @@ -43,17 +55,18 @@ class ConsoleOutputPlugin(Plugin): self.counter = 0 self._append_cache = '' self.isatty = sys.stdout.isatty() + self.iswindows = (sys.platform == 'win32') self._stdout = sys.stdout self.stdout = IOBuffer() - self.redirect_stdout = redirect_stdout(self.stdout) + self.redirect_stdout = redirect_stdout(self._stdout if self.iswindows else self.stdout) self.redirect_stdout.__enter__() def run(self): - if self.isatty: + if self.isatty and not self.iswindows: self._write(self.context.parent, rewind=True) else: - pass # not a tty + pass # not a tty, or windows, so we'll ignore stats output def finalize(self): self._write(self.context.parent, rewind=False) @@ -62,24 +75,28 @@ class ConsoleOutputPlugin(Plugin): def write(self, context, prefix='', rewind=True, append=None): t_cnt = len(context) - buffered = self.stdout.switch() - for line in buffered.split('\n')[:-1]: - print(line + CLEAR_EOL, file=sys.stderr) + if not self.iswindows: + buffered = self.stdout.switch() + for line in buffered.split('\n')[:-1]: + print(line + CLEAR_EOL, file=sys.stderr) + + alive_color = Style.BRIGHT + dead_color = Style.BRIGHT + Fore.BLACK for i in context.graph.topologically_sorted_indexes: node = context[i] - name_suffix = '({})'.format(i) if settings.DEBUG else '' + name_suffix = '({})'.format(i) if settings.DEBUG.get() else '' if node.alive: _line = ''.join( ( - ' ', Style.BRIGHT, '+', Style.RESET_ALL, ' ', node.name, name_suffix, ' ', + ' ', alive_color, '+', Style.RESET_ALL, ' ', node.name, name_suffix, ' ', node.get_statistics_as_string(), Style.RESET_ALL, ' ', ) ) else: _line = ''.join( ( - ' ', Fore.BLACK, '-', ' ', node.name, name_suffix, ' ', node.get_statistics_as_string(), + ' ', dead_color, '-', ' ', node.name, name_suffix, ' ', node.get_statistics_as_string(), Style.RESET_ALL, ' ', ) ) @@ -103,7 +120,7 @@ class ConsoleOutputPlugin(Plugin): print(MOVE_CURSOR_UP(t_cnt + 2), file=sys.stderr) def _write(self, graph_context, rewind): - if settings.PROFILE: + if settings.PROFILE.get(): if self.counter % 10 and self._append_cache: append = self._append_cache else: diff --git a/bonobo/ext/jupyter/plugin.py b/bonobo/ext/jupyter/plugin.py index a72141c..715b057 100644 --- a/bonobo/ext/jupyter/plugin.py +++ b/bonobo/ext/jupyter/plugin.py @@ -1,11 +1,11 @@ +import logging + from bonobo.ext.jupyter.widget import BonoboWidget from bonobo.plugins import Plugin try: import IPython.core.display except ImportError as e: - import logging - logging.exception( 'You must install Jupyter to use the bonobo Jupyter extension. Easiest way is to install the ' 'optional "jupyter" dependencies with «pip install bonobo[jupyter]», but you can also install a ' diff --git a/bonobo/ext/opendatasoft.py b/bonobo/ext/opendatasoft.py index 4be3134..2dc54c0 100644 --- a/bonobo/ext/opendatasoft.py +++ b/bonobo/ext/opendatasoft.py @@ -13,13 +13,13 @@ def path_str(path): class OpenDataSoftAPI(Configurable): - dataset = Option(str, required=True) + dataset = Option(str, positional=True) endpoint = Option(str, default='{scheme}://{netloc}{path}') scheme = Option(str, default='https') netloc = Option(str, default='data.opendatasoft.com') path = Option(path_str, default='/api/records/1.0/search/') rows = Option(int, default=500) - limit = Option(int, default=None) + limit = Option(int, required=False) timezone = Option(str, default='Europe/Paris') kwargs = Option(dict, default=dict) diff --git a/bonobo/logging.py b/bonobo/logging.py index 17bdeb7..071fcd3 100644 --- a/bonobo/logging.py +++ b/bonobo/logging.py @@ -8,19 +8,22 @@ from colorama import Fore, Style from bonobo import settings from bonobo.util.term import CLEAR_EOL +iswindows = (sys.platform == 'win32') + def get_format(): yield '{b}[%(fg)s%(levelname)s{b}][{w}' yield '{b}][{w}'.join(('%(spent)04d', '%(name)s')) yield '{b}]' yield ' %(fg)s%(message)s{r}' - yield CLEAR_EOL + if not iswindows: + yield CLEAR_EOL colors = { - 'b': Fore.BLACK, - 'w': Fore.LIGHTBLACK_EX, - 'r': Style.RESET_ALL, + 'b': '' if iswindows else Fore.BLACK, + 'w': '' if iswindows else Fore.LIGHTBLACK_EX, + 'r': '' if iswindows else Style.RESET_ALL, } format = (''.join(get_format())).format(**colors) @@ -28,7 +31,9 @@ format = (''.join(get_format())).format(**colors) class Filter(logging.Filter): def filter(self, record): record.spent = record.relativeCreated // 1000 - if record.levelname == 'DEBG': + if iswindows: + record.fg = '' + elif record.levelname == 'DEBG': record.fg = Fore.LIGHTBLACK_EX elif record.levelname == 'INFO': record.fg = Fore.LIGHTWHITE_EX @@ -46,7 +51,10 @@ class Filter(logging.Filter): class Formatter(logging.Formatter): def formatException(self, ei): tb = super().formatException(ei) - return textwrap.indent(tb, Fore.BLACK + ' | ' + Fore.WHITE) + if iswindows: + return textwrap.indent(tb, ' | ') + else: + return textwrap.indent(tb, Fore.BLACK + ' | ' + Fore.WHITE) def setup(level): @@ -75,4 +83,4 @@ def get_logger(name='bonobo'): getLogger = get_logger # Setup formating and level. -setup(level=settings.LOGGING_LEVEL) +setup(level=settings.LOGGING_LEVEL.get()) diff --git a/bonobo/nodes/__init__.py b/bonobo/nodes/__init__.py index c25b580..2cdd1e9 100644 --- a/bonobo/nodes/__init__.py +++ b/bonobo/nodes/__init__.py @@ -1,9 +1,8 @@ -from bonobo.nodes.io import __all__ as _all_io -from bonobo.nodes.io import * - -from bonobo.nodes.basics import __all__ as _all_basics from bonobo.nodes.basics import * - +from bonobo.nodes.basics import __all__ as _all_basics from bonobo.nodes.filter import Filter +from bonobo.nodes.io import * +from bonobo.nodes.io import __all__ as _all_io +from bonobo.nodes.throttle import RateLimited -__all__ = _all_basics + _all_io + ['Filter'] +__all__ = _all_basics + _all_io + ['Filter', 'RateLimited'] diff --git a/bonobo/nodes/basics.py b/bonobo/nodes/basics.py index c21757a..e23dd05 100644 --- a/bonobo/nodes/basics.py +++ b/bonobo/nodes/basics.py @@ -1,8 +1,5 @@ import functools -from pprint import pprint as _pprint - import itertools -from colorama import Fore, Style from bonobo import settings from bonobo.config import Configurable, Option @@ -10,15 +7,17 @@ from bonobo.config.processors import ContextProcessor from bonobo.structs.bags import Bag from bonobo.util.objects import ValueHolder from bonobo.util.term import CLEAR_EOL + from bonobo.constants import NOT_MODIFIED __all__ = [ - 'identity', 'Limit', - 'Tee', - 'count', - 'pprint', 'PrettyPrinter', + 'Tee', + 'arg0_to_kwargs', + 'count', + 'identity', + 'kwargs_to_arg0', 'noop', ] @@ -73,7 +72,7 @@ def _count_counter(self, context): class PrettyPrinter(Configurable): def call(self, *args, **kwargs): - formater = self._format_quiet if settings.QUIET else self._format_console + formater = self._format_quiet if settings.QUIET.get() else self._format_console for i, (item, value) in enumerate(itertools.chain(enumerate(args), kwargs.items())): print(formater(i, item, value)) @@ -87,47 +86,28 @@ class PrettyPrinter(Configurable): ) -pprint = PrettyPrinter() -pprint.__name__ = 'pprint' - - -def PrettyPrint(title_keys=('title', 'name', 'id'), print_values=True, sort=True): - from bonobo.constants import NOT_MODIFIED - - def _pprint(*args, **kwargs): - nonlocal title_keys, sort, print_values - - row = args[0] - for key in title_keys: - if key in row: - print(Style.BRIGHT, row.get(key), Style.RESET_ALL, sep='') - break - - if print_values: - for k in sorted(row) if sort else row: - print( - ' • ', - Fore.BLUE, - k, - Style.RESET_ALL, - ' : ', - Fore.BLACK, - '(', - type(row[k]).__name__, - ')', - Style.RESET_ALL, - ' ', - repr(row[k]), - CLEAR_EOL, - ) - - yield NOT_MODIFIED - - _pprint.__name__ = 'pprint' - - return _pprint - - def noop(*args, **kwargs): # pylint: disable=unused-argument from bonobo.constants import NOT_MODIFIED return NOT_MODIFIED + + +def arg0_to_kwargs(row): + """ + Transform items in a stream from "arg0" format (each call only has one positional argument, which is a dict-like + object) to "kwargs" format (each call only has keyword arguments that represent a row). + + :param row: + :return: bonobo.Bag + """ + return Bag(**row) + + +def kwargs_to_arg0(**row): + """ + Transform items in a stream from "kwargs" format (each call only has keyword arguments that represent a row) to + "arg0" format (each call only has one positional argument, which is a dict-like object) . + + :param **row: + :return: bonobo.Bag + """ + return Bag(row) diff --git a/bonobo/nodes/factory.py b/bonobo/nodes/factory.py new file mode 100644 index 0000000..2a1c30b --- /dev/null +++ b/bonobo/nodes/factory.py @@ -0,0 +1,219 @@ +import functools +import warnings +from functools import partial + +from bonobo import Bag +from bonobo.config import Configurable, Method + +_isarg = lambda item: type(item) is int +_iskwarg = lambda item: type(item) is str + + +class Operation(): + def __init__(self, item, callable): + self.item = item + self.callable = callable + + def __repr__(self): + return ''.format(self.callable.__name__, self.item) + + def apply(self, *args, **kwargs): + if _isarg(self.item): + return (*args[0:self.item], self.callable(args[self.item]), *args[self.item + 1:]), kwargs + if _iskwarg(self.item): + return args, {**kwargs, self.item: self.callable(kwargs.get(self.item))} + raise RuntimeError('Houston, we have a problem...') + + +class FactoryOperation(): + def __init__(self, factory, callable): + self.factory = factory + self.callable = callable + + def __repr__(self): + return ''.format(self.callable.__name__) + + def apply(self, *args, **kwargs): + return self.callable(*args, **kwargs) + + +CURSOR_TYPES = {} + + +def operation(mixed): + def decorator(m, ctype=mixed): + def lazy_operation(self, *args, **kwargs): + @functools.wraps(m) + def actual_operation(x): + return m(self, x, *args, **kwargs) + + self.factory.operations.append(Operation(self.item, actual_operation)) + return CURSOR_TYPES[ctype](self.factory, self.item) if ctype else self + + return lazy_operation + + return decorator if isinstance(mixed, str) else decorator(mixed, ctype=None) + + +def factory_operation(m): + def lazy_operation(self, *config): + @functools.wraps(m) + def actual_operation(*args, **kwargs): + return m(self, *config, *args, **kwargs) + + self.operations.append(FactoryOperation(self, actual_operation)) + return self + + return lazy_operation + + +class Cursor(): + _type = None + + def __init__(self, factory, item): + self.factory = factory + self.item = item + + @operation('dict') + def dict(self, x): + return x if isinstance(x, dict) else dict(x) + + @operation('int') + def int(self): + pass + + @operation('str') + def str(self, x): + return x if isinstance(x, str) else str(x) + + @operation('list') + def list(self): + pass + + @operation('tuple') + def tuple(self): + pass + + def __getattr__(self, item): + """ + Fallback to type methods if they exist, for example StrCursor.upper will use str.upper if not overriden, etc. + + :param item: + """ + if self._type and item in self._type.__dict__: + method = self._type.__dict__[item] + + @operation + @functools.wraps(method) + def _operation(self, x, *args, **kwargs): + return method(x, *args, **kwargs) + + setattr(self, item, partial(_operation, self)) + return getattr(self, item) + + raise AttributeError('Unknown operation {}.{}().'.format( + type(self).__name__, + item, + )) + + +CURSOR_TYPES['default'] = Cursor + + +class DictCursor(Cursor): + _type = dict + + @operation('default') + def get(self, x, path): + return x.get(path) + + @operation + def map_keys(self, x, mapping): + return {mapping.get(k): v for k, v in x.items()} + + +CURSOR_TYPES['dict'] = DictCursor + + +class StringCursor(Cursor): + _type = str + + +CURSOR_TYPES['str'] = StringCursor + + +class Factory(Configurable): + initialize = Method(required=False) + + def __init__(self, *args, **kwargs): + warnings.warn( + __file__ + + ' is experimental, API may change in the future, use it as a preview only and knowing the risks.', + FutureWarning + ) + super(Factory, self).__init__(*args, **kwargs) + self.default_cursor_type = 'default' + self.operations = [] + + if self.initialize is not None: + self.initialize(self) + + @factory_operation + def move(self, _from, _to, *args, **kwargs): + if _from == _to: + return args, kwargs + + if _isarg(_from): + value = args[_from] + args = args[:_from] + args[_from + 1:] + elif _iskwarg(_from): + value = kwargs[_from] + kwargs = {k: v for k, v in kwargs if k != _from} + else: + raise RuntimeError('Houston, we have a problem...') + + if _isarg(_to): + return (*args[:_to], value, *args[_to + 1:]), kwargs + elif _iskwarg(_to): + return args, {**kwargs, _to: value} + else: + raise RuntimeError('Houston, we have a problem...') + + def __call__(self, *args, **kwargs): + print('factory call on', args, kwargs) + for operation in self.operations: + args, kwargs = operation.apply(*args, **kwargs) + print(' ... after', operation, 'got', args, kwargs) + return Bag(*args, **kwargs) + + def __getitem__(self, item): + return CURSOR_TYPES[self.default_cursor_type](self, item) + + +if __name__ == '__main__': + f = Factory() + + f[0].dict().map_keys({'foo': 'F00'}) + f['foo'].str().upper() + + print('operations:', f.operations) + print(f({'foo': 'bisou'}, foo='blah')) +''' +specs: + +- rename keys of an input dict (in args, or kwargs) using a translation map. + + +f = Factory() + +f[0] +f['xxx'] = + +f[0].dict().get('foo.bar').move_to('foo.baz').apply(str.upper) +f[0].get('foo.*').items().map(str.lower) + +f['foo'].keys_map({ + 'a': 'b' +}) + +''' diff --git a/bonobo/nodes/io/base.py b/bonobo/nodes/io/base.py index d9b3212..496a0e8 100644 --- a/bonobo/nodes/io/base.py +++ b/bonobo/nodes/io/base.py @@ -1,5 +1,6 @@ from bonobo import settings from bonobo.config import Configurable, ContextProcessor, Option, Service +from bonobo.errors import UnrecoverableValueError, UnrecoverableNotImplementedError from bonobo.structs.bags import Bag @@ -9,7 +10,7 @@ class IOFormatEnabled(Configurable): def get_input(self, *args, **kwargs): if self.ioformat == settings.IOFORMAT_ARG0: if len(args) != 1 or len(kwargs): - raise ValueError( + raise UnrecoverableValueError( 'Wrong input formating: IOFORMAT=ARG0 implies one arg and no kwargs, got args={!r} and kwargs={!r}.'. format(args, kwargs) ) @@ -17,13 +18,13 @@ class IOFormatEnabled(Configurable): if self.ioformat == settings.IOFORMAT_KWARGS: if len(args) or not len(kwargs): - raise ValueError( + raise UnrecoverableValueError( 'Wrong input formating: IOFORMAT=KWARGS ioformat implies no arg, got args={!r} and kwargs={!r}.'. format(args, kwargs) ) return kwargs - raise NotImplementedError('Unsupported format.') + raise UnrecoverableNotImplementedError('Unsupported format.') def get_output(self, row): if self.ioformat == settings.IOFORMAT_ARG0: @@ -32,7 +33,7 @@ class IOFormatEnabled(Configurable): if self.ioformat == settings.IOFORMAT_KWARGS: return Bag(**row) - raise NotImplementedError('Unsupported format.') + raise UnrecoverableNotImplementedError('Unsupported format.') class FileHandler(Configurable): diff --git a/bonobo/nodes/io/csv.py b/bonobo/nodes/io/csv.py index ae68bd0..75fffe8 100644 --- a/bonobo/nodes/io/csv.py +++ b/bonobo/nodes/io/csv.py @@ -26,7 +26,7 @@ class CsvHandler(FileHandler): """ delimiter = Option(str, default=';') quotechar = Option(str, default='"') - headers = Option(tuple) + headers = Option(tuple, required=False) class CsvReader(IOFormatEnabled, FileReader, CsvHandler): diff --git a/bonobo/nodes/io/json.py b/bonobo/nodes/io/json.py index c6d9bf5..f1c6df0 100644 --- a/bonobo/nodes/io/json.py +++ b/bonobo/nodes/io/json.py @@ -4,6 +4,7 @@ from bonobo.config.processors import ContextProcessor from bonobo.constants import NOT_MODIFIED from bonobo.nodes.io.base import FileHandler, IOFormatEnabled from bonobo.nodes.io.file import FileReader, FileWriter +from bonobo.structs.bags import Bag class JsonHandler(FileHandler): @@ -19,6 +20,12 @@ class JsonReader(IOFormatEnabled, FileReader, JsonHandler): yield self.get_output(line) +class JsonDictItemsReader(JsonReader): + def read(self, fs, file): + for line in self.loader(file).items(): + yield Bag(*line) + + class JsonWriter(IOFormatEnabled, FileWriter, JsonHandler): @ContextProcessor def envelope(self, context, fs, file, lineno): diff --git a/bonobo/nodes/io/pickle.py b/bonobo/nodes/io/pickle.py index e94f94a..d9da55f 100644 --- a/bonobo/nodes/io/pickle.py +++ b/bonobo/nodes/io/pickle.py @@ -17,7 +17,7 @@ class PickleHandler(FileHandler): """ - item_names = Option(tuple) + item_names = Option(tuple, required=False) class PickleReader(IOFormatEnabled, FileReader, PickleHandler): diff --git a/bonobo/nodes/throttle.py b/bonobo/nodes/throttle.py new file mode 100644 index 0000000..58f5c09 --- /dev/null +++ b/bonobo/nodes/throttle.py @@ -0,0 +1,52 @@ +import threading +import time + +from bonobo.config import Configurable, ContextProcessor, Method, Option + + +class RateLimitBucket(threading.Thread): + daemon = True + + @property + def stopped(self): + return self._stop_event.is_set() + + def __init__(self, initial=1, period=1, amount=1): + super(RateLimitBucket, self).__init__() + self.semaphore = threading.BoundedSemaphore(initial) + self.amount = amount + self.period = period + + self._stop_event = threading.Event() + + def stop(self): + self._stop_event.set() + + def run(self): + while not self.stopped: + time.sleep(self.period) + for _ in range(self.amount): + self.semaphore.release() + + def wait(self): + return self.semaphore.acquire() + + +class RateLimited(Configurable): + handler = Method() + + initial = Option(int, positional=True, default=1) + period = Option(int, positional=True, default=1) + amount = Option(int, positional=True, default=1) + + @ContextProcessor + def bucket(self, context): + bucket = RateLimitBucket(self.initial, self.amount, self.period) + bucket.start() + yield bucket + bucket.stop() + bucket.join() + + def call(self, bucket, *args, **kwargs): + bucket.wait() + return self.handler(*args, **kwargs) diff --git a/bonobo/settings.py b/bonobo/settings.py index e0e5289..e5edd83 100644 --- a/bonobo/settings.py +++ b/bonobo/settings.py @@ -5,6 +5,10 @@ from bonobo.errors import ValidationError def to_bool(s): + if s is None: + return False + if type(s) is bool: + return s if len(s): if s.lower() in ('f', 'false', 'n', 'no', '0'): return False @@ -13,7 +17,18 @@ def to_bool(s): class Setting: - def __init__(self, name, default=None, validator=None): + __all__ = {} + + @classmethod + def clear_all(cls): + for setting in Setting.__all__.values(): + setting.clear() + + def __new__(cls, name, *args, **kwargs): + Setting.__all__[name] = super().__new__(cls) + return Setting.__all__[name] + + def __init__(self, name, default=None, validator=None, formatter=None): self.name = name if default: @@ -21,15 +36,14 @@ class Setting: else: self.default = lambda: None - if validator: - self.validator = validator - else: - self.validator = None + self.validator = validator + self.formatter = formatter def __repr__(self): - return ''.format(self.name, self.value) + return ''.format(self.name, self.get()) def set(self, value): + value = self.formatter(value) if self.formatter else value if self.validator and not self.validator(value): raise ValidationError('Invalid value {!r} for setting {}.'.format(value, self.name)) self.value = value @@ -38,21 +52,35 @@ class Setting: try: return self.value except AttributeError: - self.value = self.default() + value = os.environ.get(self.name, None) + if value is None: + value = self.default() + self.set(value) return self.value + def clear(self): + try: + del self.value + except AttributeError: + pass + # Debug/verbose mode. -DEBUG = to_bool(os.environ.get('DEBUG', 'f')) +DEBUG = Setting('DEBUG', formatter=to_bool, default=False) # Profile mode. -PROFILE = to_bool(os.environ.get('PROFILE', 'f')) +PROFILE = Setting('PROFILE', formatter=to_bool, default=False) # Quiet mode. -QUIET = to_bool(os.environ.get('QUIET', 'f')) +QUIET = Setting('QUIET', formatter=to_bool, default=False) # Logging level. -LOGGING_LEVEL = logging.DEBUG if DEBUG else logging.INFO +LOGGING_LEVEL = Setting( + 'LOGGING_LEVEL', + formatter=logging._checkLevel, + validator=logging._checkLevel, + default=lambda: logging.DEBUG if DEBUG.get() else logging.INFO +) # Input/Output format for transformations IOFORMAT_ARG0 = 'arg0' @@ -67,5 +95,8 @@ IOFORMAT = Setting('IOFORMAT', default=IOFORMAT_KWARGS, validator=IOFORMATS.__co def check(): - if DEBUG and QUIET: + if DEBUG.get() and QUIET.get(): raise RuntimeError('I cannot be verbose and quiet at the same time.') + + +clear_all = Setting.clear_all diff --git a/bonobo/strategies/executor.py b/bonobo/strategies/executor.py index 44d206e..a0bd4f4 100644 --- a/bonobo/strategies/executor.py +++ b/bonobo/strategies/executor.py @@ -21,7 +21,7 @@ class ExecutorStrategy(Strategy): def execute(self, graph, *args, plugins=None, services=None, **kwargs): context = self.create_graph_execution_context(graph, plugins=plugins, services=services) - context.recv(BEGIN, Bag(), END) + context.write(BEGIN, Bag(), END) executor = self.create_executor() @@ -57,7 +57,7 @@ class ExecutorStrategy(Strategy): futures.append(executor.submit(_runner)) while context.alive: - time.sleep(0.2) + time.sleep(0.1) for plugin_context in context.plugins: plugin_context.shutdown() diff --git a/bonobo/strategies/naive.py b/bonobo/strategies/naive.py index b93a2e9..cab9c57 100644 --- a/bonobo/strategies/naive.py +++ b/bonobo/strategies/naive.py @@ -6,7 +6,7 @@ from bonobo.structs.bags import Bag class NaiveStrategy(Strategy): def execute(self, graph, *args, plugins=None, **kwargs): context = self.create_graph_execution_context(graph, plugins=plugins) - context.recv(BEGIN, Bag(), END) + context.write(BEGIN, Bag(), END) # TODO: how to run plugins in "naive" mode ? context.start() diff --git a/bonobo/structs/__init__.py b/bonobo/structs/__init__.py index 15e76a9..6c0d9ab 100644 --- a/bonobo/structs/__init__.py +++ b/bonobo/structs/__init__.py @@ -1,5 +1,10 @@ -from bonobo.structs.bags import Bag +from bonobo.structs.bags import Bag, ErrorBag from bonobo.structs.graphs import Graph from bonobo.structs.tokens import Token -__all__ = ['Bag', 'Graph', 'Token'] +__all__ = [ + 'Bag', + 'ErrorBag', + 'Graph', + 'Token', +] diff --git a/bonobo/structs/bags.py b/bonobo/structs/bags.py index 4ef2fa7..3eae9ff 100644 --- a/bonobo/structs/bags.py +++ b/bonobo/structs/bags.py @@ -1,6 +1,6 @@ import itertools -from bonobo.constants import INHERIT_INPUT +from bonobo.constants import INHERIT_INPUT, LOOPBACK __all__ = [ 'Bag', @@ -33,8 +33,10 @@ class Bag: """ + default_flags = () + def __init__(self, *args, _flags=None, _parent=None, **kwargs): - self._flags = _flags or () + self._flags = type(self).default_flags + (_flags or ()) self._parent = _parent self._args = args self._kwargs = kwargs @@ -106,5 +108,9 @@ class Bag: ) +class LoopbackBag(Bag): + default_flags = (LOOPBACK, ) + + class ErrorBag(Bag): pass diff --git a/bonobo/structs/graphs.py b/bonobo/structs/graphs.py index ccafb6b..fe7c1df 100644 --- a/bonobo/structs/graphs.py +++ b/bonobo/structs/graphs.py @@ -1,3 +1,5 @@ +from copy import copy + from bonobo.constants import BEGIN @@ -62,6 +64,15 @@ class Graph: return self + def copy(self): + g = Graph() + + g.edges = copy(self.edges) + g.named = copy(self.named) + g.nodes = copy(self.nodes) + + return g + @property def topologically_sorted_indexes(self): """Iterate in topological order, based on networkx's topological_sort() function. diff --git a/bonobo/structs/inputs.py b/bonobo/structs/inputs.py index cf9a6ec..7cfe12f 100644 --- a/bonobo/structs/inputs.py +++ b/bonobo/structs/inputs.py @@ -77,6 +77,12 @@ class Input(Queue, Readable, Writable): return Queue.put(self, data, block, timeout) + def _decrement_runlevel(self): + if self._runlevel == 1: + self.on_finalize() + self._runlevel -= 1 + self.on_end() + def get(self, block=True, timeout=None): if not self.alive: raise InactiveReadableError('Cannot get() on an inactive {}.'.format(Readable.__name__)) @@ -84,13 +90,7 @@ class Input(Queue, Readable, Writable): data = Queue.get(self, block, timeout) if data == END: - if self._runlevel == 1: - self.on_finalize() - - self._runlevel -= 1 - - # callback - self.on_end() + self._decrement_runlevel() if not self.alive: raise InactiveReadableError( @@ -100,6 +100,10 @@ class Input(Queue, Readable, Writable): return data + def shutdown(self): + while self._runlevel >= 1: + self._decrement_runlevel() + def empty(self): self.mutex.acquire() while self._qsize() and self.queue[0] == END: diff --git a/bonobo/util/__init__.py b/bonobo/util/__init__.py index 8b13789..df14e9a 100644 --- a/bonobo/util/__init__.py +++ b/bonobo/util/__init__.py @@ -1 +1,33 @@ +from bonobo.util.collections import sortedlist +from bonobo.util.inspect import ( + inspect_node, + isbag, + isconfigurable, + isconfigurabletype, + iscontextprocessor, + iserrorbag, + isloopbackbag, + ismethod, + isoption, + istype, +) +from bonobo.util.objects import (get_name, get_attribute_or_create, ValueHolder) +from bonobo.util.python import require +# Bonobo's util API +__all__ = [ + 'ValueHolder', + 'get_attribute_or_create', + 'get_name', + 'inspect_node', + 'isbag', + 'isconfigurable', + 'isconfigurabletype', + 'iscontextprocessor', + 'iserrorbag', + 'isloopbackbag', + 'ismethod', + 'isoption', + 'istype', + 'require', +] diff --git a/bonobo/util/collections.py b/bonobo/util/collections.py new file mode 100644 index 0000000..b97630a --- /dev/null +++ b/bonobo/util/collections.py @@ -0,0 +1,6 @@ +import bisect + + +class sortedlist(list): + def insort(self, x): + bisect.insort(self, x) \ No newline at end of file diff --git a/bonobo/util/errors.py b/bonobo/util/errors.py index 0ea4e58..cae2789 100644 --- a/bonobo/util/errors.py +++ b/bonobo/util/errors.py @@ -1,13 +1,6 @@ import sys from textwrap import indent -from bonobo import settings -from bonobo.structs.bags import ErrorBag - - -def is_error(bag): - return isinstance(bag, ErrorBag) - def _get_error_message(exc): if hasattr(exc, '__str__'): diff --git a/bonobo/util/inspect.py b/bonobo/util/inspect.py new file mode 100644 index 0000000..f9ae4d8 --- /dev/null +++ b/bonobo/util/inspect.py @@ -0,0 +1,149 @@ +from collections import namedtuple + + +def isconfigurable(mixed): + """ + Check if the given argument is an instance of :class:`bonobo.config.Configurable`. + + :param mixed: + :return: bool + """ + from bonobo.config.configurables import Configurable + return isinstance(mixed, Configurable) + + +def isconfigurabletype(mixed): + """ + Check if the given argument is an instance of :class:`bonobo.config.ConfigurableMeta`, meaning it has all the + plumbery necessary to build :class:`bonobo.config.Configurable`-like instances. + + :param mixed: + :return: bool + """ + from bonobo.config.configurables import ConfigurableMeta + return isinstance(mixed, ConfigurableMeta) + + +def isoption(mixed): + """ + Check if the given argument is an instance of :class:`bonobo.config.Option`. + + :param mixed: + :return: bool + """ + + from bonobo.config.options import Option + return isinstance(mixed, Option) + + +def ismethod(mixed): + """ + Check if the given argument is an instance of :class:`bonobo.config.Method`. + + :param mixed: + :return: bool + """ + from bonobo.config.options import Method + return isinstance(mixed, Method) + + +def iscontextprocessor(x): + """ + Check if the given argument is an instance of :class:`bonobo.config.ContextProcessor`. + + :param mixed: + :return: bool + """ + from bonobo.config.processors import ContextProcessor + return isinstance(x, ContextProcessor) + + +def istype(mixed): + """ + Check if the given argument is a type object. + + :param mixed: + :return: bool + """ + return isinstance(mixed, type) + + +def isbag(mixed): + """ + Check if the given argument is an instance of a :class:`bonobo.Bag`. + + :param mixed: + :return: bool + """ + from bonobo.structs.bags import Bag + return isinstance(mixed, Bag) + + +def iserrorbag(mixed): + """ + Check if the given argument is an instance of an :class:`bonobo.ErrorBag`. + + :param mixed: + :return: bool + """ + from bonobo.structs.bags import ErrorBag + return isinstance(mixed, ErrorBag) + + +def isloopbackbag(mixed): + """ + Check if the given argument is an instance of a :class:`bonobo.Bag`, marked for loopback behaviour. + + :param mixed: + :return: bool + """ + from bonobo.constants import LOOPBACK + return isbag(mixed) and LOOPBACK in mixed.flags + + +ConfigurableInspection = namedtuple( + 'ConfigurableInspection', [ + 'type', + 'instance', + 'options', + 'processors', + 'partial', + ] +) + +ConfigurableInspection.__enter__ = lambda self: self +ConfigurableInspection.__exit__ = lambda *exc_details: None + + +def inspect_node(mixed, *, _partial=None): + """ + If the given argument is somehow a :class:`bonobo.config.Configurable` object (either a subclass, an instance, or + a partially configured instance), then it will return a :class:`ConfigurableInspection` namedtuple, used to inspect + the configurable metadata (options). If you want to get the option values, you don't need this, it is only usefull + to perform introspection on a configurable. + + If it's not looking like a configurable, it will raise a :class:`TypeError`. + + :param mixed: + :return: ConfigurableInspection + + :raise: TypeError + """ + if isconfigurabletype(mixed): + inst, typ = None, mixed + elif isconfigurable(mixed): + inst, typ = mixed, type(mixed) + elif hasattr(mixed, 'func'): + return inspect_node(mixed.func, _partial=(mixed.args, mixed.keywords)) + else: + raise TypeError( + 'Not a Configurable, nor a Configurable instance and not even a partially configured Configurable. Check your inputs.' + ) + + return ConfigurableInspection( + typ, + inst, + list(typ.__options__), + list(typ.__processors__), + _partial, + ) diff --git a/docs/_templates/index.html b/docs/_templates/index.html index 8f9185a..33c4116 100644 --- a/docs/_templates/index.html +++ b/docs/_templates/index.html @@ -9,14 +9,37 @@

{% trans %} - Bonobo is a line-by-line data-processing toolkit for python 3.5+ (extract-transform-load - framework, or ETL) emphasizing simple and atomic data transformations defined using a directed graph of plain old - python objects (functions, iterables, generators, ...). + Bonobo is an Extract Transform Load framework for the Python (3.5+) language. {% endtrans %}

-
- Bonobo is ALPHA software. Some APIs will change. +

+ {% trans %} + It works by streaming data through a directed acyclic graph of python callables, one row at a time. + {% endtrans %} +

+ +

+ {% trans %} + It is targeting small scale data (as in “not big data”), allowing it to be quick and easy to install (no + client-server, no daemon, lightweight requirements, no surprises). + {% endtrans %} +

+

+ {% trans %} + Most common file formats (XML, CSV, JSON, Excel, ...) and basic services (SQL databases, REST web services, ...) can + be worked with using the built-in or extension libraries, other services can benefit the richness of PyPI. + {% endtrans %} +

+

+ {% trans %} + In short, Bonobo contains the logic to execute efficiently an ETL process, the glue to use plain old python + ojects and common operations, readers and writers. For the rest, it's just python! + {% endtrans %} +

+ +
+ Bonobo is currently released as alpha version. Expect some APIs to change.
@@ -25,25 +48,25 @@
- -
- - -
-

Features

-
    -
  • - {% trans %} - 10 minutes to get started: Know some python? Writing your first data processor is an affair - of minutes. - {% endtrans %} -
  • -
  • - {% trans %} - Data sources and targets: HTML, JSON, XML, SQL databases, NoSQL databases, HTTP/REST APIs, - streaming APIs, python objects... - {% endtrans %} -
  • -
  • - {% trans %} - Service injection: Abstract the transformation dependencies to easily switch data sources and - dependant libraries. You'll be able to specify the concrete implementations or configurations at - runtime, for example to switch a database connection string or an API endpoint. - {% endtrans %} -
  • -
  • - {% trans %} - Plugins: Easily add features to all your transformations by using builtin plugins (Jupyter, - Console, ...) or write your own. - {% endtrans %} -
  • -
  • - {% trans %} - Bonobo is young, and the todo-list is huge. Read the roadmap. - {% endtrans %} -
  • -
- -

{% trans %} - You can also download PDF/EPUB versions of the Bonobo documentation: - PDF version, - EPUB version. - {% endtrans %} -

Table of contents

-
{{ toctree(maxdepth=2, collapse=False)}}
+ {% endblock %} diff --git a/docs/_templates/sidebarintro.html b/docs/_templates/sidebarintro.html index fc8acf2..ef4ad45 100644 --- a/docs/_templates/sidebarintro.html +++ b/docs/_templates/sidebarintro.html @@ -9,14 +9,14 @@ You can download the documentation in other formats as well:

Useful Links

\ No newline at end of file +
  • Bonobo ETL
  • +
  • Bonobo ETL @ PyPI
  • +
  • Bonobo ETL @ GitHub
  • + diff --git a/docs/changelog.rst b/docs/changelog.rst index 2f12063..a049822 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,70 @@ Changelog ========= +v.0.5.0 - 5 october 2017 +:::::::::::::::::::::::: + +Important highlights +-------------------- + +* `bonobo.pprint` and `bonobo.PrettyPrint` have been removed, in favor of `bonobo.PrettyPrinter` (BC break). +* The `bonobo.config` API has suffered a major refactoring. It has been done carefully and most of your code should + work unchanged, but you may have surprises. This was necessary for this API to be more uniform (potential BC break). +* bonobo.pprint and bonobo.PrettyPrint have been removed, in favor of new bonobo.PrettyPrinter() generic printer. If + you're still using the old versions, time to switch (BC break). +* Secondary APIs start to be more uniform (bonobo.config, bonobo.util). + +New features +------------ + +Graphs & Nodes +.............. + +* Graphs now have a .copy() method. +* New helper transformations arg0_to_kwargs and kwargs_to_arg0. +* The unique pretty printer provided by the core library is now bonobo.PrettyPrinter(). +* Services now have "fs" and "http" configured by default. + +Command line +............ + +* New `bonobo convert` command now allows to run simple conversion jobs without coding anything. +* New `bonobo inspect` command now allows to generate graphviz source for graph visualization. +* Passing environment variables to graph executions now can be done using -e/--env. (cwandrews) +* Add ability to install requirements with for a requirements.txt residing in the same dir (Alex Vykaliuk) + +Preview +....... + +* A "transformation factory" makes its first appearance. It is considered a preview unstable feature. Stay + tuned. + +Internals +--------- + +* Configurables have undergone a refactoring, all types of descriptors should now behave in the same way. +* An UnrecoverrableError exception subclass allows for some errors to stop the whole execution. +* Refactoring of Settings (bonobo.settings). +* Add a reference to graph context (private) in service container. +* Few internal APIs changes and refactorings. + +Bugfixes +-------- + +* Check if PluginExecutionContext was started before shutting it down. (Vitalii Vokhmin) +* Move patch one level up because importlib brakes all the CI tools. (Alex Vykaliuk) +* Do not fail in ipykernel without ipywidgets. (Alex Vykaliuk) +* Escaping issues (Tomas Zubiri) + +Miscellaneous +------------- + +* Windows console output should now be correct. (Parthiv20) +* Various bugfixes. +* More readable statistics on Ubuntu workstation standard terminal (spagoc) +* Documentation, more documentation, documentation again. + + v.0.4.3 - 16 july 2017 :::::::::::::::::::::: diff --git a/docs/guide/ext/docker.rst b/docs/extension/docker.rst similarity index 86% rename from docs/guide/ext/docker.rst rename to docs/extension/docker.rst index 8ff667f..f0fd17c 100644 --- a/docs/guide/ext/docker.rst +++ b/docs/extension/docker.rst @@ -1,5 +1,5 @@ -Bonobo with Docker -================== +Docker Extension +================ .. todo:: The `bonobo-docker` package is at a very alpha stage, and things will change. This section is here to give a brief overview but is neither complete nor definitive. diff --git a/docs/extension/index.rst b/docs/extension/index.rst new file mode 100644 index 0000000..53f10be --- /dev/null +++ b/docs/extension/index.rst @@ -0,0 +1,14 @@ +Extensions +========== + +Extensions contains all things needed to work with a few popular third party tools. + +Most of them are available as optional extra dependencies, and the maturity stage of each may vary. + +.. toctree:: + :maxdepth: 2 + + docker + jupyter + selenium + sqlalchemy diff --git a/docs/guide/ext/jupyter.rst b/docs/extension/jupyter.rst similarity index 81% rename from docs/guide/ext/jupyter.rst rename to docs/extension/jupyter.rst index 6e96bf6..6c3385f 100644 --- a/docs/guide/ext/jupyter.rst +++ b/docs/extension/jupyter.rst @@ -1,8 +1,8 @@ -Bonobo with Jupyter -=================== +Jupyter Extension +================= -There is a builtin plugin that integrates (kind of minimalistically, for now) bonobo within jupyter notebooks, so -you can read the execution status of a graph within a nice (ok not so nice) html/javascript widget. +There is a builtin plugin that integrates (somewhat minimallistically, for now) bonobo within jupyter notebooks, so +you can read the execution status of a graph within a nice (ok, not so nice) html/javascript widget. See https://github.com/jupyter-widgets/widget-cookiecutter for the base template used. diff --git a/docs/guide/ext/selenium.rst b/docs/extension/selenium.rst similarity index 95% rename from docs/guide/ext/selenium.rst rename to docs/extension/selenium.rst index e588cd6..c262c13 100644 --- a/docs/guide/ext/selenium.rst +++ b/docs/extension/selenium.rst @@ -1,5 +1,5 @@ -Bonobo with Selenium -==================== +Selenium Extension +================== .. todo:: The `bonobo-selenium` package is at a very alpha stage, and things will change. This section is here to give a brief overview but is neither complete nor definitive. diff --git a/docs/guide/ext/sqlalchemy.rst b/docs/extension/sqlalchemy.rst similarity index 87% rename from docs/guide/ext/sqlalchemy.rst rename to docs/extension/sqlalchemy.rst index d7da4e8..734bbf6 100644 --- a/docs/guide/ext/sqlalchemy.rst +++ b/docs/extension/sqlalchemy.rst @@ -1,5 +1,5 @@ -Bonobo with SQLAlchemy -====================== +SQLAlchemy Extension +==================== .. todo:: The `bonobo-sqlalchemy` package is at a very alpha stage, and things will change. This section is here to give a brief overview but is neither complete nor definitive. diff --git a/docs/guide/environment.rst b/docs/guide/environment.rst new file mode 100644 index 0000000..203368d --- /dev/null +++ b/docs/guide/environment.rst @@ -0,0 +1,78 @@ +Environment Variables +===================== + +Best practice holds that variables should be passed to graphs via environment variables. +Doing this is important for keeping sensitive data out of the code - such as an +API token or username and password used to access a database. Not only is this +approach more secure, it also makes graphs more flexible by allowing adjustments +for a variety of environments and contexts. Importantly, environment variables +are also the means by-which arguments can be passed to graphs. + +.. note:: + + This document is about using your own settings and configuration values. If you're looking for bonobo's builtin + settings, also configurable using environment variables, please check :doc:`/reference/settings`. + +Passing / Setting Environment Variables +::::::::::::::::::::::::::::::::::::::: + +Setting environment variables for your graphs to use can be done in a variety of ways and which one used can vary +based-upon context. Perhaps the most immediate and simple way to set/override a variable for a given graph is +simply to use the optional ``--env`` argument when running bonobo from the shell (bash, command prompt, etc). +``--env`` (or ``-e`` for short) should then be followed by the variable name and value using the +syntax ``VAR_NAME=VAR_VALUE``. Multiple environment variables can be passed by using multiple ``--env`` / ``-e`` flags +(i.e. ``bonobo run --env FIZZ=buzz ...`` and ``bonobo run --env FIZZ=buzz --env Foo=bar ...``). Additionally, in bash +you can also set environment variables by listing those you wish to set before the `bonobo run` command with space +separating the key-value pairs (i.e. ``FIZZ=buzz bonobo run ...`` or ``FIZZ=buzz FOO=bar bonobo run ...``). + +The Examples below demonstrate setting one or multiple variables using both of these methods: + +.. code-block:: bash + + # Using one environment variable via --env flag: + bonobo run csvsanitizer --env SECRET_TOKEN=secret123 + + # Using multiple environment variables via -e (env) flag: + bonobo run csvsanitizer -e SRC_FILE=inventory.txt -e DST_FILE=inventory_processed.csv + + # Using one environment variable inline (bash only): + SECRET_TOKEN=secret123 bonobo run csvsanitizer + + # Using multiple environment variables inline (bash only): + SRC_FILE=inventory.txt DST_FILE=inventory_processed.csv bonobo run csvsanitizer + +*Though not-yet implemented, the bonobo roadmap includes implementing environment / .env files as well.* + +Accessing Environment Variables from within the Graph Context +::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +Environment variables, whether set globally or only for the scope of the graph, +can be can be accessed using any of the normal means. It is important to note +that whether set globally for the system or just for the graph context, +environment variables are accessed by bonobo in the same way. In the example +below the database user and password are accessed via the ``os`` module's ``getenv`` +function and used to get data from the database. + +.. code-block:: python + + import os + + import bonobo + from bonobo.config import use + + + DB_USER = os.getenv('DB_USER') + DB_PASS = os.getenv('DB_PASS') + + + @use('database') + def extract(database): + with database.connect(DB_USER, DB_PASS) as conn: + yield from conn.query_all() + + + graph = bonobo.Graph( + extract, + bonobo.PrettyPrinter(), + ) + diff --git a/docs/guide/graphs.rst b/docs/guide/graphs.rst new file mode 100644 index 0000000..14af705 --- /dev/null +++ b/docs/guide/graphs.rst @@ -0,0 +1,11 @@ +Graphs +====== + +Writing graphs +:::::::::::::: + +Debugging graphs +:::::::::::::::: + +Executing graphs +:::::::::::::::: diff --git a/docs/guide/index.rst b/docs/guide/index.rst index 18e5565..76e426a 100644 --- a/docs/guide/index.rst +++ b/docs/guide/index.rst @@ -1,28 +1,14 @@ Guides ====== -Concepts and best practices -::::::::::::::::::::::::::: - -There are a few things that you should know while writing transformations graphs with bonobo. +Here are a few guides and best practices to work with bonobo. .. toctree:: :maxdepth: 2 - purity + graphs transformations services + environment + purity -Third party integrations -:::::::::::::::::::::::: - -There is a few **bonobo** extensions that ease the use of the library with third party tools. Each integration is -available as an optional extra dependency, and the maturity stage of each extension vary. - -.. toctree:: - :maxdepth: 2 - - ext/docker - ext/jupyter - ext/selenium - ext/sqlalchemy diff --git a/docs/guide/purity.rst b/docs/guide/purity.rst index e01ff38..ba0d56e 100644 --- a/docs/guide/purity.rst +++ b/docs/guide/purity.rst @@ -1,34 +1,39 @@ -Pure transformations -==================== +Best Practices +============== The nature of components, and how the data flow from one to another, can be a bit tricky. Hopefully, they should be very easy to write with a few hints. -The major problem we have is that one message (underlying implementation: :class:`bonobo.structs.bags.Bag`) can go -through more than one component, and at the same time. If you wanna be safe, you tend to :func:`copy.copy()` everything -between two calls to two different components, but that's very expensive. +Pure transformations +:::::::::::::::::::: -Instead of that, we chosed the oposite: copies are never made, and you should not modify in place the inputs of your -component before yielding them, and that mostly means that you want to recreate dicts and lists before yielding (or -returning) them. Numeric values, strings and tuples being immutable in python, modifying a variable of one of those -type will already return a different instance. +One “message” (a.k.a :class:`bonobo.Bag` instance) may go through more than one component, and at the same time. +To ensure your code is safe, one could :func:`copy.copy()` each message on each transformation input but that's quite +expensive, especially because it may not be needed. + +Instead, we chose the opposite: copies are never made, instead you should not modify in place the inputs of your +component before yielding them, which that mostly means that you want to recreate dicts and lists before yielding if +their values changed. + +Numeric values, strings and tuples being immutable in python, modifying a variable of one of those type will already +return a different instance. Examples will be shown with `return` statements, of course you can do the same with `yield` statements in generators. Numbers -::::::: +------- In python, numbers are immutable. So you can't be wrong with numbers. All of the following are correct. .. code-block:: python - def do_your_number_thing(n: int) -> int: + def do_your_number_thing(n): return n - def do_your_number_thing(n: int) -> int: + def do_your_number_thing(n): return n + 1 - def do_your_number_thing(n: int) -> int: + def do_your_number_thing(n): # correct, but bad style n += 1 return n @@ -37,37 +42,37 @@ The same is true with other numeric types, so don't be shy. Tuples -:::::: +------ Tuples are immutable, so you risk nothing. .. code-block:: python - def do_your_tuple_thing(t: tuple) -> tuple: + def do_your_tuple_thing(t): return ('foo', ) + t - def do_your_tuple_thing(t: tuple) -> tuple: + def do_your_tuple_thing(t): return t + ('bar', ) - def do_your_tuple_thing(t: tuple) -> tuple: + def do_your_tuple_thing(t): # correct, but bad style t += ('baaaz', ) return t Strings -::::::: +------- -You know the drill, strings are immutable. +You know the drill, strings are immutable, too. .. code-block:: python - def do_your_str_thing(t: str) -> str: + def do_your_str_thing(t): return 'foo ' + t + ' bar' - def do_your_str_thing(t: str) -> str: + def do_your_str_thing(t): return ' '.join(('foo', t, 'bar', )) - def do_your_str_thing(t: str) -> str: + def do_your_str_thing(t): return 'foo {} bar'.format(t) You can, if you're using python 3.6+, use `f-strings `_, @@ -75,15 +80,15 @@ but the core bonobo libraries won't use it to stay 3.5 compatible. Dicts -::::: +----- So, now it gets interesting. Dicts are mutable. It means that you can mess things up if you're not cautious. -For example, doing the following may cause unexpected problems: +For example, doing the following may (will) cause unexpected problems: .. code-block:: python - def mutate_my_dict_like_crazy(d: dict) -> dict: + def mutate_my_dict_like_crazy(d): # Bad! Don't do that! d.update({ 'foo': compute_something() @@ -112,7 +117,7 @@ Now let's see how to do it correctly: .. code-block:: python - def new_dicts_like_crazy(d: dict) -> dict: + def new_dicts_like_crazy(d): # Creating a new dict is correct. return { **d, @@ -120,7 +125,7 @@ Now let's see how to do it correctly: 'bar': compute_anotherthing(), } - def new_dict_and_yield() -> dict: + def new_dict_and_yield(): d = {} for i in range(100): # Different dict each time. @@ -128,13 +133,13 @@ Now let's see how to do it correctly: 'index': i } -I hear you think «Yeah, but if I create like millions of dicts ...». +I bet you think «Yeah, but if I create like millions of dicts ...». -Let's say we chosed the oposite way and copy the dict outside the transformation (in fact, `it's what we did in bonobo's +Let's say we chose the opposite way and copied the dict outside the transformation (in fact, `it's what we did in bonobo's ancestor `_). This means you will also create the -same number of dicts, the difference is that you won't even notice it. Also, it means that if you want to yield 1 million -times the same dict, going "pure" makes it efficient (you'll just yield the same object 1 million times) while going "copy -crazy" will create 1 million objects. +same number of dicts, the difference is that you won't even notice it. Also, it means that if you want to yield the same +dict 1 million times, going "pure" makes it efficient (you'll just yield the same object 1 million times) while going +"copy crazy" would create 1 million identical objects. Using dicts like this will create a lot of dicts, but also free them as soon as all the future components that take this dict as input are done. Also, one important thing to note is that most primitive data structures in python are immutable, so creating diff --git a/docs/guide/services.rst b/docs/guide/services.rst index cf7ecc7..4e1a22c 100644 --- a/docs/guide/services.rst +++ b/docs/guide/services.rst @@ -12,8 +12,8 @@ If you're going a little further than that, you'll feel limited, for a few reaso * Hardcoded and tightly linked dependencies make your transformations hard to test, and hard to reuse. * Processing data on your laptop is great, but being able to do it on different target systems (or stages), in different - environments, is more realistic. You'll want to contigure a different database on a staging environment, - preprod environment or production system. Maybe you have silimar systems for different clients and want to select + environments, is more realistic. You'll want to configure a different database on a staging environment, + pre-production environment, or production system. Maybe you have similar systems for different clients and want to select the system at runtime. Etc. Service injection @@ -44,7 +44,7 @@ Let's define such a transformation: 'category': database.get_category_name_for_sku(row['sku']) } -This piece of code tells bonobo that your transformation expect a sercive called "primary_sql_database", that will be +This piece of code tells bonobo that your transformation expect a service called "primary_sql_database", that will be injected to your calls under the parameter name "database". Function-based transformations diff --git a/docs/guide/transformations.rst b/docs/guide/transformations.rst index 8222357..e0fc347 100644 --- a/docs/guide/transformations.rst +++ b/docs/guide/transformations.rst @@ -22,7 +22,7 @@ underscores and lowercase names: def uppercase(s: str) -> str: return s.upper() -If you're naming something which is configurable, that will need to be instanciated or called to obtain something that +If you're naming something which is configurable, that will need to be instantiated or called to obtain something that can be used as a graph node, then use camelcase names: .. code-block:: python diff --git a/docs/index.rst b/docs/index.rst index 8fbcd6e..1d6b708 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -7,6 +7,7 @@ Bonobo install tutorial/index guide/index + extension/index reference/index faq contribute/index diff --git a/docs/install.rst b/docs/install.rst index 87df3d3..c006c88 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -1,6 +1,7 @@ Installation ============ + Create an ETL project ::::::::::::::::::::: @@ -15,6 +16,7 @@ Creating a project and starting to write code should take less than a minute: Once you bootstrapped a project, you can start editing the default example transformation by editing `my-etl-project/main.py`. Now, you can head to :doc:`tutorial/index`. + Other installation options :::::::::::::::::::::::::: @@ -27,6 +29,7 @@ You can install it directly from the `Python Package Index