diff --git a/Makefile b/Makefile index 6db0f7c..3ea8912 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # This file has been auto-generated. # All changes will be lost, see Projectfile. # -# Updated at 2017-05-03 18:02:59.359160 +# Updated at 2017-05-22 19:54:27.969596 PACKAGE ?= bonobo PYTHON ?= $(shell which python) diff --git a/README.rst b/README.rst index 5e7deba..59b1d10 100644 --- a/README.rst +++ b/README.rst @@ -7,11 +7,12 @@ Data-processing for humans. .. image:: https://img.shields.io/pypi/v/bonobo.svg :target: https://pypi.python.org/pypi/bonobo :alt: PyPI + .. image:: https://img.shields.io/pypi/pyversions/bonobo.svg :target: https://pypi.python.org/pypi/bonobo :alt: Versions -.. image:: https://readthedocs.org/projects/bonobo/badge/?version=0.3 +.. image:: https://readthedocs.org/projects/bonobo/badge/?version=latest :target: http://docs.bonobo-project.org/ :alt: Documentation @@ -50,12 +51,12 @@ so as though it may not yet be complete or fully stable (please, allow us to rea ---- +Homepage: https://www.bonobo-project.org/ (`Roadmap `_) + Documentation: http://docs.bonobo-project.org/ Issues: https://github.com/python-bonobo/bonobo/issues -Roadmap: https://www.bonobo-project.org/roadmap - Slack: https://bonobo-slack.herokuapp.com/ Release announcements: http://eepurl.com/csHFKL diff --git a/bonobo/_api.py b/bonobo/_api.py index 41e9623..ca5f363 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -1,5 +1,3 @@ -import warnings - from bonobo.structs import Bag, Graph, Token from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \ PrettyPrint, Tee, count, identity, noop, pprint @@ -85,7 +83,9 @@ def open_fs(fs_url, *args, **kwargs): :returns: :class:`~fs.base.FS` object """ from fs import open_fs as _open_fs - return _open_fs(str(fs_url), *args, **kwargs) + from os.path import expanduser + + return _open_fs(expanduser(str(fs_url)), *args, **kwargs) # bonobo.nodes diff --git a/bonobo/_version.py b/bonobo/_version.py index 15e8324..0404d81 100644 --- a/bonobo/_version.py +++ b/bonobo/_version.py @@ -1 +1 @@ -__version__ = '0.3.0a1' +__version__ = '0.3.0' diff --git a/bonobo/commands/run.py b/bonobo/commands/run.py index 2ff2283..25fe956 100644 --- a/bonobo/commands/run.py +++ b/bonobo/commands/run.py @@ -20,10 +20,8 @@ def get_default_services(filename, services=None): '__name__': '__bonobo__', '__file__': services_filename, } - try: - exec(code, context) - except Exception: - raise + exec(code, context) + return { **context[DEFAULT_SERVICES_ATTR](), **(services or {}), diff --git a/bonobo/config/__init__.py b/bonobo/config/__init__.py index 9fc9971..8e662c4 100644 --- a/bonobo/config/__init__.py +++ b/bonobo/config/__init__.py @@ -1,13 +1,15 @@ from bonobo.config.configurables import Configurable from bonobo.config.options import Option, Method from bonobo.config.processors import ContextProcessor -from bonobo.config.services import Container, Service +from bonobo.config.services import Container, Service, Exclusive +# bonobo.config public programming interface __all__ = [ 'Configurable', 'Container', 'ContextProcessor', - 'Option', + 'Exclusive', 'Method', + 'Option', 'Service', ] diff --git a/bonobo/config/configurables.py b/bonobo/config/configurables.py index aef371b..43cb8c2 100644 --- a/bonobo/config/configurables.py +++ b/bonobo/config/configurables.py @@ -1,6 +1,6 @@ from bonobo.config.options import Method, Option from bonobo.config.processors import ContextProcessor -from bonobo.errors import ConfigurationError +from bonobo.errors import ConfigurationError, AbstractError __all__ = [ 'Configurable', diff --git a/bonobo/config/processors.py b/bonobo/config/processors.py index fc89fd4..d441b6e 100644 --- a/bonobo/config/processors.py +++ b/bonobo/config/processors.py @@ -1,4 +1,3 @@ -import types from collections import Iterable from contextlib import contextmanager @@ -132,14 +131,7 @@ def resolve_processors(mixed): try: yield from mixed.__processors__ except AttributeError: - # old code, deprecated usage - if isinstance(mixed, types.FunctionType): - yield from getattr(mixed, _CONTEXT_PROCESSORS_ATTR, ()) - - for cls in reversed((mixed if isinstance(mixed, type) else type(mixed)).__mro__): - yield from cls.__dict__.get(_CONTEXT_PROCESSORS_ATTR, ()) - - return () + yield from () get_context_processors = deprecated_alias('get_context_processors', resolve_processors) diff --git a/bonobo/config/services.py b/bonobo/config/services.py index 8d6c95e..4a91668 100644 --- a/bonobo/config/services.py +++ b/bonobo/config/services.py @@ -1,7 +1,10 @@ import re +import threading import types +from contextlib import ContextDecorator from bonobo.config.options import Option +from bonobo.errors import MissingServiceImplementationError _service_name_re = re.compile(r"^[^\d\W]\w*(:?\.[^\d\W]\w*)*$", re.UNICODE) @@ -78,8 +81,48 @@ class Container(dict): if not name in self: if default: return default - raise KeyError('Cannot resolve service {!r} using provided service collection.'.format(name)) + raise MissingServiceImplementationError( + 'Cannot resolve service {!r} using provided service collection.'.format(name) + ) value = super().get(name) + # XXX this is not documented and can lead to errors. if isinstance(value, types.LambdaType): value = value(self) return value + + +class Exclusive(ContextDecorator): + """ + Decorator and context manager used to require exclusive usage of an object, most probably a service. It's usefull + for example if call order matters on a service implementation (think of an http api that requires a nonce or version + parameter ...). + + Usage: + + >>> def handler(some_service): + ... with Exclusive(some_service): + ... some_service.call_1() + ... some_service.call_2() + ... some_service.call_3() + + This will ensure that nobody else is using the same service while in the "with" block, using a lock primitive to + ensure that. + + """ + _locks = {} + + def __init__(self, wrapped): + self._wrapped = wrapped + + def get_lock(self): + _id = id(self._wrapped) + if not _id in Exclusive._locks: + Exclusive._locks[_id] = threading.RLock() + return Exclusive._locks[_id] + + def __enter__(self): + self.get_lock().acquire() + return self._wrapped + + def __exit__(self, *exc): + self.get_lock().release() diff --git a/bonobo/errors.py b/bonobo/errors.py index 4a2e9c5..564950d 100644 --- a/bonobo/errors.py +++ b/bonobo/errors.py @@ -55,4 +55,8 @@ class ProhibitedOperationError(RuntimeError): class ConfigurationError(Exception): - pass \ No newline at end of file + pass + + +class MissingServiceImplementationError(KeyError): + pass diff --git a/bonobo/examples/nodes/_services.py b/bonobo/examples/nodes/_services.py new file mode 100644 index 0000000..337bf6b --- /dev/null +++ b/bonobo/examples/nodes/_services.py @@ -0,0 +1,5 @@ +from bonobo import get_examples_path, open_fs + + +def get_services(): + return {'fs': open_fs(get_examples_path())} diff --git a/bonobo/examples/types/__main__.py b/bonobo/examples/types/__main__.py new file mode 100644 index 0000000..3d1549f --- /dev/null +++ b/bonobo/examples/types/__main__.py @@ -0,0 +1,3 @@ +from bonobo.util.python import require + +graph = require('strings').graph diff --git a/bonobo/execution/base.py b/bonobo/execution/base.py index 6ca22f2..779f212 100644 --- a/bonobo/execution/base.py +++ b/bonobo/execution/base.py @@ -9,6 +9,14 @@ from bonobo.util.errors import print_error from bonobo.util.objects import Wrapper, get_name +@contextmanager +def recoverable(error_handler): + try: + yield + except Exception as exc: # pylint: disable=broad-except + error_handler(exc, traceback.format_exc()) + + @contextmanager def unrecoverable(error_handler): try: @@ -55,10 +63,9 @@ class LoopingExecutionContext(Wrapper): raise RuntimeError('Cannot start a node twice ({}).'.format(get_name(self))) self._started = True - self._stack = ContextCurrifier(self.wrapped, *self._get_initial_context()) - with unrecoverable(self.handle_error): - self._stack.setup(self) + self._stack = ContextCurrifier(self.wrapped, *self._get_initial_context()) + self._stack.setup(self) for enhancer in self._enhancers: with unrecoverable(self.handle_error): @@ -82,7 +89,7 @@ class LoopingExecutionContext(Wrapper): return try: - with unrecoverable(self.handle_error): + if self._stack: self._stack.teardown() finally: self._stopped = True diff --git a/bonobo/execution/plugin.py b/bonobo/execution/plugin.py index db5c0db..d928f4a 100644 --- a/bonobo/execution/plugin.py +++ b/bonobo/execution/plugin.py @@ -1,6 +1,4 @@ -import traceback - -from bonobo.execution.base import LoopingExecutionContext +from bonobo.execution.base import LoopingExecutionContext, recoverable class PluginExecutionContext(LoopingExecutionContext): @@ -14,21 +12,14 @@ class PluginExecutionContext(LoopingExecutionContext): def start(self): super().start() - try: + with recoverable(self.handle_error): self.wrapped.initialize() - except Exception as exc: # pylint: disable=broad-except - self.handle_error(exc, traceback.format_exc()) def shutdown(self): - try: + with recoverable(self.handle_error): self.wrapped.finalize() - except Exception as exc: # pylint: disable=broad-except - self.handle_error(exc, traceback.format_exc()) - finally: - self.alive = False + self.alive = False def step(self): - try: + with recoverable(self.handle_error): self.wrapped.run() - except Exception as exc: # pylint: disable=broad-except - self.handle_error(exc, traceback.format_exc()) diff --git a/bonobo/nodes/basics.py b/bonobo/nodes/basics.py index 195cd8e..5ce550c 100644 --- a/bonobo/nodes/basics.py +++ b/bonobo/nodes/basics.py @@ -1,6 +1,7 @@ import functools from pprint import pprint as _pprint +import itertools from colorama import Fore, Style from bonobo.config import Configurable, Option @@ -69,6 +70,12 @@ def _count_counter(self, context): context.send(Bag(counter._value)) +class PrettyPrinter(Configurable): + def call(self, *args, **kwargs): + for i, (item, value) in enumerate(itertools.chain(enumerate(args), kwargs.items())): + print(' ' if i else '•', item, '=', str(value).strip().replace('\n', '\n' + CLEAR_EOL), CLEAR_EOL) + + pprint = Tee(_pprint) diff --git a/bonobo/nodes/io/csv.py b/bonobo/nodes/io/csv.py index 45b40de..e0412fa 100644 --- a/bonobo/nodes/io/csv.py +++ b/bonobo/nodes/io/csv.py @@ -3,6 +3,8 @@ import csv from bonobo.config import Option from bonobo.config.processors import ContextProcessor from bonobo.constants import NOT_MODIFIED +from bonobo.errors import ConfigurationError, ValidationError +from bonobo.structs import Bag from bonobo.util.objects import ValueHolder from .file import FileHandler, FileReader, FileWriter @@ -28,6 +30,14 @@ class CsvHandler(FileHandler): headers = Option(tuple) +def validate_csv_output_format(v): + if callable(v): + return v + if v in {'dict', 'kwargs'}: + return v + raise ValidationError('Unsupported format {!r}.'.format(v)) + + class CsvReader(CsvHandler, FileReader): """ Reads a CSV and yield the values as dicts. @@ -39,13 +49,23 @@ class CsvReader(CsvHandler, FileReader): """ skip = Option(int, default=0) + output_format = Option(validate_csv_output_format, default='dict') @ContextProcessor def csv_headers(self, context, fs, file): yield ValueHolder(self.headers) + def get_output_formater(self): + if callable(self.output_format): + return self.output_format + elif isinstance(self.output_format, str): + return getattr(self, '_format_as_' + self.output_format) + else: + raise ConfigurationError('Unsupported format {!r} for {}.'.format(self.output_format, type(self).__name__)) + def read(self, fs, file, headers): reader = csv.reader(file, delimiter=self.delimiter, quotechar=self.quotechar) + formater = self.get_output_formater() if not headers.get(): headers.set(next(reader)) @@ -60,7 +80,13 @@ class CsvReader(CsvHandler, FileReader): if len(row) != field_count: raise ValueError('Got a line with %d fields, expecting %d.' % (len(row), field_count, )) - yield dict(zip(headers.value, row)) + yield formater(headers.get(), row) + + def _format_as_dict(self, headers, values): + return dict(zip(headers, values)) + + def _format_as_kwargs(self, headers, values): + return Bag(**dict(zip(headers, values))) class CsvWriter(CsvHandler, FileWriter): diff --git a/bonobo/nodes/io/file.py b/bonobo/nodes/io/file.py index c88f601..b06fae3 100644 --- a/bonobo/nodes/io/file.py +++ b/bonobo/nodes/io/file.py @@ -4,11 +4,6 @@ from bonobo.config.processors import ContextProcessor from bonobo.constants import NOT_MODIFIED from bonobo.util.objects import ValueHolder -__all__ = [ - 'FileReader', - 'FileWriter', -] - class FileHandler(Configurable): """Abstract component factory for file-related components. @@ -23,6 +18,7 @@ class FileHandler(Configurable): path = Option(str, required=True, positional=True) # type: str eol = Option(str, default='\n') # type: str mode = Option(str) # type: str + encoding = Option(str, default='utf-8') # type: str fs = Service('fs') # type: str @@ -32,7 +28,7 @@ class FileHandler(Configurable): yield file def open(self, fs): - return fs.open(self.path, self.mode) + return fs.open(self.path, self.mode, encoding=self.encoding) class Reader(FileHandler): diff --git a/bonobo/nodes/io/json.py b/bonobo/nodes/io/json.py index fdb49b8..b2db708 100644 --- a/bonobo/nodes/io/json.py +++ b/bonobo/nodes/io/json.py @@ -3,10 +3,6 @@ import json from bonobo.config.processors import ContextProcessor from .file import FileWriter, FileReader -__all__ = [ - 'JsonWriter', -] - class JsonHandler(): eol = ',\n' diff --git a/bonobo/nodes/io/xml.py b/bonobo/nodes/io/xml.py new file mode 100644 index 0000000..e69de29 diff --git a/bonobo/strategies/executor.py b/bonobo/strategies/executor.py index 3f34862..26b810b 100644 --- a/bonobo/strategies/executor.py +++ b/bonobo/strategies/executor.py @@ -36,7 +36,7 @@ class ExecutorStrategy(Strategy): plugin_context.loop() plugin_context.stop() except Exception as exc: - print_error(exc, traceback.format_exc(), prefix='Error in plugin context', context=plugin_context) + print_error(exc, traceback.format_exc(), context=plugin_context) futures.append(executor.submit(_runner)) @@ -46,9 +46,7 @@ class ExecutorStrategy(Strategy): try: node_context.start() except Exception as exc: - print_error( - exc, traceback.format_exc(), prefix='Could not start node context', context=node_context - ) + print_error(exc, traceback.format_exc(), context=node_context, method='start') node_context.input.on_end() else: node_context.loop() @@ -56,7 +54,7 @@ class ExecutorStrategy(Strategy): try: node_context.stop() except Exception as exc: - print_error(exc, traceback.format_exc(), prefix='Could not stop node context', context=node_context) + print_error(exc, traceback.format_exc(), context=node_context, method='stop') futures.append(executor.submit(_runner)) diff --git a/bonobo/structs/bags.py b/bonobo/structs/bags.py index 5fec1f2..4ef2fa7 100644 --- a/bonobo/structs/bags.py +++ b/bonobo/structs/bags.py @@ -75,6 +75,14 @@ class Bag: raise TypeError('Could not apply bag to {}.'.format(func_or_iter)) + def get(self): + """ + Get a 2 element tuple of this bag's args and kwargs. + + :return: tuple + """ + return self.args, self.kwargs + def extend(self, *args, **kwargs): return type(self)(*args, _parent=self, **kwargs) diff --git a/bonobo/util/errors.py b/bonobo/util/errors.py index bd1f51f..0ea4e58 100644 --- a/bonobo/util/errors.py +++ b/bonobo/util/errors.py @@ -1,5 +1,7 @@ import sys +from textwrap import indent +from bonobo import settings from bonobo.structs.bags import ErrorBag @@ -7,7 +9,14 @@ def is_error(bag): return isinstance(bag, ErrorBag) -def print_error(exc, trace, context=None, prefix=''): +def _get_error_message(exc): + if hasattr(exc, '__str__'): + message = str(exc) + return message[0].upper() + message[1:] + return '\n'.join(exc.args), + + +def print_error(exc, trace, context=None, method=None): """ Error handler. Whatever happens in a plugin or component, if it looks like an exception, taste like an exception or somehow make me think it is an exception, I'll handle it. @@ -18,14 +27,20 @@ def print_error(exc, trace, context=None, prefix=''): """ from colorama import Fore, Style + + prefix = '{}{} | {}'.format(Fore.RED, Style.BRIGHT, Style.RESET_ALL) + print( Style.BRIGHT, Fore.RED, - '\U0001F4A3 {}{}{}'.format( - (prefix + ': ') if prefix else '', type(exc).__name__, ' in {!r}'.format(context) if context else '' - ), + type(exc).__name__, + ' (in {}{})'.format(type(context).__name__, '.{}()'.format(method) if method else '') if context else '', + Style.RESET_ALL, + '\n', + indent(_get_error_message(exc), prefix + Style.BRIGHT), Style.RESET_ALL, sep='', file=sys.stderr, ) - print(trace) + print(prefix, file=sys.stderr) + print(indent(trace, prefix, predicate=lambda line: True), file=sys.stderr) diff --git a/bonobo/util/testing.py b/bonobo/util/testing.py index 7cf3da0..d5b6cc8 100644 --- a/bonobo/util/testing.py +++ b/bonobo/util/testing.py @@ -1,3 +1,4 @@ +from contextlib import contextmanager from unittest.mock import MagicMock from bonobo.execution.node import NodeExecutionContext @@ -7,3 +8,12 @@ class CapturingNodeExecutionContext(NodeExecutionContext): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.send = MagicMock() + + +@contextmanager +def optional_contextmanager(cm, *, ignore=False): + if cm is None or ignore: + yield + else: + with cm: + yield diff --git a/config/__init__.py b/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docs/changelog.rst b/docs/changelog.rst index f9e2145..061e02c 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,51 @@ Changelog ========= +v.0.3.0 - 22 may 2017 +::::::::::::::::::::: + +Features +-------- + +* ContextProcessors can now be implemented by getting the "yield" value (v = yield x), shortening the teardown-only context processors by one line. +* File related writers (file, csv, json ...) now returns NOT_MODIFIED, making it easier to chain something after. +* More consistent console output, nodes are now sorted in a topological order before display. +* Graph.add_chain(...) now takes _input and _output parameters the same way, accepting indexes, instances or names (subject to change). +* Graph.add_chain(...) now allows to "name" a chain, using _name keyword argument, to easily reference its output later (subject to change). +* New settings module (bonobo.settings) read environment for some global configuration stuff (DEBUG and PROFILE, for now). +* New Method subclass of Option allows to use Configurable objects as decorator (see bonobo.nodes.filter.Filter for a simple example). +* New Filter transformation in standard library. + +Internal features +----------------- + +* Better ContextProcessor implementation, avoiding to use a decorator on the parent class. Now works with Configurable instances like Option, Service and Method. +* ContextCurrifier replaces the logic that was in NodeExecutionContext, that setup and teardown the context stack. Maybe the name is not ideal. +* All builtin transformations are of course updated to use the improved API, and should be 100% backward compatible. +* The "core" package has been dismantled, and its rare remaining members are now in "structs" and "util" packages. +* Standard transformation library has been moved under the bonobo.nodes package. It does not change anything if you used bonobo.* (which you should). +* ValueHolder is now more restrictive, not allowing to use .value anymore. + +Miscellaneous +------------- + +* Code cleanup, dead code removal, more tests, etc. +* More documentation. + +v.0.2.4 - 2 may 2017 +:::::::::::::::::::: + +* Cosmetic release for PyPI package page formating. Same content as v.0.2.3. + +v.0.2.3 - 1 may 2017 +::::::::::::::::::::: + +* Positional options now supported, backward compatible. All FileHandler subclasses supports their path argument as positional. +* Better transformation lifecycle management (still work needed here). +* Windows continuous integration now works. +* Refactoring the "API" a lot to have a much cleaner first glance at it. +* More documentation, tutorials, and tuning project artifacts. + v.0.2.2 - 28 apr 2017 ::::::::::::::::::::: @@ -36,4 +81,4 @@ Initial release * Input/output MUX DEMUX removed, maybe no need for that in the real world. May come back, but not in 1.0 * Change dependency policy. We need to include only the very basic requirements (and very required). Everything related to transforms that we may not use (bs, sqla, ...) should be optional dependencies. -* Execution strategies, threaded by default. \ No newline at end of file +* Execution strategies, threaded by default. diff --git a/docs/faq.rst b/docs/faq.rst index 6938dcd..6f3cd3f 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -3,6 +3,7 @@ F.A.Q. List of questions that went up about the project, in no particuliar order. + Too long; didn't read. ---------------------- @@ -19,8 +20,22 @@ It's lean manufacturing for data. .. note:: - This is NOT a «big data» tool. We process around 5 millions database lines in around 1 hour with rdc.etl, bonobo - ancestor (algorithms are the same, we still need to run a bit of benchmarks). + This is NOT a «big data» tool. Neither a «data analysis» tool. We process around 5 millions database lines in around + 1 hour with rdc.etl, bonobo ancestor (algorithms are the same, we still need to run a bit of benchmarks). + + +What versions of python does bonobo support? Why not more? +---------------------------------------------------------- + +Bonobo is battle-tested against the latest python 3.5 and python 3.6. It may work well using other patch releases of those +versions, but we cannot guarantee it. + +The main reasons about why 3.5+: + +* Creating a tool that works well under both python 2 and 3 is a lot more work. +* Python 3 is nearly 10 years old. Consider moving on. +* Python 3.5 contains syntaxic sugar that makes working with data a lot more convenient. + Can a graph contain another graph? ---------------------------------- @@ -30,8 +45,14 @@ No, not for now. There are no tools today in bonobo to insert a graph as a subgr It would be great to allow it, but there is a few design questions behind this, like what node you use as input and output of the subgraph, etc. +On another hand, if you don't consider a graph as the container but by the nodes and edges it contains, its pretty +easy to add a set of nodes and edge to a subgraph, and thus simulate it. But there will be more threads, more copies +of the same nodes, so it's not really an acceptable answer for big graphs. If it was possible to use a Graph as a +node, then the problem would be correctly solved. + It is something to be seriously considered post 1.0 (probably way post 1.0). + How would one access contextual data from a transformation? Are there parameter injections like pytest's fixtures? ------------------------------------------------------------------------------------------------------------------ @@ -43,20 +64,26 @@ to find a better way to apply it. To understand how it works today, look at https://github.com/python-bonobo/bonobo/blob/0.3/bonobo/io/csv.py#L63 and class hierarchy. + What is a plugin? Do I need to write one? ----------------------------------------- Plugins are special classes added to an execution context, used to enhance or change the actual behavior of an execution in a generic way. You don't need to write plugins to code transformation graphs. + Is there a difference between a transformation node and a regular python function or generator? ----------------------------------------------------------------------------------------------- -No. +Short answer: no. Transformation callables are just regular callables, and there is nothing that differentiate it from regular python callables. You can even use some callables both in an imperative programming context and in a transformation graph, no problem. +Longer answer: yes, sometimes, but you should not care. The function-based transformations are plain old python callable. The +class-based transformations can be plain-old-python-objects, but can also subclass Configurable which brings a lot of +fancy features, like options, service injections, class factories as decorators... + Why did you include the word «marketing» in a commit message? Why is there a marketing-automation tag on the project? Isn't marketing evil? ------------------------------------------------------------------------------------------------------------------------------------------- @@ -83,6 +110,7 @@ See https://github.com/python-bonobo/bonobo/issues/1 Bonobo is not a replacement for pandas, nor dask, nor luigi, nor airflow... It may be a replacement for Pentaho, Talend or other data integration suites but targets people more comfortable with code as an interface. + All those references to monkeys hurt my head. Bonobos are not monkeys. ---------------------------------------------------------------------- @@ -96,6 +124,7 @@ known primate typing feature.» See https://github.com/python-bonobo/bonobo/issues/24 + Who is behind this? ------------------- @@ -104,6 +133,7 @@ Me (as an individual), and a few great people that helped me along the way. Not The code, documentation, and surrounding material is created using spare time and may lack a bit velocity. Feel free to jump in so we can go faster! + Documentation seriously lacks X, there is a problem in Y... ----------------------------------------------------------- diff --git a/docs/guide/services.rst b/docs/guide/services.rst index 0b12d96..cf7ecc7 100644 --- a/docs/guide/services.rst +++ b/docs/guide/services.rst @@ -81,6 +81,26 @@ A dictionary, or dictionary-like, "services" named argument can be passed to the provided is pretty basic, and feature-less. But you can use much more evolved libraries instead of the provided stub, and as long as it works the same (a.k.a implements a dictionary-like interface), the system will use it. +Solving concurrency problems +---------------------------- + +If a service cannot be used by more than one thread at a time, either because it's just not threadsafe, or because +it requires to carefully order the calls made (apis that includes nonces, or work on results returned by previous +calls are usually good candidates), you can use the :class:`bonobo.config.Exclusive` context processor to lock the +use of a dependency for a time period. + +.. code-block:: python + + from bonobo.config import Exclusive + + def t1(api): + with Exclusive(api): + api.first_call() + api.second_call() + # ... etc + api.last_call() + + Service configuration (to be decided and implemented) ::::::::::::::::::::::::::::::::::::::::::::::::::::: diff --git a/docs/install.rst b/docs/install.rst index ec617fb..943ffbe 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -1,7 +1,16 @@ Installation ============ -Bonobo is `available on PyPI `_, and it's the easiest solution to get started. +Create an ETL project +::::::::::::::::::::: + +If you only want to use Bonobo to code ETLs, your easiest option to get started is to use our +`cookiecutter template `_. + +Install from PyPI +::::::::::::::::: + +You can also install it directly from the `Python Package Index `_. .. code-block:: shell-session diff --git a/docs/roadmap.rst b/docs/roadmap.rst index 4bfcc91..182cf71 100644 --- a/docs/roadmap.rst +++ b/docs/roadmap.rst @@ -1,12 +1,12 @@ -Detailed roadmap -================ +Internal roadmap notes +====================== -initialize / finalize better than start / stop ? +Things that should be thought about and/or implemented, but that I don't know where to store. Graph and node level plugins :::::::::::::::::::::::::::: - * Enhancers or nide-level plugins + * Enhancers or node-level plugins * Graph level plugins * Documentation @@ -15,21 +15,19 @@ Command line interface and environment * How do we manage environment ? .env ? * How do we configure plugins ? -* Console run should allow console plugin as a command line argument (or silence it). Services and Processors ::::::::::::::::::::::: -* ContextProcessors not clean +* ContextProcessors not clean (a bit better, but still not in love with the api) Next... ::::::: * Release process specialised for bonobo. With changelog production, etc. * Document how to upgrade version, like, minor need change badges, etc. -* PyPI page looks like crap: https://pypi.python.org/pypi/bonobo/0.2.1 -* Windows break because of readme encoding. Fix in edgy. -* bonobo init --with sqlalchemy,docker +* Windows console looks crappy. +* bonobo init --with sqlalchemy,docker; cookiecutter? * logger, vebosity level @@ -39,22 +37,15 @@ External libs that looks good * dask.distributed * mediator (event dispatcher) -Version 0.3 +Version 0.4 ::::::::::: -* Services ! * SQLAlchemy 101 -Version 0.2 -::::::::::: +Design decisions +:::::::::::::::: -* Autodetect if within jupyter notebook context, and apply plugin if it's the case. -* New bonobo.structs package with simple data structures (bags, graphs, tokens). - -Plugins API -::::::::::: - -* Stabilize, find other things to do. +* initialize / finalize better than start / stop ? Minor stuff ::::::::::: diff --git a/docs/tutorial/index.rst b/docs/tutorial/index.rst index 4627357..c00f27a 100644 --- a/docs/tutorial/index.rst +++ b/docs/tutorial/index.rst @@ -1,8 +1,6 @@ First steps =========== -Bonobo uses simple python and should be quick and easy to learn. - What is Bonobo? ::::::::::::::: @@ -13,10 +11,16 @@ Bonobo *is not* a statistical or data-science tool. If you're looking for a data Bonobo is a lean manufacturing assembly line for data that let you focus on the actual work instead of the plumbery. +Bonobo uses simple python and should be quick and easy to learn. Tutorial :::::::: +Warning: the documentation is still in progress. Although all content here should be accurate, you may feel a lack of +completeness, for which we plaid guilty and apologize. If there is something blocking, please come on our +`slack channel `_ and complain, we'll figure something out. If there is something +that did not block you but can be a no-go for others, please consider contributing to the docs. + .. toctree:: :maxdepth: 2 @@ -43,6 +47,6 @@ Read about integrating external tools with bonobo * :doc:`../guide/ext/docker`: run transformation graphs in isolated containers. * :doc:`../guide/ext/jupyter`: run transformations within jupyter notebooks. -* :doc:`../guide/ext/selenium`: run +* :doc:`../guide/ext/selenium`: crawl the web using a real browser and work with the gathered data. * :doc:`../guide/ext/sqlalchemy`: everything you need to interract with SQL databases. diff --git a/setup.py b/setup.py index 844240a..feabb8c 100644 --- a/setup.py +++ b/setup.py @@ -18,13 +18,19 @@ except NameError: # Get the long description from the README file -with open(path.join(here, 'README.rst'), encoding='utf-8') as f: - long_description = f.read() +try: + with open(path.join(here, 'README.rst'), encoding='utf-8') as f: + long_description = f.read() +except: + long_description = '' # Get the classifiers from the classifiers file tolines = lambda c: list(filter(None, map(lambda s: s.strip(), c.split('\n')))) -with open(path.join(here, 'classifiers.txt'), encoding='utf-8') as f: - classifiers = tolines(f.read()) +try: + with open(path.join(here, 'classifiers.txt'), encoding='utf-8') as f: + classifiers = tolines(f.read()) +except: + classifiers = [] version_ns = {} try: diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_config.py b/tests/config/test_configurables.py similarity index 61% rename from tests/test_config.py rename to tests/config/test_configurables.py index 3f17a53..178c188 100644 --- a/tests/test_config.py +++ b/tests/config/test_configurables.py @@ -2,7 +2,6 @@ import pytest from bonobo.config.configurables import Configurable from bonobo.config.options import Option -from bonobo.config.services import Container, Service, validate_service_name class MyConfigurable(Configurable): @@ -25,28 +24,6 @@ class MyConfigurableUsingPositionalOptions(MyConfigurable): third = Option(str, required=False, positional=True) -class PrinterInterface(): - def print(self, *args): - raise NotImplementedError() - - -class ConcretePrinter(PrinterInterface): - def __init__(self, prefix): - self.prefix = prefix - - def print(self, *args): - return ';'.join((self.prefix, *args)) - - -class MyServiceDependantConfigurable(Configurable): - printer = Service( - PrinterInterface, - ) - - def __call__(self, printer: PrinterInterface, *args): - return printer.print(*args) - - def test_missing_required_option_error(): with pytest.raises(TypeError) as exc: MyConfigurable() @@ -107,39 +84,5 @@ def test_option_resolution_order(): assert o.integer == None -def test_service_name_validator(): - assert validate_service_name('foo') == 'foo' - assert validate_service_name('foo.bar') == 'foo.bar' - assert validate_service_name('Foo') == 'Foo' - assert validate_service_name('Foo.Bar') == 'Foo.Bar' - assert validate_service_name('Foo.a0') == 'Foo.a0' - - with pytest.raises(ValueError): - validate_service_name('foo.0') - - with pytest.raises(ValueError): - validate_service_name('0.foo') - - -SERVICES = Container( - printer0=ConcretePrinter(prefix='0'), - printer1=ConcretePrinter(prefix='1'), -) - - -def test_service_dependency(): - o = MyServiceDependantConfigurable(printer='printer0') - - assert o(SERVICES.get('printer0'), 'foo', 'bar') == '0;foo;bar' - assert o(SERVICES.get('printer1'), 'bar', 'baz') == '1;bar;baz' - assert o(*SERVICES.args_for(o), 'foo', 'bar') == '0;foo;bar' - - -def test_service_dependency_unavailable(): - o = MyServiceDependantConfigurable(printer='printer2') - with pytest.raises(KeyError): - SERVICES.args_for(o) - - def test_option_positional(): o = MyConfigurableUsingPositionalOptions('1', '2', '3', required_str='hello') diff --git a/tests/test_config_method.py b/tests/config/test_methods.py similarity index 97% rename from tests/test_config_method.py rename to tests/config/test_methods.py index 13eb873..3a5f6a3 100644 --- a/tests/test_config_method.py +++ b/tests/config/test_methods.py @@ -28,8 +28,6 @@ def test_define_with_decorator(): def Concrete(self, *args, **kwargs): calls.append((args, kwargs, )) - print('handler', Concrete.handler) - assert callable(Concrete.handler) t = Concrete('foo', bar='baz') diff --git a/tests/test_config_processors.py b/tests/config/test_processors.py similarity index 100% rename from tests/test_config_processors.py rename to tests/config/test_processors.py diff --git a/tests/config/test_services.py b/tests/config/test_services.py new file mode 100644 index 0000000..b762dbe --- /dev/null +++ b/tests/config/test_services.py @@ -0,0 +1,96 @@ +import threading +import time + +import pytest + +from bonobo.config import Configurable, Container, Exclusive, Service +from bonobo.config.services import validate_service_name + + +class PrinterInterface(): + def print(self, *args): + raise NotImplementedError() + + +class ConcretePrinter(PrinterInterface): + def __init__(self, prefix): + self.prefix = prefix + + def print(self, *args): + return ';'.join((self.prefix, *args)) + + +SERVICES = Container( + printer0=ConcretePrinter(prefix='0'), + printer1=ConcretePrinter(prefix='1'), +) + + +class MyServiceDependantConfigurable(Configurable): + printer = Service( + PrinterInterface, + ) + + def __call__(self, printer: PrinterInterface, *args): + return printer.print(*args) + + +def test_service_name_validator(): + assert validate_service_name('foo') == 'foo' + assert validate_service_name('foo.bar') == 'foo.bar' + assert validate_service_name('Foo') == 'Foo' + assert validate_service_name('Foo.Bar') == 'Foo.Bar' + assert validate_service_name('Foo.a0') == 'Foo.a0' + + with pytest.raises(ValueError): + validate_service_name('foo.0') + + with pytest.raises(ValueError): + validate_service_name('0.foo') + + +def test_service_dependency(): + o = MyServiceDependantConfigurable(printer='printer0') + + assert o(SERVICES.get('printer0'), 'foo', 'bar') == '0;foo;bar' + assert o(SERVICES.get('printer1'), 'bar', 'baz') == '1;bar;baz' + assert o(*SERVICES.args_for(o), 'foo', 'bar') == '0;foo;bar' + + +def test_service_dependency_unavailable(): + o = MyServiceDependantConfigurable(printer='printer2') + with pytest.raises(KeyError): + SERVICES.args_for(o) + + +class VCR: + def __init__(self): + self.tape = [] + + def append(self, x): + return self.tape.append(x) + + +def test_exclusive(): + vcr = VCR() + vcr.append('hello') + + def record(prefix, vcr=vcr): + with Exclusive(vcr): + for i in range(5): + vcr.append(' '.join((prefix, str(i)))) + time.sleep(0.05) + + threads = [threading.Thread(target=record, args=(str(i), )) for i in range(5)] + + for thread in threads: + thread.start() + time.sleep(0.01) # this is not good practice, how to test this without sleeping ?? XXX + + for thread in threads: + thread.join() + + assert vcr.tape == [ + 'hello', '0 0', '0 1', '0 2', '0 3', '0 4', '1 0', '1 1', '1 2', '1 3', '1 4', '2 0', '2 1', '2 2', '2 3', + '2 4', '3 0', '3 1', '3 2', '3 3', '3 4', '4 0', '4 1', '4 2', '4 3', '4 4' + ] diff --git a/tests/io/test_csv.py b/tests/io/test_csv.py index 59f7197..bded111 100644 --- a/tests/io/test_csv.py +++ b/tests/io/test_csv.py @@ -55,3 +55,38 @@ def test_read_csv_from_file(tmpdir): 'b': 'b bar', 'c': 'c bar', } + + +def test_read_csv_kwargs_output_formater(tmpdir): + fs, filename = open_fs(tmpdir), 'input.csv' + fs.open(filename, 'w').write('a,b,c\na foo,b foo,c foo\na bar,b bar,c bar') + + reader = CsvReader(path=filename, delimiter=',', output_format='kwargs') + + context = CapturingNodeExecutionContext(reader, services={'fs': fs}) + + context.start() + context.write(BEGIN, Bag(), END) + context.step() + context.stop() + + assert len(context.send.mock_calls) == 2 + + args0, kwargs0 = context.send.call_args_list[0] + assert len(args0) == 1 and not len(kwargs0) + args1, kwargs1 = context.send.call_args_list[1] + assert len(args1) == 1 and not len(kwargs1) + + _args, _kwargs = args0[0].get() + assert not len(_args) and _kwargs == { + 'a': 'a foo', + 'b': 'b foo', + 'c': 'c foo', + } + + _args, _kwargs = args1[0].get() + assert not len(_args) and _kwargs == { + 'a': 'a bar', + 'b': 'b bar', + 'c': 'c bar', + } diff --git a/tests/test_commands.py b/tests/test_commands.py index 55032b2..40a6ed5 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -1,10 +1,26 @@ +import runpy +import sys +from unittest.mock import patch + import pkg_resources import pytest -from bonobo import get_examples_path +from bonobo import __main__, __version__, get_examples_path from bonobo.commands import entrypoint +def runner_entrypoint(*args): + return entrypoint(list(args)) + + +def runner_module(*args): + with patch.object(sys, 'argv', ['bonobo', *args]): + return runpy.run_path(__main__.__file__, run_name='__main__') + + +all_runners = pytest.mark.parametrize('runner', [runner_entrypoint, runner_module]) + + def test_entrypoint(): commands = {} @@ -13,23 +29,51 @@ def test_entrypoint(): assert 'init' in commands assert 'run' in commands + assert 'version' in commands -def test_no_command(capsys): +@all_runners +def test_no_command(runner, capsys): with pytest.raises(SystemExit): - entrypoint([]) + runner() _, err = capsys.readouterr() assert 'error: the following arguments are required: command' in err -def test_init(): - pass # need ext dir - - -def test_run(capsys): - entrypoint(['run', '--quiet', get_examples_path('types/strings.py')]) +@all_runners +def test_run(runner, capsys): + runner('run', '--quiet', get_examples_path('types/strings.py')) out, err = capsys.readouterr() out = out.split('\n') assert out[0].startswith('Foo ') assert out[1].startswith('Bar ') assert out[2].startswith('Baz ') + + +@all_runners +def test_run_module(runner, capsys): + runner('run', '--quiet', '-m', 'bonobo.examples.types.strings') + out, err = capsys.readouterr() + out = out.split('\n') + assert out[0].startswith('Foo ') + assert out[1].startswith('Bar ') + assert out[2].startswith('Baz ') + + +@all_runners +def test_run_path(runner, capsys): + runner('run', '--quiet', get_examples_path('types')) + out, err = capsys.readouterr() + out = out.split('\n') + assert out[0].startswith('Foo ') + assert out[1].startswith('Bar ') + assert out[2].startswith('Baz ') + + +@all_runners +def test_version(runner, capsys): + runner('version') + out, err = capsys.readouterr() + out = out.strip() + assert out.startswith('bonobo ') + assert out.endswith(__version__) diff --git a/tests/test_publicapi.py b/tests/test_publicapi.py index 0ce6323..6b554e1 100644 --- a/tests/test_publicapi.py +++ b/tests/test_publicapi.py @@ -1,4 +1,4 @@ -import types +import inspect def test_wildcard_import(): @@ -10,7 +10,7 @@ def test_wildcard_import(): if name.startswith('_'): continue attr = getattr(bonobo, name) - if isinstance(attr, types.ModuleType): + if inspect.ismodule(attr): continue assert name in bonobo.__all__ diff --git a/tests/util/test_objects.py b/tests/util/test_objects.py index d8e6f7d..c6e30b2 100644 --- a/tests/util/test_objects.py +++ b/tests/util/test_objects.py @@ -1,4 +1,9 @@ +import operator + +import pytest + from bonobo.util.objects import Wrapper, get_name, ValueHolder +from bonobo.util.testing import optional_contextmanager class foo: @@ -52,3 +57,56 @@ def test_valueholder(): assert y == x assert y is not x assert repr(x) == repr(y) == repr(43) + + +unsupported_operations = { + int: {operator.matmul}, + str: { + operator.sub, operator.mul, operator.matmul, operator.floordiv, operator.truediv, operator.mod, divmod, + operator.pow, operator.lshift, operator.rshift, operator.and_, operator.xor, operator.or_ + }, +} + + +@pytest.mark.parametrize('x,y', [(5, 3), (0, 10), (0, 0), (1, 1), ('foo', 'bar'), ('', 'baz!')]) +@pytest.mark.parametrize( + 'operation,inplace_operation', [ + (operator.add, operator.iadd), + (operator.sub, operator.isub), + (operator.mul, operator.imul), + (operator.matmul, operator.imatmul), + (operator.truediv, operator.itruediv), + (operator.floordiv, operator.ifloordiv), + (operator.mod, operator.imod), + (divmod, None), + (operator.pow, operator.ipow), + (operator.lshift, operator.ilshift), + (operator.rshift, operator.irshift), + (operator.and_, operator.iand), + (operator.xor, operator.ixor), + (operator.or_, operator.ior), + ] +) +def test_valueholder_integer_operations(x, y, operation, inplace_operation): + v = ValueHolder(x) + + is_supported = operation not in unsupported_operations.get(type(x), set()) + + isdiv = ('div' in operation.__name__) or ('mod' in operation.__name__) + + # forward... + with optional_contextmanager(pytest.raises(TypeError), ignore=is_supported): + with optional_contextmanager(pytest.raises(ZeroDivisionError), ignore=y or not isdiv): + assert operation(x, y) == operation(v, y) + + # backward... + with optional_contextmanager(pytest.raises(TypeError), ignore=is_supported): + with optional_contextmanager(pytest.raises(ZeroDivisionError), ignore=x or not isdiv): + assert operation(y, x) == operation(y, v) + + # in place... + if inplace_operation is not None: + with optional_contextmanager(pytest.raises(TypeError), ignore=is_supported): + with optional_contextmanager(pytest.raises(ZeroDivisionError), ignore=y or not isdiv): + inplace_operation(v, y) + assert v == operation(x, y)