Merge pull request #75 from hartym/develop

Develop (for 0.4)
This commit is contained in:
Romain Dorgueil
2017-05-25 06:30:11 -07:00
committed by GitHub
40 changed files with 548 additions and 167 deletions

View File

@ -1,7 +1,7 @@
# This file has been auto-generated.
# All changes will be lost, see Projectfile.
#
# Updated at 2017-05-03 18:02:59.359160
# Updated at 2017-05-22 19:54:27.969596
PACKAGE ?= bonobo
PYTHON ?= $(shell which python)

View File

@ -7,11 +7,12 @@ Data-processing for humans.
.. image:: https://img.shields.io/pypi/v/bonobo.svg
:target: https://pypi.python.org/pypi/bonobo
:alt: PyPI
.. image:: https://img.shields.io/pypi/pyversions/bonobo.svg
:target: https://pypi.python.org/pypi/bonobo
:alt: Versions
.. image:: https://readthedocs.org/projects/bonobo/badge/?version=0.3
.. image:: https://readthedocs.org/projects/bonobo/badge/?version=latest
:target: http://docs.bonobo-project.org/
:alt: Documentation
@ -50,12 +51,12 @@ so as though it may not yet be complete or fully stable (please, allow us to rea
----
Homepage: https://www.bonobo-project.org/ (`Roadmap <https://www.bonobo-project.org/roadmap>`_)
Documentation: http://docs.bonobo-project.org/
Issues: https://github.com/python-bonobo/bonobo/issues
Roadmap: https://www.bonobo-project.org/roadmap
Slack: https://bonobo-slack.herokuapp.com/
Release announcements: http://eepurl.com/csHFKL

View File

@ -1,5 +1,3 @@
import warnings
from bonobo.structs import Bag, Graph, Token
from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \
PrettyPrint, Tee, count, identity, noop, pprint
@ -85,7 +83,9 @@ def open_fs(fs_url, *args, **kwargs):
:returns: :class:`~fs.base.FS` object
"""
from fs import open_fs as _open_fs
return _open_fs(str(fs_url), *args, **kwargs)
from os.path import expanduser
return _open_fs(expanduser(str(fs_url)), *args, **kwargs)
# bonobo.nodes

View File

@ -1 +1 @@
__version__ = '0.3.0a1'
__version__ = '0.3.0'

View File

@ -20,10 +20,8 @@ def get_default_services(filename, services=None):
'__name__': '__bonobo__',
'__file__': services_filename,
}
try:
exec(code, context)
except Exception:
raise
exec(code, context)
return {
**context[DEFAULT_SERVICES_ATTR](),
**(services or {}),

View File

@ -1,13 +1,15 @@
from bonobo.config.configurables import Configurable
from bonobo.config.options import Option, Method
from bonobo.config.processors import ContextProcessor
from bonobo.config.services import Container, Service
from bonobo.config.services import Container, Service, Exclusive
# bonobo.config public programming interface
__all__ = [
'Configurable',
'Container',
'ContextProcessor',
'Option',
'Exclusive',
'Method',
'Option',
'Service',
]

View File

@ -1,6 +1,6 @@
from bonobo.config.options import Method, Option
from bonobo.config.processors import ContextProcessor
from bonobo.errors import ConfigurationError
from bonobo.errors import ConfigurationError, AbstractError
__all__ = [
'Configurable',

View File

@ -1,4 +1,3 @@
import types
from collections import Iterable
from contextlib import contextmanager
@ -132,14 +131,7 @@ def resolve_processors(mixed):
try:
yield from mixed.__processors__
except AttributeError:
# old code, deprecated usage
if isinstance(mixed, types.FunctionType):
yield from getattr(mixed, _CONTEXT_PROCESSORS_ATTR, ())
for cls in reversed((mixed if isinstance(mixed, type) else type(mixed)).__mro__):
yield from cls.__dict__.get(_CONTEXT_PROCESSORS_ATTR, ())
return ()
yield from ()
get_context_processors = deprecated_alias('get_context_processors', resolve_processors)

View File

@ -1,7 +1,10 @@
import re
import threading
import types
from contextlib import ContextDecorator
from bonobo.config.options import Option
from bonobo.errors import MissingServiceImplementationError
_service_name_re = re.compile(r"^[^\d\W]\w*(:?\.[^\d\W]\w*)*$", re.UNICODE)
@ -78,8 +81,48 @@ class Container(dict):
if not name in self:
if default:
return default
raise KeyError('Cannot resolve service {!r} using provided service collection.'.format(name))
raise MissingServiceImplementationError(
'Cannot resolve service {!r} using provided service collection.'.format(name)
)
value = super().get(name)
# XXX this is not documented and can lead to errors.
if isinstance(value, types.LambdaType):
value = value(self)
return value
class Exclusive(ContextDecorator):
"""
Decorator and context manager used to require exclusive usage of an object, most probably a service. It's usefull
for example if call order matters on a service implementation (think of an http api that requires a nonce or version
parameter ...).
Usage:
>>> def handler(some_service):
... with Exclusive(some_service):
... some_service.call_1()
... some_service.call_2()
... some_service.call_3()
This will ensure that nobody else is using the same service while in the "with" block, using a lock primitive to
ensure that.
"""
_locks = {}
def __init__(self, wrapped):
self._wrapped = wrapped
def get_lock(self):
_id = id(self._wrapped)
if not _id in Exclusive._locks:
Exclusive._locks[_id] = threading.RLock()
return Exclusive._locks[_id]
def __enter__(self):
self.get_lock().acquire()
return self._wrapped
def __exit__(self, *exc):
self.get_lock().release()

View File

@ -55,4 +55,8 @@ class ProhibitedOperationError(RuntimeError):
class ConfigurationError(Exception):
pass
pass
class MissingServiceImplementationError(KeyError):
pass

View File

@ -0,0 +1,5 @@
from bonobo import get_examples_path, open_fs
def get_services():
return {'fs': open_fs(get_examples_path())}

View File

@ -0,0 +1,3 @@
from bonobo.util.python import require
graph = require('strings').graph

View File

@ -9,6 +9,14 @@ from bonobo.util.errors import print_error
from bonobo.util.objects import Wrapper, get_name
@contextmanager
def recoverable(error_handler):
try:
yield
except Exception as exc: # pylint: disable=broad-except
error_handler(exc, traceback.format_exc())
@contextmanager
def unrecoverable(error_handler):
try:
@ -55,10 +63,9 @@ class LoopingExecutionContext(Wrapper):
raise RuntimeError('Cannot start a node twice ({}).'.format(get_name(self)))
self._started = True
self._stack = ContextCurrifier(self.wrapped, *self._get_initial_context())
with unrecoverable(self.handle_error):
self._stack.setup(self)
self._stack = ContextCurrifier(self.wrapped, *self._get_initial_context())
self._stack.setup(self)
for enhancer in self._enhancers:
with unrecoverable(self.handle_error):
@ -82,7 +89,7 @@ class LoopingExecutionContext(Wrapper):
return
try:
with unrecoverable(self.handle_error):
if self._stack:
self._stack.teardown()
finally:
self._stopped = True

View File

@ -1,6 +1,4 @@
import traceback
from bonobo.execution.base import LoopingExecutionContext
from bonobo.execution.base import LoopingExecutionContext, recoverable
class PluginExecutionContext(LoopingExecutionContext):
@ -14,21 +12,14 @@ class PluginExecutionContext(LoopingExecutionContext):
def start(self):
super().start()
try:
with recoverable(self.handle_error):
self.wrapped.initialize()
except Exception as exc: # pylint: disable=broad-except
self.handle_error(exc, traceback.format_exc())
def shutdown(self):
try:
with recoverable(self.handle_error):
self.wrapped.finalize()
except Exception as exc: # pylint: disable=broad-except
self.handle_error(exc, traceback.format_exc())
finally:
self.alive = False
self.alive = False
def step(self):
try:
with recoverable(self.handle_error):
self.wrapped.run()
except Exception as exc: # pylint: disable=broad-except
self.handle_error(exc, traceback.format_exc())

View File

@ -1,6 +1,7 @@
import functools
from pprint import pprint as _pprint
import itertools
from colorama import Fore, Style
from bonobo.config import Configurable, Option
@ -69,6 +70,12 @@ def _count_counter(self, context):
context.send(Bag(counter._value))
class PrettyPrinter(Configurable):
def call(self, *args, **kwargs):
for i, (item, value) in enumerate(itertools.chain(enumerate(args), kwargs.items())):
print(' ' if i else '', item, '=', str(value).strip().replace('\n', '\n' + CLEAR_EOL), CLEAR_EOL)
pprint = Tee(_pprint)

View File

@ -3,6 +3,8 @@ import csv
from bonobo.config import Option
from bonobo.config.processors import ContextProcessor
from bonobo.constants import NOT_MODIFIED
from bonobo.errors import ConfigurationError, ValidationError
from bonobo.structs import Bag
from bonobo.util.objects import ValueHolder
from .file import FileHandler, FileReader, FileWriter
@ -28,6 +30,14 @@ class CsvHandler(FileHandler):
headers = Option(tuple)
def validate_csv_output_format(v):
if callable(v):
return v
if v in {'dict', 'kwargs'}:
return v
raise ValidationError('Unsupported format {!r}.'.format(v))
class CsvReader(CsvHandler, FileReader):
"""
Reads a CSV and yield the values as dicts.
@ -39,13 +49,23 @@ class CsvReader(CsvHandler, FileReader):
"""
skip = Option(int, default=0)
output_format = Option(validate_csv_output_format, default='dict')
@ContextProcessor
def csv_headers(self, context, fs, file):
yield ValueHolder(self.headers)
def get_output_formater(self):
if callable(self.output_format):
return self.output_format
elif isinstance(self.output_format, str):
return getattr(self, '_format_as_' + self.output_format)
else:
raise ConfigurationError('Unsupported format {!r} for {}.'.format(self.output_format, type(self).__name__))
def read(self, fs, file, headers):
reader = csv.reader(file, delimiter=self.delimiter, quotechar=self.quotechar)
formater = self.get_output_formater()
if not headers.get():
headers.set(next(reader))
@ -60,7 +80,13 @@ class CsvReader(CsvHandler, FileReader):
if len(row) != field_count:
raise ValueError('Got a line with %d fields, expecting %d.' % (len(row), field_count, ))
yield dict(zip(headers.value, row))
yield formater(headers.get(), row)
def _format_as_dict(self, headers, values):
return dict(zip(headers, values))
def _format_as_kwargs(self, headers, values):
return Bag(**dict(zip(headers, values)))
class CsvWriter(CsvHandler, FileWriter):

View File

@ -4,11 +4,6 @@ from bonobo.config.processors import ContextProcessor
from bonobo.constants import NOT_MODIFIED
from bonobo.util.objects import ValueHolder
__all__ = [
'FileReader',
'FileWriter',
]
class FileHandler(Configurable):
"""Abstract component factory for file-related components.
@ -23,6 +18,7 @@ class FileHandler(Configurable):
path = Option(str, required=True, positional=True) # type: str
eol = Option(str, default='\n') # type: str
mode = Option(str) # type: str
encoding = Option(str, default='utf-8') # type: str
fs = Service('fs') # type: str
@ -32,7 +28,7 @@ class FileHandler(Configurable):
yield file
def open(self, fs):
return fs.open(self.path, self.mode)
return fs.open(self.path, self.mode, encoding=self.encoding)
class Reader(FileHandler):

View File

@ -3,10 +3,6 @@ import json
from bonobo.config.processors import ContextProcessor
from .file import FileWriter, FileReader
__all__ = [
'JsonWriter',
]
class JsonHandler():
eol = ',\n'

0
bonobo/nodes/io/xml.py Normal file
View File

View File

@ -36,7 +36,7 @@ class ExecutorStrategy(Strategy):
plugin_context.loop()
plugin_context.stop()
except Exception as exc:
print_error(exc, traceback.format_exc(), prefix='Error in plugin context', context=plugin_context)
print_error(exc, traceback.format_exc(), context=plugin_context)
futures.append(executor.submit(_runner))
@ -46,9 +46,7 @@ class ExecutorStrategy(Strategy):
try:
node_context.start()
except Exception as exc:
print_error(
exc, traceback.format_exc(), prefix='Could not start node context', context=node_context
)
print_error(exc, traceback.format_exc(), context=node_context, method='start')
node_context.input.on_end()
else:
node_context.loop()
@ -56,7 +54,7 @@ class ExecutorStrategy(Strategy):
try:
node_context.stop()
except Exception as exc:
print_error(exc, traceback.format_exc(), prefix='Could not stop node context', context=node_context)
print_error(exc, traceback.format_exc(), context=node_context, method='stop')
futures.append(executor.submit(_runner))

View File

@ -75,6 +75,14 @@ class Bag:
raise TypeError('Could not apply bag to {}.'.format(func_or_iter))
def get(self):
"""
Get a 2 element tuple of this bag's args and kwargs.
:return: tuple
"""
return self.args, self.kwargs
def extend(self, *args, **kwargs):
return type(self)(*args, _parent=self, **kwargs)

View File

@ -1,5 +1,7 @@
import sys
from textwrap import indent
from bonobo import settings
from bonobo.structs.bags import ErrorBag
@ -7,7 +9,14 @@ def is_error(bag):
return isinstance(bag, ErrorBag)
def print_error(exc, trace, context=None, prefix=''):
def _get_error_message(exc):
if hasattr(exc, '__str__'):
message = str(exc)
return message[0].upper() + message[1:]
return '\n'.join(exc.args),
def print_error(exc, trace, context=None, method=None):
"""
Error handler. Whatever happens in a plugin or component, if it looks like an exception, taste like an exception
or somehow make me think it is an exception, I'll handle it.
@ -18,14 +27,20 @@ def print_error(exc, trace, context=None, prefix=''):
"""
from colorama import Fore, Style
prefix = '{}{} | {}'.format(Fore.RED, Style.BRIGHT, Style.RESET_ALL)
print(
Style.BRIGHT,
Fore.RED,
'\U0001F4A3 {}{}{}'.format(
(prefix + ': ') if prefix else '', type(exc).__name__, ' in {!r}'.format(context) if context else ''
),
type(exc).__name__,
' (in {}{})'.format(type(context).__name__, '.{}()'.format(method) if method else '') if context else '',
Style.RESET_ALL,
'\n',
indent(_get_error_message(exc), prefix + Style.BRIGHT),
Style.RESET_ALL,
sep='',
file=sys.stderr,
)
print(trace)
print(prefix, file=sys.stderr)
print(indent(trace, prefix, predicate=lambda line: True), file=sys.stderr)

View File

@ -1,3 +1,4 @@
from contextlib import contextmanager
from unittest.mock import MagicMock
from bonobo.execution.node import NodeExecutionContext
@ -7,3 +8,12 @@ class CapturingNodeExecutionContext(NodeExecutionContext):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.send = MagicMock()
@contextmanager
def optional_contextmanager(cm, *, ignore=False):
if cm is None or ignore:
yield
else:
with cm:
yield

0
config/__init__.py Normal file
View File

View File

@ -1,6 +1,51 @@
Changelog
=========
v.0.3.0 - 22 may 2017
:::::::::::::::::::::
Features
--------
* ContextProcessors can now be implemented by getting the "yield" value (v = yield x), shortening the teardown-only context processors by one line.
* File related writers (file, csv, json ...) now returns NOT_MODIFIED, making it easier to chain something after.
* More consistent console output, nodes are now sorted in a topological order before display.
* Graph.add_chain(...) now takes _input and _output parameters the same way, accepting indexes, instances or names (subject to change).
* Graph.add_chain(...) now allows to "name" a chain, using _name keyword argument, to easily reference its output later (subject to change).
* New settings module (bonobo.settings) read environment for some global configuration stuff (DEBUG and PROFILE, for now).
* New Method subclass of Option allows to use Configurable objects as decorator (see bonobo.nodes.filter.Filter for a simple example).
* New Filter transformation in standard library.
Internal features
-----------------
* Better ContextProcessor implementation, avoiding to use a decorator on the parent class. Now works with Configurable instances like Option, Service and Method.
* ContextCurrifier replaces the logic that was in NodeExecutionContext, that setup and teardown the context stack. Maybe the name is not ideal.
* All builtin transformations are of course updated to use the improved API, and should be 100% backward compatible.
* The "core" package has been dismantled, and its rare remaining members are now in "structs" and "util" packages.
* Standard transformation library has been moved under the bonobo.nodes package. It does not change anything if you used bonobo.* (which you should).
* ValueHolder is now more restrictive, not allowing to use .value anymore.
Miscellaneous
-------------
* Code cleanup, dead code removal, more tests, etc.
* More documentation.
v.0.2.4 - 2 may 2017
::::::::::::::::::::
* Cosmetic release for PyPI package page formating. Same content as v.0.2.3.
v.0.2.3 - 1 may 2017
:::::::::::::::::::::
* Positional options now supported, backward compatible. All FileHandler subclasses supports their path argument as positional.
* Better transformation lifecycle management (still work needed here).
* Windows continuous integration now works.
* Refactoring the "API" a lot to have a much cleaner first glance at it.
* More documentation, tutorials, and tuning project artifacts.
v.0.2.2 - 28 apr 2017
:::::::::::::::::::::
@ -36,4 +81,4 @@ Initial release
* Input/output MUX DEMUX removed, maybe no need for that in the real world. May come back, but not in 1.0
* Change dependency policy. We need to include only the very basic requirements (and very required). Everything related
to transforms that we may not use (bs, sqla, ...) should be optional dependencies.
* Execution strategies, threaded by default.
* Execution strategies, threaded by default.

View File

@ -3,6 +3,7 @@ F.A.Q.
List of questions that went up about the project, in no particuliar order.
Too long; didn't read.
----------------------
@ -19,8 +20,22 @@ It's lean manufacturing for data.
.. note::
This is NOT a «big data» tool. We process around 5 millions database lines in around 1 hour with rdc.etl, bonobo
ancestor (algorithms are the same, we still need to run a bit of benchmarks).
This is NOT a «big data» tool. Neither a «data analysis» tool. We process around 5 millions database lines in around
1 hour with rdc.etl, bonobo ancestor (algorithms are the same, we still need to run a bit of benchmarks).
What versions of python does bonobo support? Why not more?
----------------------------------------------------------
Bonobo is battle-tested against the latest python 3.5 and python 3.6. It may work well using other patch releases of those
versions, but we cannot guarantee it.
The main reasons about why 3.5+:
* Creating a tool that works well under both python 2 and 3 is a lot more work.
* Python 3 is nearly 10 years old. Consider moving on.
* Python 3.5 contains syntaxic sugar that makes working with data a lot more convenient.
Can a graph contain another graph?
----------------------------------
@ -30,8 +45,14 @@ No, not for now. There are no tools today in bonobo to insert a graph as a subgr
It would be great to allow it, but there is a few design questions behind this, like what node you use as input and
output of the subgraph, etc.
On another hand, if you don't consider a graph as the container but by the nodes and edges it contains, its pretty
easy to add a set of nodes and edge to a subgraph, and thus simulate it. But there will be more threads, more copies
of the same nodes, so it's not really an acceptable answer for big graphs. If it was possible to use a Graph as a
node, then the problem would be correctly solved.
It is something to be seriously considered post 1.0 (probably way post 1.0).
How would one access contextual data from a transformation? Are there parameter injections like pytest's fixtures?
------------------------------------------------------------------------------------------------------------------
@ -43,20 +64,26 @@ to find a better way to apply it.
To understand how it works today, look at https://github.com/python-bonobo/bonobo/blob/0.3/bonobo/io/csv.py#L63 and class hierarchy.
What is a plugin? Do I need to write one?
-----------------------------------------
Plugins are special classes added to an execution context, used to enhance or change the actual behavior of an execution
in a generic way. You don't need to write plugins to code transformation graphs.
Is there a difference between a transformation node and a regular python function or generator?
-----------------------------------------------------------------------------------------------
No.
Short answer: no.
Transformation callables are just regular callables, and there is nothing that differentiate it from regular python callables.
You can even use some callables both in an imperative programming context and in a transformation graph, no problem.
Longer answer: yes, sometimes, but you should not care. The function-based transformations are plain old python callable. The
class-based transformations can be plain-old-python-objects, but can also subclass Configurable which brings a lot of
fancy features, like options, service injections, class factories as decorators...
Why did you include the word «marketing» in a commit message? Why is there a marketing-automation tag on the project? Isn't marketing evil?
-------------------------------------------------------------------------------------------------------------------------------------------
@ -83,6 +110,7 @@ See https://github.com/python-bonobo/bonobo/issues/1
Bonobo is not a replacement for pandas, nor dask, nor luigi, nor airflow... It may be a replacement for Pentaho, Talend
or other data integration suites but targets people more comfortable with code as an interface.
All those references to monkeys hurt my head. Bonobos are not monkeys.
----------------------------------------------------------------------
@ -96,6 +124,7 @@ known primate typing feature.»
See https://github.com/python-bonobo/bonobo/issues/24
Who is behind this?
-------------------
@ -104,6 +133,7 @@ Me (as an individual), and a few great people that helped me along the way. Not
The code, documentation, and surrounding material is created using spare time and may lack a bit velocity. Feel free
to jump in so we can go faster!
Documentation seriously lacks X, there is a problem in Y...
-----------------------------------------------------------

View File

@ -81,6 +81,26 @@ A dictionary, or dictionary-like, "services" named argument can be passed to the
provided is pretty basic, and feature-less. But you can use much more evolved libraries instead of the provided
stub, and as long as it works the same (a.k.a implements a dictionary-like interface), the system will use it.
Solving concurrency problems
----------------------------
If a service cannot be used by more than one thread at a time, either because it's just not threadsafe, or because
it requires to carefully order the calls made (apis that includes nonces, or work on results returned by previous
calls are usually good candidates), you can use the :class:`bonobo.config.Exclusive` context processor to lock the
use of a dependency for a time period.
.. code-block:: python
from bonobo.config import Exclusive
def t1(api):
with Exclusive(api):
api.first_call()
api.second_call()
# ... etc
api.last_call()
Service configuration (to be decided and implemented)
:::::::::::::::::::::::::::::::::::::::::::::::::::::

View File

@ -1,7 +1,16 @@
Installation
============
Bonobo is `available on PyPI <https://pypi.python.org/pypi/bonobo>`_, and it's the easiest solution to get started.
Create an ETL project
:::::::::::::::::::::
If you only want to use Bonobo to code ETLs, your easiest option to get started is to use our
`cookiecutter template <https://github.com/python-bonobo/cookiecutter-bonobo>`_.
Install from PyPI
:::::::::::::::::
You can also install it directly from the `Python Package Index <https://pypi.python.org/pypi/bonobo>`_.
.. code-block:: shell-session

View File

@ -1,12 +1,12 @@
Detailed roadmap
================
Internal roadmap notes
======================
initialize / finalize better than start / stop ?
Things that should be thought about and/or implemented, but that I don't know where to store.
Graph and node level plugins
::::::::::::::::::::::::::::
* Enhancers or nide-level plugins
* Enhancers or node-level plugins
* Graph level plugins
* Documentation
@ -15,21 +15,19 @@ Command line interface and environment
* How do we manage environment ? .env ?
* How do we configure plugins ?
* Console run should allow console plugin as a command line argument (or silence it).
Services and Processors
:::::::::::::::::::::::
* ContextProcessors not clean
* ContextProcessors not clean (a bit better, but still not in love with the api)
Next...
:::::::
* Release process specialised for bonobo. With changelog production, etc.
* Document how to upgrade version, like, minor need change badges, etc.
* PyPI page looks like crap: https://pypi.python.org/pypi/bonobo/0.2.1
* Windows break because of readme encoding. Fix in edgy.
* bonobo init --with sqlalchemy,docker
* Windows console looks crappy.
* bonobo init --with sqlalchemy,docker; cookiecutter?
* logger, vebosity level
@ -39,22 +37,15 @@ External libs that looks good
* dask.distributed
* mediator (event dispatcher)
Version 0.3
Version 0.4
:::::::::::
* Services !
* SQLAlchemy 101
Version 0.2
:::::::::::
Design decisions
::::::::::::::::
* Autodetect if within jupyter notebook context, and apply plugin if it's the case.
* New bonobo.structs package with simple data structures (bags, graphs, tokens).
Plugins API
:::::::::::
* Stabilize, find other things to do.
* initialize / finalize better than start / stop ?
Minor stuff
:::::::::::

View File

@ -1,8 +1,6 @@
First steps
===========
Bonobo uses simple python and should be quick and easy to learn.
What is Bonobo?
:::::::::::::::
@ -13,10 +11,16 @@ Bonobo *is not* a statistical or data-science tool. If you're looking for a data
Bonobo is a lean manufacturing assembly line for data that let you focus on the actual work instead of the plumbery.
Bonobo uses simple python and should be quick and easy to learn.
Tutorial
::::::::
Warning: the documentation is still in progress. Although all content here should be accurate, you may feel a lack of
completeness, for which we plaid guilty and apologize. If there is something blocking, please come on our
`slack channel <https://bonobo-slack.herokuapp.com/>`_ and complain, we'll figure something out. If there is something
that did not block you but can be a no-go for others, please consider contributing to the docs.
.. toctree::
:maxdepth: 2
@ -43,6 +47,6 @@ Read about integrating external tools with bonobo
* :doc:`../guide/ext/docker`: run transformation graphs in isolated containers.
* :doc:`../guide/ext/jupyter`: run transformations within jupyter notebooks.
* :doc:`../guide/ext/selenium`: run
* :doc:`../guide/ext/selenium`: crawl the web using a real browser and work with the gathered data.
* :doc:`../guide/ext/sqlalchemy`: everything you need to interract with SQL databases.

View File

@ -18,13 +18,19 @@ except NameError:
# Get the long description from the README file
with open(path.join(here, 'README.rst'), encoding='utf-8') as f:
long_description = f.read()
try:
with open(path.join(here, 'README.rst'), encoding='utf-8') as f:
long_description = f.read()
except:
long_description = ''
# Get the classifiers from the classifiers file
tolines = lambda c: list(filter(None, map(lambda s: s.strip(), c.split('\n'))))
with open(path.join(here, 'classifiers.txt'), encoding='utf-8') as f:
classifiers = tolines(f.read())
try:
with open(path.join(here, 'classifiers.txt'), encoding='utf-8') as f:
classifiers = tolines(f.read())
except:
classifiers = []
version_ns = {}
try:

0
tests/__init__.py Normal file
View File

View File

@ -2,7 +2,6 @@ import pytest
from bonobo.config.configurables import Configurable
from bonobo.config.options import Option
from bonobo.config.services import Container, Service, validate_service_name
class MyConfigurable(Configurable):
@ -25,28 +24,6 @@ class MyConfigurableUsingPositionalOptions(MyConfigurable):
third = Option(str, required=False, positional=True)
class PrinterInterface():
def print(self, *args):
raise NotImplementedError()
class ConcretePrinter(PrinterInterface):
def __init__(self, prefix):
self.prefix = prefix
def print(self, *args):
return ';'.join((self.prefix, *args))
class MyServiceDependantConfigurable(Configurable):
printer = Service(
PrinterInterface,
)
def __call__(self, printer: PrinterInterface, *args):
return printer.print(*args)
def test_missing_required_option_error():
with pytest.raises(TypeError) as exc:
MyConfigurable()
@ -107,39 +84,5 @@ def test_option_resolution_order():
assert o.integer == None
def test_service_name_validator():
assert validate_service_name('foo') == 'foo'
assert validate_service_name('foo.bar') == 'foo.bar'
assert validate_service_name('Foo') == 'Foo'
assert validate_service_name('Foo.Bar') == 'Foo.Bar'
assert validate_service_name('Foo.a0') == 'Foo.a0'
with pytest.raises(ValueError):
validate_service_name('foo.0')
with pytest.raises(ValueError):
validate_service_name('0.foo')
SERVICES = Container(
printer0=ConcretePrinter(prefix='0'),
printer1=ConcretePrinter(prefix='1'),
)
def test_service_dependency():
o = MyServiceDependantConfigurable(printer='printer0')
assert o(SERVICES.get('printer0'), 'foo', 'bar') == '0;foo;bar'
assert o(SERVICES.get('printer1'), 'bar', 'baz') == '1;bar;baz'
assert o(*SERVICES.args_for(o), 'foo', 'bar') == '0;foo;bar'
def test_service_dependency_unavailable():
o = MyServiceDependantConfigurable(printer='printer2')
with pytest.raises(KeyError):
SERVICES.args_for(o)
def test_option_positional():
o = MyConfigurableUsingPositionalOptions('1', '2', '3', required_str='hello')

View File

@ -28,8 +28,6 @@ def test_define_with_decorator():
def Concrete(self, *args, **kwargs):
calls.append((args, kwargs, ))
print('handler', Concrete.handler)
assert callable(Concrete.handler)
t = Concrete('foo', bar='baz')

View File

@ -0,0 +1,96 @@
import threading
import time
import pytest
from bonobo.config import Configurable, Container, Exclusive, Service
from bonobo.config.services import validate_service_name
class PrinterInterface():
def print(self, *args):
raise NotImplementedError()
class ConcretePrinter(PrinterInterface):
def __init__(self, prefix):
self.prefix = prefix
def print(self, *args):
return ';'.join((self.prefix, *args))
SERVICES = Container(
printer0=ConcretePrinter(prefix='0'),
printer1=ConcretePrinter(prefix='1'),
)
class MyServiceDependantConfigurable(Configurable):
printer = Service(
PrinterInterface,
)
def __call__(self, printer: PrinterInterface, *args):
return printer.print(*args)
def test_service_name_validator():
assert validate_service_name('foo') == 'foo'
assert validate_service_name('foo.bar') == 'foo.bar'
assert validate_service_name('Foo') == 'Foo'
assert validate_service_name('Foo.Bar') == 'Foo.Bar'
assert validate_service_name('Foo.a0') == 'Foo.a0'
with pytest.raises(ValueError):
validate_service_name('foo.0')
with pytest.raises(ValueError):
validate_service_name('0.foo')
def test_service_dependency():
o = MyServiceDependantConfigurable(printer='printer0')
assert o(SERVICES.get('printer0'), 'foo', 'bar') == '0;foo;bar'
assert o(SERVICES.get('printer1'), 'bar', 'baz') == '1;bar;baz'
assert o(*SERVICES.args_for(o), 'foo', 'bar') == '0;foo;bar'
def test_service_dependency_unavailable():
o = MyServiceDependantConfigurable(printer='printer2')
with pytest.raises(KeyError):
SERVICES.args_for(o)
class VCR:
def __init__(self):
self.tape = []
def append(self, x):
return self.tape.append(x)
def test_exclusive():
vcr = VCR()
vcr.append('hello')
def record(prefix, vcr=vcr):
with Exclusive(vcr):
for i in range(5):
vcr.append(' '.join((prefix, str(i))))
time.sleep(0.05)
threads = [threading.Thread(target=record, args=(str(i), )) for i in range(5)]
for thread in threads:
thread.start()
time.sleep(0.01) # this is not good practice, how to test this without sleeping ?? XXX
for thread in threads:
thread.join()
assert vcr.tape == [
'hello', '0 0', '0 1', '0 2', '0 3', '0 4', '1 0', '1 1', '1 2', '1 3', '1 4', '2 0', '2 1', '2 2', '2 3',
'2 4', '3 0', '3 1', '3 2', '3 3', '3 4', '4 0', '4 1', '4 2', '4 3', '4 4'
]

View File

@ -55,3 +55,38 @@ def test_read_csv_from_file(tmpdir):
'b': 'b bar',
'c': 'c bar',
}
def test_read_csv_kwargs_output_formater(tmpdir):
fs, filename = open_fs(tmpdir), 'input.csv'
fs.open(filename, 'w').write('a,b,c\na foo,b foo,c foo\na bar,b bar,c bar')
reader = CsvReader(path=filename, delimiter=',', output_format='kwargs')
context = CapturingNodeExecutionContext(reader, services={'fs': fs})
context.start()
context.write(BEGIN, Bag(), END)
context.step()
context.stop()
assert len(context.send.mock_calls) == 2
args0, kwargs0 = context.send.call_args_list[0]
assert len(args0) == 1 and not len(kwargs0)
args1, kwargs1 = context.send.call_args_list[1]
assert len(args1) == 1 and not len(kwargs1)
_args, _kwargs = args0[0].get()
assert not len(_args) and _kwargs == {
'a': 'a foo',
'b': 'b foo',
'c': 'c foo',
}
_args, _kwargs = args1[0].get()
assert not len(_args) and _kwargs == {
'a': 'a bar',
'b': 'b bar',
'c': 'c bar',
}

View File

@ -1,10 +1,26 @@
import runpy
import sys
from unittest.mock import patch
import pkg_resources
import pytest
from bonobo import get_examples_path
from bonobo import __main__, __version__, get_examples_path
from bonobo.commands import entrypoint
def runner_entrypoint(*args):
return entrypoint(list(args))
def runner_module(*args):
with patch.object(sys, 'argv', ['bonobo', *args]):
return runpy.run_path(__main__.__file__, run_name='__main__')
all_runners = pytest.mark.parametrize('runner', [runner_entrypoint, runner_module])
def test_entrypoint():
commands = {}
@ -13,23 +29,51 @@ def test_entrypoint():
assert 'init' in commands
assert 'run' in commands
assert 'version' in commands
def test_no_command(capsys):
@all_runners
def test_no_command(runner, capsys):
with pytest.raises(SystemExit):
entrypoint([])
runner()
_, err = capsys.readouterr()
assert 'error: the following arguments are required: command' in err
def test_init():
pass # need ext dir
def test_run(capsys):
entrypoint(['run', '--quiet', get_examples_path('types/strings.py')])
@all_runners
def test_run(runner, capsys):
runner('run', '--quiet', get_examples_path('types/strings.py'))
out, err = capsys.readouterr()
out = out.split('\n')
assert out[0].startswith('Foo ')
assert out[1].startswith('Bar ')
assert out[2].startswith('Baz ')
@all_runners
def test_run_module(runner, capsys):
runner('run', '--quiet', '-m', 'bonobo.examples.types.strings')
out, err = capsys.readouterr()
out = out.split('\n')
assert out[0].startswith('Foo ')
assert out[1].startswith('Bar ')
assert out[2].startswith('Baz ')
@all_runners
def test_run_path(runner, capsys):
runner('run', '--quiet', get_examples_path('types'))
out, err = capsys.readouterr()
out = out.split('\n')
assert out[0].startswith('Foo ')
assert out[1].startswith('Bar ')
assert out[2].startswith('Baz ')
@all_runners
def test_version(runner, capsys):
runner('version')
out, err = capsys.readouterr()
out = out.strip()
assert out.startswith('bonobo ')
assert out.endswith(__version__)

View File

@ -1,4 +1,4 @@
import types
import inspect
def test_wildcard_import():
@ -10,7 +10,7 @@ def test_wildcard_import():
if name.startswith('_'):
continue
attr = getattr(bonobo, name)
if isinstance(attr, types.ModuleType):
if inspect.ismodule(attr):
continue
assert name in bonobo.__all__

View File

@ -1,4 +1,9 @@
import operator
import pytest
from bonobo.util.objects import Wrapper, get_name, ValueHolder
from bonobo.util.testing import optional_contextmanager
class foo:
@ -52,3 +57,56 @@ def test_valueholder():
assert y == x
assert y is not x
assert repr(x) == repr(y) == repr(43)
unsupported_operations = {
int: {operator.matmul},
str: {
operator.sub, operator.mul, operator.matmul, operator.floordiv, operator.truediv, operator.mod, divmod,
operator.pow, operator.lshift, operator.rshift, operator.and_, operator.xor, operator.or_
},
}
@pytest.mark.parametrize('x,y', [(5, 3), (0, 10), (0, 0), (1, 1), ('foo', 'bar'), ('', 'baz!')])
@pytest.mark.parametrize(
'operation,inplace_operation', [
(operator.add, operator.iadd),
(operator.sub, operator.isub),
(operator.mul, operator.imul),
(operator.matmul, operator.imatmul),
(operator.truediv, operator.itruediv),
(operator.floordiv, operator.ifloordiv),
(operator.mod, operator.imod),
(divmod, None),
(operator.pow, operator.ipow),
(operator.lshift, operator.ilshift),
(operator.rshift, operator.irshift),
(operator.and_, operator.iand),
(operator.xor, operator.ixor),
(operator.or_, operator.ior),
]
)
def test_valueholder_integer_operations(x, y, operation, inplace_operation):
v = ValueHolder(x)
is_supported = operation not in unsupported_operations.get(type(x), set())
isdiv = ('div' in operation.__name__) or ('mod' in operation.__name__)
# forward...
with optional_contextmanager(pytest.raises(TypeError), ignore=is_supported):
with optional_contextmanager(pytest.raises(ZeroDivisionError), ignore=y or not isdiv):
assert operation(x, y) == operation(v, y)
# backward...
with optional_contextmanager(pytest.raises(TypeError), ignore=is_supported):
with optional_contextmanager(pytest.raises(ZeroDivisionError), ignore=x or not isdiv):
assert operation(y, x) == operation(y, v)
# in place...
if inplace_operation is not None:
with optional_contextmanager(pytest.raises(TypeError), ignore=is_supported):
with optional_contextmanager(pytest.raises(ZeroDivisionError), ignore=y or not isdiv):
inplace_operation(v, y)
assert v == operation(x, y)