Merge branch 'filesystem' into 0.2

This commit is contained in:
Romain Dorgueil
2017-04-28 06:33:37 +02:00
30 changed files with 423 additions and 266 deletions

View File

@ -1,7 +1,7 @@
# This file has been auto-generated. # This file has been auto-generated.
# All changes will be lost, see Projectfile. # All changes will be lost, see Projectfile.
# #
# Updated at 2017-04-27 10:59:55.259076 # Updated at 2017-04-28 06:33:29.712011
PYTHON ?= $(shell which python) PYTHON ?= $(shell which python)
PYTHON_BASENAME ?= $(shell basename $(PYTHON)) PYTHON_BASENAME ?= $(shell basename $(PYTHON))

View File

@ -22,6 +22,7 @@ enable_features = {
install_requires = [ install_requires = [
'colorama ==0.3.9', 'colorama ==0.3.9',
'fs ==2.0.3',
'psutil ==5.2.2', 'psutil ==5.2.2',
'requests ==2.13.0', 'requests ==2.13.0',
'stevedore ==1.21.0', 'stevedore ==1.21.0',

View File

@ -7,113 +7,10 @@
"""Bonobo data-processing toolkit main module.""" """Bonobo data-processing toolkit main module."""
import sys import sys
import warnings
assert (sys.version_info >= (3, 5)), 'Python 3.5+ is required to use Bonobo.' assert (sys.version_info >= (3, 5)), 'Python 3.5+ is required to use Bonobo.'
from bonobo._api import *
from bonobo._api import __all__
from ._version import __version__ __all__ = __all__
from .basics import __all__ as __all_basics__
from .config import __all__ as __all_config__
from .execution import __all__ as __all_execution__
from .io import __all__ as __all_io__
from .strategies import __all__ as __all_strategies__
__all__ = __all_basics__ + __all_config__ + __all_execution__ + __all_io__ + __all_strategies__ + [
'Bag',
'ErrorBag'
'Graph',
'Token',
'__version__',
'create_strategy',
'get_examples_path',
'run',
]
from .basics import *
from .config import *
from .execution import *
from .io import *
from .strategies import *
from .structs.bags import *
from .structs.graphs import *
from .structs.tokens import *
DEFAULT_STRATEGY = 'threadpool'
STRATEGIES = {
'naive': NaiveStrategy,
'processpool': ProcessPoolExecutorStrategy,
'threadpool': ThreadPoolExecutorStrategy,
}
def get_examples_path(*pathsegments):
import os
import pathlib
return str(pathlib.Path(os.path.dirname(__file__), 'examples', *pathsegments))
def create_strategy(name=None):
"""
Create a strategy, or just returns it if it's already one.
:param name:
:return: Strategy
"""
from bonobo.strategies.base import Strategy
import logging
if isinstance(name, Strategy):
return name
if name is None:
name = DEFAULT_STRATEGY
logging.debug('Creating strategy {}...'.format(name))
try:
factory = STRATEGIES[name]
except KeyError as exc:
raise RuntimeError(
'Invalid strategy {}. Available choices: {}.'.format(repr(name), ', '.join(sorted(STRATEGIES.keys())))
) from exc
return factory()
def _is_interactive_console():
import sys
return sys.stdout.isatty()
def _is_jupyter_notebook():
try:
return get_ipython().__class__.__name__ == 'ZMQInteractiveShell'
except NameError:
return False
def run(graph, *chain, strategy=None, plugins=None, services=None):
if len(chain):
warnings.warn('DEPRECATED. You should pass a Graph instance instead of a chain.')
from bonobo import Graph
graph = Graph(graph, *chain)
strategy = create_strategy(strategy)
plugins = []
if _is_interactive_console():
from bonobo.ext.console import ConsoleOutputPlugin
if ConsoleOutputPlugin not in plugins:
plugins.append(ConsoleOutputPlugin)
if _is_jupyter_notebook():
from bonobo.ext.jupyter import JupyterOutputPlugin
if JupyterOutputPlugin not in plugins:
plugins.append(JupyterOutputPlugin)
return strategy.execute(graph, plugins=plugins, services=services)
del sys del sys
del warnings

80
bonobo/_api.py Normal file
View File

@ -0,0 +1,80 @@
from bonobo._version import __version__
__all__ = [
'__version__',
]
from bonobo.structs import Bag, Graph
__all__ += ['Bag', 'Graph']
# Filesystem. This is a shortcut from the excellent filesystem2 library, that we make available there for convenience.
from fs import open_fs as _open_fs
open_fs = lambda url, *args, **kwargs: _open_fs(str(url), *args, **kwargs)
__all__ += ['open_fs']
# Basic transformations.
from bonobo.basics import *
from bonobo.basics import __all__ as _all_basics
__all__ += _all_basics
# Execution strategies.
from bonobo.strategies import create_strategy
__all__ += ['create_strategy']
# Extract and loads from stdlib.
from bonobo.io import *
from bonobo.io import __all__ as _all_io
__all__ += _all_io
# XXX This may be belonging to the bonobo.examples package.
def get_examples_path(*pathsegments):
import os
import pathlib
return str(pathlib.Path(os.path.dirname(__file__), 'examples', *pathsegments))
__all__.append(get_examples_path.__name__)
def _is_interactive_console():
import sys
return sys.stdout.isatty()
def _is_jupyter_notebook():
try:
return get_ipython().__class__.__name__ == 'ZMQInteractiveShell'
except NameError:
return False
# @api
def run(graph, *chain, strategy=None, plugins=None, services=None):
if len(chain):
warnings.warn('DEPRECATED. You should pass a Graph instance instead of a chain.')
from bonobo import Graph
graph = Graph(graph, *chain)
strategy = create_strategy(strategy)
plugins = []
if _is_interactive_console():
from bonobo.ext.console import ConsoleOutputPlugin
if ConsoleOutputPlugin not in plugins:
plugins.append(ConsoleOutputPlugin)
if _is_jupyter_notebook():
from bonobo.ext.jupyter import JupyterOutputPlugin
if JupyterOutputPlugin not in plugins:
plugins.append(JupyterOutputPlugin)
return strategy.execute(graph, plugins=plugins, services=services)
__all__.append(run.__name__)

View File

@ -19,6 +19,7 @@ __all__ = [
'noop', 'noop',
] ]
def identity(x): def identity(x):
return x return x

View File

@ -1,11 +1,12 @@
from bonobo.config.configurables import Configurable from bonobo.config.configurables import Configurable
from bonobo.config.options import Option from bonobo.config.options import Option
from bonobo.config.services import Container, Service
from bonobo.config.processors import ContextProcessor from bonobo.config.processors import ContextProcessor
from bonobo.config.services import Container, Service
__all__ = [ __all__ = [
'Configurable', 'Configurable',
'Container', 'Container',
'ContextProcessor',
'Option', 'Option',
'Service', 'Service',
] ]

View File

@ -1,16 +1,21 @@
from os.path import dirname, realpath, join
import bonobo import bonobo
from bonobo.ext.opendatasoft import OpenDataSoftAPI from bonobo.ext.opendatasoft import OpenDataSoftAPI
OUTPUT_FILENAME = realpath(join(dirname(__file__), 'coffeeshops.txt')) filename = 'coffeeshops.txt'
graph = bonobo.Graph( graph = bonobo.Graph(
OpenDataSoftAPI(dataset='liste-des-cafes-a-un-euro', netloc='opendata.paris.fr'), OpenDataSoftAPI(dataset='liste-des-cafes-a-un-euro', netloc='opendata.paris.fr'),
lambda row: '{nom_du_cafe}, {adresse}, {arrondissement} Paris, France'.format(**row), lambda row: '{nom_du_cafe}, {adresse}, {arrondissement} Paris, France'.format(**row),
bonobo.FileWriter(path=OUTPUT_FILENAME), bonobo.FileWriter(path=filename),
) )
def get_services():
from os.path import dirname
return {
'fs': bonobo.open_fs(dirname(__file__))
}
if __name__ == '__main__': if __name__ == '__main__':
bonobo.run(graph) bonobo.run(graph, services=get_services())
print('Import done, read {} for results.'.format(OUTPUT_FILENAME))

View File

@ -1,11 +1,10 @@
import json import json
import os
from bonobo import JsonWriter, Graph, get_examples_path
from bonobo.basics import Tee
from bonobo.ext.opendatasoft import OpenDataSoftAPI
from colorama import Fore, Style from colorama import Fore, Style
import bonobo
from bonobo.ext.opendatasoft import OpenDataSoftAPI
try: try:
import pycountry import pycountry
except ImportError as exc: except ImportError as exc:
@ -15,8 +14,6 @@ API_DATASET = 'fablabs-in-the-world'
API_NETLOC = 'datanova.laposte.fr' API_NETLOC = 'datanova.laposte.fr'
ROWS = 100 ROWS = 100
__path__ = os.path.dirname(__file__)
def _getlink(x): def _getlink(x):
return x.get('url', None) return x.get('url', None)
@ -55,15 +52,21 @@ def display(row):
print(' - {}source{}: {source}'.format(Fore.BLUE, Style.RESET_ALL, source='datanova/' + API_DATASET)) print(' - {}source{}: {source}'.format(Fore.BLUE, Style.RESET_ALL, source='datanova/' + API_DATASET))
graph = Graph( graph = bonobo.Graph(
OpenDataSoftAPI(dataset=API_DATASET, netloc=API_NETLOC, timezone='Europe/Paris'), OpenDataSoftAPI(dataset=API_DATASET, netloc=API_NETLOC, timezone='Europe/Paris'),
normalize, normalize,
filter_france, filter_france,
Tee(display), bonobo.Tee(display),
JsonWriter(path=get_examples_path('datasets/fablabs.txt')), bonobo.JsonWriter(path='datasets/fablabs.txt'),
) )
if __name__ == '__main__':
from bonobo import run
run(graph) def get_services():
from os.path import dirname
return {
'fs': bonobo.open_fs(dirname(__file__))
}
if __name__ == '__main__':
bonobo.run(graph, services=get_services())

View File

@ -1,11 +1,11 @@
from bonobo import CsvReader, Graph, get_examples_path import bonobo
graph = Graph( from ._services import get_services
CsvReader(path=get_examples_path('datasets/coffeeshops.txt')),
graph = bonobo.Graph(
bonobo.CsvReader(path='datasets/coffeeshops.txt'),
print, print,
) )
if __name__ == '__main__': if __name__ == '__main__':
import bonobo bonobo.run(graph, services=get_services())
bonobo.run(graph)

View File

@ -1,8 +1,13 @@
import bonobo as bb import bonobo
from ._services import get_services
url = 'https://data.toulouse-metropole.fr/explore/dataset/theatres-et-salles-de-spectacles/download?format=json&timezone=Europe/Berlin&use_labels_for_header=true' url = 'https://data.toulouse-metropole.fr/explore/dataset/theatres-et-salles-de-spectacles/download?format=json&timezone=Europe/Berlin&use_labels_for_header=true'
graph = bb.Graph(bb.JsonReader(path=url), print) graph = bonobo.Graph(
bonobo.JsonReader(path=url),
print
)
if __name__ == '__main__': if __name__ == '__main__':
bb.run(graph) bonobo.run(graph)

View File

@ -1,9 +1,3 @@
from bonobo.execution.graph import GraphExecutionContext, NodeExecutionContext, PluginExecutionContext from bonobo.execution.graph import GraphExecutionContext, NodeExecutionContext, PluginExecutionContext
__all__ = [
'GraphExecutionContext',
'NodeExecutionContext',
'PluginExecutionContext',
]

View File

@ -2,6 +2,7 @@ import sys
import traceback import traceback
from time import sleep from time import sleep
from bonobo.config import Container
from bonobo.config.processors import resolve_processors from bonobo.config.processors import resolve_processors
from bonobo.util.iterators import ensure_tuple from bonobo.util.iterators import ensure_tuple
from bonobo.util.objects import Wrapper from bonobo.util.objects import Wrapper
@ -23,9 +24,17 @@ class LoopingExecutionContext(Wrapper):
def stopped(self): def stopped(self):
return self._stopped return self._stopped
def __init__(self, wrapped, parent): def __init__(self, wrapped, parent, services=None):
super().__init__(wrapped) super().__init__(wrapped)
self.parent = parent self.parent = parent
if services:
if parent:
raise RuntimeError(
'Having services defined both in GraphExecutionContext and child NodeExecutionContext is not supported, for now.')
self.services = Container(services) if services else Container()
else:
self.services = None
self._started, self._stopped, self._context, self._stack = False, False, None, [] self._started, self._stopped, self._context, self._stack = False, False, None, []
def start(self): def start(self):
@ -34,7 +43,12 @@ class LoopingExecutionContext(Wrapper):
assert self._context is None assert self._context is None
self._started = True self._started = True
try: try:
self._context = self.parent.services.args_for(self.wrapped) if self.parent else () if self.parent:
self._context = self.parent.services.args_for(self.wrapped)
elif self.services:
self._context = self.services.args_for(self.wrapped)
else:
self._context = ()
except Exception as exc: # pylint: disable=broad-except except Exception as exc: # pylint: disable=broad-except
self.handle_error(exc, traceback.format_exc()) self.handle_error(exc, traceback.format_exc())
raise raise
@ -102,4 +116,4 @@ class LoopingExecutionContext(Wrapper):
sep='', sep='',
file=sys.stderr, file=sys.stderr,
) )
print(trace) print(trace)

View File

@ -2,12 +2,12 @@ import traceback
from queue import Empty from queue import Empty
from time import sleep from time import sleep
from bonobo.structs.bags import Bag, ErrorBag
from bonobo.constants import INHERIT_INPUT, NOT_MODIFIED from bonobo.constants import INHERIT_INPUT, NOT_MODIFIED
from bonobo.core.inputs import Input from bonobo.core.inputs import Input
from bonobo.core.statistics import WithStatistics from bonobo.core.statistics import WithStatistics
from bonobo.errors import InactiveReadableError from bonobo.errors import InactiveReadableError
from bonobo.execution.base import LoopingExecutionContext from bonobo.execution.base import LoopingExecutionContext
from bonobo.structs.bags import Bag, ErrorBag
from bonobo.util.iterators import iter_if_not_sequence from bonobo.util.iterators import iter_if_not_sequence
@ -21,8 +21,8 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext):
"""todo check if this is right, and where it is used""" """todo check if this is right, and where it is used"""
return self.input.alive and self._started and not self._stopped return self.input.alive and self._started and not self._stopped
def __init__(self, wrapped, parent): def __init__(self, wrapped, parent=None, services=None):
LoopingExecutionContext.__init__(self, wrapped, parent) LoopingExecutionContext.__init__(self, wrapped, parent=parent, services=services)
WithStatistics.__init__(self, 'in', 'out', 'err') WithStatistics.__init__(self, 'in', 'out', 'err')
self.input = Input() self.input = Input()
@ -115,9 +115,11 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext):
else: else:
self.push(_resolve(input_bag, result)) self.push(_resolve(input_bag, result))
def is_error(bag): def is_error(bag):
return isinstance(bag, ErrorBag) return isinstance(bag, ErrorBag)
def _resolve(input_bag, output): def _resolve(input_bag, output):
# NotModified means to send the input unmodified to output. # NotModified means to send the input unmodified to output.
if output is NOT_MODIFIED: if output is NOT_MODIFIED:

View File

@ -3,7 +3,7 @@ import csv
from bonobo.config import Option from bonobo.config import Option
from bonobo.config.processors import ContextProcessor, contextual from bonobo.config.processors import ContextProcessor, contextual
from bonobo.util.objects import ValueHolder from bonobo.util.objects import ValueHolder
from .file import FileReader, FileWriter, FileHandler from .file import FileHandler, FileReader, FileWriter
class CsvHandler(FileHandler): class CsvHandler(FileHandler):
@ -41,10 +41,10 @@ class CsvReader(CsvHandler, FileReader):
skip = Option(int, default=0) skip = Option(int, default=0)
@ContextProcessor @ContextProcessor
def csv_headers(self, context, file): def csv_headers(self, context, fs, file):
yield ValueHolder(self.headers) yield ValueHolder(self.headers)
def read(self, file, headers): def read(self, fs, file, headers):
reader = csv.reader(file, delimiter=self.delimiter, quotechar=self.quotechar) reader = csv.reader(file, delimiter=self.delimiter, quotechar=self.quotechar)
headers.value = headers.value or next(reader) headers.value = headers.value or next(reader)
field_count = len(headers.value) field_count = len(headers.value)
@ -55,7 +55,7 @@ class CsvReader(CsvHandler, FileReader):
for row in reader: for row in reader:
if len(row) != field_count: if len(row) != field_count:
raise ValueError('Got a line with %d fields, expecting %d.' % (len(row), field_count, )) raise ValueError('Got a line with %d fields, expecting %d.' % (len(row), field_count,))
yield dict(zip(headers.value, row)) yield dict(zip(headers.value, row))
@ -63,12 +63,12 @@ class CsvReader(CsvHandler, FileReader):
@contextual @contextual
class CsvWriter(CsvHandler, FileWriter): class CsvWriter(CsvHandler, FileWriter):
@ContextProcessor @ContextProcessor
def writer(self, context, file, lineno): def writer(self, context, fs, file, lineno):
writer = csv.writer(file, delimiter=self.delimiter, quotechar=self.quotechar) writer = csv.writer(file, delimiter=self.delimiter, quotechar=self.quotechar, lineterminator=self.eol)
headers = ValueHolder(list(self.headers) if self.headers else None) headers = ValueHolder(list(self.headers) if self.headers else None)
yield writer, headers yield writer, headers
def write(self, file, lineno, writer, headers, row): def write(self, fs, file, lineno, writer, headers, row):
if not lineno.value: if not lineno.value:
headers.value = headers.value or row.keys() headers.value = headers.value or row.keys()
writer.writerow(headers.value) writer.writerow(headers.value)

View File

@ -1,8 +1,6 @@
from io import BytesIO from bonobo.config import Option, Service
from bonobo.config import Option
from bonobo.config.processors import ContextProcessor, contextual
from bonobo.config.configurables import Configurable from bonobo.config.configurables import Configurable
from bonobo.config.processors import ContextProcessor, contextual
from bonobo.util.objects import ValueHolder from bonobo.util.objects import ValueHolder
__all__ = [ __all__ = [
@ -13,30 +11,34 @@ __all__ = [
@contextual @contextual
class FileHandler(Configurable): class FileHandler(Configurable):
""" """Abstract component factory for file-related components.
Abstract component factory for file-related components.
Args:
path (str): which path to use within the provided filesystem.
eol (str): which character to use to separate lines.
mode (str): which mode to use when opening the file.
fs (str): service name to use for filesystem.
""" """
path = Option(str, required=True) path = Option(str, required=True) # type: str
eol = Option(str, default='\n') eol = Option(str, default='\n') # type: str
mode = Option(str) mode = Option(str) # type: str
fs = Service('fs') # type: str
@ContextProcessor @ContextProcessor
def file(self, context): def file(self, context, fs):
if self.path.find('http://') == 0 or self.path.find('https://') == 0: with self.open(fs) as file:
import requests yield file
response = requests.get(self.path)
yield BytesIO(response.content)
else:
with self.open() as file:
yield file
def open(self): def open(self, fs):
return open(self.path, self.mode) return fs.open(self.path, self.mode)
class Reader(FileHandler): class Reader(FileHandler):
"""Abstract component factory for readers.
"""
def __call__(self, *args): def __call__(self, *args):
yield from self.read(*args) yield from self.read(*args)
@ -45,6 +47,9 @@ class Reader(FileHandler):
class Writer(FileHandler): class Writer(FileHandler):
"""Abstract component factory for writers.
"""
def __call__(self, *args): def __call__(self, *args):
return self.write(*args) return self.write(*args)
@ -53,23 +58,18 @@ class Writer(FileHandler):
class FileReader(Reader): class FileReader(Reader):
""" """Component factory for file-like readers.
Component factory for file-like readers.
On its own, it can be used to read a file and yield one row per line, trimming the "eol" character at the end if On its own, it can be used to read a file and yield one row per line, trimming the "eol" character at the end if
present. Extending it is usually the right way to create more specific file readers (like json, csv, etc.) present. Extending it is usually the right way to create more specific file readers (like json, csv, etc.)
""" """
mode = Option(str, default='r') mode = Option(str, default='r')
def read(self, file): def read(self, fs, file):
""" """
Write a row on the next line of given file. Write a row on the next line of given file.
Prefix is used for newlines. Prefix is used for newlines.
:param ctx:
:param row:
""" """
for line in file: for line in file:
yield line.rstrip(self.eol) yield line.rstrip(self.eol)
@ -77,28 +77,22 @@ class FileReader(Reader):
@contextual @contextual
class FileWriter(Writer): class FileWriter(Writer):
""" """Component factory for file or file-like writers.
Component factory for file or file-like writers.
On its own, it can be used to write in a file one line per row that comes into this component. Extending it is On its own, it can be used to write in a file one line per row that comes into this component. Extending it is
usually the right way to create more specific file writers (like json, csv, etc.) usually the right way to create more specific file writers (like json, csv, etc.)
""" """
mode = Option(str, default='w+') mode = Option(str, default='w+')
@ContextProcessor @ContextProcessor
def lineno(self, context, file): def lineno(self, context, fs, file):
lineno = ValueHolder(0, type=int) lineno = ValueHolder(0, type=int)
yield lineno yield lineno
def write(self, file, lineno, row): def write(self, fs, file, lineno, row):
""" """
Write a row on the next line of opened file in context. Write a row on the next line of opened file in context.
:param file fp:
:param str row:
:param str prefix:
""" """
self._write_line(file, (self.eol if lineno.value else '') + row) self._write_line(file, (self.eol if lineno.value else '') + row)
lineno.value += 1 lineno.value += 1

View File

@ -15,7 +15,7 @@ class JsonHandler:
class JsonReader(JsonHandler, FileReader): class JsonReader(JsonHandler, FileReader):
loader = staticmethod(json.load) loader = staticmethod(json.load)
def read(self, file): def read(self, fs, file):
for line in self.loader(file): for line in self.loader(file):
yield line yield line
@ -23,16 +23,16 @@ class JsonReader(JsonHandler, FileReader):
@contextual @contextual
class JsonWriter(JsonHandler, FileWriter): class JsonWriter(JsonHandler, FileWriter):
@ContextProcessor @ContextProcessor
def envelope(self, context, file, lineno): def envelope(self, context, fs, file, lineno):
file.write('[\n') file.write('[\n')
yield yield
file.write('\n]') file.write('\n]')
def write(self, file, lineno, row): def write(self, fs, file, lineno, row):
""" """
Write a json row on the next line of file pointed by ctx.file. Write a json row on the next line of file pointed by ctx.file.
:param ctx: :param ctx:
:param row: :param row:
""" """
return super().write(file, lineno, json.dumps(row)) return super().write(fs, file, lineno, json.dumps(row))

View File

@ -1,8 +1,42 @@
from bonobo.strategies.executor import ThreadPoolExecutorStrategy, ProcessPoolExecutorStrategy from bonobo.strategies.executor import ProcessPoolExecutorStrategy, ThreadPoolExecutorStrategy
from bonobo.strategies.naive import NaiveStrategy from bonobo.strategies.naive import NaiveStrategy
__all__ = [ __all__ = [
'NaiveStrategy', 'create_strategy',
'ProcessPoolExecutorStrategy',
'ThreadPoolExecutorStrategy',
] ]
STRATEGIES = {
'naive': NaiveStrategy,
'processpool': ProcessPoolExecutorStrategy,
'threadpool': ThreadPoolExecutorStrategy,
}
DEFAULT_STRATEGY = 'threadpool'
def create_strategy(name=None):
"""
Create a strategy, or just returns it if it's already one.
:param name:
:return: Strategy
"""
from bonobo.strategies.base import Strategy
import logging
if isinstance(name, Strategy):
return name
if name is None:
name = DEFAULT_STRATEGY
logging.debug('Creating strategy {}...'.format(name))
try:
factory = STRATEGIES[name]
except KeyError as exc:
raise RuntimeError(
'Invalid strategy {}. Available choices: {}.'.format(repr(name), ', '.join(sorted(STRATEGIES.keys())))
) from exc
return factory()

View File

@ -0,0 +1,7 @@
from bonobo.structs.bags import Bag
from bonobo.structs.graphs import Graph
from bonobo.structs.tokens import Token
__all__ = [
'Bag', 'Graph', 'Token'
]

View File

@ -4,6 +4,6 @@ from bonobo.execution.node import NodeExecutionContext
class CapturingNodeExecutionContext(NodeExecutionContext): class CapturingNodeExecutionContext(NodeExecutionContext):
def __init__(self, wrapped, parent): def __init__(self, *args, **kwargs):
super().__init__(wrapped, parent) super().__init__(*args, **kwargs)
self.send = MagicMock() self.send = MagicMock()

View File

@ -10,6 +10,7 @@ There are a few things that you should know while writing transformations graphs
:maxdepth: 2 :maxdepth: 2
purity purity
services
Third party integrations Third party integrations
:::::::::::::::::::::::: ::::::::::::::::::::::::

View File

@ -1,21 +1,35 @@
Services and dependencies (draft implementation) Services and dependencies (draft implementation)
================================================ ================================================
:Status: Draft implementation
:Stability: Alpha
:Last-Modified: 27 apr 2017
Most probably, you'll want to use external systems within your transformations. Those systems may include databases, Most probably, you'll want to use external systems within your transformations. Those systems may include databases,
apis (using http, for example), filesystems, etc. apis (using http, for example), filesystems, etc.
For a start, including those services hardcoded in your transformations can do the job, but you'll pretty soon feel You can start by hardcoding those services. That does the job, at first.
limited, for two main reasons:
* Hardcoded and tightly linked dependencies make your transformation atoms hard to test. If you're going a little further than that, you'll feel limited, for a few reasons:
* Hardcoded and tightly linked dependencies make your transformations hard to test, and hard to reuse.
* Processing data on your laptop is great, but being able to do it on different systems (or stages), in different * Processing data on your laptop is great, but being able to do it on different systems (or stages), in different
environments, is more realistic. environments, is more realistic? You probably want to contigure a different database on a staging environment,
preprod environment or production system. Maybe you have silimar systems for different clients and want to select
the system at runtime. Etc.
Service injection Service injection
::::::::::::::::: :::::::::::::::::
To solve this problem, we introduce a light dependency injection system that basically allows you to define named To solve this problem, we introduce a light dependency injection system. It allows to define named dependencies in
dependencies in your transformations, and provide an implementation at runtime. your transformations, and provide an implementation at runtime.
Class-based transformations
---------------------------
To define a service dependency in a class-based transformation, use :class:`bonobo.config.Service`, a special
descriptor (and subclass of :class:`bonobo.config.Option`) that will hold the service names and act as a marker
for runtime resolution of service instances.
Let's define such a transformation: Let's define such a transformation:
@ -24,7 +38,7 @@ Let's define such a transformation:
from bonobo.config import Configurable, Service from bonobo.config import Configurable, Service
class JoinDatabaseCategories(Configurable): class JoinDatabaseCategories(Configurable):
database = Service(default='primary_sql_database') database = Service('primary_sql_database')
def __call__(self, database, row): def __call__(self, database, row):
return { return {
@ -35,28 +49,46 @@ Let's define such a transformation:
This piece of code tells bonobo that your transformation expect a sercive called "primary_sql_database", that will be This piece of code tells bonobo that your transformation expect a sercive called "primary_sql_database", that will be
injected to your calls under the parameter name "database". injected to your calls under the parameter name "database".
Function-based transformations
------------------------------
No implementation yet, but expect something similar to CBT API, maybe using a `@Service(...)` decorator.
Execution
---------
Let's see how to execute it: Let's see how to execute it:
.. code-block:: python .. code-block:: python
import bonobo import bonobo
bonobo.run( graph = bonobo.graph(
[...extract...], *before,
JoinDatabaseCategories(), JoinDatabaseCategories(),
[...load...], *after,
services={
'primary_sql_database': my_database_service,
}
) )
if __name__ == '__main__':
bonobo.run(
graph,
services={
'primary_sql_database': my_database_service,
}
)
A dictionary, or dictionary-like, "services" named argument can be passed to the :func:`bonobo.run` helper. The
"dictionary-like" part is the real keyword here. Bonobo is not a DIC library, and won't become one. So the implementation
provided is pretty basic, and feature-less. But you can use much more evolved libraries instead of the provided
stub, and as long as it works the same (a.k.a implements a dictionary-like interface), the system will use it.
Future Future and proposals
:::::: ::::::::::::::::::::
This is the first proposed implementation and it will evolve, but looks a lot like how we used bonobo ancestor in This is the first proposed implementation and it will evolve, but looks a lot like how we used bonobo ancestor in
production. production.
You can expect to see the following features pretty soon: May or may not happen, depending on discussions.
* Singleton or prototype based injection (to use spring terminology, see * Singleton or prototype based injection (to use spring terminology, see
https://www.tutorialspoint.com/spring/spring_bean_scopes.htm), allowing smart factory usage and efficient sharing of https://www.tutorialspoint.com/spring/spring_bean_scopes.htm), allowing smart factory usage and efficient sharing of
@ -64,11 +96,43 @@ You can expect to see the following features pretty soon:
* Lazily resolved parameters, eventually overriden by command line or environment, so you can for example override the * Lazily resolved parameters, eventually overriden by command line or environment, so you can for example override the
database DSN or target filesystem on command line (or with shell environment). database DSN or target filesystem on command line (or with shell environment).
* Pool based locks that ensure that only one (or n) transformations are using a given service at the same time. * Pool based locks that ensure that only one (or n) transformations are using a given service at the same time.
* Simple config implementation, using a python file for config (ex: bonobo run ... --services=services_prod.py).
* Default configuration for services, using an optional callable (`def get_services(args): ...`). Maybe tie default
configuration to graph, but not really a fan because this is unrelated to graph logic.
* Default implementation for a service in a transformation or in the descriptor. Maybe not a good idea, because it
tends to push forward multiple instances of the same thing, but we maybe...
A few ideas on how it can be implemented, from the user perspective.
.. code-block:: python
# using call
http = Service('http.client')(requests)
# using more explicit call
http = Service('http.client').set_default_impl(requests)
# using a decorator
@Service('http.client')
def http(self, services):
import requests
return requests
# as a default in a subclass of Service
class HttpService(Service):
def get_default_impl(self, services):
import requests
return requests
# ... then use it as another service
http = HttpService('http.client')
This is under heavy development, let us know what you think (slack may be a good place for this). This is under development, let us know what you think (slack may be a good place for this).
The basics already work, and you can try it.
Read more Read more
::::::::: :::::::::
todo: example code. * See https://github.com/hartym/bonobo-sqlalchemy/blob/work-in-progress/bonobo_sqlalchemy/writers.py#L19 for example usage (work in progress).

View File

@ -1,8 +1,7 @@
Installation Installation
============ ============
Install with pip Bonobo is `available on PyPI <https://pypi.python.org/pypi/bonobo>`_, and it's the easiest solution to get started.
::::::::::::::::
.. code-block:: shell-session .. code-block:: shell-session
@ -11,29 +10,61 @@ Install with pip
Install from source Install from source
::::::::::::::::::: :::::::::::::::::::
If you want to install an unreleased version, you can use git urls with pip. This is useful when using bonobo as a
dependency of your code and you want to try a forked version of bonobo with your software. You can use the git+http
string in your `requirements.txt` file. However, the best option for development on bonobo directly is not this one,
but editable installs (see below).
.. code-block:: shell-session .. code-block:: shell-session
$ pip install git+https://github.com/python-bonobo/bonobo.git@master#egg=bonobo $ pip install git+https://github.com/python-bonobo/bonobo.git@0.2#egg=bonobo
Editable install Editable install
:::::::::::::::: ::::::::::::::::
If you plan on making patches to Bonobo, you should install it as an "editable" package. If you plan on making patches to Bonobo, you should install it as an "editable" package, which is a really great pip feature.
Pip will clone your repository in a source directory and create a symlink for it in the site-package directory of your
python interpreter.
.. code-block:: shell-session .. code-block:: shell-session
$ pip install --editable git+https://github.com/python-bonobo/bonobo.git@master#egg=bonobo $ pip install --editable git+https://github.com/python-bonobo/bonobo.git@0.2#egg=bonobo
Note: `-e` is the shorthand version of `--editable`. .. note:: You can also use the `-e` flag instead of the long version.
If you can't find the "source" directory, try trunning this:
.. code-block:: shell-session
$ python -c "import bonobo; print(bonobo.__path__)"
Another option is to have a "local" editable install, which means you create the clone by yourself and make an editable install
from the local clone.
.. code-block:: shell-session
  $ git clone git@github.com:python-bonobo/bonobo.git
$ cd bonobo
$ pip install --editable .
You can develop on this clone, but you probably want to add your own repository if you want to push code back and make pull requests.
I usually name the git remote for the main bonobo repository "upstream", and my own repository "origin".
.. code-block:: shell-session
$ git remote rename origin upstream
$ git remote add origin git@github.com:hartym/bonobo.git
Of course, replace my github username by the one you used to fork bonobo. You should be good to go!
Windows support Windows support
::::::::::::::: :::::::::::::::
We had some people report that there are problems on the windows platform, mostly due to terminal features. We're trying There are problems on the windows platform, mostly due to the fact bonobo was not developed by experienced windows users.
to look into that but we don't have good windows experience, no windows box and not enough energy to provide serious
support there. If you have experience in this domain and you're willing to help, you're more than welcome! We're trying to look into that but energy available to provide serious support on windows is very limited.
If you have experience in this domain and you're willing to help, you're more than welcome!
.. todo:: .. todo::

View File

@ -41,8 +41,8 @@ setup(
description='Bonobo', description='Bonobo',
license='Apache License, Version 2.0', license='Apache License, Version 2.0',
install_requires=[ install_requires=[
'colorama ==0.3.9', 'psutil ==5.2.2', 'requests ==2.13.0', 'colorama ==0.3.9', 'fs ==2.0.3', 'psutil ==5.2.2',
'stevedore ==1.21.0' 'requests ==2.13.0', 'stevedore ==1.21.0'
], ],
version=version, version=version,
long_description=read('README.rst'), long_description=read('README.rst'),

View File

@ -1,15 +1,16 @@
import pytest import pytest
from bonobo import Bag, CsvReader, CsvWriter from bonobo import Bag, CsvReader, CsvWriter, open_fs
from bonobo.constants import BEGIN, END from bonobo.constants import BEGIN, END
from bonobo.execution.node import NodeExecutionContext from bonobo.execution.node import NodeExecutionContext
from bonobo.util.testing import CapturingNodeExecutionContext from bonobo.util.testing import CapturingNodeExecutionContext
def test_write_csv_to_file(tmpdir): def test_write_csv_to_file(tmpdir):
file = tmpdir.join('output.json') fs, filename = open_fs(tmpdir), 'output.csv'
writer = CsvWriter(path=str(file))
context = NodeExecutionContext(writer, None) writer = CsvWriter(path=filename)
context = NodeExecutionContext(writer, services={'fs': fs})
context.recv(BEGIN, Bag({'foo': 'bar'}), Bag({'foo': 'baz', 'ignore': 'this'}), END) context.recv(BEGIN, Bag({'foo': 'bar'}), Bag({'foo': 'baz', 'ignore': 'this'}), END)
@ -18,19 +19,19 @@ def test_write_csv_to_file(tmpdir):
context.step() context.step()
context.stop() context.stop()
assert file.read() == 'foo\nbar\nbaz\n' assert fs.open(filename).read() == 'foo\nbar\nbaz\n'
with pytest.raises(AttributeError): with pytest.raises(AttributeError):
getattr(context, 'file') getattr(context, 'file')
def test_read_csv_from_file(tmpdir): def test_read_csv_from_file(tmpdir):
file = tmpdir.join('input.csv') fs, filename = open_fs(tmpdir), 'input.csv'
file.write('a,b,c\na foo,b foo,c foo\na bar,b bar,c bar') fs.open(filename, 'w').write('a,b,c\na foo,b foo,c foo\na bar,b bar,c bar')
reader = CsvReader(path=str(file), delimiter=',') reader = CsvReader(path=filename, delimiter=',')
context = CapturingNodeExecutionContext(reader, None) context = CapturingNodeExecutionContext(reader, services={'fs': fs})
context.start() context.start()
context.recv(BEGIN, Bag(), END) context.recv(BEGIN, Bag(), END)

View File

@ -1,6 +1,6 @@
import pytest import pytest
from bonobo import Bag, FileReader, FileWriter from bonobo import Bag, FileReader, FileWriter, open_fs
from bonobo.constants import BEGIN, END from bonobo.constants import BEGIN, END
from bonobo.execution.node import NodeExecutionContext from bonobo.execution.node import NodeExecutionContext
from bonobo.util.testing import CapturingNodeExecutionContext from bonobo.util.testing import CapturingNodeExecutionContext
@ -14,10 +14,10 @@ from bonobo.util.testing import CapturingNodeExecutionContext
] ]
) )
def test_file_writer_in_context(tmpdir, lines, output): def test_file_writer_in_context(tmpdir, lines, output):
file = tmpdir.join('output.txt') fs, filename = open_fs(tmpdir), 'output.txt'
writer = FileWriter(path=str(file)) writer = FileWriter(path=filename)
context = NodeExecutionContext(writer, None) context = NodeExecutionContext(writer, services={'fs': fs})
context.start() context.start()
context.recv(BEGIN, *map(Bag, lines), END) context.recv(BEGIN, *map(Bag, lines), END)
@ -25,25 +25,27 @@ def test_file_writer_in_context(tmpdir, lines, output):
context.step() context.step()
context.stop() context.stop()
assert file.read() == output assert fs.open(filename).read() == output
def test_file_writer_out_of_context(tmpdir): def test_file_writer_out_of_context(tmpdir):
file = tmpdir.join('output.txt') fs, filename = open_fs(tmpdir), 'output.txt'
writer = FileWriter(path=str(file))
with writer.open() as fp: writer = FileWriter(path=filename)
with writer.open(fs) as fp:
fp.write('Yosh!') fp.write('Yosh!')
assert file.read() == 'Yosh!' assert fs.open(filename).read() == 'Yosh!'
def test_file_reader_in_context(tmpdir): def test_file_reader_in_context(tmpdir):
file = tmpdir.join('input.txt') fs, filename = open_fs(tmpdir), 'input.txt'
file.write('Hello\nWorld\n')
reader = FileReader(path=str(file)) fs.open(filename, 'w').write('Hello\nWorld\n')
context = CapturingNodeExecutionContext(reader, None)
reader = FileReader(path=filename)
context = CapturingNodeExecutionContext(reader, services={'fs': fs})
context.start() context.start()
context.recv(BEGIN, Bag(), END) context.recv(BEGIN, Bag(), END)

View File

@ -1,22 +1,23 @@
import pytest import pytest
from bonobo import Bag, JsonReader, JsonWriter from bonobo import Bag, JsonReader, JsonWriter, open_fs
from bonobo.constants import BEGIN, END from bonobo.constants import BEGIN, END
from bonobo.execution.node import NodeExecutionContext from bonobo.execution.node import NodeExecutionContext
from bonobo.util.testing import CapturingNodeExecutionContext from bonobo.util.testing import CapturingNodeExecutionContext
def test_write_json_to_file(tmpdir): def test_write_json_to_file(tmpdir):
file = tmpdir.join('output.json') fs, filename = open_fs(tmpdir), 'output.json'
writer = JsonWriter(path=str(file))
context = NodeExecutionContext(writer, None) writer = JsonWriter(path=filename)
context = NodeExecutionContext(writer, services={'fs': fs})
context.start() context.start()
context.recv(BEGIN, Bag({'foo': 'bar'}), END) context.recv(BEGIN, Bag({'foo': 'bar'}), END)
context.step() context.step()
context.stop() context.stop()
assert file.read() == '[\n{"foo": "bar"}\n]' assert fs.open(filename).read() == '[\n{"foo": "bar"}\n]'
with pytest.raises(AttributeError): with pytest.raises(AttributeError):
getattr(context, 'file') getattr(context, 'file')
@ -26,11 +27,11 @@ def test_write_json_to_file(tmpdir):
def test_read_json_from_file(tmpdir): def test_read_json_from_file(tmpdir):
file = tmpdir.join('input.json') fs, filename = open_fs(tmpdir), 'input.json'
file.write('[{"x": "foo"},{"x": "bar"}]') fs.open(filename, 'w').write('[{"x": "foo"},{"x": "bar"}]')
reader = JsonReader(path=str(file)) reader = JsonReader(path=filename)
context = CapturingNodeExecutionContext(reader, None) context = CapturingNodeExecutionContext(reader, services={'fs': fs})
context.start() context.start()
context.recv(BEGIN, Bag(), END) context.recv(BEGIN, Bag(), END)

View File

@ -1,6 +1,7 @@
import pytest import pytest
from bonobo import Graph, BEGIN from bonobo.constants import BEGIN
from bonobo.structs import Graph
identity = lambda x: x identity = lambda x: x

View File

@ -1,4 +1,4 @@
from bonobo import Token from bonobo.structs import Token
def test_token_repr(): def test_token_repr():

View File

@ -1,7 +1,8 @@
from bonobo import Graph, NaiveStrategy, Bag
from bonobo.config.processors import contextual from bonobo.config.processors import contextual
from bonobo.constants import BEGIN, END from bonobo.constants import BEGIN, END
from bonobo.execution.graph import GraphExecutionContext from bonobo.execution.graph import GraphExecutionContext
from bonobo.strategies import NaiveStrategy
from bonobo.structs import Bag, Graph
def generate_integers(): def generate_integers():
@ -9,7 +10,7 @@ def generate_integers():
def square(i: int) -> int: def square(i: int) -> int:
return i**2 return i ** 2
@contextual @contextual

17
tests/test_publicapi.py Normal file
View File

@ -0,0 +1,17 @@
import types
def test_wildcard_import():
bonobo = __import__('bonobo')
assert bonobo.__version__
for name in dir(bonobo):
# ignore attributes starting by underscores
if name.startswith('_'):
continue
attr = getattr(bonobo, name)
if isinstance(attr, types.ModuleType):
continue
assert name in bonobo.__all__