[core] Still refactoring the core behaviour of bags, starting to be much simpler.
This commit is contained in:
committed by
Romain Dorgueil
parent
f18889830b
commit
9a54f7b4aa
@ -1,105 +1,34 @@
|
||||
import mimetypes
|
||||
import os
|
||||
|
||||
import bonobo
|
||||
from bonobo.commands.util.arguments import parse_variable_argument
|
||||
from bonobo.util import require
|
||||
from bonobo.util.iterators import tuplize
|
||||
from bonobo.util.python import WorkingDirectoryModulesRegistry
|
||||
|
||||
SHORTCUTS = {
|
||||
'csv': 'text/csv',
|
||||
'json': 'application/json',
|
||||
'pickle': 'pickle',
|
||||
'plain': 'text/plain',
|
||||
'text': 'text/plain',
|
||||
'txt': 'text/plain',
|
||||
}
|
||||
|
||||
REGISTRY = {
|
||||
'application/json': (bonobo.JsonReader, bonobo.JsonWriter),
|
||||
'pickle': (bonobo.PickleReader, bonobo.PickleWriter),
|
||||
'text/csv': (bonobo.CsvReader, bonobo.CsvWriter),
|
||||
'text/plain': (bonobo.FileReader, bonobo.FileWriter),
|
||||
}
|
||||
|
||||
READER = 'reader'
|
||||
WRITER = 'writer'
|
||||
|
||||
|
||||
def resolve_factory(name, filename, factory_type, options=None):
|
||||
"""
|
||||
Try to resolve which transformation factory to use for this filename. User eventually provided a name, which has
|
||||
priority, otherwise we try to detect it using the mimetype detection on filename.
|
||||
|
||||
"""
|
||||
if name is None:
|
||||
name = mimetypes.guess_type(filename)[0]
|
||||
|
||||
if name in SHORTCUTS:
|
||||
name = SHORTCUTS[name]
|
||||
|
||||
if name is None:
|
||||
_, _ext = os.path.splitext(filename)
|
||||
if _ext:
|
||||
_ext = _ext[1:]
|
||||
if _ext in SHORTCUTS:
|
||||
name = SHORTCUTS[_ext]
|
||||
|
||||
if options:
|
||||
options = dict(map(parse_variable_argument, options))
|
||||
else:
|
||||
options = dict()
|
||||
|
||||
if not name in REGISTRY:
|
||||
raise RuntimeError(
|
||||
'Could not resolve {factory_type} factory for {filename} ({name}). Try providing it explicitely using -{opt} <format>.'.
|
||||
format(name=name, filename=filename, factory_type=factory_type, opt=factory_type[0])
|
||||
)
|
||||
|
||||
if factory_type == READER:
|
||||
return REGISTRY[name][0], options
|
||||
elif factory_type == WRITER:
|
||||
return REGISTRY[name][1], options
|
||||
else:
|
||||
raise ValueError('Invalid factory type.')
|
||||
|
||||
|
||||
@tuplize
|
||||
def resolve_filters(filters):
|
||||
registry = WorkingDirectoryModulesRegistry()
|
||||
for f in filters:
|
||||
try:
|
||||
mod, attr = f.split(':', 1)
|
||||
yield getattr(registry.require(mod), attr)
|
||||
except ValueError:
|
||||
yield getattr(bonobo, f)
|
||||
from bonobo.registry import READER, WRITER, default_registry
|
||||
from bonobo.util.resolvers import _resolve_transformations, _resolve_options
|
||||
|
||||
|
||||
def execute(
|
||||
input,
|
||||
output,
|
||||
input_filename,
|
||||
output_filename,
|
||||
reader=None,
|
||||
reader_option=None,
|
||||
writer=None,
|
||||
writer_option=None,
|
||||
option=None,
|
||||
filter=None,
|
||||
transformation=None,
|
||||
):
|
||||
reader_factory, reader_option = resolve_factory(reader, input, READER, (option or []) + (reader_option or []))
|
||||
reader_factory = default_registry.get_reader_factory_for(input_filename, format=reader)
|
||||
reader_options = _resolve_options((option or []) + (reader_option or []))
|
||||
|
||||
if output == '-':
|
||||
writer_factory, writer_option = bonobo.PrettyPrinter, {}
|
||||
if output_filename == '-':
|
||||
writer_factory = bonobo.PrettyPrinter
|
||||
else:
|
||||
writer_factory, writer_option = resolve_factory(writer, output, WRITER, (option or []) + (writer_option or []))
|
||||
writer_factory = default_registry.get_writer_factory_for(output_filename, format=writer)
|
||||
writer_options = _resolve_options((option or []) + (writer_option or []))
|
||||
|
||||
filters = resolve_filters(filter)
|
||||
transformations = _resolve_transformations(transformation)
|
||||
|
||||
graph = bonobo.Graph()
|
||||
graph.add_chain(
|
||||
reader_factory(input, **reader_option),
|
||||
*filters,
|
||||
writer_factory(output, **writer_option),
|
||||
reader_factory(input_filename, **reader_options),
|
||||
*transformations,
|
||||
writer_factory(output_filename, **writer_options),
|
||||
)
|
||||
|
||||
return bonobo.run(
|
||||
@ -110,8 +39,8 @@ def execute(
|
||||
|
||||
|
||||
def register(parser):
|
||||
parser.add_argument('input', help='Input filename.')
|
||||
parser.add_argument('output', help='Output filename.')
|
||||
parser.add_argument('input-filename', help='Input filename.')
|
||||
parser.add_argument('output-filename', help='Output filename.')
|
||||
parser.add_argument(
|
||||
'--' + READER,
|
||||
'-r',
|
||||
@ -124,11 +53,11 @@ def register(parser):
|
||||
'Choose the writer factory if it cannot be detected from extension, or if detection is wrong (use - for console pretty print).'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--filter',
|
||||
'-f',
|
||||
dest='filter',
|
||||
'--transformation',
|
||||
'-t',
|
||||
dest='transformation',
|
||||
action='append',
|
||||
help='Add a filter between input and output',
|
||||
help='Add a transformation between input and output (can be used multiple times, order is preserved).',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--option',
|
||||
|
||||
@ -1,26 +0,0 @@
|
||||
import json
|
||||
|
||||
|
||||
def parse_variable_argument(arg):
|
||||
try:
|
||||
key, val = arg.split('=', 1)
|
||||
except ValueError:
|
||||
return arg, True
|
||||
|
||||
try:
|
||||
val = json.loads(val)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
return key, val
|
||||
|
||||
|
||||
def test_parse_variable_argument():
|
||||
assert parse_variable_argument('foo=bar') == ('foo', 'bar')
|
||||
assert parse_variable_argument('foo="bar"') == ('foo', 'bar')
|
||||
assert parse_variable_argument('sep=";"') == ('sep', ';')
|
||||
assert parse_variable_argument('foo') == ('foo', True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_parse_var()
|
||||
@ -3,13 +3,13 @@ from queue import Empty
|
||||
from time import sleep
|
||||
from types import GeneratorType
|
||||
|
||||
from bonobo import settings
|
||||
from bonobo.constants import INHERIT_INPUT, NOT_MODIFIED, BEGIN, END
|
||||
from bonobo.constants import NOT_MODIFIED, BEGIN, END
|
||||
from bonobo.errors import InactiveReadableError, UnrecoverableError
|
||||
from bonobo.execution.base import LoopingExecutionContext
|
||||
from bonobo.structs.bags import Bag
|
||||
from bonobo.structs.inputs import Input
|
||||
from bonobo.util import get_name, iserrorbag, isloopbackbag, isdict, istuple
|
||||
from bonobo.structs.tokens import Token
|
||||
from bonobo.util import get_name, iserrorbag, isloopbackbag
|
||||
from bonobo.util.compat import deprecated_alias
|
||||
from bonobo.util.statistics import WithStatistics
|
||||
|
||||
@ -49,7 +49,7 @@ class NodeExecutionContext(WithStatistics, LoopingExecutionContext):
|
||||
:param mixed value: message
|
||||
"""
|
||||
for message in messages:
|
||||
self.input.put(message)
|
||||
self.input.put(message if isinstance(message, (Bag, Token)) else Bag(message))
|
||||
|
||||
def write_sync(self, *messages):
|
||||
self.write(BEGIN, *messages, END)
|
||||
@ -145,21 +145,4 @@ def _resolve(input_bag, output):
|
||||
if iserrorbag(output):
|
||||
return output
|
||||
|
||||
# If it does not look like a bag, let's create one for easier manipulation
|
||||
if hasattr(output, 'apply'): # XXX TODO use isbag() ?
|
||||
# Already a bag? Check if we need to set parent.
|
||||
if INHERIT_INPUT in output.flags:
|
||||
output.set_parent(input_bag)
|
||||
return output
|
||||
|
||||
# If we're using kwargs ioformat, then a dict means kwargs.
|
||||
if settings.IOFORMAT == settings.IOFORMAT_KWARGS and isdict(output):
|
||||
return Bag(**output)
|
||||
|
||||
if istuple(output):
|
||||
if len(output) > 1 and isdict(output[-1]):
|
||||
return Bag(*output[0:-1], **output[-1])
|
||||
return Bag(*output)
|
||||
|
||||
# Either we use arg0 format, either it's "just" a value.
|
||||
return Bag(output)
|
||||
|
||||
@ -44,7 +44,7 @@ class ETLCommand(BaseCommand):
|
||||
return self.GraphType(not_implemented)
|
||||
|
||||
def get_services(self):
|
||||
return get_default_services(type(self).__file__)
|
||||
return {}
|
||||
|
||||
@property
|
||||
def logger(self):
|
||||
|
||||
@ -14,14 +14,14 @@ def path_str(path):
|
||||
|
||||
class OpenDataSoftAPI(Configurable):
|
||||
dataset = Option(str, positional=True)
|
||||
endpoint = Option(str, default='{scheme}://{netloc}{path}')
|
||||
scheme = Option(str, default='https')
|
||||
netloc = Option(str, default='data.opendatasoft.com')
|
||||
path = Option(path_str, default='/api/records/1.0/search/')
|
||||
rows = Option(int, default=500)
|
||||
endpoint = Option(str, required=False, default='{scheme}://{netloc}{path}')
|
||||
scheme = Option(str, required=False, default='https')
|
||||
netloc = Option(str, required=False, default='data.opendatasoft.com')
|
||||
path = Option(path_str, required=False, default='/api/records/1.0/search/')
|
||||
rows = Option(int, required=False, default=500)
|
||||
limit = Option(int, required=False)
|
||||
timezone = Option(str, default='Europe/Paris')
|
||||
kwargs = Option(dict, default=dict)
|
||||
timezone = Option(str, required=False, default='Europe/Paris')
|
||||
kwargs = Option(dict, required=False, default=dict)
|
||||
|
||||
@ContextProcessor
|
||||
def compute_path(self, context):
|
||||
@ -44,7 +44,11 @@ class OpenDataSoftAPI(Configurable):
|
||||
break
|
||||
|
||||
for row in records:
|
||||
yield {**row.get('fields', {}), 'geometry': row.get('geometry', {})}
|
||||
yield {
|
||||
**row.get('fields', {}),
|
||||
'geometry': row.get('geometry', {}),
|
||||
'recordid': row.get('recordid'),
|
||||
}
|
||||
|
||||
start += self.rows
|
||||
|
||||
|
||||
@ -75,24 +75,24 @@ class Cursor():
|
||||
self.item = item
|
||||
|
||||
@operation('dict')
|
||||
def dict(self, x):
|
||||
def as_dict(self, x):
|
||||
return x if isinstance(x, dict) else dict(x)
|
||||
|
||||
@operation('int')
|
||||
def int(self):
|
||||
pass
|
||||
def as_int(self, x):
|
||||
return x if isinstance(x, int) else int(x)
|
||||
|
||||
@operation('str')
|
||||
def str(self, x):
|
||||
def as_str(self, x):
|
||||
return x if isinstance(x, str) else str(x)
|
||||
|
||||
@operation('list')
|
||||
def list(self):
|
||||
pass
|
||||
def as_list(self, x):
|
||||
return x if isinstance(x, list) else list(x)
|
||||
|
||||
@operation('tuple')
|
||||
def tuple(self):
|
||||
pass
|
||||
def as_tuple(self, x):
|
||||
return x if isinstance(x, tuple) else tuple(x)
|
||||
|
||||
def __getattr__(self, item):
|
||||
"""
|
||||
@ -147,7 +147,7 @@ class Factory(Configurable):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
warnings.warn(
|
||||
__file__ +
|
||||
type(self).__name__ +
|
||||
' is experimental, API may change in the future, use it as a preview only and knowing the risks.',
|
||||
FutureWarning
|
||||
)
|
||||
@ -180,40 +180,9 @@ class Factory(Configurable):
|
||||
raise RuntimeError('Houston, we have a problem...')
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
print('factory call on', args, kwargs)
|
||||
for operation in self.operations:
|
||||
args, kwargs = operation.apply(*args, **kwargs)
|
||||
print(' ... after', operation, 'got', args, kwargs)
|
||||
return Bag(*args, **kwargs)
|
||||
|
||||
def __getitem__(self, item):
|
||||
return CURSOR_TYPES[self.default_cursor_type](self, item)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
f = Factory()
|
||||
|
||||
f[0].dict().map_keys({'foo': 'F00'})
|
||||
f['foo'].str().upper()
|
||||
|
||||
print('operations:', f.operations)
|
||||
print(f({'foo': 'bisou'}, foo='blah'))
|
||||
'''
|
||||
specs:
|
||||
|
||||
- rename keys of an input dict (in args, or kwargs) using a translation map.
|
||||
|
||||
|
||||
f = Factory()
|
||||
|
||||
f[0]
|
||||
f['xxx'] =
|
||||
|
||||
f[0].dict().get('foo.bar').move_to('foo.baz').apply(str.upper)
|
||||
f[0].get('foo.*').items().map(str.lower)
|
||||
|
||||
f['foo'].keys_map({
|
||||
'a': 'b'
|
||||
})
|
||||
|
||||
'''
|
||||
|
||||
90
bonobo/registry.py
Normal file
90
bonobo/registry.py
Normal file
@ -0,0 +1,90 @@
|
||||
import mimetypes
|
||||
|
||||
import os
|
||||
|
||||
from bonobo import JsonReader, CsvReader, PickleReader, FileReader, FileWriter, PickleWriter, CsvWriter, JsonWriter
|
||||
|
||||
FILETYPE_CSV = 'text/csv'
|
||||
FILETYPE_JSON = 'application/json'
|
||||
FILETYPE_PICKLE = 'pickle'
|
||||
FILETYPE_PLAIN = 'text/plain'
|
||||
|
||||
READER = 'reader'
|
||||
WRITER = 'writer'
|
||||
|
||||
|
||||
class Registry:
|
||||
ALIASES = {
|
||||
'csv': FILETYPE_CSV,
|
||||
'json': FILETYPE_JSON,
|
||||
'pickle': FILETYPE_PICKLE,
|
||||
'plain': FILETYPE_PLAIN,
|
||||
'text': FILETYPE_PLAIN,
|
||||
'txt': FILETYPE_PLAIN,
|
||||
}
|
||||
|
||||
FACTORIES = {
|
||||
READER: {
|
||||
FILETYPE_JSON: JsonReader,
|
||||
FILETYPE_CSV: CsvReader,
|
||||
FILETYPE_PICKLE: PickleReader,
|
||||
FILETYPE_PLAIN: FileReader,
|
||||
},
|
||||
WRITER: {
|
||||
FILETYPE_JSON: JsonWriter,
|
||||
FILETYPE_CSV: CsvWriter,
|
||||
FILETYPE_PICKLE: PickleWriter,
|
||||
FILETYPE_PLAIN: FileWriter,
|
||||
},
|
||||
}
|
||||
|
||||
def get_factory_for(self, kind, name, *, format=None):
|
||||
if not kind in self.FACTORIES:
|
||||
raise KeyError('Unknown factory kind {!r}.'.format(kind))
|
||||
|
||||
if format is None and name is None:
|
||||
raise RuntimeError('Cannot guess factory without at least a filename or a format.')
|
||||
|
||||
# Guess mimetype if possible
|
||||
if format is None:
|
||||
format = mimetypes.guess_type(name)[0]
|
||||
|
||||
# Guess from extension if possible
|
||||
if format is None:
|
||||
_, _ext = os.path.splitext(name)
|
||||
if _ext:
|
||||
format = _ext[1:]
|
||||
|
||||
# Apply aliases
|
||||
if format in self.ALIASES:
|
||||
format = self.ALIASES[format]
|
||||
|
||||
if format is None or not format in self.FACTORIES[kind]:
|
||||
raise RuntimeError(
|
||||
'Could not resolve {kind} factory for {name} ({format}).'.format(kind=kind, name=name, format=format)
|
||||
)
|
||||
|
||||
return self.FACTORIES[kind][format]
|
||||
|
||||
def get_reader_factory_for(self, name, *, format=None):
|
||||
"""
|
||||
Returns a callable to build a reader for the provided filename, eventually forcing a format.
|
||||
|
||||
:param name: filename
|
||||
:param format: format
|
||||
:return: type
|
||||
"""
|
||||
return self.get_factory_for(READER, name, format=format)
|
||||
|
||||
def get_writer_factory_for(self, name, *, format=None):
|
||||
"""
|
||||
Returns a callable to build a writer for the provided filename, eventually forcing a format.
|
||||
|
||||
:param name: filename
|
||||
:param format: format
|
||||
:return: type
|
||||
"""
|
||||
return self.get_factory_for(WRITER, name, format=format)
|
||||
|
||||
|
||||
default_registry = Registry()
|
||||
@ -1,5 +1,6 @@
|
||||
import itertools
|
||||
|
||||
from bonobo.structs.tokens import Token
|
||||
from bonobo.constants import INHERIT_INPUT, LOOPBACK
|
||||
|
||||
__all__ = [
|
||||
@ -35,11 +36,55 @@ class Bag:
|
||||
|
||||
default_flags = ()
|
||||
|
||||
def __new__(cls, *args, _flags=None, _parent=None, **kwargs):
|
||||
# Handle the special case where we call Bag's constructor with only one bag or token as argument.
|
||||
if len(args) == 1 and len(kwargs) == 0:
|
||||
if isinstance(args[0], Bag):
|
||||
raise ValueError('Bag cannot be instanciated with a bag (for now ...).')
|
||||
|
||||
if isinstance(args[0], Token):
|
||||
return args[0]
|
||||
|
||||
# Otherwise, type will handle that for us.
|
||||
return super().__new__(cls)
|
||||
|
||||
def __init__(self, *args, _flags=None, _parent=None, **kwargs):
|
||||
self._flags = type(self).default_flags + (_flags or ())
|
||||
self._parent = _parent
|
||||
self._args = args
|
||||
self._kwargs = kwargs
|
||||
|
||||
if len(args) == 1 and len(kwargs) == 0:
|
||||
# If we only have one argument, that may be because we're using the shorthand syntax.
|
||||
mixed = args[0]
|
||||
|
||||
if isinstance(mixed, Bag):
|
||||
# Just duplicate the bag.
|
||||
self._args = mixed.args
|
||||
self._kwargs = mixed.kwargs
|
||||
elif isinstance(mixed, tuple):
|
||||
if not len(mixed):
|
||||
# Empty bag.
|
||||
self._args = ()
|
||||
self._kwargs = {}
|
||||
elif isinstance(mixed[-1], dict):
|
||||
# Args + Kwargs
|
||||
self._args = mixed[:-1]
|
||||
self._kwargs = mixed[-1]
|
||||
else:
|
||||
# Args only
|
||||
self._args = mixed
|
||||
self._kwargs = {}
|
||||
elif isinstance(mixed, dict):
|
||||
# Kwargs only
|
||||
self._args = ()
|
||||
self._kwargs = mixed
|
||||
else:
|
||||
self._args = args
|
||||
self._kwargs = {}
|
||||
|
||||
else:
|
||||
# Otherwise, lets get args/kwargs from the constructor.
|
||||
self._args = args
|
||||
self._kwargs = kwargs
|
||||
|
||||
@property
|
||||
def args(self):
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
from bonobo.util.collections import sortedlist
|
||||
from bonobo.util.iterators import ensure_tuple
|
||||
from bonobo.util.collections import sortedlist, ensure_tuple
|
||||
from bonobo.util.compat import deprecated, deprecated_alias
|
||||
from bonobo.util.inspect import (
|
||||
inspect_node,
|
||||
|
||||
@ -1,6 +1,48 @@
|
||||
import bisect
|
||||
import functools
|
||||
|
||||
|
||||
class sortedlist(list):
|
||||
def insort(self, x):
|
||||
bisect.insort(self, x)
|
||||
bisect.insort(self, x)
|
||||
|
||||
|
||||
def ensure_tuple(tuple_or_mixed):
|
||||
"""
|
||||
If it's not a tuple, let's make a tuple of one item.
|
||||
Otherwise, not changed.
|
||||
|
||||
:param tuple_or_mixed:
|
||||
:return: tuple
|
||||
|
||||
"""
|
||||
if isinstance(tuple_or_mixed, tuple):
|
||||
return tuple_or_mixed
|
||||
return (tuple_or_mixed, )
|
||||
|
||||
|
||||
def tuplize(generator):
|
||||
""" Takes a generator and make it a tuple-returning function. As a side
|
||||
effect, it can also decorate any iterator-returning function to force
|
||||
return value to be a tuple.
|
||||
|
||||
>>> tuplized_lambda = tuplize(lambda: [1, 2, 3])
|
||||
>>> tuplized_lambda()
|
||||
(1, 2, 3)
|
||||
|
||||
>>> @tuplize
|
||||
... def my_generator():
|
||||
... yield 1
|
||||
... yield 2
|
||||
... yield 3
|
||||
...
|
||||
>>> my_generator()
|
||||
(1, 2, 3)
|
||||
|
||||
"""
|
||||
|
||||
@functools.wraps(generator)
|
||||
def tuplized(*args, **kwargs):
|
||||
return tuple(generator(*args, **kwargs))
|
||||
|
||||
return tuplized
|
||||
|
||||
@ -1,37 +0,0 @@
|
||||
""" Iterator utilities. """
|
||||
import functools
|
||||
|
||||
|
||||
def force_iterator(mixed):
|
||||
"""Sudo make me an iterator.
|
||||
|
||||
Deprecated?
|
||||
|
||||
:param mixed:
|
||||
:return: Iterator, baby.
|
||||
"""
|
||||
if isinstance(mixed, str):
|
||||
return [mixed]
|
||||
try:
|
||||
return iter(mixed)
|
||||
except TypeError:
|
||||
return [mixed] if mixed else []
|
||||
|
||||
|
||||
def ensure_tuple(tuple_or_mixed):
|
||||
if isinstance(tuple_or_mixed, tuple):
|
||||
return tuple_or_mixed
|
||||
return (tuple_or_mixed, )
|
||||
|
||||
|
||||
def tuplize(generator):
|
||||
""" Takes a generator and make it a tuple-returning function. As a side
|
||||
effect, it can also decorate any iterator-returning function to force
|
||||
return value to be a tuple.
|
||||
"""
|
||||
|
||||
@functools.wraps(generator)
|
||||
def tuplized(*args, **kwargs):
|
||||
return tuple(generator(*args, **kwargs))
|
||||
|
||||
return tuplized
|
||||
61
bonobo/util/resolvers.py
Normal file
61
bonobo/util/resolvers.py
Normal file
@ -0,0 +1,61 @@
|
||||
"""
|
||||
This package is considered private, and should only be used within bonobo.
|
||||
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
import bonobo
|
||||
from bonobo.util.collections import tuplize
|
||||
from bonobo.util.python import WorkingDirectoryModulesRegistry
|
||||
|
||||
|
||||
def _parse_option(option):
|
||||
"""
|
||||
Parse a 'key=val' option string into a python (key, val) pair
|
||||
|
||||
:param option: str
|
||||
:return: tuple
|
||||
"""
|
||||
try:
|
||||
key, val = option.split('=', 1)
|
||||
except ValueError:
|
||||
return option, True
|
||||
|
||||
try:
|
||||
val = json.loads(val)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
return key, val
|
||||
|
||||
|
||||
def _resolve_options(options=None):
|
||||
"""
|
||||
Resolve a collection of option strings (eventually coming from command line) into a python dictionary.
|
||||
|
||||
:param options: tuple[str]
|
||||
:return: dict
|
||||
"""
|
||||
if options:
|
||||
return dict(map(_parse_option, options))
|
||||
return dict()
|
||||
|
||||
|
||||
@tuplize
|
||||
def _resolve_transformations(transformations):
|
||||
"""
|
||||
Resolve a collection of strings into the matching python objects, defaulting to bonobo namespace if no package is provided.
|
||||
|
||||
Syntax for each string is path.to.package:attribute
|
||||
|
||||
:param transformations: tuple(str)
|
||||
:return: tuple(object)
|
||||
"""
|
||||
registry = WorkingDirectoryModulesRegistry()
|
||||
for t in transformations:
|
||||
try:
|
||||
mod, attr = t.split(':', 1)
|
||||
yield getattr(registry.require(mod), attr)
|
||||
except ValueError:
|
||||
yield getattr(bonobo, t)
|
||||
@ -28,9 +28,7 @@ def test_write_csv_to_file_kwargs(tmpdir, add_kwargs):
|
||||
fs, filename, services = csv_tester.get_services_for_writer(tmpdir)
|
||||
|
||||
with NodeExecutionContext(CsvWriter(filename, **add_kwargs), services=services) as context:
|
||||
context.write(BEGIN, Bag(**{'foo': 'bar'}), Bag(**{'foo': 'baz', 'ignore': 'this'}), END)
|
||||
context.step()
|
||||
context.step()
|
||||
context.write_sync({'foo': 'bar'}, {'foo': 'baz', 'ignore': 'this'})
|
||||
|
||||
with fs.open(filename) as fp:
|
||||
assert fp.read() == 'foo\nbar\nbaz\n'
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
import pytest
|
||||
|
||||
from bonobo import Bag, JsonReader, JsonWriter, settings
|
||||
from bonobo.constants import BEGIN, END
|
||||
from bonobo import JsonReader, JsonWriter, settings
|
||||
from bonobo.execution.node import NodeExecutionContext
|
||||
from bonobo.util.testing import FilesystemTester
|
||||
|
||||
@ -29,8 +28,7 @@ def test_write_json_kwargs(tmpdir, add_kwargs):
|
||||
fs, filename, services = json_tester.get_services_for_writer(tmpdir)
|
||||
|
||||
with NodeExecutionContext(JsonWriter(filename, **add_kwargs), services=services) as context:
|
||||
context.write(BEGIN, Bag(**{'foo': 'bar'}), END)
|
||||
context.step()
|
||||
context.write_sync({'foo': 'bar'})
|
||||
|
||||
with fs.open(filename) as fp:
|
||||
assert fp.read() == '[{"foo": "bar"}]'
|
||||
|
||||
@ -14,7 +14,7 @@ def test_write_pickled_dict_to_file(tmpdir):
|
||||
fs, filename, services = pickle_tester.get_services_for_writer(tmpdir)
|
||||
|
||||
with NodeExecutionContext(PickleWriter(filename), services=services) as context:
|
||||
context.write_sync(Bag({'foo': 'bar'}), Bag({'foo': 'baz', 'ignore': 'this'}))
|
||||
context.write_sync(Bag(({'foo': 'bar'}, {})), Bag(({'foo': 'baz', 'ignore': 'this'}, {})))
|
||||
|
||||
with fs.open(filename, 'rb') as fp:
|
||||
assert pickle.loads(fp.read()) == {'foo': 'bar'}
|
||||
@ -27,7 +27,7 @@ def test_read_pickled_list_from_file(tmpdir):
|
||||
fs, filename, services = pickle_tester.get_services_for_reader(tmpdir)
|
||||
|
||||
with BufferingNodeExecutionContext(PickleReader(filename), services=services) as context:
|
||||
context.write_sync(Bag())
|
||||
context.write_sync(())
|
||||
output = context.get_buffer()
|
||||
|
||||
assert len(output) == 2
|
||||
|
||||
66
tests/nodes/factory.py
Normal file
66
tests/nodes/factory.py
Normal file
@ -0,0 +1,66 @@
|
||||
from unittest import TestCase
|
||||
|
||||
import pytest
|
||||
|
||||
from bonobo import Bag
|
||||
from bonobo.nodes.factory import Factory
|
||||
from bonobo.util.testing import BufferingNodeExecutionContext
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore:Factory')
|
||||
class FactoryTypeTest(TestCase):
|
||||
def execute_node(self, node, *rows):
|
||||
with BufferingNodeExecutionContext(node) as context:
|
||||
context.write_sync(*map(Bag, rows))
|
||||
return context.get_buffer()
|
||||
|
||||
def test_args_as_str(self):
|
||||
f = Factory()
|
||||
f[0].as_str().upper()
|
||||
|
||||
output = self.execute_node(f, 'foo', 'bar', 'baz')
|
||||
|
||||
assert len(output) == 3
|
||||
assert output[0] == 'FOO'
|
||||
assert output[1] == 'BAR'
|
||||
assert output[2] == 'BAZ'
|
||||
|
||||
def test_kwargs_as_str(self):
|
||||
f = Factory()
|
||||
f['foo'].as_str().upper()
|
||||
|
||||
output = self.execute_node(f, {'foo': 'bar'}, {'foo': 'baz'})
|
||||
assert len(output) == 2
|
||||
assert output[0] == {'foo': 'BAR'}
|
||||
assert output[1] == {'foo': 'BAZ'}
|
||||
|
||||
|
||||
"""
|
||||
draft below.
|
||||
|
||||
if __name__ == '__main__':
|
||||
f = Factory()
|
||||
|
||||
f[0].dict().map_keys({'foo': 'F00'})
|
||||
|
||||
print('operations:', f.operations)
|
||||
print(f({'foo': 'bisou'}, foo='blah'))
|
||||
|
||||
specs:
|
||||
|
||||
- rename keys of an input dict (in args, or kwargs) using a translation map.
|
||||
|
||||
|
||||
f = Factory()
|
||||
|
||||
f[0]
|
||||
f['xxx'] =
|
||||
|
||||
f[0].dict().get('foo.bar').move_to('foo.baz').apply(str.upper)
|
||||
f[0].get('foo.*').items().map(str.lower)
|
||||
|
||||
f['foo'].keys_map({
|
||||
'a': 'b'
|
||||
})
|
||||
|
||||
"""
|
||||
@ -1,8 +1,10 @@
|
||||
import pickle
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from bonobo import Bag
|
||||
from bonobo.constants import INHERIT_INPUT
|
||||
from bonobo.constants import INHERIT_INPUT, BEGIN
|
||||
from bonobo.structs import Token
|
||||
|
||||
args = (
|
||||
@ -31,6 +33,32 @@ def test_basic():
|
||||
my_callable2.assert_called_once_with(*args, **kwargs)
|
||||
|
||||
|
||||
def test_constructor_empty():
|
||||
a, b = Bag(), Bag()
|
||||
assert a == b
|
||||
assert a.args is ()
|
||||
assert a.kwargs == {}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(('arg_in', 'arg_out'), (
|
||||
((), ()),
|
||||
({}, ()),
|
||||
(('a', 'b', 'c'), None),
|
||||
))
|
||||
def test_constructor_shorthand(arg_in, arg_out):
|
||||
if arg_out is None:
|
||||
arg_out = arg_in
|
||||
assert Bag(arg_in) == arg_out
|
||||
|
||||
|
||||
def test_constructor_kwargs_only():
|
||||
assert Bag(foo='bar') == {'foo': 'bar'}
|
||||
|
||||
|
||||
def test_constructor_identity():
|
||||
assert Bag(BEGIN) is BEGIN
|
||||
|
||||
|
||||
def test_inherit():
|
||||
bag = Bag('a', a=1)
|
||||
bag2 = Bag.inherit('b', b=2, _parent=bag)
|
||||
|
||||
30
tests/util/test_collections.py
Normal file
30
tests/util/test_collections.py
Normal file
@ -0,0 +1,30 @@
|
||||
from bonobo.util import sortedlist, ensure_tuple
|
||||
from bonobo.util.collections import tuplize
|
||||
|
||||
|
||||
def test_sortedlist():
|
||||
l = sortedlist()
|
||||
l.insort(2)
|
||||
l.insort(1)
|
||||
l.insort(3)
|
||||
l.insort(2)
|
||||
assert l == [1, 2, 2, 3]
|
||||
|
||||
|
||||
def test_ensure_tuple():
|
||||
assert ensure_tuple('a') == ('a', )
|
||||
assert ensure_tuple(('a', )) == ('a', )
|
||||
assert ensure_tuple(()) is ()
|
||||
|
||||
|
||||
def test_tuplize():
|
||||
tuplized_lambda = tuplize(lambda: [1, 2, 3])
|
||||
assert tuplized_lambda() == (1, 2, 3)
|
||||
|
||||
@tuplize
|
||||
def some_generator():
|
||||
yield 'c'
|
||||
yield 'b'
|
||||
yield 'a'
|
||||
|
||||
assert some_generator() == ('c', 'b', 'a')
|
||||
@ -1,22 +0,0 @@
|
||||
import types
|
||||
|
||||
from bonobo.util.iterators import force_iterator
|
||||
|
||||
|
||||
def test_force_iterator_with_string():
|
||||
assert force_iterator('foo') == ['foo']
|
||||
|
||||
|
||||
def test_force_iterator_with_none():
|
||||
assert force_iterator(None) == []
|
||||
|
||||
|
||||
def test_force_iterator_with_generator():
|
||||
def generator():
|
||||
yield 'aaa'
|
||||
yield 'bbb'
|
||||
yield 'ccc'
|
||||
|
||||
iterator = force_iterator(generator())
|
||||
assert isinstance(iterator, types.GeneratorType)
|
||||
assert list(iterator) == ['aaa', 'bbb', 'ccc']
|
||||
18
tests/util/test_resolvers.py
Normal file
18
tests/util/test_resolvers.py
Normal file
@ -0,0 +1,18 @@
|
||||
import bonobo
|
||||
from bonobo.util.resolvers import _parse_option, _resolve_options, _resolve_transformations
|
||||
|
||||
|
||||
def test_parse_option():
|
||||
assert _parse_option('foo=bar') == ('foo', 'bar')
|
||||
assert _parse_option('foo="bar"') == ('foo', 'bar')
|
||||
assert _parse_option('sep=";"') == ('sep', ';')
|
||||
assert _parse_option('foo') == ('foo', True)
|
||||
|
||||
|
||||
def test_resolve_options():
|
||||
assert _resolve_options(('foo=bar', 'bar="baz"')) == {'foo': 'bar', 'bar': 'baz'}
|
||||
assert _resolve_options() == {}
|
||||
|
||||
|
||||
def test_resolve_transformations():
|
||||
assert _resolve_transformations(('PrettyPrinter', )) == (bonobo.PrettyPrinter, )
|
||||
Reference in New Issue
Block a user