Files
bonobo/bonobo/commands/convert.py
Romain Dorgueil 92cc400fe7 [core] Refactoring IOFormats so there is one and only obvious way to send it.
This is the commit where I admit that having more than one input/output
format for readers and writers was complicating the code too much for a
very small gain, and that it would be easier to only have one way to do
it.

So such way is now:

- Returning (or yielding) a dict if you have key-value type collections.
- Returning (or yielding) a tuple if you have a list-type collection.
- Returning (or yielding) something else otherwise, which will continue
  to work like the old "arg0" format.

IOFORMAT options has been removed in favour of a RemovedOption, which
will complain if you're still trying to set it to anything else than the
one value allowed.
2017-10-15 21:37:22 +02:00

155 lines
4.3 KiB
Python

import mimetypes
import os
import bonobo
from bonobo.commands.util.arguments import parse_variable_argument
from bonobo.util import require
from bonobo.util.iterators import tuplize
from bonobo.util.python import WorkingDirectoryModulesRegistry
SHORTCUTS = {
'csv': 'text/csv',
'json': 'application/json',
'pickle': 'pickle',
'plain': 'text/plain',
'text': 'text/plain',
'txt': 'text/plain',
}
REGISTRY = {
'application/json': (bonobo.JsonReader, bonobo.JsonWriter),
'pickle': (bonobo.PickleReader, bonobo.PickleWriter),
'text/csv': (bonobo.CsvReader, bonobo.CsvWriter),
'text/plain': (bonobo.FileReader, bonobo.FileWriter),
}
READER = 'reader'
WRITER = 'writer'
def resolve_factory(name, filename, factory_type, options=None):
"""
Try to resolve which transformation factory to use for this filename. User eventually provided a name, which has
priority, otherwise we try to detect it using the mimetype detection on filename.
"""
if name is None:
name = mimetypes.guess_type(filename)[0]
if name in SHORTCUTS:
name = SHORTCUTS[name]
if name is None:
_, _ext = os.path.splitext(filename)
if _ext:
_ext = _ext[1:]
if _ext in SHORTCUTS:
name = SHORTCUTS[_ext]
if options:
options = dict(map(parse_variable_argument, options))
else:
options = dict()
if not name in REGISTRY:
raise RuntimeError(
'Could not resolve {factory_type} factory for {filename} ({name}). Try providing it explicitely using -{opt} <format>.'.
format(name=name, filename=filename, factory_type=factory_type, opt=factory_type[0])
)
if factory_type == READER:
return REGISTRY[name][0], options
elif factory_type == WRITER:
return REGISTRY[name][1], options
else:
raise ValueError('Invalid factory type.')
@tuplize
def resolve_filters(filters):
registry = WorkingDirectoryModulesRegistry()
for f in filters:
try:
mod, attr = f.split(':', 1)
yield getattr(registry.require(mod), attr)
except ValueError:
yield getattr(bonobo, f)
def execute(
input,
output,
reader=None,
reader_option=None,
writer=None,
writer_option=None,
option=None,
filter=None,
):
reader_factory, reader_option = resolve_factory(reader, input, READER, (option or []) + (reader_option or []))
if output == '-':
writer_factory, writer_option = bonobo.PrettyPrinter, {}
else:
writer_factory, writer_option = resolve_factory(writer, output, WRITER, (option or []) + (writer_option or []))
filters = resolve_filters(filter)
graph = bonobo.Graph()
graph.add_chain(
reader_factory(input, **reader_option),
*filters,
writer_factory(output, **writer_option),
)
return bonobo.run(
graph, services={
'fs': bonobo.open_fs(),
}
)
def register(parser):
parser.add_argument('input', help='Input filename.')
parser.add_argument('output', help='Output filename.')
parser.add_argument(
'--' + READER,
'-r',
help='Choose the reader factory if it cannot be detected from extension, or if detection is wrong.'
)
parser.add_argument(
'--' + WRITER,
'-w',
help=
'Choose the writer factory if it cannot be detected from extension, or if detection is wrong (use - for console pretty print).'
)
parser.add_argument(
'--filter',
'-f',
dest='filter',
action='append',
help='Add a filter between input and output',
)
parser.add_argument(
'--option',
'-O',
dest='option',
action='append',
help='Add a named option to both reader and writer factories (i.e. foo="bar").',
)
parser.add_argument(
'--' + READER + '-option',
'-' + READER[0].upper(),
dest=READER + '_option',
action='append',
help='Add a named option to the reader factory.',
)
parser.add_argument(
'--' + WRITER + '-option',
'-' + WRITER[0].upper(),
dest=WRITER + '_option',
action='append',
help='Add a named option to the writer factory.',
)
return execute