diff --git a/bonobo/config/configurables.py b/bonobo/config/configurables.py index 3e6f154..7428bf8 100644 --- a/bonobo/config/configurables.py +++ b/bonobo/config/configurables.py @@ -3,7 +3,6 @@ from bonobo.util import isoption, iscontextprocessor, sortedlist __all__ = [ 'Configurable', - 'Option', ] get_creation_counter = lambda v: v._creation_counter @@ -192,11 +191,7 @@ class Configurable(metaclass=ConfigurableMeta): position += 1 def __call__(self, *args, **kwargs): - raise AbstractError( - 'You must implement the __call__ method in your configurable class {} to actually use it.'.format( - type(self).__name__ - ) - ) + raise AbstractError(self.__call__) @property def __options__(self): diff --git a/bonobo/errors.py b/bonobo/errors.py index 53d0a5d..173ce40 100644 --- a/bonobo/errors.py +++ b/bonobo/errors.py @@ -1,31 +1,4 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2012-2014 Romain Dorgueil -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - -class AbstractError(NotImplementedError): - """Abstract error is a convenient error to declare a method as "being left as an exercise for the reader".""" - - def __init__(self, method): - super().__init__( - 'Call to abstract method {class_name}.{method_name}(...): missing implementation.'.format( - class_name=method.__self__.__name__, - method_name=method.__name__, - ) - ) +from bonobo.util import get_name class InactiveIOError(IOError): @@ -63,6 +36,18 @@ class UnrecoverableError(Exception): because you know that your transformation has no point continuing runnning after a bad event.""" +class AbstractError(UnrecoverableError, NotImplementedError): + """Abstract error is a convenient error to declare a method as "being left as an exercise for the reader".""" + + def __init__(self, method): + super().__init__( + 'Call to abstract method {class_name}.{method_name}(...): missing implementation.'.format( + class_name=get_name(method.__self__), + method_name=get_name(method), + ) + ) + + class UnrecoverableTypeError(UnrecoverableError, TypeError): pass diff --git a/docs/tutorial/1-init.rst b/docs/tutorial/1-init.rst index c4f6b65..5a1bbd9 100644 --- a/docs/tutorial/1-init.rst +++ b/docs/tutorial/1-init.rst @@ -150,8 +150,8 @@ Transformations that take input and yields nothing are also called **loaders**. different types, to work with various external systems. Please note that as a convenience mean and because the cost is marginal, most builtin `loaders` will send their -inputs to their output, so you can easily chain more than one loader, or apply more transformations after a given -loader was applied. +inputs to their output unmodified, so you can easily chain more than one loader, or apply more transformations after a +given loader. Graph Factory @@ -255,4 +255,4 @@ You now know: * How to execute a job file. * How to read the console output. -**Next: :doc:`2-jobs`** +**Jump to** :doc:`2-jobs` diff --git a/docs/tutorial/2-jobs.rst b/docs/tutorial/2-jobs.rst index c3a6c8b..dd7e183 100644 --- a/docs/tutorial/2-jobs.rst +++ b/docs/tutorial/2-jobs.rst @@ -1,6 +1,38 @@ Part 2: Writing ETL Jobs ======================== +What's an ETL job ? +::::::::::::::::::: + +- data flow, stream processing +- each node, first in first out +- parallelism + +Each node has input rows, each row is one call, and each call has the input row passed as *args. + +Each call can have outputs, sent either using return, or yield. + +Each output row is stored internally as a tuple (or a namedtuple-like structure), and each output row must have the same structure (same number of fields, same len for tuple). + +If you yield something which is not a tuple, bonobo will create a tuple of one element. + +By default, exceptions are not fatal in bonobo. If a call raise an error, then bonobo will display the stack trace, increment the "err" counter for this node and move to the next input row. + +Some errors are fatal, though. For example, if you pass a 2 elements tuple to a node that takes 3 args, bonobo will raise an UnrecoverableTypeError, and exit the current execution. + +Let's write one +::::::::::::::: + +We'll create a job to do the following + +* Extract all the FabLabs from an open data API +* Apply a bit of formating +* Geocode the address and normalize it, if we can +* Display it (in the next step, we'll learn about writing the result to a file. + + + + Moving forward :::::::::::::: diff --git a/docs/tutorial/3-files.rst b/docs/tutorial/3-files.rst index adcc334..5430f69 100644 --- a/docs/tutorial/3-files.rst +++ b/docs/tutorial/3-files.rst @@ -1,6 +1,16 @@ Part 3: Working with Files ========================== +* Filesystems + +* Reading files + +* Writing files + +* Writing files to S3 + +* Atomic writes ??? + Moving forward :::::::::::::: diff --git a/docs/tutorial/5-packaging.rst b/docs/tutorial/5-packaging.rst index bf4537b..7362311 100644 --- a/docs/tutorial/5-packaging.rst +++ b/docs/tutorial/5-packaging.rst @@ -1,6 +1,10 @@ Part 5: Projects and Packaging ============================== +Until then, we worked with one file managing a job. But real life is about set of jobs working together within a project. + +Let's see how to move from the current status to a package. + Moving forward ::::::::::::::