diff --git a/Makefile b/Makefile index d804dc7..a978826 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # This file has been auto-generated. # All changes will be lost, see Projectfile. # -# Updated at 2017-01-10 23:15:21.478899 +# Updated at 2017-01-19 12:12:07.294619 PYTHON ?= $(shell which python) PYTHON_BASENAME ?= $(shell basename $(PYTHON)) diff --git a/Projectfile b/Projectfile index 5717a51..2ef816b 100644 --- a/Projectfile +++ b/Projectfile @@ -40,6 +40,7 @@ extras_require = { 'pylint >=1.6,<1.7', 'pytest >=3,<4', 'pytest-cov >=2.4,<2.5', + 'pytest-timeout >=1.2,<1.3', 'sphinx', 'sphinx_rtd_theme', 'yapf', diff --git a/README.rst b/README.rst index 35d3bc3..2d7a1f1 100644 --- a/README.rst +++ b/README.rst @@ -68,6 +68,11 @@ Version 0.2 * Threaded does not terminate anymore * More tests +Bugs: + +- KeyboardInterrupt does not work anymore. +- ThreadPool does not stop anymore. + Configuration ............. @@ -119,6 +124,7 @@ Random thoughts and things to do def execute(graph: Graph, *, strategy: ExecutionStrategy, plugins: List[Plugin]) -> Execution: pass +* Handling console. Can we use a queue, and replace stdout / stderr ? diff --git a/bonobo/__init__.py b/bonobo/__init__.py index b2ff5ef..62df11d 100644 --- a/bonobo/__init__.py +++ b/bonobo/__init__.py @@ -1,30 +1,31 @@ -""" Bonobo data-processing toolkit. +# Bonobo data-processing toolkit. +# +# Bonobo is a line-by-line data-processing toolkit for python 3.5+ emphasizing simplicity and atomicity of data +# transformations using a simple directed graph of python callables. +# +# Licensed under Apache License 2.0, read the LICENSE file in the root of the source tree. - Bonobo is a line-by-line data-processing toolkit for python 3.5+ emphasizing simplicity and atomicity of data - transformations using a simple directed graph of python callables. +"""Bonobo data-processing toolkit main module.""" - Read more at http://docs.bonobo-project.org/ - - Copyright 2012-2014 Romain Dorgueil - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -""" import sys import warnings assert (sys.version_info >= (3, 5)), 'Python 3.5+ is required to use Bonobo.' from ._version import __version__ +from .config import __all__ as __all_config__ +from .context import __all__ as __all_context__ +from .core import __all__ as __all_core__ +from .io import __all__ as __all_io__ +from .util import __all__ as __all_util__ + +__all__ = __all_config__ + __all_context__ + __all_core__ + __all_io__ + __all_util__ + [ + '__version__', + 'create_strategy', + 'get_examples_path', + 'run', +] + from .config import * from .context import * from .core import * @@ -40,56 +41,43 @@ STRATEGIES = { } -def run(graph, *chain, strategy=None, plugins=None): +def get_examples_path(*pathsegments): + import os + import pathlib + return str(pathlib.Path(os.path.dirname(__file__), 'examples', *pathsegments)) + + +def create_strategy(name=None): from bonobo.core.strategies.base import Strategy + import logging + + if isinstance(name, Strategy): + return name + + if name is None: + name = DEFAULT_STRATEGY + + logging.debug('Creating strategy {}...'.format(name)) + + try: + factory = STRATEGIES[name] + except KeyError as exc: + raise RuntimeError('Invalid strategy {}. Available choices: {}.'.format(repr(name), ', '.join( + sorted(STRATEGIES.keys())))) from exc + + return factory() + + +def run(graph, *chain, strategy=None, plugins=None): + strategy = create_strategy(strategy) if len(chain): warnings.warn('DEPRECATED. You should pass a Graph instance instead of a chain.') from bonobo import Graph graph = Graph(graph, *chain) - if not isinstance(strategy, Strategy): - if strategy is None: - strategy = DEFAULT_STRATEGY - - try: - strategy = STRATEGIES[strategy] - except KeyError as exc: - raise RuntimeError('Invalid strategy {}.'.format(repr(strategy))) from exc - - strategy = strategy() - return strategy.execute(graph, plugins=plugins) -__all__ = [ - 'Bag', - 'Configurable', - 'ContextProcessor', - 'contextual', - 'CsvReader', - 'CsvWriter', - 'FileReader', - 'FileWriter', - 'Graph', - 'JsonReader', - 'JsonWriter', - 'NOT_MODIFIED', - 'NaiveStrategy', - 'Option', - 'ProcessPoolExecutorStrategy', - 'ThreadPoolExecutorStrategy', - '__version__', - 'console_run', - 'inject', - 'jupyter_run', - 'limit', - 'log', - 'noop', - 'pprint', - 'service', - 'tee', -] - -del warnings del sys +del warnings diff --git a/bonobo/commands/__init__.py b/bonobo/commands/__init__.py index bc61ebf..6525ab6 100644 --- a/bonobo/commands/__init__.py +++ b/bonobo/commands/__init__.py @@ -1,22 +1,25 @@ import argparse +import logging from stevedore import ExtensionManager -def entrypoint(): +def entrypoint(args=None): parser = argparse.ArgumentParser() subparsers = parser.add_subparsers(dest='command') subparsers.required = True - def register_extension(ext): - parser = subparsers.add_parser(ext.name) - command = ext.plugin(parser) - parser.set_defaults(command=command) + commands = {} + def register_extension(ext, commands=commands): + try: + parser = subparsers.add_parser(ext.name) + commands[ext.name] = ext.plugin(parser) + except Exception: + logging.exception('Error while loading command {}.'.format(ext.name)) mgr = ExtensionManager(namespace='bonobo.commands', ) mgr.map(register_extension) - args = parser.parse_args().__dict__ - command = args.pop('command') - command(**args) + args = parser.parse_args(args).__dict__ + commands[args.pop('command')](**args) diff --git a/bonobo/commands/run.py b/bonobo/commands/run.py index c0f9e14..93f81d9 100644 --- a/bonobo/commands/run.py +++ b/bonobo/commands/run.py @@ -1,13 +1,21 @@ import argparse -from bonobo import Graph, console_run +from bonobo import Graph, run -def execute(file): +def execute(file, quiet=False): with file: code = compile(file.read(), file.name, 'exec') - context = {} + # TODO: A few special variables should be set before running the file: + # + # See: + # - https://docs.python.org/3/reference/import.html#import-mod-attrs + # - https://docs.python.org/3/library/runpy.html#runpy.run_module + context = { + '__name__': '__bonobo__', + '__file__': file.name, + } try: exec(code, context) @@ -16,14 +24,19 @@ def execute(file): graphs = dict((k, v) for k, v in context.items() if isinstance(v, Graph)) - assert len(graphs) == 1, 'Having more than one graph definition in one file is unsupported for now, but it is ' \ - 'something that will be implemented in the future. ' + assert len(graphs) == 1, ('Having zero or more than one graph definition in one file is unsupported for now, ' + 'but it is something that will be implemented in the future.\n\nExpected: 1, got: {}.').format( + len(graphs)) name, graph = list(graphs.items())[0] - return console_run(graph) + # todo if console and not quiet, then add the console plugin + # todo when better console plugin, add it if console and just disable display + + return run(graph) def register(parser): parser.add_argument('file', type=argparse.FileType()) + parser.add_argument('--quiet', action='store_true') return execute diff --git a/bonobo/config.py b/bonobo/config.py index e0e7514..1a0f3ca 100644 --- a/bonobo/config.py +++ b/bonobo/config.py @@ -1,3 +1,8 @@ +__all__ = [ + 'Configurable', + 'Option', +] + class Option: def __init__(self, type=None, *, required=False, default=None): self.name = None diff --git a/bonobo/context/execution.py b/bonobo/context/execution.py index 1f018f8..5e94922 100644 --- a/bonobo/context/execution.py +++ b/bonobo/context/execution.py @@ -23,7 +23,7 @@ class GraphExecutionContext: @property def alive(self): - return self.started and not self.stopped + return any(node.alive for node in self.nodes) def __init__(self, graph, plugins=None): self.graph = graph diff --git a/bonobo/core/graphs.py b/bonobo/core/graphs.py index 5e0d6a1..194c6b7 100644 --- a/bonobo/core/graphs.py +++ b/bonobo/core/graphs.py @@ -26,3 +26,6 @@ class Graph: _next = self.add_node(node) self.outputs_of(_input, create=True).add(_next) _input = _next + + def __len__(self): + return len(self.nodes) diff --git a/bonobo/core/strategies/executor.py b/bonobo/core/strategies/executor.py index 6be536b..eb45ab1 100644 --- a/bonobo/core/strategies/executor.py +++ b/bonobo/core/strategies/executor.py @@ -39,11 +39,9 @@ class ExecutorStrategy(Strategy): futures.append(executor.submit(_runner)) for node_context in context.nodes: - def _runner(node_context=node_context): node_context.start() node_context.loop() - futures.append(executor.submit(_runner)) while context.alive: diff --git a/bonobo/examples/__init__.py b/bonobo/examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bonobo/examples/datasets/__init__.py b/bonobo/examples/datasets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/read_cheap_coffeeshops_in_paris.py b/bonobo/examples/datasets/coffeeshops.py similarity index 83% rename from examples/read_cheap_coffeeshops_in_paris.py rename to bonobo/examples/datasets/coffeeshops.py index 5c8f05c..0ce910f 100644 --- a/examples/read_cheap_coffeeshops_in_paris.py +++ b/bonobo/examples/datasets/coffeeshops.py @@ -4,7 +4,7 @@ from bonobo import console_run from bonobo.ext.opendatasoft import from_opendatasoft_api from bonobo.io.file import FileWriter -OUTPUT_FILENAME = realpath(join(dirname(__file__), 'datasets/cheap_coffeeshops_in_paris.txt')) +OUTPUT_FILENAME = realpath(join(dirname(__file__), 'coffeeshops.txt')) console_run( from_opendatasoft_api( diff --git a/examples/datasets/coffeeshops.txt b/bonobo/examples/datasets/coffeeshops.txt similarity index 100% rename from examples/datasets/coffeeshops.txt rename to bonobo/examples/datasets/coffeeshops.txt diff --git a/examples/opendata_fablabs.py b/bonobo/examples/datasets/fablabs.py similarity index 89% rename from examples/opendata_fablabs.py rename to bonobo/examples/datasets/fablabs.py index 21facd1..b04e378 100644 --- a/examples/opendata_fablabs.py +++ b/bonobo/examples/datasets/fablabs.py @@ -3,7 +3,7 @@ import os from blessings import Terminal -from bonobo import tee, JsonWriter, Graph +from bonobo import Tee, JsonWriter, Graph, get_examples_path from bonobo.ext.opendatasoft import from_opendatasoft_api try: @@ -63,11 +63,11 @@ graph = Graph( ), normalize, filter_france, - tee(display), - JsonWriter(path=os.path.join(__path__, 'datasets/coffeeshops.txt')), + Tee(display), + JsonWriter(path=get_examples_path('datasets/fablabs.txt')), ) if __name__ == '__main__': - import bonobo + from bonobo import run - bonobo.run(graph) + run(graph) diff --git a/bonobo/examples/files/__init__.py b/bonobo/examples/files/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bonobo/examples/files/csv.py b/bonobo/examples/files/csv.py new file mode 100644 index 0000000..f3315f7 --- /dev/null +++ b/bonobo/examples/files/csv.py @@ -0,0 +1,11 @@ +from bonobo import CsvReader, Graph, get_examples_path + +graph = Graph( + CsvReader(path=get_examples_path('datasets/coffeeshops.txt')), + print, +) + +if __name__ == '__main__': + import bonobo + + bonobo.run(graph) diff --git a/examples/basics_file.py b/bonobo/examples/files/text.py similarity index 100% rename from examples/basics_file.py rename to bonobo/examples/files/text.py diff --git a/bonobo/examples/tutorials/__init__.py b/bonobo/examples/tutorials/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bonobo/examples/tutorials/tut02_01_read.py b/bonobo/examples/tutorials/tut02_01_read.py new file mode 100644 index 0000000..f99a9ee --- /dev/null +++ b/bonobo/examples/tutorials/tut02_01_read.py @@ -0,0 +1,14 @@ +import os +import pathlib + +import bonobo + +workdir = pathlib.Path(os.path.dirname(__file__)) + +graph = bonobo.Graph( + bonobo.FileReader(path=workdir.joinpath('datasets/coffeeshops.txt')), + print, +) + +if __name__ == '__main__': + bonobo.run(graph) diff --git a/examples/tutorial_basics_firststeps.py b/bonobo/examples/tutorials/tutorial_basics_firststeps.py similarity index 100% rename from examples/tutorial_basics_firststeps.py rename to bonobo/examples/tutorials/tutorial_basics_firststeps.py diff --git a/examples/tutorial_basics_summary.py b/bonobo/examples/tutorials/tutorial_basics_summary.py similarity index 100% rename from examples/tutorial_basics_summary.py rename to bonobo/examples/tutorials/tutorial_basics_summary.py diff --git a/bonobo/examples/types/__init__.py b/bonobo/examples/types/__init__.py new file mode 100644 index 0000000..a2c0ceb --- /dev/null +++ b/bonobo/examples/types/__init__.py @@ -0,0 +1,7 @@ +from . import bags, dicts, strings + +__all__ = [ + 'bags', + 'dicts', + 'strings', +] \ No newline at end of file diff --git a/bonobo/examples/types/bags.py b/bonobo/examples/types/bags.py new file mode 100644 index 0000000..e0609bf --- /dev/null +++ b/bonobo/examples/types/bags.py @@ -0,0 +1,45 @@ +""" +Example on how to use :class:`bonobo.Bag` instances to pass flexible args/kwargs to the next callable. + +.. graphviz:: + + digraph { + rankdir = LR; + stylesheet = "../_static/graphs.css"; + + BEGIN [shape="point"]; + BEGIN -> "extract()" -> "transform(...)" -> "load(...)"; + } + +""" + + +from random import randint + +from bonobo import Bag, Graph + + +def extract(): + yield Bag(topic='foo') + yield Bag(topic='bar') + yield Bag(topic='baz') + + +def transform(topic: str): + return Bag.inherit( + title=topic.title(), + rand=randint(10, 99) + ) + + +def load(topic: str, title: str, rand: int): + print('{} ({}) wait={}'.format(title, topic, rand)) + + +graph = Graph() +graph.add_chain(extract, transform, load) + +if __name__ == '__main__': + from bonobo import run + + run(graph) diff --git a/bonobo/examples/types/dicts.py b/bonobo/examples/types/dicts.py new file mode 100644 index 0000000..0e45630 --- /dev/null +++ b/bonobo/examples/types/dicts.py @@ -0,0 +1,47 @@ +""" +Example on how to use symple python dictionaries to communicate between transformations. + +.. graphviz:: + + digraph { + rankdir = LR; + stylesheet = "../_static/graphs.css"; + + BEGIN [shape="point"]; + BEGIN -> "extract()" -> "transform(row: dict)" -> "load(row: dict)"; + } + +""" + +from random import randint + +from bonobo import Graph + + +def extract(): + yield {'topic': 'foo'} + yield {'topic': 'bar'} + yield {'topic': 'baz'} + + +def transform(row: dict): + return { + 'topic': row['topic'].title(), + 'randint': randint(10, 99), + } + + +def load(row: dict): + print(row) + + +graph = Graph( + extract, + transform, + load +) + +if __name__ == '__main__': + from bonobo import run + + run(graph) diff --git a/bonobo/examples/types/strings.py b/bonobo/examples/types/strings.py new file mode 100644 index 0000000..75cfed7 --- /dev/null +++ b/bonobo/examples/types/strings.py @@ -0,0 +1,43 @@ +""" +Example on how to use symple python strings to communicate between transformations. + +.. graphviz:: + + digraph { + rankdir = LR; + stylesheet = "../_static/graphs.css"; + + BEGIN [shape="point"]; + BEGIN -> "extract()" -> "transform(s: str)" -> "load(s: str)"; + } + +""" +from random import randint + +from bonobo import Graph + + +def extract(): + yield 'foo' + yield 'bar' + yield 'baz' + + +def transform(s: str): + return '{} ({})'.format(s.title(), randint(10, 99)) + + +def load(s: str): + print(s) + + +graph = Graph( + extract, + transform, + load +) + +if __name__ == '__main__': + from bonobo import run + + run(graph) diff --git a/bonobo/ext/edgy/__init__.py b/bonobo/ext/edgy/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bonobo/ext/edgy/project/__init__.py b/bonobo/ext/edgy/project/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bonobo/ext/edgy/project/feature.py b/bonobo/ext/edgy/project/feature.py new file mode 100644 index 0000000..ef75fea --- /dev/null +++ b/bonobo/ext/edgy/project/feature.py @@ -0,0 +1,26 @@ +try: + import edgy.project +except ImportError as e: + import logging + + logging.exception('You must install edgy.project to use this.') + +import os + +from edgy.project.events import subscribe +from edgy.project.feature import Feature, SUPPORT_PRIORITY + + +class BonoboFeature(Feature): + requires = {'python'} + + @subscribe('edgy.project.feature.make.on_generate', priority=SUPPORT_PRIORITY) + def on_make_generate(self, event): + makefile = event.makefile + + @subscribe('edgy.project.on_start', priority=SUPPORT_PRIORITY) + def on_start(self, event): + package_path = event.setup['name'].replace('.', os.sep) + + for file in ('example_graph'): + self.render_file(os.path.join(package_path, file + '.py'), os.path.join('tornado', file + '.py.j2')) diff --git a/bonobo/io/csv.py b/bonobo/io/csv.py index 431fc94..175737f 100644 --- a/bonobo/io/csv.py +++ b/bonobo/io/csv.py @@ -1,6 +1,7 @@ import csv -from bonobo import Option, ContextProcessor, contextual +from bonobo.config import Option +from bonobo.context import ContextProcessor, contextual from bonobo.util.objects import ValueHolder from .file import FileReader, FileWriter, FileHandler diff --git a/bonobo/io/json.py b/bonobo/io/json.py index 9c50932..04a3a0a 100644 --- a/bonobo/io/json.py +++ b/bonobo/io/json.py @@ -1,6 +1,6 @@ import json -from bonobo import ContextProcessor, contextual +from bonobo.context import ContextProcessor, contextual from .file import FileWriter, FileReader __all__ = ['JsonWriter', ] diff --git a/bonobo/util/__init__.py b/bonobo/util/__init__.py index 69fdebc..5a2ee26 100644 --- a/bonobo/util/__init__.py +++ b/bonobo/util/__init__.py @@ -9,14 +9,14 @@ from .helpers import console_run, jupyter_run from .tokens import NOT_MODIFIED __all__ = [ + 'Limit', 'NOT_MODIFIED', + 'PrettyPrint', + 'Tee', 'console_run', 'jupyter_run', - 'limit', - 'log', 'noop', 'pprint', - 'tee', ] @@ -24,7 +24,7 @@ def identity(x): return x -def limit(n=10): +def Limit(n=10): i = 0 def _limit(*args, **kwargs): @@ -37,7 +37,7 @@ def limit(n=10): return _limit -def tee(f): +def Tee(f): @functools.wraps(f) def wrapped(*args, **kwargs): nonlocal f @@ -47,10 +47,10 @@ def tee(f): return wrapped -log = tee(_pprint) +pprint = Tee(_pprint) -def pprint(title_keys=('title', 'name', 'id'), print_values=True, sort=True): +def PrettyPrint(title_keys=('title', 'name', 'id'), print_values=True, sort=True): term = blessings.Terminal() def _pprint(*args, **kwargs): @@ -78,6 +78,7 @@ def pprint(title_keys=('title', 'name', 'id'), print_values=True, sort=True): ''' + Old code from rdc.etl def writehr(self, label=None): width = t.width or 80 @@ -113,4 +114,4 @@ def pprint(title_keys=('title', 'name', 'id'), print_values=True, sort=True): def noop(*args, **kwargs): # pylint: disable=unused-argument - pass + return NOT_MODIFIED diff --git a/docs/_templates/index.html b/docs/_templates/index.html index 0c3bbed..60fdff4 100644 --- a/docs/_templates/index.html +++ b/docs/_templates/index.html @@ -38,14 +38,13 @@
|
- {% trans %}First steps{% endtrans %} {% trans %}First steps{% endtrans %} |
- {%- if hasdoc('search') %}
- {% trans %}
- Search{% endtrans %} {% trans %}
+ Search{% endtrans %} |
|
- {% trans %}
+ {% trans %}
Contribute{% endtrans %} |
diff --git a/docs/conf.py b/docs/conf.py
index 68e9d7a..c7970e5 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -181,4 +181,7 @@ epub_copyright = copyright
epub_exclude_files = ['search.html']
# Example configuration for intersphinx: refer to the Python standard library.
-intersphinx_mapping = {'https://docs.python.org/': None}
+intersphinx_mapping = {
+ 'python': ('https://docs.python.org/3', None)
+}
+
diff --git a/docs/contribute/index.rst b/docs/contribute/index.rst
new file mode 100644
index 0000000..fa46d0e
--- /dev/null
+++ b/docs/contribute/index.rst
@@ -0,0 +1,18 @@
+Contributing
+============
+
+Contributing to bonobo is simple. Although we don't have a complete guide on this topic for now, the best way is to fork
+the github repository and send pull requests.
+
+Keep the following points in mind:
+
+* Although we will ask for 100% backward compatibility starting from 1.0 (following semantic versionning principles),
+ pre-1.0 versions should do their best to keep compatibility between versions. Wehn in doubt, open a github issue
+ to discuss things.
+* The core should stay as light as possible.
+* Coding standards are enforced using yapf. That means that you can code the way you want, we just ask you to run
+ `make format` before committing your changes so everybody follows the same conventions.
+* General rule for anything you're not sure about is "open a github issue to discuss the point".
+* More formal proposal process will come the day we feel the need for it.
+
+A very drafty roadmap is available in the readme.
\ No newline at end of file
diff --git a/docs/guide/ext/docker.rst b/docs/guide/ext/docker.rst
new file mode 100644
index 0000000..5937c02
--- /dev/null
+++ b/docs/guide/ext/docker.rst
@@ -0,0 +1,14 @@
+Bonobo with Docker
+==================
+
+.. todo:: The `bonobo-docker` package is at a very alpha stage, and things will change. This section is here to give a
+ brief overview but is neither complete nor definitive.
+
+Installation
+::::::::::::
+
+Overview
+::::::::
+
+Details
+:::::::
diff --git a/docs/guide/ext/jupyter.rst b/docs/guide/ext/jupyter.rst
new file mode 100644
index 0000000..98efa8c
--- /dev/null
+++ b/docs/guide/ext/jupyter.rst
@@ -0,0 +1,11 @@
+Bonobo with Jupyter
+==================
+
+Installation
+::::::::::::
+
+Overview
+::::::::
+
+Details
+:::::::
diff --git a/docs/guide/crawlers.rst b/docs/guide/ext/selenium.rst
similarity index 72%
rename from docs/guide/crawlers.rst
rename to docs/guide/ext/selenium.rst
index 7a9a181..e588cd6 100644
--- a/docs/guide/crawlers.rst
+++ b/docs/guide/ext/selenium.rst
@@ -1,8 +1,9 @@
-Web crawlers with Bonobo
-========================
+Bonobo with Selenium
+====================
+
+.. todo:: The `bonobo-selenium` package is at a very alpha stage, and things will change. This section is here to give a
+ brief overview but is neither complete nor definitive.
-.. todo:: Bonobo-Selenium is at a very alpha stage, and things will change. This section is here to give a brief
- overview but is neither complete nor definitive.
Writing web crawlers with Bonobo and Selenium is easy.
@@ -31,3 +32,11 @@ Where each step would do the following:
* `details()` extract the data you're interested in.
* ... and the writer saves it somewhere.
+Installation
+::::::::::::
+
+Overview
+::::::::
+
+Details
+:::::::
diff --git a/docs/guide/ext/sqlalchemy.rst b/docs/guide/ext/sqlalchemy.rst
new file mode 100644
index 0000000..0f9c549
--- /dev/null
+++ b/docs/guide/ext/sqlalchemy.rst
@@ -0,0 +1,15 @@
+Bonobo with SQLAlchemy
+======================
+
+.. todo:: The `bonobo-sqlalchemy` package is at a very alpha stage, and things will change. This section is here to
+ give a brief overview but is neither complete nor definitive.
+
+
+Installation
+::::::::::::
+
+Overview
+::::::::
+
+Details
+:::::::
diff --git a/docs/guide/index.rst b/docs/guide/index.rst
index 99ae56f..23cff3c 100644
--- a/docs/guide/index.rst
+++ b/docs/guide/index.rst
@@ -1,8 +1,25 @@
Guides
======
+Concepts and best practices
+:::::::::::::::::::::::::::
+
+There are a few things that you should know while writing transformations graphs with bonobo.
+
.. toctree::
:maxdepth: 2
purity
- crawlers
+
+Third party integrations
+::::::::::::::::::::::::
+
+There is a few **bonobo** extensions that ease the use of the library with third party tools. Each integration is
+available as an optional extra dependency, and the maturity stage of each extension vary.
+
+.. toctree::
+ :maxdepth: 2
+
+ ext/docker
+ ext/selenium
+ ext/sqlalchemy
diff --git a/docs/guide/purity.rst b/docs/guide/purity.rst
index 1995284..cf9d47f 100644
--- a/docs/guide/purity.rst
+++ b/docs/guide/purity.rst
@@ -1,5 +1,5 @@
-Pure components and space complexity
-====================================
+Pure transformations
+====================
The nature of components, and how the data flow from one to another, make them not so easy to write correctly.
Hopefully, with a few hints, you will be able to understand why and how they should be written.
@@ -14,7 +14,7 @@ returning) them. Numeric values, strings and tuples being immutable in python, m
type will already return a different instance.
Numbers
-=======
+:::::::
You can't be wrong with numbers. All of the following are correct.
@@ -45,7 +45,7 @@ You can't be wrong with numbers. All of the following are correct.
The same is true with other numeric types, so don't be shy. Operate like crazy, my friend.
Tuples
-======
+::::::
Tuples are immutable, so you risk nothing.
@@ -63,12 +63,12 @@ Tuples are immutable, so you risk nothing.
return t
Strings
-=======
+:::::::
You know the drill, strings are immutable, blablabla ... Examples left as an exercise for the reader.
Dicts
-=====
+:::::
So, now it gets interesting. Dicts are mutable. It means that you can mess things up badly here if you're not cautious.
diff --git a/docs/index.rst b/docs/index.rst
index 081b47d..f2ce068 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -8,6 +8,7 @@ Bonobo
tutorial/index
guide/index
reference/index
+ contribute/index
genindex
modindex
diff --git a/docs/reference/api.rst b/docs/reference/api.rst
new file mode 100644
index 0000000..13376ac
--- /dev/null
+++ b/docs/reference/api.rst
@@ -0,0 +1,56 @@
+Public API
+==========
+
+All the "public api" callables, classes and other callables are available under the root :mod:`bonobo` package, even if
+they are documented within their sub-namespace, for convenience.
+
+.. automodule:: bonobo
+ :members: create_strategy, get_examples_path, run
+ :undoc-members:
+ :show-inheritance:
+
+Config
+------
+
+.. automodule:: bonobo.config
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+
+Context
+-------
+
+.. automodule:: bonobo.context
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+
+Core
+----
+
+.. automodule:: bonobo.core
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+
+IO
+--
+
+.. automodule:: bonobo.io
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+
+Util
+----
+
+.. automodule:: bonobo.util
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+
diff --git a/docs/reference/bonobo.compat.rst b/docs/reference/bonobo.compat.rst
deleted file mode 100644
index 83581bc..0000000
--- a/docs/reference/bonobo.compat.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-bonobo.compat package
-=====================
-
-Submodules
-----------
-
-bonobo.compat.pandas module
----------------------------
-
-.. automodule:: bonobo.compat.pandas
- :members:
- :undoc-members:
- :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: bonobo.compat
- :members:
- :undoc-members:
- :show-inheritance:
diff --git a/docs/reference/bonobo.core.rst b/docs/reference/bonobo.core.rst
deleted file mode 100644
index 247e883..0000000
--- a/docs/reference/bonobo.core.rst
+++ /dev/null
@@ -1,85 +0,0 @@
-bonobo.core package
-===================
-
-Subpackages
------------
-
-.. toctree::
-
- bonobo.core.strategies
-
-Submodules
-----------
-
-bonobo.core.bags module
------------------------
-
-.. automodule:: bonobo.core.bags
- :members:
- :undoc-members:
- :show-inheritance:
-
-bonobo.core.contexts module
----------------------------
-
-.. automodule:: bonobo.core.contexts
- :members:
- :undoc-members:
- :show-inheritance:
-
-bonobo.core.errors module
--------------------------
-
-.. automodule:: bonobo.core.errors
- :members:
- :undoc-members:
- :show-inheritance:
-
-bonobo.core.graphs module
--------------------------
-
-.. automodule:: bonobo.core.graphs
- :members:
- :undoc-members:
- :show-inheritance:
-
-bonobo.core.inputs module
--------------------------
-
-.. automodule:: bonobo.core.inputs
- :members:
- :undoc-members:
- :show-inheritance:
-
-bonobo.core.plugins module
---------------------------
-
-.. automodule:: bonobo.core.plugins
- :members:
- :undoc-members:
- :show-inheritance:
-
-bonobo.core.services module
----------------------------
-
-.. automodule:: bonobo.core.services
- :members:
- :undoc-members:
- :show-inheritance:
-
-bonobo.core.stats module
-------------------------
-
-.. automodule:: bonobo.core.stats
- :members:
- :undoc-members:
- :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: bonobo.core
- :members:
- :undoc-members:
- :show-inheritance:
diff --git a/docs/reference/bonobo.core.strategies.rst b/docs/reference/bonobo.core.strategies.rst
deleted file mode 100644
index 0dfd138..0000000
--- a/docs/reference/bonobo.core.strategies.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-bonobo.core.strategies package
-==============================
-
-Submodules
-----------
-
-bonobo.core.strategies.base module
-----------------------------------
-
-.. automodule:: bonobo.core.strategies.base
- :members:
- :undoc-members:
- :show-inheritance:
-
-bonobo.core.strategies.executor module
---------------------------------------
-
-.. automodule:: bonobo.core.strategies.executor
- :members:
- :undoc-members:
- :show-inheritance:
-
-bonobo.core.strategies.naive module
------------------------------------
-
-.. automodule:: bonobo.core.strategies.naive
- :members:
- :undoc-members:
- :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: bonobo.core.strategies
- :members:
- :undoc-members:
- :show-inheritance:
diff --git a/docs/reference/bonobo.io.rst b/docs/reference/bonobo.io.rst
deleted file mode 100644
index d930d25..0000000
--- a/docs/reference/bonobo.io.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-bonobo.io package
-=================
-
-Submodules
-----------
-
-bonobo.io.file module
----------------------
-
-.. automodule:: bonobo.io.file
- :members:
- :undoc-members:
- :show-inheritance:
-
-bonobo.io.json module
----------------------
-
-.. automodule:: bonobo.io.json
- :members:
- :undoc-members:
- :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: bonobo.io
- :members:
- :undoc-members:
- :show-inheritance:
diff --git a/docs/reference/bonobo.rst b/docs/reference/bonobo.rst
deleted file mode 100644
index 4785ea3..0000000
--- a/docs/reference/bonobo.rst
+++ /dev/null
@@ -1,21 +0,0 @@
-bonobo package
-==============
-
-Subpackages
------------
-
-.. toctree::
-
- bonobo.compat
- bonobo.core
- bonobo.ext
- bonobo.io
- bonobo.util
-
-Module contents
----------------
-
-.. automodule:: bonobo
- :members:
- :undoc-members:
- :show-inheritance:
diff --git a/docs/reference/bonobo.util.rst b/docs/reference/bonobo.util.rst
deleted file mode 100644
index 4d73e2c..0000000
--- a/docs/reference/bonobo.util.rst
+++ /dev/null
@@ -1,62 +0,0 @@
-bonobo.util package
-===================
-
-Submodules
-----------
-
-bonobo.util.compat module
--------------------------
-
-.. automodule:: bonobo.util.compat
- :members:
- :undoc-members:
- :show-inheritance:
-
-bonobo.util.helpers module
---------------------------
-
-.. automodule:: bonobo.util.helpers
- :members:
- :undoc-members:
- :show-inheritance:
-
-bonobo.util.iterators module
-----------------------------
-
-.. automodule:: bonobo.util.iterators
- :members:
- :undoc-members:
- :show-inheritance:
-
-bonobo.util.lifecycle module
-----------------------------
-
-.. automodule:: bonobo.util.lifecycle
- :members:
- :undoc-members:
- :show-inheritance:
-
-bonobo.util.time module
------------------------
-
-.. automodule:: bonobo.util.time
- :members:
- :undoc-members:
- :show-inheritance:
-
-bonobo.util.tokens module
--------------------------
-
-.. automodule:: bonobo.util.tokens
- :members:
- :undoc-members:
- :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: bonobo.util
- :members:
- :undoc-members:
- :show-inheritance:
diff --git a/docs/reference/commands.rst b/docs/reference/commands.rst
new file mode 100644
index 0000000..8d79fb6
--- /dev/null
+++ b/docs/reference/commands.rst
@@ -0,0 +1,33 @@
+Commands Reference
+==================
+
+Bonobo Init
+:::::::::::
+
+Create an empty project, ready to use bonobo.
+
+Syntax: `bonobo init`
+
+Requires `edgy.project`.
+
+
+Bonobo Run
+::::::::::
+
+Run a transformation graph.
+
+Syntax: `bonobo run [-c cmd | -m mod | file | -] [arg]`
+
+.. todo:: implement -m, check if -c is of any use and if yes, implement it too. Implement args, too.
+
+Bonobo RunC
+:::::::::::
+
+Run a transformation graph in a docker container.
+
+Syntax: `bonobo runc [-c cmd | -m mod | file | -] [arg]`
+
+.. todo:: implement -m, check if -c is of any use and if yes, implement it too. Implement args, too.
+
+Requires `bonobo-docker`, install with `docker` extra: `pip install bonobo[docker]`.
+
diff --git a/docs/reference/examples.rst b/docs/reference/examples.rst
new file mode 100644
index 0000000..bddbe8b
--- /dev/null
+++ b/docs/reference/examples.rst
@@ -0,0 +1,36 @@
+Examples
+========
+
+There are a few examples bundled with **bonobo**. You'll find them under the :mod:`bonobo.examples` package.
+
+Types
+:::::
+
+bonobo.examples.types.strings
+-----------------------------
+
+.. automodule:: bonobo.examples.types.strings
+ :members: graph, extract, transform, load
+ :undoc-members:
+ :show-inheritance:
+
+
+bonobo.examples.types.dicts
+---------------------------
+
+.. automodule:: bonobo.examples.types.dicts
+ :members: graph, extract, transform, load
+ :undoc-members:
+ :show-inheritance:
+
+
+bonobo.examples.types.bags
+--------------------------
+
+.. automodule:: bonobo.examples.types.bags
+ :members: graph, extract, transform, load
+ :undoc-members:
+ :show-inheritance:
+
+
+
diff --git a/docs/reference/index.rst b/docs/reference/index.rst
index f9fa75c..56bddab 100644
--- a/docs/reference/index.rst
+++ b/docs/reference/index.rst
@@ -6,8 +6,5 @@ References
.. toctree::
:maxdepth: 4
- bonobo
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
+ commands
+ api
diff --git a/docs/tutorial/basics.rst b/docs/tutorial/basics.rst
deleted file mode 100644
index 3b1b989..0000000
--- a/docs/tutorial/basics.rst
+++ /dev/null
@@ -1,161 +0,0 @@
-Basic concepts
-==============
-
-To begin with Bonobo, you need to install it in a working python 3.5+ environment:
-
-.. code-block:: shell-session
-
- $ pip install bonobo
-
-See :doc:`/install` for more options.
-
-Let's write a first data transformation
-:::::::::::::::::::::::::::::::::::::::
-
-We'll start with the most simple components we can.
-
-In **Bonobo**, a component is a plain old python callable, not more, not less. Let's write one that takes a string and
-uppercase it.
-
-.. code-block:: python
-
- def uppercase(x: str):
- return x.upper()
-
-Pretty straightforward.
-
-You could even use :func:`str.upper` directly instead of writing a wrapper, as a type's method (unbound) will take an
-instance of this type as its first parameter (what you'd call `self` in your method).
-
-The type annotations written here are not used, but can make your code much more readable, and may very well be used as
-validators in the future.
-
-Let's write two more components: a generator to produce the data to be transformed, and something that outputs it,
-because, yeah, feedback is cool.
-
-.. code-block:: python
-
- def generate_data():
- yield 'foo'
- yield 'bar'
- yield 'baz'
-
- def output(x: str):
- print(x)
-
-Once again, you could have skipped the pain of writing this and simply use an iterable to generate the data and the
-builtin :func:`print` for the output, but we'll stick to writing our own components for now.
-
-Let's chain the three components together and run the transformation:
-
-.. code-block:: python
-
- from bonobo import run
-
- run(generate_data, uppercase, output)
-
-.. graphviz::
-
- digraph {
- rankdir = LR;
- stylesheet = "../_static/graphs.css";
-
- BEGIN [shape="point"];
- BEGIN -> "generate_data" -> "uppercase" -> "output";
- }
-
-We use the :func:`bonobo.run` helper that hides the underlying object composition necessary to actually run the
-components in parralel, because it's simpler.
-
-Depending on what you're doing, you may use the shorthand helper method, or the verbose one. Always favor the shorter,
-if you don't need to tune the graph or the execution strategy (see below).
-
-Diving in
-:::::::::
-
-Let's rewrite it using the builtin functions :func:`str.upper` and :func:`print` instead of our own wrappers, and expand
-the :func:`bonobo.run()` helper so you see what's inside...
-
-.. code-block:: python
-
- from bonobo import Graph, ThreadPoolExecutorStrategy
-
- # Represent our data processor as a simple directed graph of callables.
- graph = Graph()
- graph.add_chain(
- ('foo', 'bar', 'baz'),
- str.upper,
- print,
- )
-
- # Use a thread pool.
- executor = ThreadPoolExecutorStrategy()
-
- # Run the thing.
- executor.execute(graph)
-
-We also switched our generator for a tuple, **Bonobo** will wrap it as a generator itself if it's not callable but
-iterable.
-
-The shorthand version with builtins would look like this:
-
-.. code-block:: python
-
- from bonobo import run
-
- run(
- ('foo', 'bar', 'baz'),
- str.upper,
- print,
- )
-
-Both methods are strictly equivalent (see :func:`bonobo.run`). When in doubt, prefer the shorter version.
-
-Takeaways
-:::::::::
-
-① The :class:`bonobo.Graph` class is used to represent a data-processing pipeline.
-
-It can represent simple list-like linear graphs, like here, but it can also represent much more complex graphs, with
-branches and cycles.
-
-This is what the graph we defined looks like:
-
-.. graphviz::
-
- digraph {
- rankdir = LR;
- "iter(['foo', 'bar', 'baz'])" -> "str.upper" -> "print";
- }
-
-
-② `Components` are simple python callables. Whatever can be called can be used as a `component`. Callables can
-either `return` or `yield` data to send it to the next step. Regular functions (using `return`) should be prefered if
-each call is guaranteed to return exactly one result, while generators (using `yield`) should be prefered if the
-number of output lines for a given input varies.
-
-③ The `graph` is then executed using an `ExecutionStrategy`. In this tutorial, we'll only use
-:class:`bonobo.ThreadPoolExecutorStrategy`, which use an underlying `concurrent.futures.ThreadPoolExecutor` to
-schedule calls in a pool of threads, but basically this strategy is what determines the actual behaviour of execution.
-
-④ Before actually executing the `components`, the `ExecutorStrategy` instance will wrap each component in a `context`,
-whose responsibility is to hold the state, to keep the `components` stateless. We'll expand on this later.
-
-Concepts and definitions
-::::::::::::::::::::::::
-
-* Component
-* Graph
-* Executor
-
-.. todo:: Definitions, and substitute vague terms in the page by the exact term defined here
-
-
-Next
-::::
-
-You now know all the basic concepts necessary to build (batch-like) data processors.
-
-If you're confident with this part, let's get to a more real world example, using files and nice console output:
-:doc:`basics2`
-
diff --git a/docs/tutorial/basics2.rst b/docs/tutorial/basics2.rst
deleted file mode 100644
index f9e9608..0000000
--- a/docs/tutorial/basics2.rst
+++ /dev/null
@@ -1,46 +0,0 @@
-Working with files
-==================
-
-Bonobo would not be of any use if the aim was to uppercase small lists of strings. In fact, Bonobo should not be used
-if you don't expect any gain from parralelization of tasks.
-
-Let's take the following graph as an example:
-
-.. graphviz::
-
- digraph {
- rankdir = LR;
- "A" -> "B" -> "C";
- }
-
-The execution strategy does a bit of under the scene work, wrapping every component in a thread (assuming you're using
-the :class:`bonobo.ThreadPoolExecutorStrategy`), which allows to start running `B` as soon as `A` yielded the first line
-of data, and `C` as soon as `B` yielded the first line of data, even if `A` or `B` still have data to yield.
-
-The great thing is that you generally don't have to think about it. Just be aware that your components will be run in
-parralel, and don't worry too much about blocking components, as they won't block their siblings.
-
-That being said, let's try to write a more real-world like transformation.
-
-Reading a file
-::::::::::::::
-
-There are a few component builders available in **Bonobo** that let you read files. You should at least know about the following:
-
-* :class:`bonobo.FileReader` (aliased as :func:`bonobo.from_file`)
-* :class:`bonobo.JsonFileReader` (aliased as :func:`bonobo.from_json`)
-* :class:`bonobo.CsvFileReader` (aliased as :func:`bonobo.from_csv`)
-
-Reading a file is as simple as using one of those, and for the example, we'll use a text file that was generated using
-Bonobo from the "liste-des-cafes-a-un-euro" dataset made available by Mairie de Paris under the Open Database
-License (ODbL). You can `explore the original dataset