From 9dab39a47403d4dfac59bd1399d2d60aaa99b735 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Fri, 20 Jan 2017 20:45:16 +0100 Subject: [PATCH] Minor fixes and update documentation. Preparing the upcoming 0.2 release. --- Makefile | 2 +- Projectfile | 1 + README.rst | 6 + bonobo/__init__.py | 110 ++++++------ bonobo/commands/__init__.py | 19 ++- bonobo/commands/run.py | 25 ++- bonobo/config.py | 5 + bonobo/context/execution.py | 2 +- bonobo/core/graphs.py | 3 + bonobo/core/strategies/executor.py | 2 - bonobo/examples/__init__.py | 0 bonobo/examples/datasets/__init__.py | 0 .../examples/datasets/coffeeshops.py | 2 +- .../examples}/datasets/coffeeshops.txt | 0 .../examples/datasets/fablabs.py | 10 +- bonobo/examples/files/__init__.py | 0 bonobo/examples/files/csv.py | 11 ++ .../examples/files/text.py | 0 bonobo/examples/tutorials/__init__.py | 0 bonobo/examples/tutorials/tut02_01_read.py | 14 ++ .../tutorials}/tutorial_basics_firststeps.py | 0 .../tutorials}/tutorial_basics_summary.py | 0 bonobo/examples/types/__init__.py | 7 + bonobo/examples/types/bags.py | 45 +++++ bonobo/examples/types/dicts.py | 47 +++++ bonobo/examples/types/strings.py | 43 +++++ bonobo/ext/edgy/__init__.py | 0 bonobo/ext/edgy/project/__init__.py | 0 bonobo/ext/edgy/project/feature.py | 26 +++ bonobo/io/csv.py | 3 +- bonobo/io/json.py | 2 +- bonobo/util/__init__.py | 17 +- docs/_templates/index.html | 11 +- docs/conf.py | 5 +- docs/contribute/index.rst | 18 ++ docs/guide/ext/docker.rst | 14 ++ docs/guide/ext/jupyter.rst | 11 ++ docs/guide/{crawlers.rst => ext/selenium.rst} | 17 +- docs/guide/ext/sqlalchemy.rst | 15 ++ docs/guide/index.rst | 19 ++- docs/guide/purity.rst | 12 +- docs/index.rst | 1 + docs/reference/api.rst | 56 ++++++ docs/reference/bonobo.compat.rst | 22 --- docs/reference/bonobo.core.rst | 85 --------- docs/reference/bonobo.core.strategies.rst | 38 ----- docs/reference/bonobo.io.rst | 30 ---- docs/reference/bonobo.rst | 21 --- docs/reference/bonobo.util.rst | 62 ------- docs/reference/commands.rst | 33 ++++ docs/reference/examples.rst | 36 ++++ docs/reference/index.rst | 7 +- docs/tutorial/basics.rst | 161 ------------------ docs/tutorial/basics2.rst | 46 ----- docs/tutorial/index.rst | 30 +++- docs/tutorial/python.rst | 16 ++ docs/tutorial/tut01.rst | 132 ++++++++++++++ docs/tutorial/tut02.rst | 63 +++++++ examples | 1 + examples/basics_bags.py | 30 ---- examples/basics_dicts.py | 32 ---- examples/basics_file_csv.py | 21 --- examples/basics_strings.py | 29 ---- setup.py | 36 ++-- tests/test_basicusage.py | 14 ++ tests/test_commands.py | 32 ++++ tests/test_config.py | 1 + 67 files changed, 845 insertions(+), 714 deletions(-) create mode 100644 bonobo/examples/__init__.py create mode 100644 bonobo/examples/datasets/__init__.py rename examples/read_cheap_coffeeshops_in_paris.py => bonobo/examples/datasets/coffeeshops.py (83%) rename {examples => bonobo/examples}/datasets/coffeeshops.txt (100%) rename examples/opendata_fablabs.py => bonobo/examples/datasets/fablabs.py (89%) create mode 100644 bonobo/examples/files/__init__.py create mode 100644 bonobo/examples/files/csv.py rename examples/basics_file.py => bonobo/examples/files/text.py (100%) create mode 100644 bonobo/examples/tutorials/__init__.py create mode 100644 bonobo/examples/tutorials/tut02_01_read.py rename {examples => bonobo/examples/tutorials}/tutorial_basics_firststeps.py (100%) rename {examples => bonobo/examples/tutorials}/tutorial_basics_summary.py (100%) create mode 100644 bonobo/examples/types/__init__.py create mode 100644 bonobo/examples/types/bags.py create mode 100644 bonobo/examples/types/dicts.py create mode 100644 bonobo/examples/types/strings.py create mode 100644 bonobo/ext/edgy/__init__.py create mode 100644 bonobo/ext/edgy/project/__init__.py create mode 100644 bonobo/ext/edgy/project/feature.py create mode 100644 docs/contribute/index.rst create mode 100644 docs/guide/ext/docker.rst create mode 100644 docs/guide/ext/jupyter.rst rename docs/guide/{crawlers.rst => ext/selenium.rst} (72%) create mode 100644 docs/guide/ext/sqlalchemy.rst create mode 100644 docs/reference/api.rst delete mode 100644 docs/reference/bonobo.compat.rst delete mode 100644 docs/reference/bonobo.core.rst delete mode 100644 docs/reference/bonobo.core.strategies.rst delete mode 100644 docs/reference/bonobo.io.rst delete mode 100644 docs/reference/bonobo.rst delete mode 100644 docs/reference/bonobo.util.rst create mode 100644 docs/reference/commands.rst create mode 100644 docs/reference/examples.rst delete mode 100644 docs/tutorial/basics.rst delete mode 100644 docs/tutorial/basics2.rst create mode 100644 docs/tutorial/python.rst create mode 100644 docs/tutorial/tut01.rst create mode 100644 docs/tutorial/tut02.rst create mode 120000 examples delete mode 100644 examples/basics_bags.py delete mode 100644 examples/basics_dicts.py delete mode 100644 examples/basics_file_csv.py delete mode 100644 examples/basics_strings.py create mode 100644 tests/test_basicusage.py create mode 100644 tests/test_commands.py diff --git a/Makefile b/Makefile index d804dc7..a978826 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # This file has been auto-generated. # All changes will be lost, see Projectfile. # -# Updated at 2017-01-10 23:15:21.478899 +# Updated at 2017-01-19 12:12:07.294619 PYTHON ?= $(shell which python) PYTHON_BASENAME ?= $(shell basename $(PYTHON)) diff --git a/Projectfile b/Projectfile index 5717a51..2ef816b 100644 --- a/Projectfile +++ b/Projectfile @@ -40,6 +40,7 @@ extras_require = { 'pylint >=1.6,<1.7', 'pytest >=3,<4', 'pytest-cov >=2.4,<2.5', + 'pytest-timeout >=1.2,<1.3', 'sphinx', 'sphinx_rtd_theme', 'yapf', diff --git a/README.rst b/README.rst index 35d3bc3..2d7a1f1 100644 --- a/README.rst +++ b/README.rst @@ -68,6 +68,11 @@ Version 0.2 * Threaded does not terminate anymore * More tests +Bugs: + +- KeyboardInterrupt does not work anymore. +- ThreadPool does not stop anymore. + Configuration ............. @@ -119,6 +124,7 @@ Random thoughts and things to do def execute(graph: Graph, *, strategy: ExecutionStrategy, plugins: List[Plugin]) -> Execution: pass +* Handling console. Can we use a queue, and replace stdout / stderr ? diff --git a/bonobo/__init__.py b/bonobo/__init__.py index b2ff5ef..62df11d 100644 --- a/bonobo/__init__.py +++ b/bonobo/__init__.py @@ -1,30 +1,31 @@ -""" Bonobo data-processing toolkit. +# Bonobo data-processing toolkit. +# +# Bonobo is a line-by-line data-processing toolkit for python 3.5+ emphasizing simplicity and atomicity of data +# transformations using a simple directed graph of python callables. +# +# Licensed under Apache License 2.0, read the LICENSE file in the root of the source tree. - Bonobo is a line-by-line data-processing toolkit for python 3.5+ emphasizing simplicity and atomicity of data - transformations using a simple directed graph of python callables. +"""Bonobo data-processing toolkit main module.""" - Read more at http://docs.bonobo-project.org/ - - Copyright 2012-2014 Romain Dorgueil - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -""" import sys import warnings assert (sys.version_info >= (3, 5)), 'Python 3.5+ is required to use Bonobo.' from ._version import __version__ +from .config import __all__ as __all_config__ +from .context import __all__ as __all_context__ +from .core import __all__ as __all_core__ +from .io import __all__ as __all_io__ +from .util import __all__ as __all_util__ + +__all__ = __all_config__ + __all_context__ + __all_core__ + __all_io__ + __all_util__ + [ + '__version__', + 'create_strategy', + 'get_examples_path', + 'run', +] + from .config import * from .context import * from .core import * @@ -40,56 +41,43 @@ STRATEGIES = { } -def run(graph, *chain, strategy=None, plugins=None): +def get_examples_path(*pathsegments): + import os + import pathlib + return str(pathlib.Path(os.path.dirname(__file__), 'examples', *pathsegments)) + + +def create_strategy(name=None): from bonobo.core.strategies.base import Strategy + import logging + + if isinstance(name, Strategy): + return name + + if name is None: + name = DEFAULT_STRATEGY + + logging.debug('Creating strategy {}...'.format(name)) + + try: + factory = STRATEGIES[name] + except KeyError as exc: + raise RuntimeError('Invalid strategy {}. Available choices: {}.'.format(repr(name), ', '.join( + sorted(STRATEGIES.keys())))) from exc + + return factory() + + +def run(graph, *chain, strategy=None, plugins=None): + strategy = create_strategy(strategy) if len(chain): warnings.warn('DEPRECATED. You should pass a Graph instance instead of a chain.') from bonobo import Graph graph = Graph(graph, *chain) - if not isinstance(strategy, Strategy): - if strategy is None: - strategy = DEFAULT_STRATEGY - - try: - strategy = STRATEGIES[strategy] - except KeyError as exc: - raise RuntimeError('Invalid strategy {}.'.format(repr(strategy))) from exc - - strategy = strategy() - return strategy.execute(graph, plugins=plugins) -__all__ = [ - 'Bag', - 'Configurable', - 'ContextProcessor', - 'contextual', - 'CsvReader', - 'CsvWriter', - 'FileReader', - 'FileWriter', - 'Graph', - 'JsonReader', - 'JsonWriter', - 'NOT_MODIFIED', - 'NaiveStrategy', - 'Option', - 'ProcessPoolExecutorStrategy', - 'ThreadPoolExecutorStrategy', - '__version__', - 'console_run', - 'inject', - 'jupyter_run', - 'limit', - 'log', - 'noop', - 'pprint', - 'service', - 'tee', -] - -del warnings del sys +del warnings diff --git a/bonobo/commands/__init__.py b/bonobo/commands/__init__.py index bc61ebf..6525ab6 100644 --- a/bonobo/commands/__init__.py +++ b/bonobo/commands/__init__.py @@ -1,22 +1,25 @@ import argparse +import logging from stevedore import ExtensionManager -def entrypoint(): +def entrypoint(args=None): parser = argparse.ArgumentParser() subparsers = parser.add_subparsers(dest='command') subparsers.required = True - def register_extension(ext): - parser = subparsers.add_parser(ext.name) - command = ext.plugin(parser) - parser.set_defaults(command=command) + commands = {} + def register_extension(ext, commands=commands): + try: + parser = subparsers.add_parser(ext.name) + commands[ext.name] = ext.plugin(parser) + except Exception: + logging.exception('Error while loading command {}.'.format(ext.name)) mgr = ExtensionManager(namespace='bonobo.commands', ) mgr.map(register_extension) - args = parser.parse_args().__dict__ - command = args.pop('command') - command(**args) + args = parser.parse_args(args).__dict__ + commands[args.pop('command')](**args) diff --git a/bonobo/commands/run.py b/bonobo/commands/run.py index c0f9e14..93f81d9 100644 --- a/bonobo/commands/run.py +++ b/bonobo/commands/run.py @@ -1,13 +1,21 @@ import argparse -from bonobo import Graph, console_run +from bonobo import Graph, run -def execute(file): +def execute(file, quiet=False): with file: code = compile(file.read(), file.name, 'exec') - context = {} + # TODO: A few special variables should be set before running the file: + # + # See: + # - https://docs.python.org/3/reference/import.html#import-mod-attrs + # - https://docs.python.org/3/library/runpy.html#runpy.run_module + context = { + '__name__': '__bonobo__', + '__file__': file.name, + } try: exec(code, context) @@ -16,14 +24,19 @@ def execute(file): graphs = dict((k, v) for k, v in context.items() if isinstance(v, Graph)) - assert len(graphs) == 1, 'Having more than one graph definition in one file is unsupported for now, but it is ' \ - 'something that will be implemented in the future. ' + assert len(graphs) == 1, ('Having zero or more than one graph definition in one file is unsupported for now, ' + 'but it is something that will be implemented in the future.\n\nExpected: 1, got: {}.').format( + len(graphs)) name, graph = list(graphs.items())[0] - return console_run(graph) + # todo if console and not quiet, then add the console plugin + # todo when better console plugin, add it if console and just disable display + + return run(graph) def register(parser): parser.add_argument('file', type=argparse.FileType()) + parser.add_argument('--quiet', action='store_true') return execute diff --git a/bonobo/config.py b/bonobo/config.py index e0e7514..1a0f3ca 100644 --- a/bonobo/config.py +++ b/bonobo/config.py @@ -1,3 +1,8 @@ +__all__ = [ + 'Configurable', + 'Option', +] + class Option: def __init__(self, type=None, *, required=False, default=None): self.name = None diff --git a/bonobo/context/execution.py b/bonobo/context/execution.py index 1f018f8..5e94922 100644 --- a/bonobo/context/execution.py +++ b/bonobo/context/execution.py @@ -23,7 +23,7 @@ class GraphExecutionContext: @property def alive(self): - return self.started and not self.stopped + return any(node.alive for node in self.nodes) def __init__(self, graph, plugins=None): self.graph = graph diff --git a/bonobo/core/graphs.py b/bonobo/core/graphs.py index 5e0d6a1..194c6b7 100644 --- a/bonobo/core/graphs.py +++ b/bonobo/core/graphs.py @@ -26,3 +26,6 @@ class Graph: _next = self.add_node(node) self.outputs_of(_input, create=True).add(_next) _input = _next + + def __len__(self): + return len(self.nodes) diff --git a/bonobo/core/strategies/executor.py b/bonobo/core/strategies/executor.py index 6be536b..eb45ab1 100644 --- a/bonobo/core/strategies/executor.py +++ b/bonobo/core/strategies/executor.py @@ -39,11 +39,9 @@ class ExecutorStrategy(Strategy): futures.append(executor.submit(_runner)) for node_context in context.nodes: - def _runner(node_context=node_context): node_context.start() node_context.loop() - futures.append(executor.submit(_runner)) while context.alive: diff --git a/bonobo/examples/__init__.py b/bonobo/examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bonobo/examples/datasets/__init__.py b/bonobo/examples/datasets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/read_cheap_coffeeshops_in_paris.py b/bonobo/examples/datasets/coffeeshops.py similarity index 83% rename from examples/read_cheap_coffeeshops_in_paris.py rename to bonobo/examples/datasets/coffeeshops.py index 5c8f05c..0ce910f 100644 --- a/examples/read_cheap_coffeeshops_in_paris.py +++ b/bonobo/examples/datasets/coffeeshops.py @@ -4,7 +4,7 @@ from bonobo import console_run from bonobo.ext.opendatasoft import from_opendatasoft_api from bonobo.io.file import FileWriter -OUTPUT_FILENAME = realpath(join(dirname(__file__), 'datasets/cheap_coffeeshops_in_paris.txt')) +OUTPUT_FILENAME = realpath(join(dirname(__file__), 'coffeeshops.txt')) console_run( from_opendatasoft_api( diff --git a/examples/datasets/coffeeshops.txt b/bonobo/examples/datasets/coffeeshops.txt similarity index 100% rename from examples/datasets/coffeeshops.txt rename to bonobo/examples/datasets/coffeeshops.txt diff --git a/examples/opendata_fablabs.py b/bonobo/examples/datasets/fablabs.py similarity index 89% rename from examples/opendata_fablabs.py rename to bonobo/examples/datasets/fablabs.py index 21facd1..b04e378 100644 --- a/examples/opendata_fablabs.py +++ b/bonobo/examples/datasets/fablabs.py @@ -3,7 +3,7 @@ import os from blessings import Terminal -from bonobo import tee, JsonWriter, Graph +from bonobo import Tee, JsonWriter, Graph, get_examples_path from bonobo.ext.opendatasoft import from_opendatasoft_api try: @@ -63,11 +63,11 @@ graph = Graph( ), normalize, filter_france, - tee(display), - JsonWriter(path=os.path.join(__path__, 'datasets/coffeeshops.txt')), + Tee(display), + JsonWriter(path=get_examples_path('datasets/fablabs.txt')), ) if __name__ == '__main__': - import bonobo + from bonobo import run - bonobo.run(graph) + run(graph) diff --git a/bonobo/examples/files/__init__.py b/bonobo/examples/files/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bonobo/examples/files/csv.py b/bonobo/examples/files/csv.py new file mode 100644 index 0000000..f3315f7 --- /dev/null +++ b/bonobo/examples/files/csv.py @@ -0,0 +1,11 @@ +from bonobo import CsvReader, Graph, get_examples_path + +graph = Graph( + CsvReader(path=get_examples_path('datasets/coffeeshops.txt')), + print, +) + +if __name__ == '__main__': + import bonobo + + bonobo.run(graph) diff --git a/examples/basics_file.py b/bonobo/examples/files/text.py similarity index 100% rename from examples/basics_file.py rename to bonobo/examples/files/text.py diff --git a/bonobo/examples/tutorials/__init__.py b/bonobo/examples/tutorials/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bonobo/examples/tutorials/tut02_01_read.py b/bonobo/examples/tutorials/tut02_01_read.py new file mode 100644 index 0000000..f99a9ee --- /dev/null +++ b/bonobo/examples/tutorials/tut02_01_read.py @@ -0,0 +1,14 @@ +import os +import pathlib + +import bonobo + +workdir = pathlib.Path(os.path.dirname(__file__)) + +graph = bonobo.Graph( + bonobo.FileReader(path=workdir.joinpath('datasets/coffeeshops.txt')), + print, +) + +if __name__ == '__main__': + bonobo.run(graph) diff --git a/examples/tutorial_basics_firststeps.py b/bonobo/examples/tutorials/tutorial_basics_firststeps.py similarity index 100% rename from examples/tutorial_basics_firststeps.py rename to bonobo/examples/tutorials/tutorial_basics_firststeps.py diff --git a/examples/tutorial_basics_summary.py b/bonobo/examples/tutorials/tutorial_basics_summary.py similarity index 100% rename from examples/tutorial_basics_summary.py rename to bonobo/examples/tutorials/tutorial_basics_summary.py diff --git a/bonobo/examples/types/__init__.py b/bonobo/examples/types/__init__.py new file mode 100644 index 0000000..a2c0ceb --- /dev/null +++ b/bonobo/examples/types/__init__.py @@ -0,0 +1,7 @@ +from . import bags, dicts, strings + +__all__ = [ + 'bags', + 'dicts', + 'strings', +] \ No newline at end of file diff --git a/bonobo/examples/types/bags.py b/bonobo/examples/types/bags.py new file mode 100644 index 0000000..e0609bf --- /dev/null +++ b/bonobo/examples/types/bags.py @@ -0,0 +1,45 @@ +""" +Example on how to use :class:`bonobo.Bag` instances to pass flexible args/kwargs to the next callable. + +.. graphviz:: + + digraph { + rankdir = LR; + stylesheet = "../_static/graphs.css"; + + BEGIN [shape="point"]; + BEGIN -> "extract()" -> "transform(...)" -> "load(...)"; + } + +""" + + +from random import randint + +from bonobo import Bag, Graph + + +def extract(): + yield Bag(topic='foo') + yield Bag(topic='bar') + yield Bag(topic='baz') + + +def transform(topic: str): + return Bag.inherit( + title=topic.title(), + rand=randint(10, 99) + ) + + +def load(topic: str, title: str, rand: int): + print('{} ({}) wait={}'.format(title, topic, rand)) + + +graph = Graph() +graph.add_chain(extract, transform, load) + +if __name__ == '__main__': + from bonobo import run + + run(graph) diff --git a/bonobo/examples/types/dicts.py b/bonobo/examples/types/dicts.py new file mode 100644 index 0000000..0e45630 --- /dev/null +++ b/bonobo/examples/types/dicts.py @@ -0,0 +1,47 @@ +""" +Example on how to use symple python dictionaries to communicate between transformations. + +.. graphviz:: + + digraph { + rankdir = LR; + stylesheet = "../_static/graphs.css"; + + BEGIN [shape="point"]; + BEGIN -> "extract()" -> "transform(row: dict)" -> "load(row: dict)"; + } + +""" + +from random import randint + +from bonobo import Graph + + +def extract(): + yield {'topic': 'foo'} + yield {'topic': 'bar'} + yield {'topic': 'baz'} + + +def transform(row: dict): + return { + 'topic': row['topic'].title(), + 'randint': randint(10, 99), + } + + +def load(row: dict): + print(row) + + +graph = Graph( + extract, + transform, + load +) + +if __name__ == '__main__': + from bonobo import run + + run(graph) diff --git a/bonobo/examples/types/strings.py b/bonobo/examples/types/strings.py new file mode 100644 index 0000000..75cfed7 --- /dev/null +++ b/bonobo/examples/types/strings.py @@ -0,0 +1,43 @@ +""" +Example on how to use symple python strings to communicate between transformations. + +.. graphviz:: + + digraph { + rankdir = LR; + stylesheet = "../_static/graphs.css"; + + BEGIN [shape="point"]; + BEGIN -> "extract()" -> "transform(s: str)" -> "load(s: str)"; + } + +""" +from random import randint + +from bonobo import Graph + + +def extract(): + yield 'foo' + yield 'bar' + yield 'baz' + + +def transform(s: str): + return '{} ({})'.format(s.title(), randint(10, 99)) + + +def load(s: str): + print(s) + + +graph = Graph( + extract, + transform, + load +) + +if __name__ == '__main__': + from bonobo import run + + run(graph) diff --git a/bonobo/ext/edgy/__init__.py b/bonobo/ext/edgy/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bonobo/ext/edgy/project/__init__.py b/bonobo/ext/edgy/project/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bonobo/ext/edgy/project/feature.py b/bonobo/ext/edgy/project/feature.py new file mode 100644 index 0000000..ef75fea --- /dev/null +++ b/bonobo/ext/edgy/project/feature.py @@ -0,0 +1,26 @@ +try: + import edgy.project +except ImportError as e: + import logging + + logging.exception('You must install edgy.project to use this.') + +import os + +from edgy.project.events import subscribe +from edgy.project.feature import Feature, SUPPORT_PRIORITY + + +class BonoboFeature(Feature): + requires = {'python'} + + @subscribe('edgy.project.feature.make.on_generate', priority=SUPPORT_PRIORITY) + def on_make_generate(self, event): + makefile = event.makefile + + @subscribe('edgy.project.on_start', priority=SUPPORT_PRIORITY) + def on_start(self, event): + package_path = event.setup['name'].replace('.', os.sep) + + for file in ('example_graph'): + self.render_file(os.path.join(package_path, file + '.py'), os.path.join('tornado', file + '.py.j2')) diff --git a/bonobo/io/csv.py b/bonobo/io/csv.py index 431fc94..175737f 100644 --- a/bonobo/io/csv.py +++ b/bonobo/io/csv.py @@ -1,6 +1,7 @@ import csv -from bonobo import Option, ContextProcessor, contextual +from bonobo.config import Option +from bonobo.context import ContextProcessor, contextual from bonobo.util.objects import ValueHolder from .file import FileReader, FileWriter, FileHandler diff --git a/bonobo/io/json.py b/bonobo/io/json.py index 9c50932..04a3a0a 100644 --- a/bonobo/io/json.py +++ b/bonobo/io/json.py @@ -1,6 +1,6 @@ import json -from bonobo import ContextProcessor, contextual +from bonobo.context import ContextProcessor, contextual from .file import FileWriter, FileReader __all__ = ['JsonWriter', ] diff --git a/bonobo/util/__init__.py b/bonobo/util/__init__.py index 69fdebc..5a2ee26 100644 --- a/bonobo/util/__init__.py +++ b/bonobo/util/__init__.py @@ -9,14 +9,14 @@ from .helpers import console_run, jupyter_run from .tokens import NOT_MODIFIED __all__ = [ + 'Limit', 'NOT_MODIFIED', + 'PrettyPrint', + 'Tee', 'console_run', 'jupyter_run', - 'limit', - 'log', 'noop', 'pprint', - 'tee', ] @@ -24,7 +24,7 @@ def identity(x): return x -def limit(n=10): +def Limit(n=10): i = 0 def _limit(*args, **kwargs): @@ -37,7 +37,7 @@ def limit(n=10): return _limit -def tee(f): +def Tee(f): @functools.wraps(f) def wrapped(*args, **kwargs): nonlocal f @@ -47,10 +47,10 @@ def tee(f): return wrapped -log = tee(_pprint) +pprint = Tee(_pprint) -def pprint(title_keys=('title', 'name', 'id'), print_values=True, sort=True): +def PrettyPrint(title_keys=('title', 'name', 'id'), print_values=True, sort=True): term = blessings.Terminal() def _pprint(*args, **kwargs): @@ -78,6 +78,7 @@ def pprint(title_keys=('title', 'name', 'id'), print_values=True, sort=True): ''' + Old code from rdc.etl def writehr(self, label=None): width = t.width or 80 @@ -113,4 +114,4 @@ def pprint(title_keys=('title', 'name', 'id'), print_values=True, sort=True): def noop(*args, **kwargs): # pylint: disable=unused-argument - pass + return NOT_MODIFIED diff --git a/docs/_templates/index.html b/docs/_templates/index.html index 0c3bbed..60fdff4 100644 --- a/docs/_templates/index.html +++ b/docs/_templates/index.html @@ -38,14 +38,13 @@ @@ -69,7 +68,7 @@ {% trans %}examples and recipes{% endtrans %}

diff --git a/docs/conf.py b/docs/conf.py index 68e9d7a..c7970e5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -181,4 +181,7 @@ epub_copyright = copyright epub_exclude_files = ['search.html'] # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'https://docs.python.org/': None} +intersphinx_mapping = { + 'python': ('https://docs.python.org/3', None) +} + diff --git a/docs/contribute/index.rst b/docs/contribute/index.rst new file mode 100644 index 0000000..fa46d0e --- /dev/null +++ b/docs/contribute/index.rst @@ -0,0 +1,18 @@ +Contributing +============ + +Contributing to bonobo is simple. Although we don't have a complete guide on this topic for now, the best way is to fork +the github repository and send pull requests. + +Keep the following points in mind: + +* Although we will ask for 100% backward compatibility starting from 1.0 (following semantic versionning principles), + pre-1.0 versions should do their best to keep compatibility between versions. Wehn in doubt, open a github issue + to discuss things. +* The core should stay as light as possible. +* Coding standards are enforced using yapf. That means that you can code the way you want, we just ask you to run + `make format` before committing your changes so everybody follows the same conventions. +* General rule for anything you're not sure about is "open a github issue to discuss the point". +* More formal proposal process will come the day we feel the need for it. + +A very drafty roadmap is available in the readme. \ No newline at end of file diff --git a/docs/guide/ext/docker.rst b/docs/guide/ext/docker.rst new file mode 100644 index 0000000..5937c02 --- /dev/null +++ b/docs/guide/ext/docker.rst @@ -0,0 +1,14 @@ +Bonobo with Docker +================== + +.. todo:: The `bonobo-docker` package is at a very alpha stage, and things will change. This section is here to give a + brief overview but is neither complete nor definitive. + +Installation +:::::::::::: + +Overview +:::::::: + +Details +::::::: diff --git a/docs/guide/ext/jupyter.rst b/docs/guide/ext/jupyter.rst new file mode 100644 index 0000000..98efa8c --- /dev/null +++ b/docs/guide/ext/jupyter.rst @@ -0,0 +1,11 @@ +Bonobo with Jupyter +================== + +Installation +:::::::::::: + +Overview +:::::::: + +Details +::::::: diff --git a/docs/guide/crawlers.rst b/docs/guide/ext/selenium.rst similarity index 72% rename from docs/guide/crawlers.rst rename to docs/guide/ext/selenium.rst index 7a9a181..e588cd6 100644 --- a/docs/guide/crawlers.rst +++ b/docs/guide/ext/selenium.rst @@ -1,8 +1,9 @@ -Web crawlers with Bonobo -======================== +Bonobo with Selenium +==================== + +.. todo:: The `bonobo-selenium` package is at a very alpha stage, and things will change. This section is here to give a + brief overview but is neither complete nor definitive. -.. todo:: Bonobo-Selenium is at a very alpha stage, and things will change. This section is here to give a brief - overview but is neither complete nor definitive. Writing web crawlers with Bonobo and Selenium is easy. @@ -31,3 +32,11 @@ Where each step would do the following: * `details()` extract the data you're interested in. * ... and the writer saves it somewhere. +Installation +:::::::::::: + +Overview +:::::::: + +Details +::::::: diff --git a/docs/guide/ext/sqlalchemy.rst b/docs/guide/ext/sqlalchemy.rst new file mode 100644 index 0000000..0f9c549 --- /dev/null +++ b/docs/guide/ext/sqlalchemy.rst @@ -0,0 +1,15 @@ +Bonobo with SQLAlchemy +====================== + +.. todo:: The `bonobo-sqlalchemy` package is at a very alpha stage, and things will change. This section is here to + give a brief overview but is neither complete nor definitive. + + +Installation +:::::::::::: + +Overview +:::::::: + +Details +::::::: diff --git a/docs/guide/index.rst b/docs/guide/index.rst index 99ae56f..23cff3c 100644 --- a/docs/guide/index.rst +++ b/docs/guide/index.rst @@ -1,8 +1,25 @@ Guides ====== +Concepts and best practices +::::::::::::::::::::::::::: + +There are a few things that you should know while writing transformations graphs with bonobo. + .. toctree:: :maxdepth: 2 purity - crawlers + +Third party integrations +:::::::::::::::::::::::: + +There is a few **bonobo** extensions that ease the use of the library with third party tools. Each integration is +available as an optional extra dependency, and the maturity stage of each extension vary. + +.. toctree:: + :maxdepth: 2 + + ext/docker + ext/selenium + ext/sqlalchemy diff --git a/docs/guide/purity.rst b/docs/guide/purity.rst index 1995284..cf9d47f 100644 --- a/docs/guide/purity.rst +++ b/docs/guide/purity.rst @@ -1,5 +1,5 @@ -Pure components and space complexity -==================================== +Pure transformations +==================== The nature of components, and how the data flow from one to another, make them not so easy to write correctly. Hopefully, with a few hints, you will be able to understand why and how they should be written. @@ -14,7 +14,7 @@ returning) them. Numeric values, strings and tuples being immutable in python, m type will already return a different instance. Numbers -======= +::::::: You can't be wrong with numbers. All of the following are correct. @@ -45,7 +45,7 @@ You can't be wrong with numbers. All of the following are correct. The same is true with other numeric types, so don't be shy. Operate like crazy, my friend. Tuples -====== +:::::: Tuples are immutable, so you risk nothing. @@ -63,12 +63,12 @@ Tuples are immutable, so you risk nothing. return t Strings -======= +::::::: You know the drill, strings are immutable, blablabla ... Examples left as an exercise for the reader. Dicts -===== +::::: So, now it gets interesting. Dicts are mutable. It means that you can mess things up badly here if you're not cautious. diff --git a/docs/index.rst b/docs/index.rst index 081b47d..f2ce068 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,6 +8,7 @@ Bonobo tutorial/index guide/index reference/index + contribute/index genindex modindex diff --git a/docs/reference/api.rst b/docs/reference/api.rst new file mode 100644 index 0000000..13376ac --- /dev/null +++ b/docs/reference/api.rst @@ -0,0 +1,56 @@ +Public API +========== + +All the "public api" callables, classes and other callables are available under the root :mod:`bonobo` package, even if +they are documented within their sub-namespace, for convenience. + +.. automodule:: bonobo + :members: create_strategy, get_examples_path, run + :undoc-members: + :show-inheritance: + +Config +------ + +.. automodule:: bonobo.config + :members: + :undoc-members: + :show-inheritance: + + +Context +------- + +.. automodule:: bonobo.context + :members: + :undoc-members: + :show-inheritance: + + +Core +---- + +.. automodule:: bonobo.core + :members: + :undoc-members: + :show-inheritance: + + +IO +-- + +.. automodule:: bonobo.io + :members: + :undoc-members: + :show-inheritance: + + +Util +---- + +.. automodule:: bonobo.util + :members: + :undoc-members: + :show-inheritance: + + diff --git a/docs/reference/bonobo.compat.rst b/docs/reference/bonobo.compat.rst deleted file mode 100644 index 83581bc..0000000 --- a/docs/reference/bonobo.compat.rst +++ /dev/null @@ -1,22 +0,0 @@ -bonobo.compat package -===================== - -Submodules ----------- - -bonobo.compat.pandas module ---------------------------- - -.. automodule:: bonobo.compat.pandas - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: bonobo.compat - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/reference/bonobo.core.rst b/docs/reference/bonobo.core.rst deleted file mode 100644 index 247e883..0000000 --- a/docs/reference/bonobo.core.rst +++ /dev/null @@ -1,85 +0,0 @@ -bonobo.core package -=================== - -Subpackages ------------ - -.. toctree:: - - bonobo.core.strategies - -Submodules ----------- - -bonobo.core.bags module ------------------------ - -.. automodule:: bonobo.core.bags - :members: - :undoc-members: - :show-inheritance: - -bonobo.core.contexts module ---------------------------- - -.. automodule:: bonobo.core.contexts - :members: - :undoc-members: - :show-inheritance: - -bonobo.core.errors module -------------------------- - -.. automodule:: bonobo.core.errors - :members: - :undoc-members: - :show-inheritance: - -bonobo.core.graphs module -------------------------- - -.. automodule:: bonobo.core.graphs - :members: - :undoc-members: - :show-inheritance: - -bonobo.core.inputs module -------------------------- - -.. automodule:: bonobo.core.inputs - :members: - :undoc-members: - :show-inheritance: - -bonobo.core.plugins module --------------------------- - -.. automodule:: bonobo.core.plugins - :members: - :undoc-members: - :show-inheritance: - -bonobo.core.services module ---------------------------- - -.. automodule:: bonobo.core.services - :members: - :undoc-members: - :show-inheritance: - -bonobo.core.stats module ------------------------- - -.. automodule:: bonobo.core.stats - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: bonobo.core - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/reference/bonobo.core.strategies.rst b/docs/reference/bonobo.core.strategies.rst deleted file mode 100644 index 0dfd138..0000000 --- a/docs/reference/bonobo.core.strategies.rst +++ /dev/null @@ -1,38 +0,0 @@ -bonobo.core.strategies package -============================== - -Submodules ----------- - -bonobo.core.strategies.base module ----------------------------------- - -.. automodule:: bonobo.core.strategies.base - :members: - :undoc-members: - :show-inheritance: - -bonobo.core.strategies.executor module --------------------------------------- - -.. automodule:: bonobo.core.strategies.executor - :members: - :undoc-members: - :show-inheritance: - -bonobo.core.strategies.naive module ------------------------------------ - -.. automodule:: bonobo.core.strategies.naive - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: bonobo.core.strategies - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/reference/bonobo.io.rst b/docs/reference/bonobo.io.rst deleted file mode 100644 index d930d25..0000000 --- a/docs/reference/bonobo.io.rst +++ /dev/null @@ -1,30 +0,0 @@ -bonobo.io package -================= - -Submodules ----------- - -bonobo.io.file module ---------------------- - -.. automodule:: bonobo.io.file - :members: - :undoc-members: - :show-inheritance: - -bonobo.io.json module ---------------------- - -.. automodule:: bonobo.io.json - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: bonobo.io - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/reference/bonobo.rst b/docs/reference/bonobo.rst deleted file mode 100644 index 4785ea3..0000000 --- a/docs/reference/bonobo.rst +++ /dev/null @@ -1,21 +0,0 @@ -bonobo package -============== - -Subpackages ------------ - -.. toctree:: - - bonobo.compat - bonobo.core - bonobo.ext - bonobo.io - bonobo.util - -Module contents ---------------- - -.. automodule:: bonobo - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/reference/bonobo.util.rst b/docs/reference/bonobo.util.rst deleted file mode 100644 index 4d73e2c..0000000 --- a/docs/reference/bonobo.util.rst +++ /dev/null @@ -1,62 +0,0 @@ -bonobo.util package -=================== - -Submodules ----------- - -bonobo.util.compat module -------------------------- - -.. automodule:: bonobo.util.compat - :members: - :undoc-members: - :show-inheritance: - -bonobo.util.helpers module --------------------------- - -.. automodule:: bonobo.util.helpers - :members: - :undoc-members: - :show-inheritance: - -bonobo.util.iterators module ----------------------------- - -.. automodule:: bonobo.util.iterators - :members: - :undoc-members: - :show-inheritance: - -bonobo.util.lifecycle module ----------------------------- - -.. automodule:: bonobo.util.lifecycle - :members: - :undoc-members: - :show-inheritance: - -bonobo.util.time module ------------------------ - -.. automodule:: bonobo.util.time - :members: - :undoc-members: - :show-inheritance: - -bonobo.util.tokens module -------------------------- - -.. automodule:: bonobo.util.tokens - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: bonobo.util - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/reference/commands.rst b/docs/reference/commands.rst new file mode 100644 index 0000000..8d79fb6 --- /dev/null +++ b/docs/reference/commands.rst @@ -0,0 +1,33 @@ +Commands Reference +================== + +Bonobo Init +::::::::::: + +Create an empty project, ready to use bonobo. + +Syntax: `bonobo init` + +Requires `edgy.project`. + + +Bonobo Run +:::::::::: + +Run a transformation graph. + +Syntax: `bonobo run [-c cmd | -m mod | file | -] [arg]` + +.. todo:: implement -m, check if -c is of any use and if yes, implement it too. Implement args, too. + +Bonobo RunC +::::::::::: + +Run a transformation graph in a docker container. + +Syntax: `bonobo runc [-c cmd | -m mod | file | -] [arg]` + +.. todo:: implement -m, check if -c is of any use and if yes, implement it too. Implement args, too. + +Requires `bonobo-docker`, install with `docker` extra: `pip install bonobo[docker]`. + diff --git a/docs/reference/examples.rst b/docs/reference/examples.rst new file mode 100644 index 0000000..bddbe8b --- /dev/null +++ b/docs/reference/examples.rst @@ -0,0 +1,36 @@ +Examples +======== + +There are a few examples bundled with **bonobo**. You'll find them under the :mod:`bonobo.examples` package. + +Types +::::: + +bonobo.examples.types.strings +----------------------------- + +.. automodule:: bonobo.examples.types.strings + :members: graph, extract, transform, load + :undoc-members: + :show-inheritance: + + +bonobo.examples.types.dicts +--------------------------- + +.. automodule:: bonobo.examples.types.dicts + :members: graph, extract, transform, load + :undoc-members: + :show-inheritance: + + +bonobo.examples.types.bags +-------------------------- + +.. automodule:: bonobo.examples.types.bags + :members: graph, extract, transform, load + :undoc-members: + :show-inheritance: + + + diff --git a/docs/reference/index.rst b/docs/reference/index.rst index f9fa75c..56bddab 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -6,8 +6,5 @@ References .. toctree:: :maxdepth: 4 - bonobo - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` + commands + api diff --git a/docs/tutorial/basics.rst b/docs/tutorial/basics.rst deleted file mode 100644 index 3b1b989..0000000 --- a/docs/tutorial/basics.rst +++ /dev/null @@ -1,161 +0,0 @@ -Basic concepts -============== - -To begin with Bonobo, you need to install it in a working python 3.5+ environment: - -.. code-block:: shell-session - - $ pip install bonobo - -See :doc:`/install` for more options. - -Let's write a first data transformation -::::::::::::::::::::::::::::::::::::::: - -We'll start with the most simple components we can. - -In **Bonobo**, a component is a plain old python callable, not more, not less. Let's write one that takes a string and -uppercase it. - -.. code-block:: python - - def uppercase(x: str): - return x.upper() - -Pretty straightforward. - -You could even use :func:`str.upper` directly instead of writing a wrapper, as a type's method (unbound) will take an -instance of this type as its first parameter (what you'd call `self` in your method). - -The type annotations written here are not used, but can make your code much more readable, and may very well be used as -validators in the future. - -Let's write two more components: a generator to produce the data to be transformed, and something that outputs it, -because, yeah, feedback is cool. - -.. code-block:: python - - def generate_data(): - yield 'foo' - yield 'bar' - yield 'baz' - - def output(x: str): - print(x) - -Once again, you could have skipped the pain of writing this and simply use an iterable to generate the data and the -builtin :func:`print` for the output, but we'll stick to writing our own components for now. - -Let's chain the three components together and run the transformation: - -.. code-block:: python - - from bonobo import run - - run(generate_data, uppercase, output) - -.. graphviz:: - - digraph { - rankdir = LR; - stylesheet = "../_static/graphs.css"; - - BEGIN [shape="point"]; - BEGIN -> "generate_data" -> "uppercase" -> "output"; - } - -We use the :func:`bonobo.run` helper that hides the underlying object composition necessary to actually run the -components in parralel, because it's simpler. - -Depending on what you're doing, you may use the shorthand helper method, or the verbose one. Always favor the shorter, -if you don't need to tune the graph or the execution strategy (see below). - -Diving in -::::::::: - -Let's rewrite it using the builtin functions :func:`str.upper` and :func:`print` instead of our own wrappers, and expand -the :func:`bonobo.run()` helper so you see what's inside... - -.. code-block:: python - - from bonobo import Graph, ThreadPoolExecutorStrategy - - # Represent our data processor as a simple directed graph of callables. - graph = Graph() - graph.add_chain( - ('foo', 'bar', 'baz'), - str.upper, - print, - ) - - # Use a thread pool. - executor = ThreadPoolExecutorStrategy() - - # Run the thing. - executor.execute(graph) - -We also switched our generator for a tuple, **Bonobo** will wrap it as a generator itself if it's not callable but -iterable. - -The shorthand version with builtins would look like this: - -.. code-block:: python - - from bonobo import run - - run( - ('foo', 'bar', 'baz'), - str.upper, - print, - ) - -Both methods are strictly equivalent (see :func:`bonobo.run`). When in doubt, prefer the shorter version. - -Takeaways -::::::::: - -① The :class:`bonobo.Graph` class is used to represent a data-processing pipeline. - -It can represent simple list-like linear graphs, like here, but it can also represent much more complex graphs, with -branches and cycles. - -This is what the graph we defined looks like: - -.. graphviz:: - - digraph { - rankdir = LR; - "iter(['foo', 'bar', 'baz'])" -> "str.upper" -> "print"; - } - - -② `Components` are simple python callables. Whatever can be called can be used as a `component`. Callables can -either `return` or `yield` data to send it to the next step. Regular functions (using `return`) should be prefered if -each call is guaranteed to return exactly one result, while generators (using `yield`) should be prefered if the -number of output lines for a given input varies. - -③ The `graph` is then executed using an `ExecutionStrategy`. In this tutorial, we'll only use -:class:`bonobo.ThreadPoolExecutorStrategy`, which use an underlying `concurrent.futures.ThreadPoolExecutor` to -schedule calls in a pool of threads, but basically this strategy is what determines the actual behaviour of execution. - -④ Before actually executing the `components`, the `ExecutorStrategy` instance will wrap each component in a `context`, -whose responsibility is to hold the state, to keep the `components` stateless. We'll expand on this later. - -Concepts and definitions -:::::::::::::::::::::::: - -* Component -* Graph -* Executor - -.. todo:: Definitions, and substitute vague terms in the page by the exact term defined here - - -Next -:::: - -You now know all the basic concepts necessary to build (batch-like) data processors. - -If you're confident with this part, let's get to a more real world example, using files and nice console output: -:doc:`basics2` - diff --git a/docs/tutorial/basics2.rst b/docs/tutorial/basics2.rst deleted file mode 100644 index f9e9608..0000000 --- a/docs/tutorial/basics2.rst +++ /dev/null @@ -1,46 +0,0 @@ -Working with files -================== - -Bonobo would not be of any use if the aim was to uppercase small lists of strings. In fact, Bonobo should not be used -if you don't expect any gain from parralelization of tasks. - -Let's take the following graph as an example: - -.. graphviz:: - - digraph { - rankdir = LR; - "A" -> "B" -> "C"; - } - -The execution strategy does a bit of under the scene work, wrapping every component in a thread (assuming you're using -the :class:`bonobo.ThreadPoolExecutorStrategy`), which allows to start running `B` as soon as `A` yielded the first line -of data, and `C` as soon as `B` yielded the first line of data, even if `A` or `B` still have data to yield. - -The great thing is that you generally don't have to think about it. Just be aware that your components will be run in -parralel, and don't worry too much about blocking components, as they won't block their siblings. - -That being said, let's try to write a more real-world like transformation. - -Reading a file -:::::::::::::: - -There are a few component builders available in **Bonobo** that let you read files. You should at least know about the following: - -* :class:`bonobo.FileReader` (aliased as :func:`bonobo.from_file`) -* :class:`bonobo.JsonFileReader` (aliased as :func:`bonobo.from_json`) -* :class:`bonobo.CsvFileReader` (aliased as :func:`bonobo.from_csv`) - -Reading a file is as simple as using one of those, and for the example, we'll use a text file that was generated using -Bonobo from the "liste-des-cafes-a-un-euro" dataset made available by Mairie de Paris under the Open Database -License (ODbL). You can `explore the original dataset `_. -You'll need the example dataset, available in **Bonobo**'s repository. - -.. code-block:: python - - from bonobo import FileReader, run - - run( - FileReader('examples/datasets/cheap_coffeeshops_in_paris.txt'), - print, - ) diff --git a/docs/tutorial/index.rst b/docs/tutorial/index.rst index 70049fc..8fccab4 100644 --- a/docs/tutorial/index.rst +++ b/docs/tutorial/index.rst @@ -3,12 +3,38 @@ First steps We tried hard to make **Bonobo** simple. We use simple python, and we believe it should be simple to learn. +Tutorial +:::::::: + We strongly advice that even if you're an advanced python developper, you go through the whole tutorial for two reasons: that should be sufficient to do anything possible with **Bonobo** and that's a good moment to learn the few concepts you'll see everywhere in the software. +If you're not familiar with python, you should first read :doc:`./python`. + .. toctree:: :maxdepth: 2 - basics - basics2 + tut01 + tut02 + +Where to go next? +::::::::::::::::: + +When you're done with the tutorial, you may be interested in the following next steps: + +Read the :doc:`../reference/examples` + +Read about best development practices +------------------------------------- + +* :doc:`../guide/index` +* :doc:`../guide/purity` + +Read about integrating external tools with bonobo +------------------------------------------------- + +* :doc:`../guide/ext/docker`: run transformation graphs in isolated containers. +* :doc:`../guide/ext/jupyter`: run transformations within jupyter notebooks. +* :doc:`../guide/ext/selenium`: run +* :doc:`../guide/ext/sqlalchemy`: everything you need to interract with SQL databases. diff --git a/docs/tutorial/python.rst b/docs/tutorial/python.rst new file mode 100644 index 0000000..13c26a7 --- /dev/null +++ b/docs/tutorial/python.rst @@ -0,0 +1,16 @@ +Just enough Python for Bonobo +============================= + +This guide is intended to help programmers or enthusiasts to grasp the python basics necessary to use Bonobo. It should +definately not be considered as a general python introduction, neither a deep dive into details. + +.. toctree:: + :maxdepth: 2 + + python01 + python02 + python03 + python04 + python05 + + diff --git a/docs/tutorial/tut01.rst b/docs/tutorial/tut01.rst new file mode 100644 index 0000000..6504298 --- /dev/null +++ b/docs/tutorial/tut01.rst @@ -0,0 +1,132 @@ +Basic concepts +============== + +To begin with Bonobo, you need to install it in a working python 3.5+ environment: + +.. code-block:: shell-session + + $ pip install bonobo + +See :doc:`/install` for more options. + +Let's write a first data transformation +::::::::::::::::::::::::::::::::::::::: + +We'll start with the simplest transformation possible. + +In **Bonobo**, a transformation is a plain old python callable, not more, not less. Let's write one that takes a string +and uppercase it. + +.. code-block:: python + + def uppercase(x: str): + return x.upper() + +Pretty straightforward. + +You could even use :func:`str.upper` directly instead of writing a wrapper, as a type's method (unbound) will take an +instance of this type as its first parameter (what you'd call `self` in your method). + +The type annotations written here are not used, but can make your code much more readable, and may very well be used as +validators in the future. + +Let's write two more transformations: a generator to produce the data to be transformed, and something that outputs it, +because, yeah, feedback is cool. + +.. code-block:: python + + def generate_data(): + yield 'foo' + yield 'bar' + yield 'baz' + + def output(x: str): + print(x) + +Once again, you could have skipped the pain of writing this and simply use an iterable to generate the data and the +builtin :func:`print` for the output, but we'll stick to writing our own transformations for now. + +Let's chain the three transformations together and run the transformation graph: + +.. code-block:: python + + import bonobo + + graph = bonobo.Graph(generate_data, uppercase, output) + + if __name__ == '__main__': + bonobo.run(graph) + +.. graphviz:: + + digraph { + rankdir = LR; + stylesheet = "../_static/graphs.css"; + + BEGIN [shape="point"]; + BEGIN -> "generate_data" -> "uppercase" -> "output"; + } + +We use the :func:`bonobo.run` helper that hides the underlying object composition necessary to actually run the +transformations in parralel, because it's simpler. + +Depending on what you're doing, you may use the shorthand helper method, or the verbose one. Always favor the shorter, +if you don't need to tune the graph or the execution strategy (see below). + +Takeaways +::::::::: + +① The :class:`bonobo.Graph` class is used to represent a data-processing pipeline. + +It can represent simple list-like linear graphs, like here, but it can also represent much more complex graphs, with +branches and cycles. + +This is what the graph we defined looks like: + +.. graphviz:: + + digraph { + rankdir = LR; + BEGIN [shape="point"]; + BEGIN -> "iter(['foo', 'bar', 'baz'])" -> "str.upper" -> "print"; + } + + +② `Transformations` are simple python callables. Whatever can be called can be used as a `transformation`. Callables can +either `return` or `yield` data to send it to the next step. Regular functions (using `return`) should be prefered if +each call is guaranteed to return exactly one result, while generators (using `yield`) should be prefered if the +number of output lines for a given input varies. + +③ The `Graph` instance, or `transformation graph` is then executed using an `ExecutionStrategy`. You did not use it +directly in this tutorial, but :func:`bonobo.run` created an instance of :class:`bonobo.ThreadPoolExecutorStrategy` +under the hood (which is the default strategy). Actual behavior of an execution will depend on the strategy chosen, but +the default should be fine in most of the basic cases. + +④ Before actually executing the `transformations`, the `ExecutorStrategy` instance will wrap each component in an +`execution context`, whose responsibility is to hold the state of the transformation. It enables to keep the +`transformations` stateless, while allowing to add an external state if required. We'll expand on this later. + +Concepts and definitions +:::::::::::::::::::::::: + +* Transformation: a callable that takes input (as call parameters) and returns output(s), either as its return value or + by yielding values (a.k.a returning a generator). +* Transformation graph (or Graph): a set of transformations tied together in a :class:`bonobo.Graph` instance, which is a simple + directed acyclic graph (also refered as a DAG, sometimes). +* Node: a transformation within the context of a transformation graph. The node defines what to do whith a + transformation's output, and especially what other node to feed with the output. +* Execution strategy (or strategy): a way to run a transformation graph. It's responsibility is mainly to parralelize + (or not) the transformations, on one or more process and/or computer, and to setup the right queuing mechanism for + transformations' inputs and outputs. +* Execution context (or context): a wrapper around a node that holds the state for it. If the node need the state, there + are tools available in bonobo to feed it to the transformation using additional call parameters, and so every + transformation will be atomic. + +Next +:::: + +You now know all the basic concepts necessary to build (batch-like) data processors. + +If you're confident with this part, let's get to a more real world example, using files and nice console output: +:doc:`basics2` + diff --git a/docs/tutorial/tut02.rst b/docs/tutorial/tut02.rst new file mode 100644 index 0000000..2ceeb55 --- /dev/null +++ b/docs/tutorial/tut02.rst @@ -0,0 +1,63 @@ +Working with files +================== + +Bonobo would not be of any use if the aim was to uppercase small lists of strings. In fact, Bonobo should not be used +if you don't expect any gain from parralelization/distribution of tasks. + +Let's take the following graph as an example: + +.. graphviz:: + + digraph { + rankdir = LR; + BEGIN [shape="point"]; + BEGIN -> "A" -> "B" -> "C"; + } + +The execution strategy does a bit of under the scene work, wrapping every component in a thread (assuming you're using +the :class:`bonobo.ThreadPoolExecutorStrategy`), which allows to start running `B` as soon as `A` yielded the first line +of data, and `C` as soon as `B` yielded the first line of data, even if `A` or `B` still have data to yield. + +The great thing is that you generally don't have to think about it. Just be aware that your components will be run in +parralel (with the default strategy), and don't worry too much about blocking components, as they won't block their +siblings when run in bonobo. + +That being said, let's try to write a more real-world like transformation. + +Reading a file +:::::::::::::: + +There are a few component builders available in **Bonobo** that let you read files. You should at least know about the +following: + +* :class:`bonobo.io.FileReader` +* :class:`bonobo.io.JsonReader` +* :class:`bonobo.io.CsvReader` + +Reading a file is as simple as using one of those, and for the example, we'll use a text file that was generated using +Bonobo from the "liste-des-cafes-a-un-euro" dataset made available by Mairie de Paris under the Open Database +License (ODbL). You can `explore the original dataset `_. +You'll need the example dataset, available in **Bonobo**'s repository. + +.. literalinclude:: ../../examples/tut02_01_read.py + :language: python + +Until then, we ran the file directly using our python interpreter, but there is other options, one of them being +`bonobo run`. This command allows to run a graph defined by a python file, and is replacing the :func:`bonobo.run` +helper. It's the exact reason why we call :func:`bonobo.run` in the `if __name__ == '__main__'` block, to only +instanciate it if it is run directly. + +Using bonobo command line has a few advantages. It will look for one and only one :class:`bonobo.Graph` instance defined +in the file given as argument, configure an execution strategy, eventually plugins, and execute it. It has the benefit +of allowing to tune the "artifacts" surrounding the transformation graph on command line (verbosity, plugins ...), and +it will also ease the transition to run transformation graphs in containers, as the syntax will be the same. Of course, +it is not required, and the containerization capabilities are provided by an optional and separate python package. + +.. code-block:: shell-session + + $ bonobo run examples/tut02_01_read.py + + + + + diff --git a/examples b/examples new file mode 120000 index 0000000..7a35da1 --- /dev/null +++ b/examples @@ -0,0 +1 @@ +bonobo/examples \ No newline at end of file diff --git a/examples/basics_bags.py b/examples/basics_bags.py deleted file mode 100644 index 67f0087..0000000 --- a/examples/basics_bags.py +++ /dev/null @@ -1,30 +0,0 @@ -import time -from random import randint - -from bonobo import Bag -from bonobo.core.graphs import Graph - - -def extract(): - yield Bag(topic='foo') - yield Bag(topic='bar') - yield Bag(topic='baz') - - -def transform(topic: str): - wait = randint(0, 1) - time.sleep(wait) - return Bag.inherit(title=topic.title(), wait=wait) - - -def load(topic: str, title: str, wait: int): - print('{} ({}) wait={}'.format(title, topic, wait)) - - -graph = Graph() -graph.add_chain(extract, transform, load) - -if __name__ == '__main__': - from bonobo.util.helpers import run - - run(graph) diff --git a/examples/basics_dicts.py b/examples/basics_dicts.py deleted file mode 100644 index 76e117d..0000000 --- a/examples/basics_dicts.py +++ /dev/null @@ -1,32 +0,0 @@ -import time -from random import randint - -from bonobo.core.graphs import Graph - - -def extract(): - yield {'topic': 'foo'} - yield {'topic': 'bar'} - yield {'topic': 'baz'} - - -def transform(row): - wait = randint(0, 1) - time.sleep(wait) - return { - 'topic': row['topic'].title(), - 'wait': wait, - } - - -def load(s): - print(s) - - -graph = Graph() -graph.add_chain(extract, transform, load) - -if __name__ == '__main__': - from bonobo import run - - run(graph) diff --git a/examples/basics_file_csv.py b/examples/basics_file_csv.py deleted file mode 100644 index c13bfcb..0000000 --- a/examples/basics_file_csv.py +++ /dev/null @@ -1,21 +0,0 @@ -import os - -from bonobo import CsvReader, Graph - -__path__ = os.path.dirname(__file__) - - -def skip_comments(line): - if not line.startswith('#'): - yield line - - -graph = Graph( - CsvReader(path=os.path.join(__path__, 'datasets/coffeeshops.txt')), - print, -) - -if __name__ == '__main__': - import bonobo - - bonobo.run(graph) diff --git a/examples/basics_strings.py b/examples/basics_strings.py deleted file mode 100644 index 1147c7f..0000000 --- a/examples/basics_strings.py +++ /dev/null @@ -1,29 +0,0 @@ -import time -from random import randint - -from bonobo.core.graphs import Graph - - -def extract(): - yield 'foo' - yield 'bar' - yield 'baz' - - -def transform(s): - wait = randint(0, 1) - time.sleep(wait) - return s.title() + ' ' + str(wait) - - -def load(s): - print(s) - - -graph = Graph() -graph.add_chain(extract, transform, load) - -if __name__ == '__main__': - from bonobo import run - - run(graph) diff --git a/setup.py b/setup.py index f72d5ad..2e923d8 100644 --- a/setup.py +++ b/setup.py @@ -34,35 +34,37 @@ setup( description='Bonobo', license='Apache License, Version 2.0', install_requires=[ - 'blessings >=1.6,<1.7', 'psutil >=5.0,<5.1', 'requests >=2.12,<2.13', 'stevedore >=1.19,<1.20', - 'toolz >=0.8,<0.9' + 'blessings >=1.6,<1.7', 'psutil >=5.0,<5.1', 'requests >=2.12,<2.13', + 'stevedore >=1.19,<1.20', 'toolz >=0.8,<0.9' ], version=version, long_description=read('README.rst'), classifiers=read('classifiers.txt', tolines), packages=find_packages(exclude=['ez_setup', 'example', 'test']), include_package_data=True, - data_files=[ - ( - 'share/jupyter/nbextensions/bonobo-jupyter', [ - 'bonobo/ext/jupyter/static/extension.js', 'bonobo/ext/jupyter/static/index.js', - 'bonobo/ext/jupyter/static/index.js.map' - ] - ) - ], + data_files=[('share/jupyter/nbextensions/bonobo-jupyter', [ + 'bonobo/ext/jupyter/static/extension.js', + 'bonobo/ext/jupyter/static/index.js', + 'bonobo/ext/jupyter/static/index.js.map' + ])], extras_require={ 'dev': [ - 'coverage >=4.3,<4.4', 'mock >=2.0,<2.1', 'nose >=1.3,<1.4', 'pylint >=1.6,<1.7', 'pytest >=3,<4', - 'pytest-cov >=2.4,<2.5', 'sphinx', 'sphinx_rtd_theme', 'yapf' + 'coverage >=4.3,<4.4', 'mock >=2.0,<2.1', 'nose >=1.3,<1.4', + 'pylint >=1.6,<1.7', 'pytest >=3,<4', 'pytest-cov >=2.4,<2.5', + 'pytest-timeout >=1.2,<1.3', 'sphinx', 'sphinx_rtd_theme', 'yapf' ], 'jupyter': ['jupyter >=1.0,<1.1', 'ipywidgets >=6.0.0.beta5'] }, entry_points={ - 'bonobo.commands': ['init = bonobo.commands.init:register', 'run = bonobo.commands.run:register'], + 'bonobo.commands': [ + 'init = bonobo.commands.init:register', + 'run = bonobo.commands.run:register' + ], 'console_scripts': ['bonobo = bonobo.commands:entrypoint'], - 'edgy.project.features': ['bonobo = ' - 'bonobo.ext.edgy.project.feature:BonoboFeature'] + 'edgy.project.features': + ['bonobo = ' + 'bonobo.ext.edgy.project.feature:BonoboFeature'] }, url='https://bonobo-project.org/', - download_url='https://github.com/python-bonobo/bonobo/tarball/{version}'.format(version=version), -) + download_url='https://github.com/python-bonobo/bonobo/tarball/{version}'. + format(version=version), ) diff --git a/tests/test_basicusage.py b/tests/test_basicusage.py new file mode 100644 index 0000000..5db6f9d --- /dev/null +++ b/tests/test_basicusage.py @@ -0,0 +1,14 @@ +import pytest + +import bonobo as bb + + +@pytest.mark.timeout(2) +def test_run_graph_noop(): + graph = bb.Graph( + bb.noop + ) + assert len(graph) == 1 + + result = bb.run(graph, strategy='threadpool') + assert result diff --git a/tests/test_commands.py b/tests/test_commands.py new file mode 100644 index 0000000..eb6a96a --- /dev/null +++ b/tests/test_commands.py @@ -0,0 +1,32 @@ +import pkg_resources +import pytest + +from bonobo import get_examples_path +from bonobo.commands import entrypoint + + +def test_entrypoint(): + commands = {} + + for command in pkg_resources.iter_entry_points('bonobo.commands'): + commands[command.name] = command + + assert 'init' in commands + assert 'run' in commands + +def test_no_command(capsys): + with pytest.raises(SystemExit): + entrypoint([]) + out, err = capsys.readouterr() + assert 'error: the following arguments are required: command' in err + +def test_init(): + pass # need ext dir + +def test_run(capsys): + entrypoint(['run', '--quiet', get_examples_path('types/strings.py')]) + out, err = capsys.readouterr() + out = out.split('\n') + assert out[0].startswith('Foo ') + assert out[1].startswith('Bar ') + assert out[2].startswith('Baz ') diff --git a/tests/test_config.py b/tests/test_config.py index 45cc326..daa08d8 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,4 +1,5 @@ import pytest + from bonobo import Configurable, Option
- - {%- if hasdoc('search') %} - {%- endif %} +
-