From 25ad2849355dc20222759e25ad05b388018b0cea Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Tue, 27 Dec 2016 13:31:38 +0100 Subject: [PATCH 1/5] starting to write docs, taking decisions on public api --- .coveragerc | 2 +- bonobo/__init__.py | 8 +- bonobo/compat/__init__.py | 0 bonobo/compat/pandas.py | 9 + bonobo/core/bags.py | 4 +- bonobo/core/contexts.py | 2 +- bonobo/core/graphs.py | 3 +- bonobo/ext/console/__init__.py | 6 +- bonobo/ext/console/helpers.py | 9 - bonobo/ext/jupyter/__init__.py | 7 +- bonobo/ext/jupyter/helpers.py | 8 - bonobo/ext/{ods.py => opendatasoft.py} | 11 +- bonobo/io/__init__.py | 8 +- bonobo/io/file.py | 35 ++++ bonobo/io/json.py | 33 +--- bonobo/util/__init__.py | 4 + bonobo/util/helpers.py | 20 ++ docs/_templates/index.html | 23 +-- docs/conf.py | 12 +- docs/install.rst | 34 ++++ docs/tutorial/basics.rst | 146 ++++++++++++++ docs/tutorial/basics2.rst | 46 +++++ .../datasets/cheap_coffeeshops_in_paris.txt | 182 ++++++++++++++++++ examples/opendata_fablabs.py | 27 +-- examples/read_cheap_coffeeshops_in_paris.py | 15 ++ examples/tutorial_basics_firststeps.py | 18 ++ examples/tutorial_basics_summary.py | 18 ++ tests/ext/test_ods.py | 4 +- tests/io/test_json.py | 6 +- 29 files changed, 604 insertions(+), 96 deletions(-) create mode 100644 bonobo/compat/__init__.py create mode 100644 bonobo/compat/pandas.py delete mode 100644 bonobo/ext/console/helpers.py rename bonobo/ext/{ods.py => opendatasoft.py} (60%) create mode 100644 bonobo/io/file.py create mode 100644 bonobo/util/helpers.py create mode 100644 docs/install.rst create mode 100644 docs/tutorial/basics.rst create mode 100644 docs/tutorial/basics2.rst create mode 100644 examples/datasets/cheap_coffeeshops_in_paris.txt create mode 100644 examples/read_cheap_coffeeshops_in_paris.py create mode 100644 examples/tutorial_basics_firststeps.py create mode 100644 examples/tutorial_basics_summary.py diff --git a/.coveragerc b/.coveragerc index ce96e75..1d76a1f 100644 --- a/.coveragerc +++ b/.coveragerc @@ -23,4 +23,4 @@ exclude_lines = ignore_errors = True [html] -directory = doc/_build/html/coverage +directory = docs/_build/html/coverage diff --git a/bonobo/__init__.py b/bonobo/__init__.py index 0d331c6..b66e2d8 100644 --- a/bonobo/__init__.py +++ b/bonobo/__init__.py @@ -36,16 +36,20 @@ with open(os.path.realpath(os.path.join(os.path.dirname(__file__), '../version.t __all__ = [ 'Bag', + 'FileWriter', 'Graph', - 'NaiveStrategy', + 'JsonFileWriter', 'NOT_MODIFIED', + 'NaiveStrategy', 'ProcessPoolExecutorStrategy', 'ThreadPoolExecutorStrategy', + 'console_run', 'head', 'inject', + 'jupyter_run', 'log', 'noop', + 'run', 'service', 'tee', - 'to_json', ] diff --git a/bonobo/compat/__init__.py b/bonobo/compat/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bonobo/compat/pandas.py b/bonobo/compat/pandas.py new file mode 100644 index 0000000..aab0dbd --- /dev/null +++ b/bonobo/compat/pandas.py @@ -0,0 +1,9 @@ +from bonobo import FileWriter, JsonFileWriter + +to_file = FileWriter +to_json = JsonFileWriter + +__all__ = [ + 'to_json', + 'to_file', +] diff --git a/bonobo/core/bags.py b/bonobo/core/bags.py index 2ec728d..c4f7c7b 100644 --- a/bonobo/core/bags.py +++ b/bonobo/core/bags.py @@ -33,8 +33,8 @@ class Bag: def flags(self): return self._flags - def apply(self, func, *args, **kwargs): - return func(*args, *self.args, **kwargs, **self.kwargs) + def apply(self, func_or_iter, *args, **kwargs): + return func_or_iter(*args, *self.args, **kwargs, **self.kwargs) def extend(self, *args, **kwargs): return type(self)(*args, _parent=self, **kwargs) diff --git a/bonobo/core/contexts.py b/bonobo/core/contexts.py index 7b0ec26..0e9d127 100644 --- a/bonobo/core/contexts.py +++ b/bonobo/core/contexts.py @@ -128,7 +128,7 @@ class ComponentExecutionContext(WithStatistics, AbstractLoopContext): @property def name(self): - return self.component.__name__ + return getattr(self.component, '__name__', getattr(type(self.component), '__name__', repr(self.component))) def __init__(self, component, parent): self.parent = parent diff --git a/bonobo/core/graphs.py b/bonobo/core/graphs.py index e9ffeb5..ba31930 100644 --- a/bonobo/core/graphs.py +++ b/bonobo/core/graphs.py @@ -6,9 +6,10 @@ class Graph: Represents a coherent directed acyclic graph (DAG) of components. """ - def __init__(self): + def __init__(self, *chain): self.components = [] self.graph = {BEGIN: set()} + self.add_chain(*chain) def outputs_of(self, idx, create=False): if create and not idx in self.graph: diff --git a/bonobo/ext/console/__init__.py b/bonobo/ext/console/__init__.py index 7d5c3f5..2fffb8f 100644 --- a/bonobo/ext/console/__init__.py +++ b/bonobo/ext/console/__init__.py @@ -1,7 +1,3 @@ -from .helpers import console_run from .plugin import ConsoleOutputPlugin -__all__ = [ - 'ConsoleOutputPlugin', - 'console_run', -] +__all__ = ['ConsoleOutputPlugin', ] diff --git a/bonobo/ext/console/helpers.py b/bonobo/ext/console/helpers.py deleted file mode 100644 index f55d016..0000000 --- a/bonobo/ext/console/helpers.py +++ /dev/null @@ -1,9 +0,0 @@ -from bonobo import Graph, ThreadPoolExecutorStrategy -from .plugin import ConsoleOutputPlugin - - -def console_run(*chain, output=True, plugins=None): - graph = Graph() - executor = ThreadPoolExecutorStrategy() - graph.add_chain(*chain) - return executor.execute(graph, plugins=(plugins or []) + [ConsoleOutputPlugin()] if output else []) diff --git a/bonobo/ext/jupyter/__init__.py b/bonobo/ext/jupyter/__init__.py index 2e04e8a..7dd1300 100644 --- a/bonobo/ext/jupyter/__init__.py +++ b/bonobo/ext/jupyter/__init__.py @@ -1,4 +1,4 @@ -from .helpers import jupyter_run +from bonobo.util.helpers import jupyter_run from .plugin import JupyterOutputPlugin @@ -6,7 +6,4 @@ def _jupyter_nbextension_paths(): return [{'section': 'notebook', 'src': 'static', 'dest': 'bonobo-jupyter', 'require': 'bonobo-jupyter/extension'}] -__all__ = [ - 'JupyterOutputPlugin', - 'jupyter_run', -] +__all__ = ['JupyterOutputPlugin', ] diff --git a/bonobo/ext/jupyter/helpers.py b/bonobo/ext/jupyter/helpers.py index c62e3dd..8b13789 100644 --- a/bonobo/ext/jupyter/helpers.py +++ b/bonobo/ext/jupyter/helpers.py @@ -1,9 +1 @@ -from bonobo import Graph, ThreadPoolExecutorStrategy -from .plugin import JupyterOutputPlugin - -def jupyter_run(*chain, plugins=None): - graph = Graph() - executor = ThreadPoolExecutorStrategy() - graph.add_chain(*chain) - return executor.execute(graph, plugins=(plugins or []) + [JupyterOutputPlugin()]) diff --git a/bonobo/ext/ods.py b/bonobo/ext/opendatasoft.py similarity index 60% rename from bonobo/ext/ods.py rename to bonobo/ext/opendatasoft.py index 1a8cf4f..9fb8d61 100644 --- a/bonobo/ext/ods.py +++ b/bonobo/ext/opendatasoft.py @@ -3,11 +3,18 @@ from urllib.parse import urlencode import requests # todo: make this a service so we can substitute it ? -def extract_ods(url, dataset, rows=100, **kwargs): +def from_opendatasoft_api(dataset=None, + endpoint='{scheme}://{netloc}{path}', + scheme='https', + netloc='data.opendatasoft.com', + path='/api/records/1.0/search/', + rows=100, + **kwargs): + path = path if path.startswith('/') else '/' + path params = ( ('dataset', dataset), ('rows', rows), ) + tuple(sorted(kwargs.items())) - base_url = url + '?' + urlencode(params) + base_url = endpoint.format(scheme=scheme, netloc=netloc, path=path) + '?' + urlencode(params) def _extract_ods(): nonlocal base_url, rows diff --git a/bonobo/io/__init__.py b/bonobo/io/__init__.py index 3338e9d..ca10d08 100644 --- a/bonobo/io/__init__.py +++ b/bonobo/io/__init__.py @@ -1,5 +1,9 @@ """ Readers and writers for common file formats. """ -from .json import * +from .file import FileWriter +from .json import JsonFileWriter -__all__ = ['to_json', ] +__all__ = [ + 'FileWriter', + 'JsonFileWriter', +] diff --git a/bonobo/io/file.py b/bonobo/io/file.py new file mode 100644 index 0000000..b30e515 --- /dev/null +++ b/bonobo/io/file.py @@ -0,0 +1,35 @@ +from bonobo.util.lifecycle import with_context + +__all__ = ['FileWriter', ] + + +@with_context +class FileWriter: + # XXX TODO implement @with_context like this ? Pros and cons ? + class Meta: + contextual = True + + def __init__(self, path_or_buf, eol='\n'): + self.path_or_buf = path_or_buf + self.eol = eol + + def initialize(self, ctx): + """ todo add lock file ? optional maybe ? """ + assert not hasattr(ctx, 'fp'), 'One at a time, baby.' + ctx.fp = open(self.path_or_buf, 'w+') + ctx.first = True + + def write(self, fp, line, prefix=''): + fp.write(prefix + line) + + def __call__(self, ctx, row): + if ctx.first: + prefix, ctx.first = '', False + else: + prefix = self.eol + + self.write(ctx.fp, row, prefix=prefix) + + def finalize(self, ctx): + ctx.fp.close() + del ctx.fp, ctx.first diff --git a/bonobo/io/json.py b/bonobo/io/json.py index 2df1538..98712a7 100644 --- a/bonobo/io/json.py +++ b/bonobo/io/json.py @@ -1,40 +1,23 @@ import json +from .file import FileWriter from bonobo.util.lifecycle import with_context -__all__ = [ - 'from_json', - 'to_json', -] +__all__ = ['JsonFileWriter', ] @with_context -class JsonWriter: +class JsonFileWriter(FileWriter): def __init__(self, path_or_buf): - self.path_or_buf = path_or_buf + super().__init__(path_or_buf, eol=',\n') def initialize(self, ctx): - assert not hasattr(ctx, 'fp'), 'One at a time, baby.' - ctx.fp = open(self.path_or_buf, 'w+') + super().initialize(ctx) ctx.fp.write('[\n') - ctx.first = True - def __call__(self, ctx, row): - if ctx.first: - prefix = '' - ctx.first = False - else: - prefix = ',\n' - ctx.fp.write(prefix + json.dumps(row)) + def write(self, fp, line, prefix=''): + fp.write(prefix + json.dumps(line)) def finalize(self, ctx): ctx.fp.write('\n]') - ctx.fp.close() - del ctx.fp, ctx.first - - -def from_json(path_or_buf): - pass - - -to_json = JsonWriter + super().finalize(ctx) diff --git a/bonobo/util/__init__.py b/bonobo/util/__init__.py index 5de330f..4a15b40 100644 --- a/bonobo/util/__init__.py +++ b/bonobo/util/__init__.py @@ -4,12 +4,16 @@ import functools import pprint from .tokens import NOT_MODIFIED +from .helpers import run, console_run, jupyter_run __all__ = [ 'NOT_MODIFIED', + 'console_run', 'head', + 'jupyter_run', 'log', 'noop', + 'run', 'tee', ] diff --git a/bonobo/util/helpers.py b/bonobo/util/helpers.py new file mode 100644 index 0000000..5e3538f --- /dev/null +++ b/bonobo/util/helpers.py @@ -0,0 +1,20 @@ +def run(*chain, plugins=None): + from bonobo import Graph, ThreadPoolExecutorStrategy + + graph = Graph() + graph.add_chain(*chain) + + executor = ThreadPoolExecutorStrategy() + return executor.execute(graph, plugins=plugins or []) + + +def console_run(*chain, output=True, plugins=None): + from bonobo.ext.console import ConsoleOutputPlugin + + return run(*chain, plugins=(plugins or []) + [ConsoleOutputPlugin()] if output else []) + + +def jupyter_run(*chain, plugins=None): + from bonobo.ext.jupyter import JupyterOutputPlugin + + return run(*chain, plugins=(plugins or []) + [JupyterOutputPlugin()]) diff --git a/docs/_templates/index.html b/docs/_templates/index.html index 4894573..9b9ae48 100644 --- a/docs/_templates/index.html +++ b/docs/_templates/index.html @@ -1,22 +1,20 @@ {% extends "layout.html" %} -{% set title = _('Overview') %} +{% set title = _('Bonobo — Data processing for humans') %} {% block body %} -
- Migration in progress, things may be broken for now. Please give us some time to finish painting the walls. +
+ Rewrite in progress, things may be broken for now. Please give us some time to finish painting the walls.
-

{{ _('Welcome to Bonobo\'s Documentation') }}

- -
- + -
+

{% trans %} - Bonobo is a line-by-line data-processing toolkit for python 3.5+ emphasizing simplicity and atomicity of - data transformations using a simple directed graph of python callables. + Bonobo is a line-by-line data-processing toolkit for python 3.5+ emphasizing simple and + atomic data transformations defined using a directed graph of plain old python callables. {% endtrans %}

@@ -71,9 +69,8 @@
- + {%- if hasdoc('search') %} diff --git a/docs/conf.py b/docs/conf.py index 604417a..16307fd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,8 +12,14 @@ import bonobo # -- General configuration ------------------------------------------------ extensions = [ - 'sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.coverage', - 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode' + 'sphinx.ext.autodoc', + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx', + 'sphinx.ext.todo', + 'sphinx.ext.coverage', + 'sphinx.ext.ifconfig', + 'sphinx.ext.viewcode', + 'sphinx.ext.graphviz', ] # Add any paths that contain templates here, relative to this directory. @@ -95,6 +101,8 @@ html_additional_pages = {'index': 'index.html'} html_static_path = ['_static'] html_show_sphinx = False +graphviz_output_format = 'svg' + # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. diff --git a/docs/install.rst b/docs/install.rst new file mode 100644 index 0000000..5b8488a --- /dev/null +++ b/docs/install.rst @@ -0,0 +1,34 @@ +Installation +============ + + +.. todo:: + + better install docs, especially on how to use different fork, etc. + +Install with pip +:::::::::::::::: + +.. code-block:: shell-session + + $ pip install bonobo + +Install from source +::::::::::::::::::: + +.. code-block:: shell-session + + $ pip install git+https://github.com/python-bonobo/bonobo.git@master#egg=bonobo + +Editable install +:::::::::::::::: + +If you plan on making patches to Bonobo, you should install it as an "editable" package. + + +.. code-block:: shell-session + + $ pip install --editable git+https://github.com/python-bonobo/bonobo.git@master#egg=bonobo + +Note: `-e` is the shorthand version of `--editable`. + diff --git a/docs/tutorial/basics.rst b/docs/tutorial/basics.rst new file mode 100644 index 0000000..1197d3a --- /dev/null +++ b/docs/tutorial/basics.rst @@ -0,0 +1,146 @@ +First steps - Basic concepts +============================ + +To begin with Bonobo, you should first install it: + +.. code-block:: shell-session + + $ pip install bonobo + +See :doc:`install` if you're looking for more options. + +Let's write a first data transformation +::::::::::::::::::::::::::::::::::::::: + +We'll write a simple component that just uppercase everything. In **Bonobo**, a component is a plain old python +callable, not more, not less. + +.. code-block:: python + + def uppercase(x: str): + return x.upper() + +Ok, this is kind of simple, and you can even use `str.upper` directly instead of writing a wrapper. The type annotations +are not used, but can make your code much more readable (and may be used as validators in the future). + +To run this, we need two more things: a generator that feeds data, and something that outputs it. + +.. code-block:: python + + def generate_data(): + yield 'foo' + yield 'bar' + yield 'baz' + + def output(x: str): + print(x) + +That should do the job. Now, let's chain the three callables together and run them. + +.. code-block:: python + + from bonobo import run + + run(generate_data, uppercase, output) + +This is the simplest data transormation possible, and we run it using the `run` helper that hides the underlying object +composition necessary to actually run the callables in parralel. The more flexible, but a bit more verbose to do the +same thing would be: + +.. code-block:: python + + from bonobo import Graph, ThreadPoolExecutorStrategy + + graph = Graph() + graph.add_chain(generate_data, uppercase, output) + + executor = ThreadPoolExecutorStrategy() + executor.execute(graph) + +Depending on what you're doing, you may use the shorthand helper method, or the verbose one. Always favor the shorter, +if you don't need to tune the graph or the execution strategy. + +Definitions +::::::::::: + +* Graph +* Component +* Executor + +.. todo:: Definitions, and substitute vague terms in the page by the exact term defined here + +Summary +::::::: + +Let's rewrite this using builtin functions and methods, then explain the few concepts available here: + +.. code-block:: python + + from bonobo import Graph, ThreadPoolExecutorStrategy + + # Represent our data processor as a simple directed graph of callables. + graph = Graph( + (x for x in 'foo', 'bar', 'baz'), + str.upper, + print, + ) + + # Use a thread pool. + executor = ThreadPoolExecutorStrategy() + + # Run the thing. + executor.execute(graph) + +Or the shorthand version, that you should prefer if you don't need fine tuning: + +.. code-block:: python + + from bonobo import run + + run( + iter(['foo', 'bar', 'baz']), + str.upper, + print, + ) + +Both methods are strictly equivalent (see :func:`bonobo.run`). When in doubt, favour the shorter. + +Takeaways +::::::::: + +① The :class:`bonobo.Graph` class is used to represent a data-processing pipeline. + +It can represent simple list-like linear graphs, like here, but it can also represent much more complex graphs, with +branches and cycles. + +This is what the graph we defined looks like: + +.. graphviz:: + + digraph { + rankdir = LR; + "iter(['foo', 'bar', 'baz'])" -> "str.upper" -> "print"; + } + + +② Transformations are simple python callables. Whatever can be called can be used as a transformation. Callables can +either `return` or `yield` data to send it to the next step. Regular functions (using `return`) should be prefered if +each call is guaranteed to return exactly one result, while generators (using `yield`) should be prefered if the +number of output lines for a given input varies. + +③ The graph is then executed using an `ExecutionStrategy`. For now, let's focus only on +:class:`bonobo.ThreadPoolExecutorStrategy`, which use an underlying `concurrent.futures.ThreadPoolExecutor` to +schedule calls in a pool of threads, but basically this strategy is what determines the actual behaviour of execution. + +④ Before actually executing the callables, the `ExecutorStrategy` instance will wrap each component in a `context`, +whose responsibility is to hold the state, to keep the components stateless. We'll expand on this later. + + +Next +:::: + +You now know all the basic concepts necessary to build (batch-like) data processors. + +If you're confident with this part, let's get to a more real world example, using files and nice console output. + +.. todo:: link to next page diff --git a/docs/tutorial/basics2.rst b/docs/tutorial/basics2.rst new file mode 100644 index 0000000..8542ec2 --- /dev/null +++ b/docs/tutorial/basics2.rst @@ -0,0 +1,46 @@ +First steps - Working with files +================================ + +Bonobo would not be of any use if the aim was to uppercase small lists of strings. In fact, Bonobo should not be used +if you don't expect any gain from parralelization of tasks. + +Let's take the following graph as an example: + +.. graphviz:: + + digraph { + rankdir = LR; + "A" -> "B" -> "C"; + } + +The execution strategy does a bit of under the scene work, wrapping every component in a thread (assuming you're using +the :class:`bonobo.ThreadPoolExecutorStrategy`), which allows to start running `B` as soon as `A` yielded the first line +of data, and `C` as soon as `B` yielded the first line of data, even if `A` or `B` still have data to yield. + +The great thing is that you generally don't have to think about it. Just be aware that your components will be run in +parralel, and don't worry too much about blocking components, as they won't block their siblings. + +That being said, let's try to write a more real-world like transformation. + +Reading a file +:::::::::::::: + +There are a few component builders available in **Bonobo** that let you read files. You should at least know about the following: + +* :class:`bonobo.FileReader` (aliased as :func:`bonobo.from_file`) +* :class:`bonobo.JsonFileReader` (aliased as :func:`bonobo.from_json`) +* :class:`bonobo.CsvFileReader` (aliased as :func:`bonobo.from_csv`) + +Reading a file is as simple as using one of those, and for the example, we'll use a text file that was generated using +Bonobo from the "liste-des-cafes-a-un-euro" dataset made available by Mairie de Paris under the Open Database +License (ODbL). You can `explore the original dataset `_. +You'll need the example dataset, available in **Bonobo**'s repository. + +.. code-block:: python + + from bonobo import FileReader, run + + run( + FileReader('examples/datasets/cheap_coffeeshops_in_paris.txt'), + print, + ) diff --git a/examples/datasets/cheap_coffeeshops_in_paris.txt b/examples/datasets/cheap_coffeeshops_in_paris.txt new file mode 100644 index 0000000..b43e377 --- /dev/null +++ b/examples/datasets/cheap_coffeeshops_in_paris.txt @@ -0,0 +1,182 @@ +Extérieur Quai, 5, rue d'Alsace, 75010 Paris, France +Le Sully, 6 Bd henri IV, 75004 Paris, France +O q de poule, 53 rue du ruisseau, 75018 Paris, France +Le Pas Sage, 1 Passage du Grand Cerf, 75002 Paris, France +Le Dunois, 77 rue Dunois, 75013 Paris, France +La Renaissance, 112 Rue Championnet, 75018 Paris, France +Le chantereine, 51 Rue Victoire, 75009 Paris, France +Le Müller, 11 rue Feutrier, 75018 Paris, France +Le drapeau de la fidelité, 21 rue Copreaux, 75015 Paris, France +Le café des amis, 125 rue Blomet, 75015 Paris, France +Le Café Livres, 10 rue Saint Martin, 75004 Paris, France +Le Bosquet, 46 avenue Bosquet, 75007 Paris, France +Le Brio, 216, rue Marcadet, 75018 Paris, France +Le Kleemend's, 34 avenue Pierre Mendès-France, 75013 Paris, France +Café Pierre, 202 rue du faubourg st antoine, 75012 Paris, France +Les Arcades, 61 rue de Ponthieu, 75008 Paris, France +Le Square, 31 rue Saint-Dominique, 75007 Paris, France +Assaporare Dix sur Dix, 75, avenue Ledru-Rollin, 75012 Paris, France +Au cerceau d'or, 129 boulevard sebastopol, 75002 Paris, France +Café antoine, 17 rue Jean de la Fontaine, 75016 Paris, France +Café Lea, 5 rue Claude Bernard, 75005 Paris, France +Cardinal Saint-Germain, 11 boulevard Saint-Germain, 75005 Paris, France +Dédé la frite, 52 rue Notre-Dame des Victoires, 75002 Paris, France +La Bauloise, 36 rue du hameau, 75015 Paris, France +Le Bellerive, 71 quai de Seine, 75019 Paris, France +Le bistrot de Maëlle et Augustin, 42 rue coquillère, 75001 Paris, France +Le Dellac, 14 rue Rougemont, 75009 Paris, France +Le Felteu, 1 rue Pecquay, 75004 Paris, France +Le Saint Jean, 23 rue des abbesses, 75018 Paris, France +les montparnos, 65 boulevard Pasteur, 75015 Paris, France +Drole d'endroit pour une rencontre, 58 rue de Montorgueil, 75002 Paris, France +Le pari's café, 104 rue caulaincourt, 75018 Paris, France +Le Poulailler, 60 rue saint-sabin, 75011 Paris, France +L'Assassin, 99 rue Jean-Pierre Timbaud, 75011 Paris, France +l'Usine, 1 rue d'Avron, 75020 Paris, France +La Bricole, 52 rue Liebniz, 75018 Paris, France +le ronsard, place maubert, 75005 Paris, France +Face Bar, 82 rue des archives, 75003 Paris, France +American Kitchen, 49 rue bichat, 75010 Paris, France +La Marine, 55 bis quai de valmy, 75010 Paris, France +Le Bloc, 21 avenue Brochant, 75017 Paris, France +La Recoleta au Manoir, 229 avenue Gambetta, 75020 Paris, France +Le Pareloup, 80 Rue Saint-Charles, 75015 Paris, France +La Brasserie Gaité, 3 rue de la Gaité, 75014 Paris, France +Café Zen, 46 rue Victoire, 75009 Paris, France +O'Breizh, 27 rue de Penthièvre, 75008 Paris, France +Le Petit Choiseul, 23 rue saint augustin, 75002 Paris, France +Invitez vous chez nous, 7 rue Epée de Bois, 75005 Paris, France +La Cordonnerie, 142 Rue Saint-Denis 75002 Paris, 75002 Paris, France +Le Supercoin, 3, rue Baudelique, 75018 Paris, France +Populettes, 86 bis rue Riquet, 75018 Paris, France +Au bon coin, 49 rue des Cloys, 75018 Paris, France +Le Couvent, 69 rue Broca, 75013 Paris, France +La Brûlerie des Ternes, 111 rue mouffetard, 75005 Paris, France +L'Écir, 59 Boulevard Saint-Jacques, 75014 Paris, France +Le Chat bossu, 126, rue du Faubourg Saint Antoine, 75012 Paris, France +Denfert café, 58 boulvevard Saint Jacques, 75014 Paris, France +Le Café frappé, 95 rue Montmartre, 75002 Paris, France +La Perle, 78 rue vieille du temple, 75003 Paris, France +Le Descartes, 1 rue Thouin, 75005 Paris, France +Bagels & Coffee Corner, Place de Clichy, 75017 Paris, France +Le petit club, 55 rue de la tombe Issoire, 75014 Paris, France +Le Plein soleil, 90 avenue Parmentier, 75011 Paris, France +Le Relais Haussmann, 146, boulevard Haussmann, 75008 Paris, France +Le Malar, 88 rue Saint-Dominique, 75007 Paris, France +Au panini de la place, 47 rue Belgrand, 75020 Paris, France +Le Village, 182 rue de Courcelles, 75017 Paris, France +Pause Café, 41 rue de Charonne, 75011 Paris, France +Le Pure café, 14 rue Jean Macé, 75011 Paris, France +Extra old café, 307 fg saint Antoine, 75011 Paris, France +Chez Fafa, 44 rue Vinaigriers, 75010 Paris, France +En attendant l'or, 3 rue Faidherbe, 75011 Paris, France +Aux cadrans, 21 ter boulevard Diderot, 75012 Paris, France +Brûlerie San José, 30 rue des Petits-Champs, 75002 Paris, France +Etienne, 14 rue Turbigo, Paris, 75001 Paris, France +L'ingénu, 184 bd Voltaire, 75011 Paris, France +L'Olive, 8 rue L'Olive, 75018 Paris, France +Le Biz, 18 rue Favart, 75002 Paris, France +Le Cap Bourbon, 1 rue Louis le Grand, 75002 Paris, France +Le General Beuret, 9 Place du General Beuret, 75015 Paris, France +Le Germinal, 95 avenue Emile Zola, 75015 Paris, France +Le Ragueneau, 202 rue Saint-Honoré, 75001 Paris, France +Le refuge, 72 rue lamarck, 75018 Paris, France +Le sully, 13 rue du Faubourg Saint Denis, 75010 Paris, France +L'antre d'eux, 16 rue DE MEZIERES, 75006 Paris, France +Le bal du pirate, 60 rue des bergers, 75015 Paris, France +zic zinc, 95 rue claude decaen, 75012 Paris, France +l'orillon bar, 35 rue de l'orillon, 75011 Paris, France +Le Zazabar, 116 Rue de Ménilmontant, 75020 Paris, France +L'Inévitable, 22 rue Linné, 75005 Paris, France +Ragueneau, 202 rue Saint Honoré, 75001 Paris, France +Le Caminito, 48 rue du Dessous des Berges, 75013 Paris, France +Epicerie Musicale, 55bis quai de Valmy, 75010 Paris, France +Le petit Bretonneau, Le petit Bretonneau - à l'intérieur de l'Hôpital, 75018 Paris, France +Le Centenaire, 104 rue amelot, 75011 Paris, France +La Montagne Sans Geneviève, 13 Rue du Pot de Fer, 75005 Paris, France +Les Pères Populaires, 46 rue de Buzenval, 75020 Paris, France +Cafe de grenelle, 188 rue de Grenelle, 75007 Paris, France +Le relais de la victoire, 73 rue de la Victoire, 75009 Paris, France +La chaumière gourmande, Route de la Muette à Neuilly +Club hippique du Jardin d’Acclimatation, 75016 Paris, France +Caves populaires, 22 rue des Dames, 75017 Paris, France +Caprice café, 12 avenue Jean Moulin, 75014 Paris, France +Tamm Bara, 7 rue Clisson, 75013 Paris, France +L'anjou, 1 rue de Montholon, 75009 Paris, France +Café dans l'aerogare Air France Invalides, 2 rue Robert Esnault Pelterie, 75007 Paris, France +Waikiki, 10 rue d"Ulm, 75005 Paris, France +Chez Prune, 36 rue Beaurepaire, 75010 Paris, France +Au Vin Des Rues, 21 rue Boulard, 75014 Paris, France +bistrot les timbrés, 14 rue d'alleray, 75015 Paris, France +Café beauveau, 9 rue de Miromesnil, 75008 Paris, France +Café de la Mairie (du VIII), rue de Lisbonne, 75008 Paris, France +Café Pistache, 9 rue des petits champs, 75001 Paris, France +La Cagnotte, 13 Rue Jean-Baptiste Dumay, 75020 Paris, France +le 1 cinq, 172 rue de vaugirard, 75015 Paris, France +Le Killy Jen, 28 bis boulevard Diderot, 75012 Paris, France +Les Artisans, 106 rue Lecourbe, 75015 Paris, France +Peperoni, 83 avenue de Wagram, 75001 Paris, France +le lutece, 380 rue de vaugirard, 75015 Paris, France +Brasiloja, 16 rue Ganneron, 75018 Paris, France +Rivolux, 16 rue de Rivoli, 75004 Paris, France +Chai 33, 33 Cour Saint Emilion, 75012 Paris, France +L'européen, 21 Bis Boulevard Diderot, 75012 Paris, France +NoMa, 39 rue Notre Dame de Nazareth, 75003 Paris, France +O'Paris, 1 Rue des Envierges, 75020 Paris, France +Café Clochette, 16 avenue Richerand, 75010 Paris, France +La cantoche de Paname, 40 Boulevard Beaumarchais, 75011 Paris, France +Le Saint René, 148 Boulevard de Charonne, 75020 Paris, France +La Liberté, 196 rue du faubourg saint-antoine, 75012 Paris, France +Chez Rutabaga, 16 rue des Petits Champs, 75002 Paris, France +Le BB (Bouchon des Batignolles), 2 rue Lemercier, 75017 Paris, France +La Brocante, 10 rue Rossini, 75009 Paris, France +Le Plomb du cantal, 3 rue Gaîté, 75014 Paris, France +Les caves populaires, 22 rue des Dames, 75017 Paris, France +Chez Luna, 108 rue de Ménilmontant, 75020 Paris, France +Le bar Fleuri, 1 rue du Plateau, 75019 Paris, France +Le Chaumontois, 12 rue Armand Carrel, 75018 Paris, France +Trois pièces cuisine, 101 rue des dames, 75017 Paris, France +Le Zinc, 61 avenue de la Motte Picquet, 75015 Paris, France +La cantine de Zoé, 136 rue du Faubourg poissonnière, 75010 Paris, France +Les Vendangeurs, 6/8 rue Stanislas, 75006 Paris, France +L'avant comptoir, 3 carrefour de l'Odéon, 75006 Paris, France +Botak cafe, 1 rue Paul albert, 75018 Paris, France +le chateau d'eau, 67 rue du Château d'eau, 75010 Paris, France +Bistrot Saint-Antoine, 58 rue du Fbg Saint-Antoine, 75012 Paris, France +Chez Oscar, 11/13 boulevard Beaumarchais, 75004 Paris, France +Le Fronton, 63 rue de Ponthieu, 75008 Paris, France +Le Piquet, 48 avenue de la Motte Picquet, 75015 Paris, France +Le Tournebride, 104 rue Mouffetard, 75005 Paris, France +maison du vin, 52 rue des plantes, 75014 Paris, France +Coffee Chope, 344Vrue Vaugirard, 75015 Paris, France +L'entrepôt, 157 rue Bercy 75012 Paris, 75012 Paris, France +Le café Monde et Médias, Place de la République, 75003 Paris, France +Café rallye tournelles, 11 Quai de la Tournelle, 75005 Paris, France +Brasserie le Morvan, 61 rue du château d'eau, 75010 Paris, France +Chez Miamophile, 6 rue Mélingue, 75019 Paris, France +La Caravane, Rue de la Fontaine au Roi, 75011 Paris, France +Panem, 18 rue de Crussol, 75011 Paris, France +Petits Freres des Pauvres, 47 rue de Batignolles, 75017 Paris, France +Café Dupont, 198 rue de la Convention, 75015 Paris, France +L'Angle, 28 rue de Ponthieu, 75008 Paris, France +Institut des Cultures d'Islam, 19-23 rue Léon, 75018 Paris, France +Canopy Café associatif, 19 rue Pajol, 75018 Paris, France +L'Entracte, place de l'opera, 75002 Paris, France +Le Sévigné, 15 rue du Parc Royal, 75003 Paris, France +Le Café d'avant, 35 rue Claude Bernard, 75005 Paris, France +Le Lucernaire, 53 rue Notre-Dame des Champs, 75006 Paris, France +Le Brigadier, 12 rue Blanche, 75009 Paris, France +L'âge d'or, 26 rue du Docteur Magnan, 75013 Paris, France +Café Victor, 10 boulevard Victor, 75015 Paris, France +L'empreinte, 54, avenue Daumesnil, 75012 Paris, France +L'horizon, 93, rue de la Roquette, 75011 Paris, France +Au pays de Vannes, 34 bis rue de Wattignies, 75012 Paris, France +Café Martin, 2 place Martin Nadaud, 75001 Paris, France +Café Varenne, 36 rue de Varenne, 75007 Paris, France +l'Eléphant du nil, 125 Rue Saint-Antoine, 75004 Paris, France +Le Comptoir, 354 bis rue Vaugirard, 75015 Paris, France +Le Parc Vaugirard, 358 rue de Vaugirard, 75015 Paris, France +Le Reynou, 2 bis quai de la mégisserie, 75001 Paris, France +le Zango, 58 rue Daguerre, 75014 Paris, France +Melting Pot, 3 rue de Lagny, 75020 Paris, France +Pari's Café, 174 avenue de Clichy, 75017 Paris, France \ No newline at end of file diff --git a/examples/opendata_fablabs.py b/examples/opendata_fablabs.py index 811fe86..72adc9c 100644 --- a/examples/opendata_fablabs.py +++ b/examples/opendata_fablabs.py @@ -1,16 +1,17 @@ import json from blessings import Terminal -from pycountry import countries -from bonobo.ext.console import console_run -from bonobo.ext.ods import extract_ods -from bonobo.util import tee -from bonobo.io.json import to_json +from bonobo import console_run, tee, JsonFileWriter +from bonobo.ext.opendatasoft import from_opendatasoft_api -DATASET = 'fablabs-in-the-world' -SEARCH_URL = 'https://datanova.laposte.fr/api/records/1.0/search/' -URL = SEARCH_URL + '?dataset=' + DATASET +try: + import pycountry +except ImportError as exc: + raise ImportError('You must install package "pycountry" to run this example.') from exc + +API_DATASET = 'fablabs-in-the-world' +API_NETLOC = 'datanova.laposte.fr' ROWS = 100 t = Terminal() @@ -25,7 +26,7 @@ def normalize(row): ** row, 'links': list(filter(None, map(_getlink, json.loads(row.get('links'))))), - 'country': countries.get(alpha_2=row.get('country_code', '').upper()).name, + 'country': pycountry.countries.get(alpha_2=row.get('country_code', '').upper()).name, } return result @@ -47,15 +48,15 @@ def display(row): print(' - {}: {address}'.format(t.blue('address'), address=', '.join(address))) print(' - {}: {links}'.format(t.blue('links'), links=', '.join(row['links']))) print(' - {}: {geometry}'.format(t.blue('geometry'), **row)) - print(' - {}: {source}'.format(t.blue('source'), source='datanova/' + DATASET)) + print(' - {}: {source}'.format(t.blue('source'), source='datanova/' + API_DATASET)) if __name__ == '__main__': console_run( - extract_ods( - SEARCH_URL, DATASET, timezone='Europe/Paris'), + from_opendatasoft_api( + API_DATASET, netloc=API_NETLOC, timezone='Europe/Paris'), normalize, filter_france, tee(display), - to_json('fablabs.json'), + JsonFileWriter('fablabs.json'), output=True, ) diff --git a/examples/read_cheap_coffeeshops_in_paris.py b/examples/read_cheap_coffeeshops_in_paris.py new file mode 100644 index 0000000..8e701e3 --- /dev/null +++ b/examples/read_cheap_coffeeshops_in_paris.py @@ -0,0 +1,15 @@ +from os.path import dirname, realpath, join + +from bonobo import console_run +from bonobo.ext.opendatasoft import from_opendatasoft_api +from bonobo.io.file import FileWriter + +OUTPUT_FILENAME = realpath(join(dirname(__file__), 'datasets/cheap_coffeeshops_in_paris.txt')) + +console_run( + from_opendatasoft_api( + 'liste-des-cafes-a-un-euro', netloc='opendata.paris.fr'), + lambda row: '{nom_du_cafe}, {adresse}, {arrondissement} Paris, France'.format(**row), + FileWriter(OUTPUT_FILENAME), ) + +print('Import done, read {} for results.'.format(OUTPUT_FILENAME)) diff --git a/examples/tutorial_basics_firststeps.py b/examples/tutorial_basics_firststeps.py new file mode 100644 index 0000000..4fb3f52 --- /dev/null +++ b/examples/tutorial_basics_firststeps.py @@ -0,0 +1,18 @@ +from bonobo import run + + +def generate_data(): + yield 'foo' + yield 'bar' + yield 'baz' + + +def uppercase(x: str): + return x.upper() + + +def output(x: str): + print(x) + + +run(generate_data, uppercase, output) diff --git a/examples/tutorial_basics_summary.py b/examples/tutorial_basics_summary.py new file mode 100644 index 0000000..60a5452 --- /dev/null +++ b/examples/tutorial_basics_summary.py @@ -0,0 +1,18 @@ +from bonobo import Graph, ThreadPoolExecutorStrategy + + +def yield_from(*args): + yield from args + + +# Represent our data processor as a simple directed graph of callables. +graph = Graph( + lambda: (x for x in ('foo', 'bar', 'baz')), + str.upper, + print, ) + +# Use a thread pool. +executor = ThreadPoolExecutorStrategy() + +# Run the thing. +executor.execute(graph) diff --git a/tests/ext/test_ods.py b/tests/ext/test_ods.py index e585637..eef0db7 100644 --- a/tests/ext/test_ods.py +++ b/tests/ext/test_ods.py @@ -1,6 +1,6 @@ from mock import patch -from bonobo.ext.ods import extract_ods +from bonobo.ext.opendatasoft import from_opendatasoft_api class ResponseMock: @@ -17,7 +17,7 @@ class ResponseMock: def test_read_from_opendatasoft_api(): - extract = extract_ods('http://example.com/', 'test-a-set') + extract = from_opendatasoft_api('http://example.com/', 'test-a-set') with patch( 'requests.get', return_value=ResponseMock([ { diff --git a/tests/io/test_json.py b/tests/io/test_json.py index af7f2f5..15cc91d 100644 --- a/tests/io/test_json.py +++ b/tests/io/test_json.py @@ -1,13 +1,13 @@ import pytest -from bonobo import to_json, Bag +from bonobo import Bag, JsonFileWriter from bonobo.core.contexts import ComponentExecutionContext from bonobo.util.tokens import BEGIN, END def test_write_json_to_file(tmpdir): file = tmpdir.join('output.json') - json_writer = to_json(str(file)) + json_writer = JsonFileWriter(str(file)) context = ComponentExecutionContext(json_writer, None) context.initialize() @@ -28,7 +28,7 @@ def test_write_json_to_file(tmpdir): def test_write_json_without_initializer_should_not_work(tmpdir): file = tmpdir.join('output.json') - json_writer = to_json(str(file)) + json_writer = JsonFileWriter(str(file)) context = ComponentExecutionContext(json_writer, None) with pytest.raises(AttributeError): From ad36f9368a45d71f6cd58fed8ef342995596c24c Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Tue, 27 Dec 2016 22:05:21 +0100 Subject: [PATCH 2/5] more docs, still wip --- docs/_templates/index.html | 101 +++++++++++---------- docs/_templates/sidebarintro.html | 21 +++++ docs/conf.py | 16 +++- docs/guide/index.rst | 4 + docs/history.rst | 22 +++++ docs/index.rst | 10 ++- docs/reference/bonobo.compat.rst | 22 +++++ docs/reference/bonobo.core.rst | 85 ++++++++++++++++++ docs/reference/bonobo.core.strategies.rst | 38 ++++++++ docs/reference/bonobo.ext.console.rst | 22 +++++ docs/reference/bonobo.ext.jupyter.rst | 30 +++++++ docs/reference/bonobo.ext.rst | 46 ++++++++++ docs/reference/bonobo.io.rst | 30 +++++++ docs/reference/bonobo.rst | 21 +++++ docs/reference/bonobo.util.rst | 62 +++++++++++++ docs/reference/index.rst | 13 +++ docs/reference/modules.rst | 3 + docs/tutorial/basics.rst | 102 ++++++++++++---------- docs/tutorial/basics2.rst | 4 +- docs/tutorial/index.rst | 14 +++ 20 files changed, 560 insertions(+), 106 deletions(-) create mode 100644 docs/_templates/sidebarintro.html create mode 100644 docs/guide/index.rst create mode 100644 docs/history.rst create mode 100644 docs/reference/bonobo.compat.rst create mode 100644 docs/reference/bonobo.core.rst create mode 100644 docs/reference/bonobo.core.strategies.rst create mode 100644 docs/reference/bonobo.ext.console.rst create mode 100644 docs/reference/bonobo.ext.jupyter.rst create mode 100644 docs/reference/bonobo.ext.rst create mode 100644 docs/reference/bonobo.io.rst create mode 100644 docs/reference/bonobo.rst create mode 100644 docs/reference/bonobo.util.rst create mode 100644 docs/reference/index.rst create mode 100644 docs/reference/modules.rst create mode 100644 docs/tutorial/index.rst diff --git a/docs/_templates/index.html b/docs/_templates/index.html index 9b9ae48..640db28 100644 --- a/docs/_templates/index.html +++ b/docs/_templates/index.html @@ -19,22 +19,64 @@

- {% trans %} - It was originally created as a programmatic ETL (extract transform load) python 2.7+ library called rdc.etl, - to process tenth of millions of retail stock informations, and served this purpose for years. + {% trans history_url=pathto('history') %} + It was originally created as a programmatic ETL (extract transform load) for python 2.7+ (see + history) , but is now much more than that. Of course you can still write ETL jobs within minutes, but + you can also write web crawlers, twitter bots, web crawlers, streaming API endpoints... {% endtrans %}

{% trans %} - Bonobo is a clean full-rewrite of rdc.etl, for python 3.5+, and is now used for many ETL as well as non-ETL - use cases. For examples, it's pretty easy to write selenium based web crawlers, or twitter bots. As long as - a use case can be represented as a graph of callables interracting, Bonobo can be used. + As long as your use case can be represented as a graph of callables interracting, Bonobo can be used. {% endtrans %}

-

Features

+

{% trans %}Documentation{% endtrans %}

+ + + + + + + + + + + + + + +
+ + + {%- if hasdoc('search') %} + {%- endif %} +
+ + + +
+ + + +
+ +

Features

  • @@ -63,51 +105,6 @@
- -

{% trans %}Documentation{% endtrans %}

- - - - - - - - - - - - - - -
- - - {%- if hasdoc('search') %} - {%- endif %} -
- - - {%- if hasdoc('genindex') %} - {%- endif %} -
- - - -
-

{% trans %} You can also download PDF/EPUB versions of the Bonobo documentation: PDF version, diff --git a/docs/_templates/sidebarintro.html b/docs/_templates/sidebarintro.html new file mode 100644 index 0000000..5828cae --- /dev/null +++ b/docs/_templates/sidebarintro.html @@ -0,0 +1,21 @@ +

About Bonobo

+

+ Bonobo is a data-processing toolkit for python 3.5+, with emphasis on simplicity, atomicity and testability. Oh, + and performances, too! +

+ +

Other Formats

+

+ You can download the documentation in other formats as well: +

+ + +

Useful Links

+ \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 16307fd..6992e8a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -4,7 +4,7 @@ import sys import os -sys.path.insert(0, os.path.abspath('..')) +sys.path.insert(0, os.path.abspath('.')) sys.path.insert(0, os.path.abspath('_themes')) import bonobo @@ -82,13 +82,23 @@ html_theme_options = { } html_sidebars = { - '**': [ + 'index': [ 'sidebarlogo.html', + 'navigation.html', 'localtoc.html', - 'relations.html', + 'sidebarintro.html', + 'sourcelink.html', 'searchbox.html', 'sidebarinfos.html', + ], + '**': [ + 'sidebarlogo.html', + 'navigation.html', + 'localtoc.html', + 'relations.html', 'sourcelink.html', + 'searchbox.html', + 'sidebarinfos.html', ] } diff --git a/docs/guide/index.rst b/docs/guide/index.rst new file mode 100644 index 0000000..ebd0d17 --- /dev/null +++ b/docs/guide/index.rst @@ -0,0 +1,4 @@ +Guides +====== + +.. todo:: write the fucking doc! diff --git a/docs/history.rst b/docs/history.rst new file mode 100644 index 0000000..70eea0a --- /dev/null +++ b/docs/history.rst @@ -0,0 +1,22 @@ +History +======= + +**Bonobo** is a full rewrite of **rdc.etl**. + +**rdc.etl** is a full python 2.7+ ETL library for which development started in 2012, and was opensourced in 2013 (see +`first commit `_). + +Although the first commit in **Bonobo** happened late 2016, it's based on a lot of code, learnings and experience that +happened because of **rdc.etl**. + +It would have been counterproductive to migrate the same codebase: + + * a lot of mistakes were impossible to fix in a backward compatible way (for example, transormations were stateful, + making them more complicated to write and impossible to reuse, a lot of effort was used to make the components have + multi-inputs and multi-outputs, although in 99% of the case it's useless, etc.). + * we also wanted to develop something that took advantage of modern python versions, hence the choice of 3.5+. + +**rdc.etl** still runs data transformation jobs, in both python 2.7 and 3, and we reuse whatever is possible to +build Bonobo. + +You can read diff --git a/docs/index.rst b/docs/index.rst index c2b725f..081b47d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,9 +3,11 @@ Bonobo .. toctree:: :maxdepth: 2 - :caption: Contents: + install + tutorial/index + guide/index + reference/index + genindex + modindex -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/reference/bonobo.compat.rst b/docs/reference/bonobo.compat.rst new file mode 100644 index 0000000..83581bc --- /dev/null +++ b/docs/reference/bonobo.compat.rst @@ -0,0 +1,22 @@ +bonobo.compat package +===================== + +Submodules +---------- + +bonobo.compat.pandas module +--------------------------- + +.. automodule:: bonobo.compat.pandas + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: bonobo.compat + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/reference/bonobo.core.rst b/docs/reference/bonobo.core.rst new file mode 100644 index 0000000..247e883 --- /dev/null +++ b/docs/reference/bonobo.core.rst @@ -0,0 +1,85 @@ +bonobo.core package +=================== + +Subpackages +----------- + +.. toctree:: + + bonobo.core.strategies + +Submodules +---------- + +bonobo.core.bags module +----------------------- + +.. automodule:: bonobo.core.bags + :members: + :undoc-members: + :show-inheritance: + +bonobo.core.contexts module +--------------------------- + +.. automodule:: bonobo.core.contexts + :members: + :undoc-members: + :show-inheritance: + +bonobo.core.errors module +------------------------- + +.. automodule:: bonobo.core.errors + :members: + :undoc-members: + :show-inheritance: + +bonobo.core.graphs module +------------------------- + +.. automodule:: bonobo.core.graphs + :members: + :undoc-members: + :show-inheritance: + +bonobo.core.inputs module +------------------------- + +.. automodule:: bonobo.core.inputs + :members: + :undoc-members: + :show-inheritance: + +bonobo.core.plugins module +-------------------------- + +.. automodule:: bonobo.core.plugins + :members: + :undoc-members: + :show-inheritance: + +bonobo.core.services module +--------------------------- + +.. automodule:: bonobo.core.services + :members: + :undoc-members: + :show-inheritance: + +bonobo.core.stats module +------------------------ + +.. automodule:: bonobo.core.stats + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: bonobo.core + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/reference/bonobo.core.strategies.rst b/docs/reference/bonobo.core.strategies.rst new file mode 100644 index 0000000..0dfd138 --- /dev/null +++ b/docs/reference/bonobo.core.strategies.rst @@ -0,0 +1,38 @@ +bonobo.core.strategies package +============================== + +Submodules +---------- + +bonobo.core.strategies.base module +---------------------------------- + +.. automodule:: bonobo.core.strategies.base + :members: + :undoc-members: + :show-inheritance: + +bonobo.core.strategies.executor module +-------------------------------------- + +.. automodule:: bonobo.core.strategies.executor + :members: + :undoc-members: + :show-inheritance: + +bonobo.core.strategies.naive module +----------------------------------- + +.. automodule:: bonobo.core.strategies.naive + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: bonobo.core.strategies + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/reference/bonobo.ext.console.rst b/docs/reference/bonobo.ext.console.rst new file mode 100644 index 0000000..4385466 --- /dev/null +++ b/docs/reference/bonobo.ext.console.rst @@ -0,0 +1,22 @@ +bonobo.ext.console package +========================== + +Submodules +---------- + +bonobo.ext.console.plugin module +-------------------------------- + +.. automodule:: bonobo.ext.console.plugin + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: bonobo.ext.console + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/reference/bonobo.ext.jupyter.rst b/docs/reference/bonobo.ext.jupyter.rst new file mode 100644 index 0000000..c695678 --- /dev/null +++ b/docs/reference/bonobo.ext.jupyter.rst @@ -0,0 +1,30 @@ +bonobo.ext.jupyter package +========================== + +Submodules +---------- + +bonobo.ext.jupyter.plugin module +-------------------------------- + +.. automodule:: bonobo.ext.jupyter.plugin + :members: + :undoc-members: + :show-inheritance: + +bonobo.ext.jupyter.widget module +-------------------------------- + +.. automodule:: bonobo.ext.jupyter.widget + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: bonobo.ext.jupyter + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/reference/bonobo.ext.rst b/docs/reference/bonobo.ext.rst new file mode 100644 index 0000000..63327ef --- /dev/null +++ b/docs/reference/bonobo.ext.rst @@ -0,0 +1,46 @@ +bonobo.ext package +================== + +Subpackages +----------- + +.. toctree:: + + bonobo.ext.console + bonobo.ext.jupyter + +Submodules +---------- + +bonobo.ext.couchdb_ module +-------------------------- + +.. automodule:: bonobo.ext.couchdb_ + :members: + :undoc-members: + :show-inheritance: + +bonobo.ext.opendatasoft module +------------------------------ + +.. automodule:: bonobo.ext.opendatasoft + :members: + :undoc-members: + :show-inheritance: + +bonobo.ext.selenium module +-------------------------- + +.. automodule:: bonobo.ext.selenium + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: bonobo.ext + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/reference/bonobo.io.rst b/docs/reference/bonobo.io.rst new file mode 100644 index 0000000..d930d25 --- /dev/null +++ b/docs/reference/bonobo.io.rst @@ -0,0 +1,30 @@ +bonobo.io package +================= + +Submodules +---------- + +bonobo.io.file module +--------------------- + +.. automodule:: bonobo.io.file + :members: + :undoc-members: + :show-inheritance: + +bonobo.io.json module +--------------------- + +.. automodule:: bonobo.io.json + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: bonobo.io + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/reference/bonobo.rst b/docs/reference/bonobo.rst new file mode 100644 index 0000000..4785ea3 --- /dev/null +++ b/docs/reference/bonobo.rst @@ -0,0 +1,21 @@ +bonobo package +============== + +Subpackages +----------- + +.. toctree:: + + bonobo.compat + bonobo.core + bonobo.ext + bonobo.io + bonobo.util + +Module contents +--------------- + +.. automodule:: bonobo + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/reference/bonobo.util.rst b/docs/reference/bonobo.util.rst new file mode 100644 index 0000000..4d73e2c --- /dev/null +++ b/docs/reference/bonobo.util.rst @@ -0,0 +1,62 @@ +bonobo.util package +=================== + +Submodules +---------- + +bonobo.util.compat module +------------------------- + +.. automodule:: bonobo.util.compat + :members: + :undoc-members: + :show-inheritance: + +bonobo.util.helpers module +-------------------------- + +.. automodule:: bonobo.util.helpers + :members: + :undoc-members: + :show-inheritance: + +bonobo.util.iterators module +---------------------------- + +.. automodule:: bonobo.util.iterators + :members: + :undoc-members: + :show-inheritance: + +bonobo.util.lifecycle module +---------------------------- + +.. automodule:: bonobo.util.lifecycle + :members: + :undoc-members: + :show-inheritance: + +bonobo.util.time module +----------------------- + +.. automodule:: bonobo.util.time + :members: + :undoc-members: + :show-inheritance: + +bonobo.util.tokens module +------------------------- + +.. automodule:: bonobo.util.tokens + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: bonobo.util + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/reference/index.rst b/docs/reference/index.rst new file mode 100644 index 0000000..f9fa75c --- /dev/null +++ b/docs/reference/index.rst @@ -0,0 +1,13 @@ +References +========== + +.. todo:: write the fucking doc! + +.. toctree:: + :maxdepth: 4 + + bonobo + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/reference/modules.rst b/docs/reference/modules.rst new file mode 100644 index 0000000..e29cd78 --- /dev/null +++ b/docs/reference/modules.rst @@ -0,0 +1,3 @@ +bonobo +====== + diff --git a/docs/tutorial/basics.rst b/docs/tutorial/basics.rst index 1197d3a..ad507b7 100644 --- a/docs/tutorial/basics.rst +++ b/docs/tutorial/basics.rst @@ -1,29 +1,37 @@ -First steps - Basic concepts -============================ +Basic concepts +============== -To begin with Bonobo, you should first install it: +To begin with Bonobo, you need to install it in a working python 3.5+ environment: .. code-block:: shell-session $ pip install bonobo -See :doc:`install` if you're looking for more options. +See :doc:`/install` for more options. Let's write a first data transformation ::::::::::::::::::::::::::::::::::::::: -We'll write a simple component that just uppercase everything. In **Bonobo**, a component is a plain old python -callable, not more, not less. +We'll start with the most simple components we can. + +In **Bonobo**, a component is a plain old python callable, not more, not less. Let's write one that takes a string and +uppercase it. .. code-block:: python def uppercase(x: str): return x.upper() -Ok, this is kind of simple, and you can even use `str.upper` directly instead of writing a wrapper. The type annotations -are not used, but can make your code much more readable (and may be used as validators in the future). +Pretty straightforward. -To run this, we need two more things: a generator that feeds data, and something that outputs it. +You could even use :func:`str.upper` directly instead of writing a wrapper, as a type's method (unbound) will take an +instance of this type as its first parameter (what you'd call `self` in your method). + +The type annotations written here are not used, but can make your code much more readable, and may very well be used as +validators in the future. + +Let's write two more components: a generator to produce the data to be transformed, and something that outputs it, +because, yeah, feedback is cool. .. code-block:: python @@ -35,7 +43,10 @@ To run this, we need two more things: a generator that feeds data, and something def output(x: str): print(x) -That should do the job. Now, let's chain the three callables together and run them. +Once again, you could have skipped the pain of writing this and simply use an iterable to generate the data and the +builtin :func:`print` for the output, but we'll stick to writing our own components for now. + +Let's chain the three components together and run the transformation: .. code-block:: python @@ -43,44 +54,33 @@ That should do the job. Now, let's chain the three callables together and run th run(generate_data, uppercase, output) -This is the simplest data transormation possible, and we run it using the `run` helper that hides the underlying object -composition necessary to actually run the callables in parralel. The more flexible, but a bit more verbose to do the -same thing would be: +.. graphviz:: -.. code-block:: python + digraph { + rankdir = LR; + "generate_data" -> "uppercase" -> "output"; + } - from bonobo import Graph, ThreadPoolExecutorStrategy - - graph = Graph() - graph.add_chain(generate_data, uppercase, output) - - executor = ThreadPoolExecutorStrategy() - executor.execute(graph) +We use the :func:`bonobo.run` helper that hides the underlying object composition necessary to actually run the +components in parralel, because it's simpler. Depending on what you're doing, you may use the shorthand helper method, or the verbose one. Always favor the shorter, -if you don't need to tune the graph or the execution strategy. +if you don't need to tune the graph or the execution strategy (see below). -Definitions -::::::::::: +Diving in +::::::::: -* Graph -* Component -* Executor - -.. todo:: Definitions, and substitute vague terms in the page by the exact term defined here - -Summary -::::::: - -Let's rewrite this using builtin functions and methods, then explain the few concepts available here: +Let's rewrite it using the builtin functions :func:`str.upper` and :func:`print` instead of our own wrappers, and expand +the :func:`bonobo.run()` helper so you see what's inside... .. code-block:: python from bonobo import Graph, ThreadPoolExecutorStrategy # Represent our data processor as a simple directed graph of callables. - graph = Graph( - (x for x in 'foo', 'bar', 'baz'), + graph = Graph() + graph.add_chain( + ('foo', 'bar', 'baz'), str.upper, print, ) @@ -91,19 +91,22 @@ Let's rewrite this using builtin functions and methods, then explain the few con # Run the thing. executor.execute(graph) -Or the shorthand version, that you should prefer if you don't need fine tuning: +We also switched our generator for a tuple, **Bonobo** will wrap it as a generator itself if it's not callable but +iterable. + +The shorthand version with builtins would look like this: .. code-block:: python from bonobo import run run( - iter(['foo', 'bar', 'baz']), + ('foo', 'bar', 'baz'), str.upper, print, ) -Both methods are strictly equivalent (see :func:`bonobo.run`). When in doubt, favour the shorter. +Both methods are strictly equivalent (see :func:`bonobo.run`). When in doubt, prefer the shorter version. Takeaways ::::::::: @@ -123,17 +126,26 @@ This is what the graph we defined looks like: } -② Transformations are simple python callables. Whatever can be called can be used as a transformation. Callables can +② `Components` are simple python callables. Whatever can be called can be used as a `component`. Callables can either `return` or `yield` data to send it to the next step. Regular functions (using `return`) should be prefered if each call is guaranteed to return exactly one result, while generators (using `yield`) should be prefered if the number of output lines for a given input varies. -③ The graph is then executed using an `ExecutionStrategy`. For now, let's focus only on +③ The `graph` is then executed using an `ExecutionStrategy`. In this tutorial, we'll only use :class:`bonobo.ThreadPoolExecutorStrategy`, which use an underlying `concurrent.futures.ThreadPoolExecutor` to schedule calls in a pool of threads, but basically this strategy is what determines the actual behaviour of execution. -④ Before actually executing the callables, the `ExecutorStrategy` instance will wrap each component in a `context`, -whose responsibility is to hold the state, to keep the components stateless. We'll expand on this later. +④ Before actually executing the `components`, the `ExecutorStrategy` instance will wrap each component in a `context`, +whose responsibility is to hold the state, to keep the `components` stateless. We'll expand on this later. + +Concepts and definitions +:::::::::::::::::::::::: + +* Component +* Graph +* Executor + +.. todo:: Definitions, and substitute vague terms in the page by the exact term defined here Next @@ -141,6 +153,6 @@ Next You now know all the basic concepts necessary to build (batch-like) data processors. -If you're confident with this part, let's get to a more real world example, using files and nice console output. +If you're confident with this part, let's get to a more real world example, using files and nice console output: +:doc:`basics2` -.. todo:: link to next page diff --git a/docs/tutorial/basics2.rst b/docs/tutorial/basics2.rst index 8542ec2..f9e9608 100644 --- a/docs/tutorial/basics2.rst +++ b/docs/tutorial/basics2.rst @@ -1,5 +1,5 @@ -First steps - Working with files -================================ +Working with files +================== Bonobo would not be of any use if the aim was to uppercase small lists of strings. In fact, Bonobo should not be used if you don't expect any gain from parralelization of tasks. diff --git a/docs/tutorial/index.rst b/docs/tutorial/index.rst new file mode 100644 index 0000000..70049fc --- /dev/null +++ b/docs/tutorial/index.rst @@ -0,0 +1,14 @@ +First steps +=========== + +We tried hard to make **Bonobo** simple. We use simple python, and we believe it should be simple to learn. + +We strongly advice that even if you're an advanced python developper, you go through the whole tutorial for two +reasons: that should be sufficient to do anything possible with **Bonobo** and that's a good moment to learn the few +concepts you'll see everywhere in the software. + +.. toctree:: + :maxdepth: 2 + + basics + basics2 From 6b57e4680fc3246d886597cec116b97e629c9f06 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Tue, 27 Dec 2016 22:06:51 +0100 Subject: [PATCH 3/5] fixing erroneous path in doc config --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 6992e8a..80d0660 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -4,7 +4,7 @@ import sys import os -sys.path.insert(0, os.path.abspath('.')) +sys.path.insert(0, os.path.abspath('..')) sys.path.insert(0, os.path.abspath('_themes')) import bonobo From 36bbbd92c3f6568ed16c37bc819a0e2b039450e0 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Tue, 27 Dec 2016 22:14:36 +0100 Subject: [PATCH 4/5] fix download links --- docs/_templates/sidebarintro.html | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/_templates/sidebarintro.html b/docs/_templates/sidebarintro.html index 5828cae..fc8acf2 100644 --- a/docs/_templates/sidebarintro.html +++ b/docs/_templates/sidebarintro.html @@ -9,8 +9,9 @@ You can download the documentation in other formats as well:

Useful Links

From b409391666b48f866bcaa6c347d5dab92d67b3d3 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Tue, 27 Dec 2016 22:17:46 +0100 Subject: [PATCH 5/5] more marketing --- docs/_templates/index.html | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/_templates/index.html b/docs/_templates/index.html index 640db28..0c3bbed 100644 --- a/docs/_templates/index.html +++ b/docs/_templates/index.html @@ -19,16 +19,16 @@

- {% trans history_url=pathto('history') %} - It was originally created as a programmatic ETL (extract transform load) for python 2.7+ (see - history) , but is now much more than that. Of course you can still write ETL jobs within minutes, but - you can also write web crawlers, twitter bots, web crawlers, streaming API endpoints... + {% trans %} + Bonobo is a full-featured Extract-Transform-Load library that won't force you to use an + ugly IDE. {% endtrans %}

{% trans %} - As long as your use case can be represented as a graph of callables interracting, Bonobo can be used. + Bonobo is your own data-monkey army. Tedious and repetitive data-processing incoming? Give + it a try! {% endtrans %}