starting to write docs, taking decisions on public api

This commit is contained in:
Romain Dorgueil
2016-12-27 13:31:38 +01:00
parent 512e2ab46d
commit 25ad284935
29 changed files with 604 additions and 96 deletions

View File

@ -23,4 +23,4 @@ exclude_lines =
ignore_errors = True ignore_errors = True
[html] [html]
directory = doc/_build/html/coverage directory = docs/_build/html/coverage

View File

@ -36,16 +36,20 @@ with open(os.path.realpath(os.path.join(os.path.dirname(__file__), '../version.t
__all__ = [ __all__ = [
'Bag', 'Bag',
'FileWriter',
'Graph', 'Graph',
'NaiveStrategy', 'JsonFileWriter',
'NOT_MODIFIED', 'NOT_MODIFIED',
'NaiveStrategy',
'ProcessPoolExecutorStrategy', 'ProcessPoolExecutorStrategy',
'ThreadPoolExecutorStrategy', 'ThreadPoolExecutorStrategy',
'console_run',
'head', 'head',
'inject', 'inject',
'jupyter_run',
'log', 'log',
'noop', 'noop',
'run',
'service', 'service',
'tee', 'tee',
'to_json',
] ]

View File

9
bonobo/compat/pandas.py Normal file
View File

@ -0,0 +1,9 @@
from bonobo import FileWriter, JsonFileWriter
to_file = FileWriter
to_json = JsonFileWriter
__all__ = [
'to_json',
'to_file',
]

View File

@ -33,8 +33,8 @@ class Bag:
def flags(self): def flags(self):
return self._flags return self._flags
def apply(self, func, *args, **kwargs): def apply(self, func_or_iter, *args, **kwargs):
return func(*args, *self.args, **kwargs, **self.kwargs) return func_or_iter(*args, *self.args, **kwargs, **self.kwargs)
def extend(self, *args, **kwargs): def extend(self, *args, **kwargs):
return type(self)(*args, _parent=self, **kwargs) return type(self)(*args, _parent=self, **kwargs)

View File

@ -128,7 +128,7 @@ class ComponentExecutionContext(WithStatistics, AbstractLoopContext):
@property @property
def name(self): def name(self):
return self.component.__name__ return getattr(self.component, '__name__', getattr(type(self.component), '__name__', repr(self.component)))
def __init__(self, component, parent): def __init__(self, component, parent):
self.parent = parent self.parent = parent

View File

@ -6,9 +6,10 @@ class Graph:
Represents a coherent directed acyclic graph (DAG) of components. Represents a coherent directed acyclic graph (DAG) of components.
""" """
def __init__(self): def __init__(self, *chain):
self.components = [] self.components = []
self.graph = {BEGIN: set()} self.graph = {BEGIN: set()}
self.add_chain(*chain)
def outputs_of(self, idx, create=False): def outputs_of(self, idx, create=False):
if create and not idx in self.graph: if create and not idx in self.graph:

View File

@ -1,7 +1,3 @@
from .helpers import console_run
from .plugin import ConsoleOutputPlugin from .plugin import ConsoleOutputPlugin
__all__ = [ __all__ = ['ConsoleOutputPlugin', ]
'ConsoleOutputPlugin',
'console_run',
]

View File

@ -1,9 +0,0 @@
from bonobo import Graph, ThreadPoolExecutorStrategy
from .plugin import ConsoleOutputPlugin
def console_run(*chain, output=True, plugins=None):
graph = Graph()
executor = ThreadPoolExecutorStrategy()
graph.add_chain(*chain)
return executor.execute(graph, plugins=(plugins or []) + [ConsoleOutputPlugin()] if output else [])

View File

@ -1,4 +1,4 @@
from .helpers import jupyter_run from bonobo.util.helpers import jupyter_run
from .plugin import JupyterOutputPlugin from .plugin import JupyterOutputPlugin
@ -6,7 +6,4 @@ def _jupyter_nbextension_paths():
return [{'section': 'notebook', 'src': 'static', 'dest': 'bonobo-jupyter', 'require': 'bonobo-jupyter/extension'}] return [{'section': 'notebook', 'src': 'static', 'dest': 'bonobo-jupyter', 'require': 'bonobo-jupyter/extension'}]
__all__ = [ __all__ = ['JupyterOutputPlugin', ]
'JupyterOutputPlugin',
'jupyter_run',
]

View File

@ -1,9 +1 @@
from bonobo import Graph, ThreadPoolExecutorStrategy
from .plugin import JupyterOutputPlugin
def jupyter_run(*chain, plugins=None):
graph = Graph()
executor = ThreadPoolExecutorStrategy()
graph.add_chain(*chain)
return executor.execute(graph, plugins=(plugins or []) + [JupyterOutputPlugin()])

View File

@ -3,11 +3,18 @@ from urllib.parse import urlencode
import requests # todo: make this a service so we can substitute it ? import requests # todo: make this a service so we can substitute it ?
def extract_ods(url, dataset, rows=100, **kwargs): def from_opendatasoft_api(dataset=None,
endpoint='{scheme}://{netloc}{path}',
scheme='https',
netloc='data.opendatasoft.com',
path='/api/records/1.0/search/',
rows=100,
**kwargs):
path = path if path.startswith('/') else '/' + path
params = ( params = (
('dataset', dataset), ('dataset', dataset),
('rows', rows), ) + tuple(sorted(kwargs.items())) ('rows', rows), ) + tuple(sorted(kwargs.items()))
base_url = url + '?' + urlencode(params) base_url = endpoint.format(scheme=scheme, netloc=netloc, path=path) + '?' + urlencode(params)
def _extract_ods(): def _extract_ods():
nonlocal base_url, rows nonlocal base_url, rows

View File

@ -1,5 +1,9 @@
""" Readers and writers for common file formats. """ """ Readers and writers for common file formats. """
from .json import * from .file import FileWriter
from .json import JsonFileWriter
__all__ = ['to_json', ] __all__ = [
'FileWriter',
'JsonFileWriter',
]

35
bonobo/io/file.py Normal file
View File

@ -0,0 +1,35 @@
from bonobo.util.lifecycle import with_context
__all__ = ['FileWriter', ]
@with_context
class FileWriter:
# XXX TODO implement @with_context like this ? Pros and cons ?
class Meta:
contextual = True
def __init__(self, path_or_buf, eol='\n'):
self.path_or_buf = path_or_buf
self.eol = eol
def initialize(self, ctx):
""" todo add lock file ? optional maybe ? """
assert not hasattr(ctx, 'fp'), 'One at a time, baby.'
ctx.fp = open(self.path_or_buf, 'w+')
ctx.first = True
def write(self, fp, line, prefix=''):
fp.write(prefix + line)
def __call__(self, ctx, row):
if ctx.first:
prefix, ctx.first = '', False
else:
prefix = self.eol
self.write(ctx.fp, row, prefix=prefix)
def finalize(self, ctx):
ctx.fp.close()
del ctx.fp, ctx.first

View File

@ -1,40 +1,23 @@
import json import json
from .file import FileWriter
from bonobo.util.lifecycle import with_context from bonobo.util.lifecycle import with_context
__all__ = [ __all__ = ['JsonFileWriter', ]
'from_json',
'to_json',
]
@with_context @with_context
class JsonWriter: class JsonFileWriter(FileWriter):
def __init__(self, path_or_buf): def __init__(self, path_or_buf):
self.path_or_buf = path_or_buf super().__init__(path_or_buf, eol=',\n')
def initialize(self, ctx): def initialize(self, ctx):
assert not hasattr(ctx, 'fp'), 'One at a time, baby.' super().initialize(ctx)
ctx.fp = open(self.path_or_buf, 'w+')
ctx.fp.write('[\n') ctx.fp.write('[\n')
ctx.first = True
def __call__(self, ctx, row): def write(self, fp, line, prefix=''):
if ctx.first: fp.write(prefix + json.dumps(line))
prefix = ''
ctx.first = False
else:
prefix = ',\n'
ctx.fp.write(prefix + json.dumps(row))
def finalize(self, ctx): def finalize(self, ctx):
ctx.fp.write('\n]') ctx.fp.write('\n]')
ctx.fp.close() super().finalize(ctx)
del ctx.fp, ctx.first
def from_json(path_or_buf):
pass
to_json = JsonWriter

View File

@ -4,12 +4,16 @@ import functools
import pprint import pprint
from .tokens import NOT_MODIFIED from .tokens import NOT_MODIFIED
from .helpers import run, console_run, jupyter_run
__all__ = [ __all__ = [
'NOT_MODIFIED', 'NOT_MODIFIED',
'console_run',
'head', 'head',
'jupyter_run',
'log', 'log',
'noop', 'noop',
'run',
'tee', 'tee',
] ]

20
bonobo/util/helpers.py Normal file
View File

@ -0,0 +1,20 @@
def run(*chain, plugins=None):
from bonobo import Graph, ThreadPoolExecutorStrategy
graph = Graph()
graph.add_chain(*chain)
executor = ThreadPoolExecutorStrategy()
return executor.execute(graph, plugins=plugins or [])
def console_run(*chain, output=True, plugins=None):
from bonobo.ext.console import ConsoleOutputPlugin
return run(*chain, plugins=(plugins or []) + [ConsoleOutputPlugin()] if output else [])
def jupyter_run(*chain, plugins=None):
from bonobo.ext.jupyter import JupyterOutputPlugin
return run(*chain, plugins=(plugins or []) + [JupyterOutputPlugin()])

View File

@ -1,22 +1,20 @@
{% extends "layout.html" %} {% extends "layout.html" %}
{% set title = _('Overview') %} {% set title = _('Bonobo — Data processing for humans') %}
{% block body %} {% block body %}
<div style="border: 2px solid red; font-weight: bold;"> <div style="border: 2px solid red; font-weight: bold; margin: 1em; padding: 1em">
Migration in progress, things may be broken for now. Please give us some time to finish painting the walls. Rewrite in progress, things may be broken for now. Please give us some time to finish painting the walls.
</div> </div>
<h1>{{ _('Welcome to Bonobo\'s Documentation') }}</h1> <h1 style="text-align: center">
<img class="logo" src="{{ pathto('_static/bonobo.png', 1) }}" title="Bonobo" alt="Bonobo"
<div style="text-align: center;">
<img class="logo" src="{{ pathto('_static/bonobo.png', 1) }}" title="Bonobo"
style=" width: 128px; height: 128px;"/> style=" width: 128px; height: 128px;"/>
</div> </h1>
<p> <p>
{% trans %} {% trans %}
Bonobo is a line-by-line data-processing toolkit for python 3.5+ emphasizing simplicity and atomicity of <strong>Bonobo</strong> is a line-by-line data-processing toolkit for python 3.5+ emphasizing simple and
data transformations using a simple directed graph of python callables. atomic data transformations defined using a directed graph of plain old python callables.
{% endtrans %} {% endtrans %}
</p> </p>
@ -71,9 +69,8 @@
<table class="contentstable"> <table class="contentstable">
<tr> <tr>
<td> <td>
<p class="biglink"><a class="biglink" href="{{ pathto("tutorial") }}">{% trans %}First steps with <p class="biglink"><a class="biglink" href="{{ pathto("tutorial/basics") }}">{% trans %}First steps{% endtrans %}</a><br/>
Bonobo{% endtrans %}</a><br/> <span class="linkdescr">{% trans %}quick overview of basic features{% endtrans %}</span></p>
<span class="linkdescr">{% trans %}overview of basic features{% endtrans %}</span></p>
</td> </td>
<td> <td>
{%- if hasdoc('search') %} {%- if hasdoc('search') %}

View File

@ -12,8 +12,14 @@ import bonobo
# -- General configuration ------------------------------------------------ # -- General configuration ------------------------------------------------
extensions = [ extensions = [
'sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.autodoc',
'sphinx.ext.ifconfig', 'sphinx.ext.viewcode' 'sphinx.ext.doctest',
'sphinx.ext.intersphinx',
'sphinx.ext.todo',
'sphinx.ext.coverage',
'sphinx.ext.ifconfig',
'sphinx.ext.viewcode',
'sphinx.ext.graphviz',
] ]
# Add any paths that contain templates here, relative to this directory. # Add any paths that contain templates here, relative to this directory.
@ -95,6 +101,8 @@ html_additional_pages = {'index': 'index.html'}
html_static_path = ['_static'] html_static_path = ['_static']
html_show_sphinx = False html_show_sphinx = False
graphviz_output_format = 'svg'
# -- Options for HTMLHelp output ------------------------------------------ # -- Options for HTMLHelp output ------------------------------------------
# Output file base name for HTML help builder. # Output file base name for HTML help builder.

34
docs/install.rst Normal file
View File

@ -0,0 +1,34 @@
Installation
============
.. todo::
better install docs, especially on how to use different fork, etc.
Install with pip
::::::::::::::::
.. code-block:: shell-session
$ pip install bonobo
Install from source
:::::::::::::::::::
.. code-block:: shell-session
$ pip install git+https://github.com/python-bonobo/bonobo.git@master#egg=bonobo
Editable install
::::::::::::::::
If you plan on making patches to Bonobo, you should install it as an "editable" package.
.. code-block:: shell-session
$ pip install --editable git+https://github.com/python-bonobo/bonobo.git@master#egg=bonobo
Note: `-e` is the shorthand version of `--editable`.

146
docs/tutorial/basics.rst Normal file
View File

@ -0,0 +1,146 @@
First steps - Basic concepts
============================
To begin with Bonobo, you should first install it:
.. code-block:: shell-session
$ pip install bonobo
See :doc:`install` if you're looking for more options.
Let's write a first data transformation
:::::::::::::::::::::::::::::::::::::::
We'll write a simple component that just uppercase everything. In **Bonobo**, a component is a plain old python
callable, not more, not less.
.. code-block:: python
def uppercase(x: str):
return x.upper()
Ok, this is kind of simple, and you can even use `str.upper` directly instead of writing a wrapper. The type annotations
are not used, but can make your code much more readable (and may be used as validators in the future).
To run this, we need two more things: a generator that feeds data, and something that outputs it.
.. code-block:: python
def generate_data():
yield 'foo'
yield 'bar'
yield 'baz'
def output(x: str):
print(x)
That should do the job. Now, let's chain the three callables together and run them.
.. code-block:: python
from bonobo import run
run(generate_data, uppercase, output)
This is the simplest data transormation possible, and we run it using the `run` helper that hides the underlying object
composition necessary to actually run the callables in parralel. The more flexible, but a bit more verbose to do the
same thing would be:
.. code-block:: python
from bonobo import Graph, ThreadPoolExecutorStrategy
graph = Graph()
graph.add_chain(generate_data, uppercase, output)
executor = ThreadPoolExecutorStrategy()
executor.execute(graph)
Depending on what you're doing, you may use the shorthand helper method, or the verbose one. Always favor the shorter,
if you don't need to tune the graph or the execution strategy.
Definitions
:::::::::::
* Graph
* Component
* Executor
.. todo:: Definitions, and substitute vague terms in the page by the exact term defined here
Summary
:::::::
Let's rewrite this using builtin functions and methods, then explain the few concepts available here:
.. code-block:: python
from bonobo import Graph, ThreadPoolExecutorStrategy
# Represent our data processor as a simple directed graph of callables.
graph = Graph(
(x for x in 'foo', 'bar', 'baz'),
str.upper,
print,
)
# Use a thread pool.
executor = ThreadPoolExecutorStrategy()
# Run the thing.
executor.execute(graph)
Or the shorthand version, that you should prefer if you don't need fine tuning:
.. code-block:: python
from bonobo import run
run(
iter(['foo', 'bar', 'baz']),
str.upper,
print,
)
Both methods are strictly equivalent (see :func:`bonobo.run`). When in doubt, favour the shorter.
Takeaways
:::::::::
① The :class:`bonobo.Graph` class is used to represent a data-processing pipeline.
It can represent simple list-like linear graphs, like here, but it can also represent much more complex graphs, with
branches and cycles.
This is what the graph we defined looks like:
.. graphviz::
digraph {
rankdir = LR;
"iter(['foo', 'bar', 'baz'])" -> "str.upper" -> "print";
}
② Transformations are simple python callables. Whatever can be called can be used as a transformation. Callables can
either `return` or `yield` data to send it to the next step. Regular functions (using `return`) should be prefered if
each call is guaranteed to return exactly one result, while generators (using `yield`) should be prefered if the
number of output lines for a given input varies.
③ The graph is then executed using an `ExecutionStrategy`. For now, let's focus only on
:class:`bonobo.ThreadPoolExecutorStrategy`, which use an underlying `concurrent.futures.ThreadPoolExecutor` to
schedule calls in a pool of threads, but basically this strategy is what determines the actual behaviour of execution.
④ Before actually executing the callables, the `ExecutorStrategy` instance will wrap each component in a `context`,
whose responsibility is to hold the state, to keep the components stateless. We'll expand on this later.
Next
::::
You now know all the basic concepts necessary to build (batch-like) data processors.
If you're confident with this part, let's get to a more real world example, using files and nice console output.
.. todo:: link to next page

46
docs/tutorial/basics2.rst Normal file
View File

@ -0,0 +1,46 @@
First steps - Working with files
================================
Bonobo would not be of any use if the aim was to uppercase small lists of strings. In fact, Bonobo should not be used
if you don't expect any gain from parralelization of tasks.
Let's take the following graph as an example:
.. graphviz::
digraph {
rankdir = LR;
"A" -> "B" -> "C";
}
The execution strategy does a bit of under the scene work, wrapping every component in a thread (assuming you're using
the :class:`bonobo.ThreadPoolExecutorStrategy`), which allows to start running `B` as soon as `A` yielded the first line
of data, and `C` as soon as `B` yielded the first line of data, even if `A` or `B` still have data to yield.
The great thing is that you generally don't have to think about it. Just be aware that your components will be run in
parralel, and don't worry too much about blocking components, as they won't block their siblings.
That being said, let's try to write a more real-world like transformation.
Reading a file
::::::::::::::
There are a few component builders available in **Bonobo** that let you read files. You should at least know about the following:
* :class:`bonobo.FileReader` (aliased as :func:`bonobo.from_file`)
* :class:`bonobo.JsonFileReader` (aliased as :func:`bonobo.from_json`)
* :class:`bonobo.CsvFileReader` (aliased as :func:`bonobo.from_csv`)
Reading a file is as simple as using one of those, and for the example, we'll use a text file that was generated using
Bonobo from the "liste-des-cafes-a-un-euro" dataset made available by Mairie de Paris under the Open Database
License (ODbL). You can `explore the original dataset <https://opendata.paris.fr/explore/dataset/liste-des-cafes-a-un-euro/information/>`_.
You'll need the example dataset, available in **Bonobo**'s repository.
.. code-block:: python
from bonobo import FileReader, run
run(
FileReader('examples/datasets/cheap_coffeeshops_in_paris.txt'),
print,
)

View File

@ -0,0 +1,182 @@
Extérieur Quai, 5, rue d'Alsace, 75010 Paris, France
Le Sully, 6 Bd henri IV, 75004 Paris, France
O q de poule, 53 rue du ruisseau, 75018 Paris, France
Le Pas Sage, 1 Passage du Grand Cerf, 75002 Paris, France
Le Dunois, 77 rue Dunois, 75013 Paris, France
La Renaissance, 112 Rue Championnet, 75018 Paris, France
Le chantereine, 51 Rue Victoire, 75009 Paris, France
Le Müller, 11 rue Feutrier, 75018 Paris, France
Le drapeau de la fidelité, 21 rue Copreaux, 75015 Paris, France
Le café des amis, 125 rue Blomet, 75015 Paris, France
Le Café Livres, 10 rue Saint Martin, 75004 Paris, France
Le Bosquet, 46 avenue Bosquet, 75007 Paris, France
Le Brio, 216, rue Marcadet, 75018 Paris, France
Le Kleemend's, 34 avenue Pierre Mendès-France, 75013 Paris, France
Café Pierre, 202 rue du faubourg st antoine, 75012 Paris, France
Les Arcades, 61 rue de Ponthieu, 75008 Paris, France
Le Square, 31 rue Saint-Dominique, 75007 Paris, France
Assaporare Dix sur Dix, 75, avenue Ledru-Rollin, 75012 Paris, France
Au cerceau d'or, 129 boulevard sebastopol, 75002 Paris, France
Café antoine, 17 rue Jean de la Fontaine, 75016 Paris, France
Café Lea, 5 rue Claude Bernard, 75005 Paris, France
Cardinal Saint-Germain, 11 boulevard Saint-Germain, 75005 Paris, France
Dédé la frite, 52 rue Notre-Dame des Victoires, 75002 Paris, France
La Bauloise, 36 rue du hameau, 75015 Paris, France
Le Bellerive, 71 quai de Seine, 75019 Paris, France
Le bistrot de Maëlle et Augustin, 42 rue coquillère, 75001 Paris, France
Le Dellac, 14 rue Rougemont, 75009 Paris, France
Le Felteu, 1 rue Pecquay, 75004 Paris, France
Le Saint Jean, 23 rue des abbesses, 75018 Paris, France
les montparnos, 65 boulevard Pasteur, 75015 Paris, France
Drole d'endroit pour une rencontre, 58 rue de Montorgueil, 75002 Paris, France
Le pari's café, 104 rue caulaincourt, 75018 Paris, France
Le Poulailler, 60 rue saint-sabin, 75011 Paris, France
L'Assassin, 99 rue Jean-Pierre Timbaud, 75011 Paris, France
l'Usine, 1 rue d'Avron, 75020 Paris, France
La Bricole, 52 rue Liebniz, 75018 Paris, France
le ronsard, place maubert, 75005 Paris, France
Face Bar, 82 rue des archives, 75003 Paris, France
American Kitchen, 49 rue bichat, 75010 Paris, France
La Marine, 55 bis quai de valmy, 75010 Paris, France
Le Bloc, 21 avenue Brochant, 75017 Paris, France
La Recoleta au Manoir, 229 avenue Gambetta, 75020 Paris, France
Le Pareloup, 80 Rue Saint-Charles, 75015 Paris, France
La Brasserie Gaité, 3 rue de la Gaité, 75014 Paris, France
Café Zen, 46 rue Victoire, 75009 Paris, France
O'Breizh, 27 rue de Penthièvre, 75008 Paris, France
Le Petit Choiseul, 23 rue saint augustin, 75002 Paris, France
Invitez vous chez nous, 7 rue Epée de Bois, 75005 Paris, France
La Cordonnerie, 142 Rue Saint-Denis 75002 Paris, 75002 Paris, France
Le Supercoin, 3, rue Baudelique, 75018 Paris, France
Populettes, 86 bis rue Riquet, 75018 Paris, France
Au bon coin, 49 rue des Cloys, 75018 Paris, France
Le Couvent, 69 rue Broca, 75013 Paris, France
La Brûlerie des Ternes, 111 rue mouffetard, 75005 Paris, France
L'Écir, 59 Boulevard Saint-Jacques, 75014 Paris, France
Le Chat bossu, 126, rue du Faubourg Saint Antoine, 75012 Paris, France
Denfert café, 58 boulvevard Saint Jacques, 75014 Paris, France
Le Café frappé, 95 rue Montmartre, 75002 Paris, France
La Perle, 78 rue vieille du temple, 75003 Paris, France
Le Descartes, 1 rue Thouin, 75005 Paris, France
Bagels & Coffee Corner, Place de Clichy, 75017 Paris, France
Le petit club, 55 rue de la tombe Issoire, 75014 Paris, France
Le Plein soleil, 90 avenue Parmentier, 75011 Paris, France
Le Relais Haussmann, 146, boulevard Haussmann, 75008 Paris, France
Le Malar, 88 rue Saint-Dominique, 75007 Paris, France
Au panini de la place, 47 rue Belgrand, 75020 Paris, France
Le Village, 182 rue de Courcelles, 75017 Paris, France
Pause Café, 41 rue de Charonne, 75011 Paris, France
Le Pure café, 14 rue Jean Macé, 75011 Paris, France
Extra old café, 307 fg saint Antoine, 75011 Paris, France
Chez Fafa, 44 rue Vinaigriers, 75010 Paris, France
En attendant l'or, 3 rue Faidherbe, 75011 Paris, France
Aux cadrans, 21 ter boulevard Diderot, 75012 Paris, France
Brûlerie San José, 30 rue des Petits-Champs, 75002 Paris, France
Etienne, 14 rue Turbigo, Paris, 75001 Paris, France
L'ingénu, 184 bd Voltaire, 75011 Paris, France
L'Olive, 8 rue L'Olive, 75018 Paris, France
Le Biz, 18 rue Favart, 75002 Paris, France
Le Cap Bourbon, 1 rue Louis le Grand, 75002 Paris, France
Le General Beuret, 9 Place du General Beuret, 75015 Paris, France
Le Germinal, 95 avenue Emile Zola, 75015 Paris, France
Le Ragueneau, 202 rue Saint-Honoré, 75001 Paris, France
Le refuge, 72 rue lamarck, 75018 Paris, France
Le sully, 13 rue du Faubourg Saint Denis, 75010 Paris, France
L'antre d'eux, 16 rue DE MEZIERES, 75006 Paris, France
Le bal du pirate, 60 rue des bergers, 75015 Paris, France
zic zinc, 95 rue claude decaen, 75012 Paris, France
l'orillon bar, 35 rue de l'orillon, 75011 Paris, France
Le Zazabar, 116 Rue de Ménilmontant, 75020 Paris, France
L'Inévitable, 22 rue Linné, 75005 Paris, France
Ragueneau, 202 rue Saint Honoré, 75001 Paris, France
Le Caminito, 48 rue du Dessous des Berges, 75013 Paris, France
Epicerie Musicale, 55bis quai de Valmy, 75010 Paris, France
Le petit Bretonneau, Le petit Bretonneau - à l'intérieur de l'Hôpital, 75018 Paris, France
Le Centenaire, 104 rue amelot, 75011 Paris, France
La Montagne Sans Geneviève, 13 Rue du Pot de Fer, 75005 Paris, France
Les Pères Populaires, 46 rue de Buzenval, 75020 Paris, France
Cafe de grenelle, 188 rue de Grenelle, 75007 Paris, France
Le relais de la victoire, 73 rue de la Victoire, 75009 Paris, France
La chaumière gourmande, Route de la Muette à Neuilly
Club hippique du Jardin dAcclimatation, 75016 Paris, France
Caves populaires, 22 rue des Dames, 75017 Paris, France
Caprice café, 12 avenue Jean Moulin, 75014 Paris, France
Tamm Bara, 7 rue Clisson, 75013 Paris, France
L'anjou, 1 rue de Montholon, 75009 Paris, France
Café dans l'aerogare Air France Invalides, 2 rue Robert Esnault Pelterie, 75007 Paris, France
Waikiki, 10 rue d"Ulm, 75005 Paris, France
Chez Prune, 36 rue Beaurepaire, 75010 Paris, France
Au Vin Des Rues, 21 rue Boulard, 75014 Paris, France
bistrot les timbrés, 14 rue d'alleray, 75015 Paris, France
Café beauveau, 9 rue de Miromesnil, 75008 Paris, France
Café de la Mairie (du VIII), rue de Lisbonne, 75008 Paris, France
Café Pistache, 9 rue des petits champs, 75001 Paris, France
La Cagnotte, 13 Rue Jean-Baptiste Dumay, 75020 Paris, France
le 1 cinq, 172 rue de vaugirard, 75015 Paris, France
Le Killy Jen, 28 bis boulevard Diderot, 75012 Paris, France
Les Artisans, 106 rue Lecourbe, 75015 Paris, France
Peperoni, 83 avenue de Wagram, 75001 Paris, France
le lutece, 380 rue de vaugirard, 75015 Paris, France
Brasiloja, 16 rue Ganneron, 75018 Paris, France
Rivolux, 16 rue de Rivoli, 75004 Paris, France
Chai 33, 33 Cour Saint Emilion, 75012 Paris, France
L'européen, 21 Bis Boulevard Diderot, 75012 Paris, France
NoMa, 39 rue Notre Dame de Nazareth, 75003 Paris, France
O'Paris, 1 Rue des Envierges, 75020 Paris, France
Café Clochette, 16 avenue Richerand, 75010 Paris, France
La cantoche de Paname, 40 Boulevard Beaumarchais, 75011 Paris, France
Le Saint René, 148 Boulevard de Charonne, 75020 Paris, France
La Liberté, 196 rue du faubourg saint-antoine, 75012 Paris, France
Chez Rutabaga, 16 rue des Petits Champs, 75002 Paris, France
Le BB (Bouchon des Batignolles), 2 rue Lemercier, 75017 Paris, France
La Brocante, 10 rue Rossini, 75009 Paris, France
Le Plomb du cantal, 3 rue Gaîté, 75014 Paris, France
Les caves populaires, 22 rue des Dames, 75017 Paris, France
Chez Luna, 108 rue de Ménilmontant, 75020 Paris, France
Le bar Fleuri, 1 rue du Plateau, 75019 Paris, France
Le Chaumontois, 12 rue Armand Carrel, 75018 Paris, France
Trois pièces cuisine, 101 rue des dames, 75017 Paris, France
Le Zinc, 61 avenue de la Motte Picquet, 75015 Paris, France
La cantine de Zoé, 136 rue du Faubourg poissonnière, 75010 Paris, France
Les Vendangeurs, 6/8 rue Stanislas, 75006 Paris, France
L'avant comptoir, 3 carrefour de l'Odéon, 75006 Paris, France
Botak cafe, 1 rue Paul albert, 75018 Paris, France
le chateau d'eau, 67 rue du Château d'eau, 75010 Paris, France
Bistrot Saint-Antoine, 58 rue du Fbg Saint-Antoine, 75012 Paris, France
Chez Oscar, 11/13 boulevard Beaumarchais, 75004 Paris, France
Le Fronton, 63 rue de Ponthieu, 75008 Paris, France
Le Piquet, 48 avenue de la Motte Picquet, 75015 Paris, France
Le Tournebride, 104 rue Mouffetard, 75005 Paris, France
maison du vin, 52 rue des plantes, 75014 Paris, France
Coffee Chope, 344Vrue Vaugirard, 75015 Paris, France
L'entrepôt, 157 rue Bercy 75012 Paris, 75012 Paris, France
Le café Monde et Médias, Place de la République, 75003 Paris, France
Café rallye tournelles, 11 Quai de la Tournelle, 75005 Paris, France
Brasserie le Morvan, 61 rue du château d'eau, 75010 Paris, France
Chez Miamophile, 6 rue Mélingue, 75019 Paris, France
La Caravane, Rue de la Fontaine au Roi, 75011 Paris, France
Panem, 18 rue de Crussol, 75011 Paris, France
Petits Freres des Pauvres, 47 rue de Batignolles, 75017 Paris, France
Café Dupont, 198 rue de la Convention, 75015 Paris, France
L'Angle, 28 rue de Ponthieu, 75008 Paris, France
Institut des Cultures d'Islam, 19-23 rue Léon, 75018 Paris, France
Canopy Café associatif, 19 rue Pajol, 75018 Paris, France
L'Entracte, place de l'opera, 75002 Paris, France
Le Sévigné, 15 rue du Parc Royal, 75003 Paris, France
Le Café d'avant, 35 rue Claude Bernard, 75005 Paris, France
Le Lucernaire, 53 rue Notre-Dame des Champs, 75006 Paris, France
Le Brigadier, 12 rue Blanche, 75009 Paris, France
L'âge d'or, 26 rue du Docteur Magnan, 75013 Paris, France
Café Victor, 10 boulevard Victor, 75015 Paris, France
L'empreinte, 54, avenue Daumesnil, 75012 Paris, France
L'horizon, 93, rue de la Roquette, 75011 Paris, France
Au pays de Vannes, 34 bis rue de Wattignies, 75012 Paris, France
Café Martin, 2 place Martin Nadaud, 75001 Paris, France
Café Varenne, 36 rue de Varenne, 75007 Paris, France
l'Eléphant du nil, 125 Rue Saint-Antoine, 75004 Paris, France
Le Comptoir, 354 bis rue Vaugirard, 75015 Paris, France
Le Parc Vaugirard, 358 rue de Vaugirard, 75015 Paris, France
Le Reynou, 2 bis quai de la mégisserie, 75001 Paris, France
le Zango, 58 rue Daguerre, 75014 Paris, France
Melting Pot, 3 rue de Lagny, 75020 Paris, France
Pari's Café, 174 avenue de Clichy, 75017 Paris, France

View File

@ -1,16 +1,17 @@
import json import json
from blessings import Terminal from blessings import Terminal
from pycountry import countries
from bonobo.ext.console import console_run from bonobo import console_run, tee, JsonFileWriter
from bonobo.ext.ods import extract_ods from bonobo.ext.opendatasoft import from_opendatasoft_api
from bonobo.util import tee
from bonobo.io.json import to_json
DATASET = 'fablabs-in-the-world' try:
SEARCH_URL = 'https://datanova.laposte.fr/api/records/1.0/search/' import pycountry
URL = SEARCH_URL + '?dataset=' + DATASET except ImportError as exc:
raise ImportError('You must install package "pycountry" to run this example.') from exc
API_DATASET = 'fablabs-in-the-world'
API_NETLOC = 'datanova.laposte.fr'
ROWS = 100 ROWS = 100
t = Terminal() t = Terminal()
@ -25,7 +26,7 @@ def normalize(row):
** **
row, row,
'links': list(filter(None, map(_getlink, json.loads(row.get('links'))))), 'links': list(filter(None, map(_getlink, json.loads(row.get('links'))))),
'country': countries.get(alpha_2=row.get('country_code', '').upper()).name, 'country': pycountry.countries.get(alpha_2=row.get('country_code', '').upper()).name,
} }
return result return result
@ -47,15 +48,15 @@ def display(row):
print(' - {}: {address}'.format(t.blue('address'), address=', '.join(address))) print(' - {}: {address}'.format(t.blue('address'), address=', '.join(address)))
print(' - {}: {links}'.format(t.blue('links'), links=', '.join(row['links']))) print(' - {}: {links}'.format(t.blue('links'), links=', '.join(row['links'])))
print(' - {}: {geometry}'.format(t.blue('geometry'), **row)) print(' - {}: {geometry}'.format(t.blue('geometry'), **row))
print(' - {}: {source}'.format(t.blue('source'), source='datanova/' + DATASET)) print(' - {}: {source}'.format(t.blue('source'), source='datanova/' + API_DATASET))
if __name__ == '__main__': if __name__ == '__main__':
console_run( console_run(
extract_ods( from_opendatasoft_api(
SEARCH_URL, DATASET, timezone='Europe/Paris'), API_DATASET, netloc=API_NETLOC, timezone='Europe/Paris'),
normalize, normalize,
filter_france, filter_france,
tee(display), tee(display),
to_json('fablabs.json'), JsonFileWriter('fablabs.json'),
output=True, ) output=True, )

View File

@ -0,0 +1,15 @@
from os.path import dirname, realpath, join
from bonobo import console_run
from bonobo.ext.opendatasoft import from_opendatasoft_api
from bonobo.io.file import FileWriter
OUTPUT_FILENAME = realpath(join(dirname(__file__), 'datasets/cheap_coffeeshops_in_paris.txt'))
console_run(
from_opendatasoft_api(
'liste-des-cafes-a-un-euro', netloc='opendata.paris.fr'),
lambda row: '{nom_du_cafe}, {adresse}, {arrondissement} Paris, France'.format(**row),
FileWriter(OUTPUT_FILENAME), )
print('Import done, read {} for results.'.format(OUTPUT_FILENAME))

View File

@ -0,0 +1,18 @@
from bonobo import run
def generate_data():
yield 'foo'
yield 'bar'
yield 'baz'
def uppercase(x: str):
return x.upper()
def output(x: str):
print(x)
run(generate_data, uppercase, output)

View File

@ -0,0 +1,18 @@
from bonobo import Graph, ThreadPoolExecutorStrategy
def yield_from(*args):
yield from args
# Represent our data processor as a simple directed graph of callables.
graph = Graph(
lambda: (x for x in ('foo', 'bar', 'baz')),
str.upper,
print, )
# Use a thread pool.
executor = ThreadPoolExecutorStrategy()
# Run the thing.
executor.execute(graph)

View File

@ -1,6 +1,6 @@
from mock import patch from mock import patch
from bonobo.ext.ods import extract_ods from bonobo.ext.opendatasoft import from_opendatasoft_api
class ResponseMock: class ResponseMock:
@ -17,7 +17,7 @@ class ResponseMock:
def test_read_from_opendatasoft_api(): def test_read_from_opendatasoft_api():
extract = extract_ods('http://example.com/', 'test-a-set') extract = from_opendatasoft_api('http://example.com/', 'test-a-set')
with patch( with patch(
'requests.get', return_value=ResponseMock([ 'requests.get', return_value=ResponseMock([
{ {

View File

@ -1,13 +1,13 @@
import pytest import pytest
from bonobo import to_json, Bag from bonobo import Bag, JsonFileWriter
from bonobo.core.contexts import ComponentExecutionContext from bonobo.core.contexts import ComponentExecutionContext
from bonobo.util.tokens import BEGIN, END from bonobo.util.tokens import BEGIN, END
def test_write_json_to_file(tmpdir): def test_write_json_to_file(tmpdir):
file = tmpdir.join('output.json') file = tmpdir.join('output.json')
json_writer = to_json(str(file)) json_writer = JsonFileWriter(str(file))
context = ComponentExecutionContext(json_writer, None) context = ComponentExecutionContext(json_writer, None)
context.initialize() context.initialize()
@ -28,7 +28,7 @@ def test_write_json_to_file(tmpdir):
def test_write_json_without_initializer_should_not_work(tmpdir): def test_write_json_without_initializer_should_not_work(tmpdir):
file = tmpdir.join('output.json') file = tmpdir.join('output.json')
json_writer = to_json(str(file)) json_writer = JsonFileWriter(str(file))
context = ComponentExecutionContext(json_writer, None) context = ComponentExecutionContext(json_writer, None)
with pytest.raises(AttributeError): with pytest.raises(AttributeError):