diff --git a/.coveragerc b/.coveragerc index ce96e75..1d76a1f 100644 --- a/.coveragerc +++ b/.coveragerc @@ -23,4 +23,4 @@ exclude_lines = ignore_errors = True [html] -directory = doc/_build/html/coverage +directory = docs/_build/html/coverage diff --git a/bonobo/__init__.py b/bonobo/__init__.py index 0d331c6..b66e2d8 100644 --- a/bonobo/__init__.py +++ b/bonobo/__init__.py @@ -36,16 +36,20 @@ with open(os.path.realpath(os.path.join(os.path.dirname(__file__), '../version.t __all__ = [ 'Bag', + 'FileWriter', 'Graph', - 'NaiveStrategy', + 'JsonFileWriter', 'NOT_MODIFIED', + 'NaiveStrategy', 'ProcessPoolExecutorStrategy', 'ThreadPoolExecutorStrategy', + 'console_run', 'head', 'inject', + 'jupyter_run', 'log', 'noop', + 'run', 'service', 'tee', - 'to_json', ] diff --git a/bonobo/compat/__init__.py b/bonobo/compat/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bonobo/compat/pandas.py b/bonobo/compat/pandas.py new file mode 100644 index 0000000..aab0dbd --- /dev/null +++ b/bonobo/compat/pandas.py @@ -0,0 +1,9 @@ +from bonobo import FileWriter, JsonFileWriter + +to_file = FileWriter +to_json = JsonFileWriter + +__all__ = [ + 'to_json', + 'to_file', +] diff --git a/bonobo/core/bags.py b/bonobo/core/bags.py index 2ec728d..c4f7c7b 100644 --- a/bonobo/core/bags.py +++ b/bonobo/core/bags.py @@ -33,8 +33,8 @@ class Bag: def flags(self): return self._flags - def apply(self, func, *args, **kwargs): - return func(*args, *self.args, **kwargs, **self.kwargs) + def apply(self, func_or_iter, *args, **kwargs): + return func_or_iter(*args, *self.args, **kwargs, **self.kwargs) def extend(self, *args, **kwargs): return type(self)(*args, _parent=self, **kwargs) diff --git a/bonobo/core/contexts.py b/bonobo/core/contexts.py index 7b0ec26..0e9d127 100644 --- a/bonobo/core/contexts.py +++ b/bonobo/core/contexts.py @@ -128,7 +128,7 @@ class ComponentExecutionContext(WithStatistics, AbstractLoopContext): @property def name(self): - return self.component.__name__ + return getattr(self.component, '__name__', getattr(type(self.component), '__name__', repr(self.component))) def __init__(self, component, parent): self.parent = parent diff --git a/bonobo/core/graphs.py b/bonobo/core/graphs.py index e9ffeb5..ba31930 100644 --- a/bonobo/core/graphs.py +++ b/bonobo/core/graphs.py @@ -6,9 +6,10 @@ class Graph: Represents a coherent directed acyclic graph (DAG) of components. """ - def __init__(self): + def __init__(self, *chain): self.components = [] self.graph = {BEGIN: set()} + self.add_chain(*chain) def outputs_of(self, idx, create=False): if create and not idx in self.graph: diff --git a/bonobo/ext/console/__init__.py b/bonobo/ext/console/__init__.py index 7d5c3f5..2fffb8f 100644 --- a/bonobo/ext/console/__init__.py +++ b/bonobo/ext/console/__init__.py @@ -1,7 +1,3 @@ -from .helpers import console_run from .plugin import ConsoleOutputPlugin -__all__ = [ - 'ConsoleOutputPlugin', - 'console_run', -] +__all__ = ['ConsoleOutputPlugin', ] diff --git a/bonobo/ext/console/helpers.py b/bonobo/ext/console/helpers.py deleted file mode 100644 index f55d016..0000000 --- a/bonobo/ext/console/helpers.py +++ /dev/null @@ -1,9 +0,0 @@ -from bonobo import Graph, ThreadPoolExecutorStrategy -from .plugin import ConsoleOutputPlugin - - -def console_run(*chain, output=True, plugins=None): - graph = Graph() - executor = ThreadPoolExecutorStrategy() - graph.add_chain(*chain) - return executor.execute(graph, plugins=(plugins or []) + [ConsoleOutputPlugin()] if output else []) diff --git a/bonobo/ext/jupyter/__init__.py b/bonobo/ext/jupyter/__init__.py index 2e04e8a..7dd1300 100644 --- a/bonobo/ext/jupyter/__init__.py +++ b/bonobo/ext/jupyter/__init__.py @@ -1,4 +1,4 @@ -from .helpers import jupyter_run +from bonobo.util.helpers import jupyter_run from .plugin import JupyterOutputPlugin @@ -6,7 +6,4 @@ def _jupyter_nbextension_paths(): return [{'section': 'notebook', 'src': 'static', 'dest': 'bonobo-jupyter', 'require': 'bonobo-jupyter/extension'}] -__all__ = [ - 'JupyterOutputPlugin', - 'jupyter_run', -] +__all__ = ['JupyterOutputPlugin', ] diff --git a/bonobo/ext/jupyter/helpers.py b/bonobo/ext/jupyter/helpers.py index c62e3dd..8b13789 100644 --- a/bonobo/ext/jupyter/helpers.py +++ b/bonobo/ext/jupyter/helpers.py @@ -1,9 +1 @@ -from bonobo import Graph, ThreadPoolExecutorStrategy -from .plugin import JupyterOutputPlugin - -def jupyter_run(*chain, plugins=None): - graph = Graph() - executor = ThreadPoolExecutorStrategy() - graph.add_chain(*chain) - return executor.execute(graph, plugins=(plugins or []) + [JupyterOutputPlugin()]) diff --git a/bonobo/ext/ods.py b/bonobo/ext/opendatasoft.py similarity index 60% rename from bonobo/ext/ods.py rename to bonobo/ext/opendatasoft.py index 1a8cf4f..9fb8d61 100644 --- a/bonobo/ext/ods.py +++ b/bonobo/ext/opendatasoft.py @@ -3,11 +3,18 @@ from urllib.parse import urlencode import requests # todo: make this a service so we can substitute it ? -def extract_ods(url, dataset, rows=100, **kwargs): +def from_opendatasoft_api(dataset=None, + endpoint='{scheme}://{netloc}{path}', + scheme='https', + netloc='data.opendatasoft.com', + path='/api/records/1.0/search/', + rows=100, + **kwargs): + path = path if path.startswith('/') else '/' + path params = ( ('dataset', dataset), ('rows', rows), ) + tuple(sorted(kwargs.items())) - base_url = url + '?' + urlencode(params) + base_url = endpoint.format(scheme=scheme, netloc=netloc, path=path) + '?' + urlencode(params) def _extract_ods(): nonlocal base_url, rows diff --git a/bonobo/io/__init__.py b/bonobo/io/__init__.py index 3338e9d..ca10d08 100644 --- a/bonobo/io/__init__.py +++ b/bonobo/io/__init__.py @@ -1,5 +1,9 @@ """ Readers and writers for common file formats. """ -from .json import * +from .file import FileWriter +from .json import JsonFileWriter -__all__ = ['to_json', ] +__all__ = [ + 'FileWriter', + 'JsonFileWriter', +] diff --git a/bonobo/io/file.py b/bonobo/io/file.py new file mode 100644 index 0000000..b30e515 --- /dev/null +++ b/bonobo/io/file.py @@ -0,0 +1,35 @@ +from bonobo.util.lifecycle import with_context + +__all__ = ['FileWriter', ] + + +@with_context +class FileWriter: + # XXX TODO implement @with_context like this ? Pros and cons ? + class Meta: + contextual = True + + def __init__(self, path_or_buf, eol='\n'): + self.path_or_buf = path_or_buf + self.eol = eol + + def initialize(self, ctx): + """ todo add lock file ? optional maybe ? """ + assert not hasattr(ctx, 'fp'), 'One at a time, baby.' + ctx.fp = open(self.path_or_buf, 'w+') + ctx.first = True + + def write(self, fp, line, prefix=''): + fp.write(prefix + line) + + def __call__(self, ctx, row): + if ctx.first: + prefix, ctx.first = '', False + else: + prefix = self.eol + + self.write(ctx.fp, row, prefix=prefix) + + def finalize(self, ctx): + ctx.fp.close() + del ctx.fp, ctx.first diff --git a/bonobo/io/json.py b/bonobo/io/json.py index 2df1538..98712a7 100644 --- a/bonobo/io/json.py +++ b/bonobo/io/json.py @@ -1,40 +1,23 @@ import json +from .file import FileWriter from bonobo.util.lifecycle import with_context -__all__ = [ - 'from_json', - 'to_json', -] +__all__ = ['JsonFileWriter', ] @with_context -class JsonWriter: +class JsonFileWriter(FileWriter): def __init__(self, path_or_buf): - self.path_or_buf = path_or_buf + super().__init__(path_or_buf, eol=',\n') def initialize(self, ctx): - assert not hasattr(ctx, 'fp'), 'One at a time, baby.' - ctx.fp = open(self.path_or_buf, 'w+') + super().initialize(ctx) ctx.fp.write('[\n') - ctx.first = True - def __call__(self, ctx, row): - if ctx.first: - prefix = '' - ctx.first = False - else: - prefix = ',\n' - ctx.fp.write(prefix + json.dumps(row)) + def write(self, fp, line, prefix=''): + fp.write(prefix + json.dumps(line)) def finalize(self, ctx): ctx.fp.write('\n]') - ctx.fp.close() - del ctx.fp, ctx.first - - -def from_json(path_or_buf): - pass - - -to_json = JsonWriter + super().finalize(ctx) diff --git a/bonobo/util/__init__.py b/bonobo/util/__init__.py index 5de330f..4a15b40 100644 --- a/bonobo/util/__init__.py +++ b/bonobo/util/__init__.py @@ -4,12 +4,16 @@ import functools import pprint from .tokens import NOT_MODIFIED +from .helpers import run, console_run, jupyter_run __all__ = [ 'NOT_MODIFIED', + 'console_run', 'head', + 'jupyter_run', 'log', 'noop', + 'run', 'tee', ] diff --git a/bonobo/util/helpers.py b/bonobo/util/helpers.py new file mode 100644 index 0000000..5e3538f --- /dev/null +++ b/bonobo/util/helpers.py @@ -0,0 +1,20 @@ +def run(*chain, plugins=None): + from bonobo import Graph, ThreadPoolExecutorStrategy + + graph = Graph() + graph.add_chain(*chain) + + executor = ThreadPoolExecutorStrategy() + return executor.execute(graph, plugins=plugins or []) + + +def console_run(*chain, output=True, plugins=None): + from bonobo.ext.console import ConsoleOutputPlugin + + return run(*chain, plugins=(plugins or []) + [ConsoleOutputPlugin()] if output else []) + + +def jupyter_run(*chain, plugins=None): + from bonobo.ext.jupyter import JupyterOutputPlugin + + return run(*chain, plugins=(plugins or []) + [JupyterOutputPlugin()]) diff --git a/docs/_templates/index.html b/docs/_templates/index.html index 4894573..9b9ae48 100644 --- a/docs/_templates/index.html +++ b/docs/_templates/index.html @@ -1,22 +1,20 @@ {% extends "layout.html" %} -{% set title = _('Overview') %} +{% set title = _('Bonobo — Data processing for humans') %} {% block body %} -
- Migration in progress, things may be broken for now. Please give us some time to finish painting the walls. +
+ Rewrite in progress, things may be broken for now. Please give us some time to finish painting the walls.
-

{{ _('Welcome to Bonobo\'s Documentation') }}

- -
- + -
+

{% trans %} - Bonobo is a line-by-line data-processing toolkit for python 3.5+ emphasizing simplicity and atomicity of - data transformations using a simple directed graph of python callables. + Bonobo is a line-by-line data-processing toolkit for python 3.5+ emphasizing simple and + atomic data transformations defined using a directed graph of plain old python callables. {% endtrans %}

@@ -71,9 +69,8 @@
- + {%- if hasdoc('search') %} diff --git a/docs/conf.py b/docs/conf.py index 604417a..16307fd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,8 +12,14 @@ import bonobo # -- General configuration ------------------------------------------------ extensions = [ - 'sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.coverage', - 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode' + 'sphinx.ext.autodoc', + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx', + 'sphinx.ext.todo', + 'sphinx.ext.coverage', + 'sphinx.ext.ifconfig', + 'sphinx.ext.viewcode', + 'sphinx.ext.graphviz', ] # Add any paths that contain templates here, relative to this directory. @@ -95,6 +101,8 @@ html_additional_pages = {'index': 'index.html'} html_static_path = ['_static'] html_show_sphinx = False +graphviz_output_format = 'svg' + # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. diff --git a/docs/install.rst b/docs/install.rst new file mode 100644 index 0000000..5b8488a --- /dev/null +++ b/docs/install.rst @@ -0,0 +1,34 @@ +Installation +============ + + +.. todo:: + + better install docs, especially on how to use different fork, etc. + +Install with pip +:::::::::::::::: + +.. code-block:: shell-session + + $ pip install bonobo + +Install from source +::::::::::::::::::: + +.. code-block:: shell-session + + $ pip install git+https://github.com/python-bonobo/bonobo.git@master#egg=bonobo + +Editable install +:::::::::::::::: + +If you plan on making patches to Bonobo, you should install it as an "editable" package. + + +.. code-block:: shell-session + + $ pip install --editable git+https://github.com/python-bonobo/bonobo.git@master#egg=bonobo + +Note: `-e` is the shorthand version of `--editable`. + diff --git a/docs/tutorial/basics.rst b/docs/tutorial/basics.rst new file mode 100644 index 0000000..1197d3a --- /dev/null +++ b/docs/tutorial/basics.rst @@ -0,0 +1,146 @@ +First steps - Basic concepts +============================ + +To begin with Bonobo, you should first install it: + +.. code-block:: shell-session + + $ pip install bonobo + +See :doc:`install` if you're looking for more options. + +Let's write a first data transformation +::::::::::::::::::::::::::::::::::::::: + +We'll write a simple component that just uppercase everything. In **Bonobo**, a component is a plain old python +callable, not more, not less. + +.. code-block:: python + + def uppercase(x: str): + return x.upper() + +Ok, this is kind of simple, and you can even use `str.upper` directly instead of writing a wrapper. The type annotations +are not used, but can make your code much more readable (and may be used as validators in the future). + +To run this, we need two more things: a generator that feeds data, and something that outputs it. + +.. code-block:: python + + def generate_data(): + yield 'foo' + yield 'bar' + yield 'baz' + + def output(x: str): + print(x) + +That should do the job. Now, let's chain the three callables together and run them. + +.. code-block:: python + + from bonobo import run + + run(generate_data, uppercase, output) + +This is the simplest data transormation possible, and we run it using the `run` helper that hides the underlying object +composition necessary to actually run the callables in parralel. The more flexible, but a bit more verbose to do the +same thing would be: + +.. code-block:: python + + from bonobo import Graph, ThreadPoolExecutorStrategy + + graph = Graph() + graph.add_chain(generate_data, uppercase, output) + + executor = ThreadPoolExecutorStrategy() + executor.execute(graph) + +Depending on what you're doing, you may use the shorthand helper method, or the verbose one. Always favor the shorter, +if you don't need to tune the graph or the execution strategy. + +Definitions +::::::::::: + +* Graph +* Component +* Executor + +.. todo:: Definitions, and substitute vague terms in the page by the exact term defined here + +Summary +::::::: + +Let's rewrite this using builtin functions and methods, then explain the few concepts available here: + +.. code-block:: python + + from bonobo import Graph, ThreadPoolExecutorStrategy + + # Represent our data processor as a simple directed graph of callables. + graph = Graph( + (x for x in 'foo', 'bar', 'baz'), + str.upper, + print, + ) + + # Use a thread pool. + executor = ThreadPoolExecutorStrategy() + + # Run the thing. + executor.execute(graph) + +Or the shorthand version, that you should prefer if you don't need fine tuning: + +.. code-block:: python + + from bonobo import run + + run( + iter(['foo', 'bar', 'baz']), + str.upper, + print, + ) + +Both methods are strictly equivalent (see :func:`bonobo.run`). When in doubt, favour the shorter. + +Takeaways +::::::::: + +① The :class:`bonobo.Graph` class is used to represent a data-processing pipeline. + +It can represent simple list-like linear graphs, like here, but it can also represent much more complex graphs, with +branches and cycles. + +This is what the graph we defined looks like: + +.. graphviz:: + + digraph { + rankdir = LR; + "iter(['foo', 'bar', 'baz'])" -> "str.upper" -> "print"; + } + + +② Transformations are simple python callables. Whatever can be called can be used as a transformation. Callables can +either `return` or `yield` data to send it to the next step. Regular functions (using `return`) should be prefered if +each call is guaranteed to return exactly one result, while generators (using `yield`) should be prefered if the +number of output lines for a given input varies. + +③ The graph is then executed using an `ExecutionStrategy`. For now, let's focus only on +:class:`bonobo.ThreadPoolExecutorStrategy`, which use an underlying `concurrent.futures.ThreadPoolExecutor` to +schedule calls in a pool of threads, but basically this strategy is what determines the actual behaviour of execution. + +④ Before actually executing the callables, the `ExecutorStrategy` instance will wrap each component in a `context`, +whose responsibility is to hold the state, to keep the components stateless. We'll expand on this later. + + +Next +:::: + +You now know all the basic concepts necessary to build (batch-like) data processors. + +If you're confident with this part, let's get to a more real world example, using files and nice console output. + +.. todo:: link to next page diff --git a/docs/tutorial/basics2.rst b/docs/tutorial/basics2.rst new file mode 100644 index 0000000..8542ec2 --- /dev/null +++ b/docs/tutorial/basics2.rst @@ -0,0 +1,46 @@ +First steps - Working with files +================================ + +Bonobo would not be of any use if the aim was to uppercase small lists of strings. In fact, Bonobo should not be used +if you don't expect any gain from parralelization of tasks. + +Let's take the following graph as an example: + +.. graphviz:: + + digraph { + rankdir = LR; + "A" -> "B" -> "C"; + } + +The execution strategy does a bit of under the scene work, wrapping every component in a thread (assuming you're using +the :class:`bonobo.ThreadPoolExecutorStrategy`), which allows to start running `B` as soon as `A` yielded the first line +of data, and `C` as soon as `B` yielded the first line of data, even if `A` or `B` still have data to yield. + +The great thing is that you generally don't have to think about it. Just be aware that your components will be run in +parralel, and don't worry too much about blocking components, as they won't block their siblings. + +That being said, let's try to write a more real-world like transformation. + +Reading a file +:::::::::::::: + +There are a few component builders available in **Bonobo** that let you read files. You should at least know about the following: + +* :class:`bonobo.FileReader` (aliased as :func:`bonobo.from_file`) +* :class:`bonobo.JsonFileReader` (aliased as :func:`bonobo.from_json`) +* :class:`bonobo.CsvFileReader` (aliased as :func:`bonobo.from_csv`) + +Reading a file is as simple as using one of those, and for the example, we'll use a text file that was generated using +Bonobo from the "liste-des-cafes-a-un-euro" dataset made available by Mairie de Paris under the Open Database +License (ODbL). You can `explore the original dataset `_. +You'll need the example dataset, available in **Bonobo**'s repository. + +.. code-block:: python + + from bonobo import FileReader, run + + run( + FileReader('examples/datasets/cheap_coffeeshops_in_paris.txt'), + print, + ) diff --git a/examples/datasets/cheap_coffeeshops_in_paris.txt b/examples/datasets/cheap_coffeeshops_in_paris.txt new file mode 100644 index 0000000..b43e377 --- /dev/null +++ b/examples/datasets/cheap_coffeeshops_in_paris.txt @@ -0,0 +1,182 @@ +Extérieur Quai, 5, rue d'Alsace, 75010 Paris, France +Le Sully, 6 Bd henri IV, 75004 Paris, France +O q de poule, 53 rue du ruisseau, 75018 Paris, France +Le Pas Sage, 1 Passage du Grand Cerf, 75002 Paris, France +Le Dunois, 77 rue Dunois, 75013 Paris, France +La Renaissance, 112 Rue Championnet, 75018 Paris, France +Le chantereine, 51 Rue Victoire, 75009 Paris, France +Le Müller, 11 rue Feutrier, 75018 Paris, France +Le drapeau de la fidelité, 21 rue Copreaux, 75015 Paris, France +Le café des amis, 125 rue Blomet, 75015 Paris, France +Le Café Livres, 10 rue Saint Martin, 75004 Paris, France +Le Bosquet, 46 avenue Bosquet, 75007 Paris, France +Le Brio, 216, rue Marcadet, 75018 Paris, France +Le Kleemend's, 34 avenue Pierre Mendès-France, 75013 Paris, France +Café Pierre, 202 rue du faubourg st antoine, 75012 Paris, France +Les Arcades, 61 rue de Ponthieu, 75008 Paris, France +Le Square, 31 rue Saint-Dominique, 75007 Paris, France +Assaporare Dix sur Dix, 75, avenue Ledru-Rollin, 75012 Paris, France +Au cerceau d'or, 129 boulevard sebastopol, 75002 Paris, France +Café antoine, 17 rue Jean de la Fontaine, 75016 Paris, France +Café Lea, 5 rue Claude Bernard, 75005 Paris, France +Cardinal Saint-Germain, 11 boulevard Saint-Germain, 75005 Paris, France +Dédé la frite, 52 rue Notre-Dame des Victoires, 75002 Paris, France +La Bauloise, 36 rue du hameau, 75015 Paris, France +Le Bellerive, 71 quai de Seine, 75019 Paris, France +Le bistrot de Maëlle et Augustin, 42 rue coquillère, 75001 Paris, France +Le Dellac, 14 rue Rougemont, 75009 Paris, France +Le Felteu, 1 rue Pecquay, 75004 Paris, France +Le Saint Jean, 23 rue des abbesses, 75018 Paris, France +les montparnos, 65 boulevard Pasteur, 75015 Paris, France +Drole d'endroit pour une rencontre, 58 rue de Montorgueil, 75002 Paris, France +Le pari's café, 104 rue caulaincourt, 75018 Paris, France +Le Poulailler, 60 rue saint-sabin, 75011 Paris, France +L'Assassin, 99 rue Jean-Pierre Timbaud, 75011 Paris, France +l'Usine, 1 rue d'Avron, 75020 Paris, France +La Bricole, 52 rue Liebniz, 75018 Paris, France +le ronsard, place maubert, 75005 Paris, France +Face Bar, 82 rue des archives, 75003 Paris, France +American Kitchen, 49 rue bichat, 75010 Paris, France +La Marine, 55 bis quai de valmy, 75010 Paris, France +Le Bloc, 21 avenue Brochant, 75017 Paris, France +La Recoleta au Manoir, 229 avenue Gambetta, 75020 Paris, France +Le Pareloup, 80 Rue Saint-Charles, 75015 Paris, France +La Brasserie Gaité, 3 rue de la Gaité, 75014 Paris, France +Café Zen, 46 rue Victoire, 75009 Paris, France +O'Breizh, 27 rue de Penthièvre, 75008 Paris, France +Le Petit Choiseul, 23 rue saint augustin, 75002 Paris, France +Invitez vous chez nous, 7 rue Epée de Bois, 75005 Paris, France +La Cordonnerie, 142 Rue Saint-Denis 75002 Paris, 75002 Paris, France +Le Supercoin, 3, rue Baudelique, 75018 Paris, France +Populettes, 86 bis rue Riquet, 75018 Paris, France +Au bon coin, 49 rue des Cloys, 75018 Paris, France +Le Couvent, 69 rue Broca, 75013 Paris, France +La Brûlerie des Ternes, 111 rue mouffetard, 75005 Paris, France +L'Écir, 59 Boulevard Saint-Jacques, 75014 Paris, France +Le Chat bossu, 126, rue du Faubourg Saint Antoine, 75012 Paris, France +Denfert café, 58 boulvevard Saint Jacques, 75014 Paris, France +Le Café frappé, 95 rue Montmartre, 75002 Paris, France +La Perle, 78 rue vieille du temple, 75003 Paris, France +Le Descartes, 1 rue Thouin, 75005 Paris, France +Bagels & Coffee Corner, Place de Clichy, 75017 Paris, France +Le petit club, 55 rue de la tombe Issoire, 75014 Paris, France +Le Plein soleil, 90 avenue Parmentier, 75011 Paris, France +Le Relais Haussmann, 146, boulevard Haussmann, 75008 Paris, France +Le Malar, 88 rue Saint-Dominique, 75007 Paris, France +Au panini de la place, 47 rue Belgrand, 75020 Paris, France +Le Village, 182 rue de Courcelles, 75017 Paris, France +Pause Café, 41 rue de Charonne, 75011 Paris, France +Le Pure café, 14 rue Jean Macé, 75011 Paris, France +Extra old café, 307 fg saint Antoine, 75011 Paris, France +Chez Fafa, 44 rue Vinaigriers, 75010 Paris, France +En attendant l'or, 3 rue Faidherbe, 75011 Paris, France +Aux cadrans, 21 ter boulevard Diderot, 75012 Paris, France +Brûlerie San José, 30 rue des Petits-Champs, 75002 Paris, France +Etienne, 14 rue Turbigo, Paris, 75001 Paris, France +L'ingénu, 184 bd Voltaire, 75011 Paris, France +L'Olive, 8 rue L'Olive, 75018 Paris, France +Le Biz, 18 rue Favart, 75002 Paris, France +Le Cap Bourbon, 1 rue Louis le Grand, 75002 Paris, France +Le General Beuret, 9 Place du General Beuret, 75015 Paris, France +Le Germinal, 95 avenue Emile Zola, 75015 Paris, France +Le Ragueneau, 202 rue Saint-Honoré, 75001 Paris, France +Le refuge, 72 rue lamarck, 75018 Paris, France +Le sully, 13 rue du Faubourg Saint Denis, 75010 Paris, France +L'antre d'eux, 16 rue DE MEZIERES, 75006 Paris, France +Le bal du pirate, 60 rue des bergers, 75015 Paris, France +zic zinc, 95 rue claude decaen, 75012 Paris, France +l'orillon bar, 35 rue de l'orillon, 75011 Paris, France +Le Zazabar, 116 Rue de Ménilmontant, 75020 Paris, France +L'Inévitable, 22 rue Linné, 75005 Paris, France +Ragueneau, 202 rue Saint Honoré, 75001 Paris, France +Le Caminito, 48 rue du Dessous des Berges, 75013 Paris, France +Epicerie Musicale, 55bis quai de Valmy, 75010 Paris, France +Le petit Bretonneau, Le petit Bretonneau - à l'intérieur de l'Hôpital, 75018 Paris, France +Le Centenaire, 104 rue amelot, 75011 Paris, France +La Montagne Sans Geneviève, 13 Rue du Pot de Fer, 75005 Paris, France +Les Pères Populaires, 46 rue de Buzenval, 75020 Paris, France +Cafe de grenelle, 188 rue de Grenelle, 75007 Paris, France +Le relais de la victoire, 73 rue de la Victoire, 75009 Paris, France +La chaumière gourmande, Route de la Muette à Neuilly +Club hippique du Jardin d’Acclimatation, 75016 Paris, France +Caves populaires, 22 rue des Dames, 75017 Paris, France +Caprice café, 12 avenue Jean Moulin, 75014 Paris, France +Tamm Bara, 7 rue Clisson, 75013 Paris, France +L'anjou, 1 rue de Montholon, 75009 Paris, France +Café dans l'aerogare Air France Invalides, 2 rue Robert Esnault Pelterie, 75007 Paris, France +Waikiki, 10 rue d"Ulm, 75005 Paris, France +Chez Prune, 36 rue Beaurepaire, 75010 Paris, France +Au Vin Des Rues, 21 rue Boulard, 75014 Paris, France +bistrot les timbrés, 14 rue d'alleray, 75015 Paris, France +Café beauveau, 9 rue de Miromesnil, 75008 Paris, France +Café de la Mairie (du VIII), rue de Lisbonne, 75008 Paris, France +Café Pistache, 9 rue des petits champs, 75001 Paris, France +La Cagnotte, 13 Rue Jean-Baptiste Dumay, 75020 Paris, France +le 1 cinq, 172 rue de vaugirard, 75015 Paris, France +Le Killy Jen, 28 bis boulevard Diderot, 75012 Paris, France +Les Artisans, 106 rue Lecourbe, 75015 Paris, France +Peperoni, 83 avenue de Wagram, 75001 Paris, France +le lutece, 380 rue de vaugirard, 75015 Paris, France +Brasiloja, 16 rue Ganneron, 75018 Paris, France +Rivolux, 16 rue de Rivoli, 75004 Paris, France +Chai 33, 33 Cour Saint Emilion, 75012 Paris, France +L'européen, 21 Bis Boulevard Diderot, 75012 Paris, France +NoMa, 39 rue Notre Dame de Nazareth, 75003 Paris, France +O'Paris, 1 Rue des Envierges, 75020 Paris, France +Café Clochette, 16 avenue Richerand, 75010 Paris, France +La cantoche de Paname, 40 Boulevard Beaumarchais, 75011 Paris, France +Le Saint René, 148 Boulevard de Charonne, 75020 Paris, France +La Liberté, 196 rue du faubourg saint-antoine, 75012 Paris, France +Chez Rutabaga, 16 rue des Petits Champs, 75002 Paris, France +Le BB (Bouchon des Batignolles), 2 rue Lemercier, 75017 Paris, France +La Brocante, 10 rue Rossini, 75009 Paris, France +Le Plomb du cantal, 3 rue Gaîté, 75014 Paris, France +Les caves populaires, 22 rue des Dames, 75017 Paris, France +Chez Luna, 108 rue de Ménilmontant, 75020 Paris, France +Le bar Fleuri, 1 rue du Plateau, 75019 Paris, France +Le Chaumontois, 12 rue Armand Carrel, 75018 Paris, France +Trois pièces cuisine, 101 rue des dames, 75017 Paris, France +Le Zinc, 61 avenue de la Motte Picquet, 75015 Paris, France +La cantine de Zoé, 136 rue du Faubourg poissonnière, 75010 Paris, France +Les Vendangeurs, 6/8 rue Stanislas, 75006 Paris, France +L'avant comptoir, 3 carrefour de l'Odéon, 75006 Paris, France +Botak cafe, 1 rue Paul albert, 75018 Paris, France +le chateau d'eau, 67 rue du Château d'eau, 75010 Paris, France +Bistrot Saint-Antoine, 58 rue du Fbg Saint-Antoine, 75012 Paris, France +Chez Oscar, 11/13 boulevard Beaumarchais, 75004 Paris, France +Le Fronton, 63 rue de Ponthieu, 75008 Paris, France +Le Piquet, 48 avenue de la Motte Picquet, 75015 Paris, France +Le Tournebride, 104 rue Mouffetard, 75005 Paris, France +maison du vin, 52 rue des plantes, 75014 Paris, France +Coffee Chope, 344Vrue Vaugirard, 75015 Paris, France +L'entrepôt, 157 rue Bercy 75012 Paris, 75012 Paris, France +Le café Monde et Médias, Place de la République, 75003 Paris, France +Café rallye tournelles, 11 Quai de la Tournelle, 75005 Paris, France +Brasserie le Morvan, 61 rue du château d'eau, 75010 Paris, France +Chez Miamophile, 6 rue Mélingue, 75019 Paris, France +La Caravane, Rue de la Fontaine au Roi, 75011 Paris, France +Panem, 18 rue de Crussol, 75011 Paris, France +Petits Freres des Pauvres, 47 rue de Batignolles, 75017 Paris, France +Café Dupont, 198 rue de la Convention, 75015 Paris, France +L'Angle, 28 rue de Ponthieu, 75008 Paris, France +Institut des Cultures d'Islam, 19-23 rue Léon, 75018 Paris, France +Canopy Café associatif, 19 rue Pajol, 75018 Paris, France +L'Entracte, place de l'opera, 75002 Paris, France +Le Sévigné, 15 rue du Parc Royal, 75003 Paris, France +Le Café d'avant, 35 rue Claude Bernard, 75005 Paris, France +Le Lucernaire, 53 rue Notre-Dame des Champs, 75006 Paris, France +Le Brigadier, 12 rue Blanche, 75009 Paris, France +L'âge d'or, 26 rue du Docteur Magnan, 75013 Paris, France +Café Victor, 10 boulevard Victor, 75015 Paris, France +L'empreinte, 54, avenue Daumesnil, 75012 Paris, France +L'horizon, 93, rue de la Roquette, 75011 Paris, France +Au pays de Vannes, 34 bis rue de Wattignies, 75012 Paris, France +Café Martin, 2 place Martin Nadaud, 75001 Paris, France +Café Varenne, 36 rue de Varenne, 75007 Paris, France +l'Eléphant du nil, 125 Rue Saint-Antoine, 75004 Paris, France +Le Comptoir, 354 bis rue Vaugirard, 75015 Paris, France +Le Parc Vaugirard, 358 rue de Vaugirard, 75015 Paris, France +Le Reynou, 2 bis quai de la mégisserie, 75001 Paris, France +le Zango, 58 rue Daguerre, 75014 Paris, France +Melting Pot, 3 rue de Lagny, 75020 Paris, France +Pari's Café, 174 avenue de Clichy, 75017 Paris, France \ No newline at end of file diff --git a/examples/opendata_fablabs.py b/examples/opendata_fablabs.py index 811fe86..72adc9c 100644 --- a/examples/opendata_fablabs.py +++ b/examples/opendata_fablabs.py @@ -1,16 +1,17 @@ import json from blessings import Terminal -from pycountry import countries -from bonobo.ext.console import console_run -from bonobo.ext.ods import extract_ods -from bonobo.util import tee -from bonobo.io.json import to_json +from bonobo import console_run, tee, JsonFileWriter +from bonobo.ext.opendatasoft import from_opendatasoft_api -DATASET = 'fablabs-in-the-world' -SEARCH_URL = 'https://datanova.laposte.fr/api/records/1.0/search/' -URL = SEARCH_URL + '?dataset=' + DATASET +try: + import pycountry +except ImportError as exc: + raise ImportError('You must install package "pycountry" to run this example.') from exc + +API_DATASET = 'fablabs-in-the-world' +API_NETLOC = 'datanova.laposte.fr' ROWS = 100 t = Terminal() @@ -25,7 +26,7 @@ def normalize(row): ** row, 'links': list(filter(None, map(_getlink, json.loads(row.get('links'))))), - 'country': countries.get(alpha_2=row.get('country_code', '').upper()).name, + 'country': pycountry.countries.get(alpha_2=row.get('country_code', '').upper()).name, } return result @@ -47,15 +48,15 @@ def display(row): print(' - {}: {address}'.format(t.blue('address'), address=', '.join(address))) print(' - {}: {links}'.format(t.blue('links'), links=', '.join(row['links']))) print(' - {}: {geometry}'.format(t.blue('geometry'), **row)) - print(' - {}: {source}'.format(t.blue('source'), source='datanova/' + DATASET)) + print(' - {}: {source}'.format(t.blue('source'), source='datanova/' + API_DATASET)) if __name__ == '__main__': console_run( - extract_ods( - SEARCH_URL, DATASET, timezone='Europe/Paris'), + from_opendatasoft_api( + API_DATASET, netloc=API_NETLOC, timezone='Europe/Paris'), normalize, filter_france, tee(display), - to_json('fablabs.json'), + JsonFileWriter('fablabs.json'), output=True, ) diff --git a/examples/read_cheap_coffeeshops_in_paris.py b/examples/read_cheap_coffeeshops_in_paris.py new file mode 100644 index 0000000..8e701e3 --- /dev/null +++ b/examples/read_cheap_coffeeshops_in_paris.py @@ -0,0 +1,15 @@ +from os.path import dirname, realpath, join + +from bonobo import console_run +from bonobo.ext.opendatasoft import from_opendatasoft_api +from bonobo.io.file import FileWriter + +OUTPUT_FILENAME = realpath(join(dirname(__file__), 'datasets/cheap_coffeeshops_in_paris.txt')) + +console_run( + from_opendatasoft_api( + 'liste-des-cafes-a-un-euro', netloc='opendata.paris.fr'), + lambda row: '{nom_du_cafe}, {adresse}, {arrondissement} Paris, France'.format(**row), + FileWriter(OUTPUT_FILENAME), ) + +print('Import done, read {} for results.'.format(OUTPUT_FILENAME)) diff --git a/examples/tutorial_basics_firststeps.py b/examples/tutorial_basics_firststeps.py new file mode 100644 index 0000000..4fb3f52 --- /dev/null +++ b/examples/tutorial_basics_firststeps.py @@ -0,0 +1,18 @@ +from bonobo import run + + +def generate_data(): + yield 'foo' + yield 'bar' + yield 'baz' + + +def uppercase(x: str): + return x.upper() + + +def output(x: str): + print(x) + + +run(generate_data, uppercase, output) diff --git a/examples/tutorial_basics_summary.py b/examples/tutorial_basics_summary.py new file mode 100644 index 0000000..60a5452 --- /dev/null +++ b/examples/tutorial_basics_summary.py @@ -0,0 +1,18 @@ +from bonobo import Graph, ThreadPoolExecutorStrategy + + +def yield_from(*args): + yield from args + + +# Represent our data processor as a simple directed graph of callables. +graph = Graph( + lambda: (x for x in ('foo', 'bar', 'baz')), + str.upper, + print, ) + +# Use a thread pool. +executor = ThreadPoolExecutorStrategy() + +# Run the thing. +executor.execute(graph) diff --git a/tests/ext/test_ods.py b/tests/ext/test_ods.py index e585637..eef0db7 100644 --- a/tests/ext/test_ods.py +++ b/tests/ext/test_ods.py @@ -1,6 +1,6 @@ from mock import patch -from bonobo.ext.ods import extract_ods +from bonobo.ext.opendatasoft import from_opendatasoft_api class ResponseMock: @@ -17,7 +17,7 @@ class ResponseMock: def test_read_from_opendatasoft_api(): - extract = extract_ods('http://example.com/', 'test-a-set') + extract = from_opendatasoft_api('http://example.com/', 'test-a-set') with patch( 'requests.get', return_value=ResponseMock([ { diff --git a/tests/io/test_json.py b/tests/io/test_json.py index af7f2f5..15cc91d 100644 --- a/tests/io/test_json.py +++ b/tests/io/test_json.py @@ -1,13 +1,13 @@ import pytest -from bonobo import to_json, Bag +from bonobo import Bag, JsonFileWriter from bonobo.core.contexts import ComponentExecutionContext from bonobo.util.tokens import BEGIN, END def test_write_json_to_file(tmpdir): file = tmpdir.join('output.json') - json_writer = to_json(str(file)) + json_writer = JsonFileWriter(str(file)) context = ComponentExecutionContext(json_writer, None) context.initialize() @@ -28,7 +28,7 @@ def test_write_json_to_file(tmpdir): def test_write_json_without_initializer_should_not_work(tmpdir): file = tmpdir.join('output.json') - json_writer = to_json(str(file)) + json_writer = JsonFileWriter(str(file)) context = ComponentExecutionContext(json_writer, None) with pytest.raises(AttributeError):