starting to write docs, taking decisions on public api
This commit is contained in:
@ -23,4 +23,4 @@ exclude_lines =
|
|||||||
ignore_errors = True
|
ignore_errors = True
|
||||||
|
|
||||||
[html]
|
[html]
|
||||||
directory = doc/_build/html/coverage
|
directory = docs/_build/html/coverage
|
||||||
|
|||||||
@ -36,16 +36,20 @@ with open(os.path.realpath(os.path.join(os.path.dirname(__file__), '../version.t
|
|||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'Bag',
|
'Bag',
|
||||||
|
'FileWriter',
|
||||||
'Graph',
|
'Graph',
|
||||||
'NaiveStrategy',
|
'JsonFileWriter',
|
||||||
'NOT_MODIFIED',
|
'NOT_MODIFIED',
|
||||||
|
'NaiveStrategy',
|
||||||
'ProcessPoolExecutorStrategy',
|
'ProcessPoolExecutorStrategy',
|
||||||
'ThreadPoolExecutorStrategy',
|
'ThreadPoolExecutorStrategy',
|
||||||
|
'console_run',
|
||||||
'head',
|
'head',
|
||||||
'inject',
|
'inject',
|
||||||
|
'jupyter_run',
|
||||||
'log',
|
'log',
|
||||||
'noop',
|
'noop',
|
||||||
|
'run',
|
||||||
'service',
|
'service',
|
||||||
'tee',
|
'tee',
|
||||||
'to_json',
|
|
||||||
]
|
]
|
||||||
|
|||||||
0
bonobo/compat/__init__.py
Normal file
0
bonobo/compat/__init__.py
Normal file
9
bonobo/compat/pandas.py
Normal file
9
bonobo/compat/pandas.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
from bonobo import FileWriter, JsonFileWriter
|
||||||
|
|
||||||
|
to_file = FileWriter
|
||||||
|
to_json = JsonFileWriter
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
'to_json',
|
||||||
|
'to_file',
|
||||||
|
]
|
||||||
@ -33,8 +33,8 @@ class Bag:
|
|||||||
def flags(self):
|
def flags(self):
|
||||||
return self._flags
|
return self._flags
|
||||||
|
|
||||||
def apply(self, func, *args, **kwargs):
|
def apply(self, func_or_iter, *args, **kwargs):
|
||||||
return func(*args, *self.args, **kwargs, **self.kwargs)
|
return func_or_iter(*args, *self.args, **kwargs, **self.kwargs)
|
||||||
|
|
||||||
def extend(self, *args, **kwargs):
|
def extend(self, *args, **kwargs):
|
||||||
return type(self)(*args, _parent=self, **kwargs)
|
return type(self)(*args, _parent=self, **kwargs)
|
||||||
|
|||||||
@ -128,7 +128,7 @@ class ComponentExecutionContext(WithStatistics, AbstractLoopContext):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self):
|
def name(self):
|
||||||
return self.component.__name__
|
return getattr(self.component, '__name__', getattr(type(self.component), '__name__', repr(self.component)))
|
||||||
|
|
||||||
def __init__(self, component, parent):
|
def __init__(self, component, parent):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
|
|||||||
@ -6,9 +6,10 @@ class Graph:
|
|||||||
Represents a coherent directed acyclic graph (DAG) of components.
|
Represents a coherent directed acyclic graph (DAG) of components.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, *chain):
|
||||||
self.components = []
|
self.components = []
|
||||||
self.graph = {BEGIN: set()}
|
self.graph = {BEGIN: set()}
|
||||||
|
self.add_chain(*chain)
|
||||||
|
|
||||||
def outputs_of(self, idx, create=False):
|
def outputs_of(self, idx, create=False):
|
||||||
if create and not idx in self.graph:
|
if create and not idx in self.graph:
|
||||||
|
|||||||
@ -1,7 +1,3 @@
|
|||||||
from .helpers import console_run
|
|
||||||
from .plugin import ConsoleOutputPlugin
|
from .plugin import ConsoleOutputPlugin
|
||||||
|
|
||||||
__all__ = [
|
__all__ = ['ConsoleOutputPlugin', ]
|
||||||
'ConsoleOutputPlugin',
|
|
||||||
'console_run',
|
|
||||||
]
|
|
||||||
|
|||||||
@ -1,9 +0,0 @@
|
|||||||
from bonobo import Graph, ThreadPoolExecutorStrategy
|
|
||||||
from .plugin import ConsoleOutputPlugin
|
|
||||||
|
|
||||||
|
|
||||||
def console_run(*chain, output=True, plugins=None):
|
|
||||||
graph = Graph()
|
|
||||||
executor = ThreadPoolExecutorStrategy()
|
|
||||||
graph.add_chain(*chain)
|
|
||||||
return executor.execute(graph, plugins=(plugins or []) + [ConsoleOutputPlugin()] if output else [])
|
|
||||||
@ -1,4 +1,4 @@
|
|||||||
from .helpers import jupyter_run
|
from bonobo.util.helpers import jupyter_run
|
||||||
from .plugin import JupyterOutputPlugin
|
from .plugin import JupyterOutputPlugin
|
||||||
|
|
||||||
|
|
||||||
@ -6,7 +6,4 @@ def _jupyter_nbextension_paths():
|
|||||||
return [{'section': 'notebook', 'src': 'static', 'dest': 'bonobo-jupyter', 'require': 'bonobo-jupyter/extension'}]
|
return [{'section': 'notebook', 'src': 'static', 'dest': 'bonobo-jupyter', 'require': 'bonobo-jupyter/extension'}]
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = ['JupyterOutputPlugin', ]
|
||||||
'JupyterOutputPlugin',
|
|
||||||
'jupyter_run',
|
|
||||||
]
|
|
||||||
|
|||||||
@ -1,9 +1 @@
|
|||||||
from bonobo import Graph, ThreadPoolExecutorStrategy
|
|
||||||
from .plugin import JupyterOutputPlugin
|
|
||||||
|
|
||||||
|
|
||||||
def jupyter_run(*chain, plugins=None):
|
|
||||||
graph = Graph()
|
|
||||||
executor = ThreadPoolExecutorStrategy()
|
|
||||||
graph.add_chain(*chain)
|
|
||||||
return executor.execute(graph, plugins=(plugins or []) + [JupyterOutputPlugin()])
|
|
||||||
|
|||||||
@ -3,11 +3,18 @@ from urllib.parse import urlencode
|
|||||||
import requests # todo: make this a service so we can substitute it ?
|
import requests # todo: make this a service so we can substitute it ?
|
||||||
|
|
||||||
|
|
||||||
def extract_ods(url, dataset, rows=100, **kwargs):
|
def from_opendatasoft_api(dataset=None,
|
||||||
|
endpoint='{scheme}://{netloc}{path}',
|
||||||
|
scheme='https',
|
||||||
|
netloc='data.opendatasoft.com',
|
||||||
|
path='/api/records/1.0/search/',
|
||||||
|
rows=100,
|
||||||
|
**kwargs):
|
||||||
|
path = path if path.startswith('/') else '/' + path
|
||||||
params = (
|
params = (
|
||||||
('dataset', dataset),
|
('dataset', dataset),
|
||||||
('rows', rows), ) + tuple(sorted(kwargs.items()))
|
('rows', rows), ) + tuple(sorted(kwargs.items()))
|
||||||
base_url = url + '?' + urlencode(params)
|
base_url = endpoint.format(scheme=scheme, netloc=netloc, path=path) + '?' + urlencode(params)
|
||||||
|
|
||||||
def _extract_ods():
|
def _extract_ods():
|
||||||
nonlocal base_url, rows
|
nonlocal base_url, rows
|
||||||
@ -1,5 +1,9 @@
|
|||||||
""" Readers and writers for common file formats. """
|
""" Readers and writers for common file formats. """
|
||||||
|
|
||||||
from .json import *
|
from .file import FileWriter
|
||||||
|
from .json import JsonFileWriter
|
||||||
|
|
||||||
__all__ = ['to_json', ]
|
__all__ = [
|
||||||
|
'FileWriter',
|
||||||
|
'JsonFileWriter',
|
||||||
|
]
|
||||||
|
|||||||
35
bonobo/io/file.py
Normal file
35
bonobo/io/file.py
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
from bonobo.util.lifecycle import with_context
|
||||||
|
|
||||||
|
__all__ = ['FileWriter', ]
|
||||||
|
|
||||||
|
|
||||||
|
@with_context
|
||||||
|
class FileWriter:
|
||||||
|
# XXX TODO implement @with_context like this ? Pros and cons ?
|
||||||
|
class Meta:
|
||||||
|
contextual = True
|
||||||
|
|
||||||
|
def __init__(self, path_or_buf, eol='\n'):
|
||||||
|
self.path_or_buf = path_or_buf
|
||||||
|
self.eol = eol
|
||||||
|
|
||||||
|
def initialize(self, ctx):
|
||||||
|
""" todo add lock file ? optional maybe ? """
|
||||||
|
assert not hasattr(ctx, 'fp'), 'One at a time, baby.'
|
||||||
|
ctx.fp = open(self.path_or_buf, 'w+')
|
||||||
|
ctx.first = True
|
||||||
|
|
||||||
|
def write(self, fp, line, prefix=''):
|
||||||
|
fp.write(prefix + line)
|
||||||
|
|
||||||
|
def __call__(self, ctx, row):
|
||||||
|
if ctx.first:
|
||||||
|
prefix, ctx.first = '', False
|
||||||
|
else:
|
||||||
|
prefix = self.eol
|
||||||
|
|
||||||
|
self.write(ctx.fp, row, prefix=prefix)
|
||||||
|
|
||||||
|
def finalize(self, ctx):
|
||||||
|
ctx.fp.close()
|
||||||
|
del ctx.fp, ctx.first
|
||||||
@ -1,40 +1,23 @@
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
|
from .file import FileWriter
|
||||||
from bonobo.util.lifecycle import with_context
|
from bonobo.util.lifecycle import with_context
|
||||||
|
|
||||||
__all__ = [
|
__all__ = ['JsonFileWriter', ]
|
||||||
'from_json',
|
|
||||||
'to_json',
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
@with_context
|
@with_context
|
||||||
class JsonWriter:
|
class JsonFileWriter(FileWriter):
|
||||||
def __init__(self, path_or_buf):
|
def __init__(self, path_or_buf):
|
||||||
self.path_or_buf = path_or_buf
|
super().__init__(path_or_buf, eol=',\n')
|
||||||
|
|
||||||
def initialize(self, ctx):
|
def initialize(self, ctx):
|
||||||
assert not hasattr(ctx, 'fp'), 'One at a time, baby.'
|
super().initialize(ctx)
|
||||||
ctx.fp = open(self.path_or_buf, 'w+')
|
|
||||||
ctx.fp.write('[\n')
|
ctx.fp.write('[\n')
|
||||||
ctx.first = True
|
|
||||||
|
|
||||||
def __call__(self, ctx, row):
|
def write(self, fp, line, prefix=''):
|
||||||
if ctx.first:
|
fp.write(prefix + json.dumps(line))
|
||||||
prefix = ''
|
|
||||||
ctx.first = False
|
|
||||||
else:
|
|
||||||
prefix = ',\n'
|
|
||||||
ctx.fp.write(prefix + json.dumps(row))
|
|
||||||
|
|
||||||
def finalize(self, ctx):
|
def finalize(self, ctx):
|
||||||
ctx.fp.write('\n]')
|
ctx.fp.write('\n]')
|
||||||
ctx.fp.close()
|
super().finalize(ctx)
|
||||||
del ctx.fp, ctx.first
|
|
||||||
|
|
||||||
|
|
||||||
def from_json(path_or_buf):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
to_json = JsonWriter
|
|
||||||
|
|||||||
@ -4,12 +4,16 @@ import functools
|
|||||||
import pprint
|
import pprint
|
||||||
|
|
||||||
from .tokens import NOT_MODIFIED
|
from .tokens import NOT_MODIFIED
|
||||||
|
from .helpers import run, console_run, jupyter_run
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'NOT_MODIFIED',
|
'NOT_MODIFIED',
|
||||||
|
'console_run',
|
||||||
'head',
|
'head',
|
||||||
|
'jupyter_run',
|
||||||
'log',
|
'log',
|
||||||
'noop',
|
'noop',
|
||||||
|
'run',
|
||||||
'tee',
|
'tee',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
20
bonobo/util/helpers.py
Normal file
20
bonobo/util/helpers.py
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
def run(*chain, plugins=None):
|
||||||
|
from bonobo import Graph, ThreadPoolExecutorStrategy
|
||||||
|
|
||||||
|
graph = Graph()
|
||||||
|
graph.add_chain(*chain)
|
||||||
|
|
||||||
|
executor = ThreadPoolExecutorStrategy()
|
||||||
|
return executor.execute(graph, plugins=plugins or [])
|
||||||
|
|
||||||
|
|
||||||
|
def console_run(*chain, output=True, plugins=None):
|
||||||
|
from bonobo.ext.console import ConsoleOutputPlugin
|
||||||
|
|
||||||
|
return run(*chain, plugins=(plugins or []) + [ConsoleOutputPlugin()] if output else [])
|
||||||
|
|
||||||
|
|
||||||
|
def jupyter_run(*chain, plugins=None):
|
||||||
|
from bonobo.ext.jupyter import JupyterOutputPlugin
|
||||||
|
|
||||||
|
return run(*chain, plugins=(plugins or []) + [JupyterOutputPlugin()])
|
||||||
23
docs/_templates/index.html
vendored
23
docs/_templates/index.html
vendored
@ -1,22 +1,20 @@
|
|||||||
{% extends "layout.html" %}
|
{% extends "layout.html" %}
|
||||||
{% set title = _('Overview') %}
|
{% set title = _('Bonobo — Data processing for humans') %}
|
||||||
{% block body %}
|
{% block body %}
|
||||||
|
|
||||||
<div style="border: 2px solid red; font-weight: bold;">
|
<div style="border: 2px solid red; font-weight: bold; margin: 1em; padding: 1em">
|
||||||
Migration in progress, things may be broken for now. Please give us some time to finish painting the walls.
|
Rewrite in progress, things may be broken for now. Please give us some time to finish painting the walls.
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<h1>{{ _('Welcome to Bonobo\'s Documentation') }}</h1>
|
<h1 style="text-align: center">
|
||||||
|
<img class="logo" src="{{ pathto('_static/bonobo.png', 1) }}" title="Bonobo" alt="Bonobo"
|
||||||
<div style="text-align: center;">
|
|
||||||
<img class="logo" src="{{ pathto('_static/bonobo.png', 1) }}" title="Bonobo"
|
|
||||||
style=" width: 128px; height: 128px;"/>
|
style=" width: 128px; height: 128px;"/>
|
||||||
</div>
|
</h1>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
{% trans %}
|
{% trans %}
|
||||||
Bonobo is a line-by-line data-processing toolkit for python 3.5+ emphasizing simplicity and atomicity of
|
<strong>Bonobo</strong> is a line-by-line data-processing toolkit for python 3.5+ emphasizing simple and
|
||||||
data transformations using a simple directed graph of python callables.
|
atomic data transformations defined using a directed graph of plain old python callables.
|
||||||
{% endtrans %}
|
{% endtrans %}
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
@ -71,9 +69,8 @@
|
|||||||
<table class="contentstable">
|
<table class="contentstable">
|
||||||
<tr>
|
<tr>
|
||||||
<td>
|
<td>
|
||||||
<p class="biglink"><a class="biglink" href="{{ pathto("tutorial") }}">{% trans %}First steps with
|
<p class="biglink"><a class="biglink" href="{{ pathto("tutorial/basics") }}">{% trans %}First steps{% endtrans %}</a><br/>
|
||||||
Bonobo{% endtrans %}</a><br/>
|
<span class="linkdescr">{% trans %}quick overview of basic features{% endtrans %}</span></p>
|
||||||
<span class="linkdescr">{% trans %}overview of basic features{% endtrans %}</span></p>
|
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
{%- if hasdoc('search') %}
|
{%- if hasdoc('search') %}
|
||||||
|
|||||||
12
docs/conf.py
12
docs/conf.py
@ -12,8 +12,14 @@ import bonobo
|
|||||||
# -- General configuration ------------------------------------------------
|
# -- General configuration ------------------------------------------------
|
||||||
|
|
||||||
extensions = [
|
extensions = [
|
||||||
'sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.coverage',
|
'sphinx.ext.autodoc',
|
||||||
'sphinx.ext.ifconfig', 'sphinx.ext.viewcode'
|
'sphinx.ext.doctest',
|
||||||
|
'sphinx.ext.intersphinx',
|
||||||
|
'sphinx.ext.todo',
|
||||||
|
'sphinx.ext.coverage',
|
||||||
|
'sphinx.ext.ifconfig',
|
||||||
|
'sphinx.ext.viewcode',
|
||||||
|
'sphinx.ext.graphviz',
|
||||||
]
|
]
|
||||||
|
|
||||||
# Add any paths that contain templates here, relative to this directory.
|
# Add any paths that contain templates here, relative to this directory.
|
||||||
@ -95,6 +101,8 @@ html_additional_pages = {'index': 'index.html'}
|
|||||||
html_static_path = ['_static']
|
html_static_path = ['_static']
|
||||||
html_show_sphinx = False
|
html_show_sphinx = False
|
||||||
|
|
||||||
|
graphviz_output_format = 'svg'
|
||||||
|
|
||||||
# -- Options for HTMLHelp output ------------------------------------------
|
# -- Options for HTMLHelp output ------------------------------------------
|
||||||
|
|
||||||
# Output file base name for HTML help builder.
|
# Output file base name for HTML help builder.
|
||||||
|
|||||||
34
docs/install.rst
Normal file
34
docs/install.rst
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
Installation
|
||||||
|
============
|
||||||
|
|
||||||
|
|
||||||
|
.. todo::
|
||||||
|
|
||||||
|
better install docs, especially on how to use different fork, etc.
|
||||||
|
|
||||||
|
Install with pip
|
||||||
|
::::::::::::::::
|
||||||
|
|
||||||
|
.. code-block:: shell-session
|
||||||
|
|
||||||
|
$ pip install bonobo
|
||||||
|
|
||||||
|
Install from source
|
||||||
|
:::::::::::::::::::
|
||||||
|
|
||||||
|
.. code-block:: shell-session
|
||||||
|
|
||||||
|
$ pip install git+https://github.com/python-bonobo/bonobo.git@master#egg=bonobo
|
||||||
|
|
||||||
|
Editable install
|
||||||
|
::::::::::::::::
|
||||||
|
|
||||||
|
If you plan on making patches to Bonobo, you should install it as an "editable" package.
|
||||||
|
|
||||||
|
|
||||||
|
.. code-block:: shell-session
|
||||||
|
|
||||||
|
$ pip install --editable git+https://github.com/python-bonobo/bonobo.git@master#egg=bonobo
|
||||||
|
|
||||||
|
Note: `-e` is the shorthand version of `--editable`.
|
||||||
|
|
||||||
146
docs/tutorial/basics.rst
Normal file
146
docs/tutorial/basics.rst
Normal file
@ -0,0 +1,146 @@
|
|||||||
|
First steps - Basic concepts
|
||||||
|
============================
|
||||||
|
|
||||||
|
To begin with Bonobo, you should first install it:
|
||||||
|
|
||||||
|
.. code-block:: shell-session
|
||||||
|
|
||||||
|
$ pip install bonobo
|
||||||
|
|
||||||
|
See :doc:`install` if you're looking for more options.
|
||||||
|
|
||||||
|
Let's write a first data transformation
|
||||||
|
:::::::::::::::::::::::::::::::::::::::
|
||||||
|
|
||||||
|
We'll write a simple component that just uppercase everything. In **Bonobo**, a component is a plain old python
|
||||||
|
callable, not more, not less.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
def uppercase(x: str):
|
||||||
|
return x.upper()
|
||||||
|
|
||||||
|
Ok, this is kind of simple, and you can even use `str.upper` directly instead of writing a wrapper. The type annotations
|
||||||
|
are not used, but can make your code much more readable (and may be used as validators in the future).
|
||||||
|
|
||||||
|
To run this, we need two more things: a generator that feeds data, and something that outputs it.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
def generate_data():
|
||||||
|
yield 'foo'
|
||||||
|
yield 'bar'
|
||||||
|
yield 'baz'
|
||||||
|
|
||||||
|
def output(x: str):
|
||||||
|
print(x)
|
||||||
|
|
||||||
|
That should do the job. Now, let's chain the three callables together and run them.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from bonobo import run
|
||||||
|
|
||||||
|
run(generate_data, uppercase, output)
|
||||||
|
|
||||||
|
This is the simplest data transormation possible, and we run it using the `run` helper that hides the underlying object
|
||||||
|
composition necessary to actually run the callables in parralel. The more flexible, but a bit more verbose to do the
|
||||||
|
same thing would be:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from bonobo import Graph, ThreadPoolExecutorStrategy
|
||||||
|
|
||||||
|
graph = Graph()
|
||||||
|
graph.add_chain(generate_data, uppercase, output)
|
||||||
|
|
||||||
|
executor = ThreadPoolExecutorStrategy()
|
||||||
|
executor.execute(graph)
|
||||||
|
|
||||||
|
Depending on what you're doing, you may use the shorthand helper method, or the verbose one. Always favor the shorter,
|
||||||
|
if you don't need to tune the graph or the execution strategy.
|
||||||
|
|
||||||
|
Definitions
|
||||||
|
:::::::::::
|
||||||
|
|
||||||
|
* Graph
|
||||||
|
* Component
|
||||||
|
* Executor
|
||||||
|
|
||||||
|
.. todo:: Definitions, and substitute vague terms in the page by the exact term defined here
|
||||||
|
|
||||||
|
Summary
|
||||||
|
:::::::
|
||||||
|
|
||||||
|
Let's rewrite this using builtin functions and methods, then explain the few concepts available here:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from bonobo import Graph, ThreadPoolExecutorStrategy
|
||||||
|
|
||||||
|
# Represent our data processor as a simple directed graph of callables.
|
||||||
|
graph = Graph(
|
||||||
|
(x for x in 'foo', 'bar', 'baz'),
|
||||||
|
str.upper,
|
||||||
|
print,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Use a thread pool.
|
||||||
|
executor = ThreadPoolExecutorStrategy()
|
||||||
|
|
||||||
|
# Run the thing.
|
||||||
|
executor.execute(graph)
|
||||||
|
|
||||||
|
Or the shorthand version, that you should prefer if you don't need fine tuning:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from bonobo import run
|
||||||
|
|
||||||
|
run(
|
||||||
|
iter(['foo', 'bar', 'baz']),
|
||||||
|
str.upper,
|
||||||
|
print,
|
||||||
|
)
|
||||||
|
|
||||||
|
Both methods are strictly equivalent (see :func:`bonobo.run`). When in doubt, favour the shorter.
|
||||||
|
|
||||||
|
Takeaways
|
||||||
|
:::::::::
|
||||||
|
|
||||||
|
① The :class:`bonobo.Graph` class is used to represent a data-processing pipeline.
|
||||||
|
|
||||||
|
It can represent simple list-like linear graphs, like here, but it can also represent much more complex graphs, with
|
||||||
|
branches and cycles.
|
||||||
|
|
||||||
|
This is what the graph we defined looks like:
|
||||||
|
|
||||||
|
.. graphviz::
|
||||||
|
|
||||||
|
digraph {
|
||||||
|
rankdir = LR;
|
||||||
|
"iter(['foo', 'bar', 'baz'])" -> "str.upper" -> "print";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
② Transformations are simple python callables. Whatever can be called can be used as a transformation. Callables can
|
||||||
|
either `return` or `yield` data to send it to the next step. Regular functions (using `return`) should be prefered if
|
||||||
|
each call is guaranteed to return exactly one result, while generators (using `yield`) should be prefered if the
|
||||||
|
number of output lines for a given input varies.
|
||||||
|
|
||||||
|
③ The graph is then executed using an `ExecutionStrategy`. For now, let's focus only on
|
||||||
|
:class:`bonobo.ThreadPoolExecutorStrategy`, which use an underlying `concurrent.futures.ThreadPoolExecutor` to
|
||||||
|
schedule calls in a pool of threads, but basically this strategy is what determines the actual behaviour of execution.
|
||||||
|
|
||||||
|
④ Before actually executing the callables, the `ExecutorStrategy` instance will wrap each component in a `context`,
|
||||||
|
whose responsibility is to hold the state, to keep the components stateless. We'll expand on this later.
|
||||||
|
|
||||||
|
|
||||||
|
Next
|
||||||
|
::::
|
||||||
|
|
||||||
|
You now know all the basic concepts necessary to build (batch-like) data processors.
|
||||||
|
|
||||||
|
If you're confident with this part, let's get to a more real world example, using files and nice console output.
|
||||||
|
|
||||||
|
.. todo:: link to next page
|
||||||
46
docs/tutorial/basics2.rst
Normal file
46
docs/tutorial/basics2.rst
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
First steps - Working with files
|
||||||
|
================================
|
||||||
|
|
||||||
|
Bonobo would not be of any use if the aim was to uppercase small lists of strings. In fact, Bonobo should not be used
|
||||||
|
if you don't expect any gain from parralelization of tasks.
|
||||||
|
|
||||||
|
Let's take the following graph as an example:
|
||||||
|
|
||||||
|
.. graphviz::
|
||||||
|
|
||||||
|
digraph {
|
||||||
|
rankdir = LR;
|
||||||
|
"A" -> "B" -> "C";
|
||||||
|
}
|
||||||
|
|
||||||
|
The execution strategy does a bit of under the scene work, wrapping every component in a thread (assuming you're using
|
||||||
|
the :class:`bonobo.ThreadPoolExecutorStrategy`), which allows to start running `B` as soon as `A` yielded the first line
|
||||||
|
of data, and `C` as soon as `B` yielded the first line of data, even if `A` or `B` still have data to yield.
|
||||||
|
|
||||||
|
The great thing is that you generally don't have to think about it. Just be aware that your components will be run in
|
||||||
|
parralel, and don't worry too much about blocking components, as they won't block their siblings.
|
||||||
|
|
||||||
|
That being said, let's try to write a more real-world like transformation.
|
||||||
|
|
||||||
|
Reading a file
|
||||||
|
::::::::::::::
|
||||||
|
|
||||||
|
There are a few component builders available in **Bonobo** that let you read files. You should at least know about the following:
|
||||||
|
|
||||||
|
* :class:`bonobo.FileReader` (aliased as :func:`bonobo.from_file`)
|
||||||
|
* :class:`bonobo.JsonFileReader` (aliased as :func:`bonobo.from_json`)
|
||||||
|
* :class:`bonobo.CsvFileReader` (aliased as :func:`bonobo.from_csv`)
|
||||||
|
|
||||||
|
Reading a file is as simple as using one of those, and for the example, we'll use a text file that was generated using
|
||||||
|
Bonobo from the "liste-des-cafes-a-un-euro" dataset made available by Mairie de Paris under the Open Database
|
||||||
|
License (ODbL). You can `explore the original dataset <https://opendata.paris.fr/explore/dataset/liste-des-cafes-a-un-euro/information/>`_.
|
||||||
|
You'll need the example dataset, available in **Bonobo**'s repository.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from bonobo import FileReader, run
|
||||||
|
|
||||||
|
run(
|
||||||
|
FileReader('examples/datasets/cheap_coffeeshops_in_paris.txt'),
|
||||||
|
print,
|
||||||
|
)
|
||||||
182
examples/datasets/cheap_coffeeshops_in_paris.txt
Normal file
182
examples/datasets/cheap_coffeeshops_in_paris.txt
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
Extérieur Quai, 5, rue d'Alsace, 75010 Paris, France
|
||||||
|
Le Sully, 6 Bd henri IV, 75004 Paris, France
|
||||||
|
O q de poule, 53 rue du ruisseau, 75018 Paris, France
|
||||||
|
Le Pas Sage, 1 Passage du Grand Cerf, 75002 Paris, France
|
||||||
|
Le Dunois, 77 rue Dunois, 75013 Paris, France
|
||||||
|
La Renaissance, 112 Rue Championnet, 75018 Paris, France
|
||||||
|
Le chantereine, 51 Rue Victoire, 75009 Paris, France
|
||||||
|
Le Müller, 11 rue Feutrier, 75018 Paris, France
|
||||||
|
Le drapeau de la fidelité, 21 rue Copreaux, 75015 Paris, France
|
||||||
|
Le café des amis, 125 rue Blomet, 75015 Paris, France
|
||||||
|
Le Café Livres, 10 rue Saint Martin, 75004 Paris, France
|
||||||
|
Le Bosquet, 46 avenue Bosquet, 75007 Paris, France
|
||||||
|
Le Brio, 216, rue Marcadet, 75018 Paris, France
|
||||||
|
Le Kleemend's, 34 avenue Pierre Mendès-France, 75013 Paris, France
|
||||||
|
Café Pierre, 202 rue du faubourg st antoine, 75012 Paris, France
|
||||||
|
Les Arcades, 61 rue de Ponthieu, 75008 Paris, France
|
||||||
|
Le Square, 31 rue Saint-Dominique, 75007 Paris, France
|
||||||
|
Assaporare Dix sur Dix, 75, avenue Ledru-Rollin, 75012 Paris, France
|
||||||
|
Au cerceau d'or, 129 boulevard sebastopol, 75002 Paris, France
|
||||||
|
Café antoine, 17 rue Jean de la Fontaine, 75016 Paris, France
|
||||||
|
Café Lea, 5 rue Claude Bernard, 75005 Paris, France
|
||||||
|
Cardinal Saint-Germain, 11 boulevard Saint-Germain, 75005 Paris, France
|
||||||
|
Dédé la frite, 52 rue Notre-Dame des Victoires, 75002 Paris, France
|
||||||
|
La Bauloise, 36 rue du hameau, 75015 Paris, France
|
||||||
|
Le Bellerive, 71 quai de Seine, 75019 Paris, France
|
||||||
|
Le bistrot de Maëlle et Augustin, 42 rue coquillère, 75001 Paris, France
|
||||||
|
Le Dellac, 14 rue Rougemont, 75009 Paris, France
|
||||||
|
Le Felteu, 1 rue Pecquay, 75004 Paris, France
|
||||||
|
Le Saint Jean, 23 rue des abbesses, 75018 Paris, France
|
||||||
|
les montparnos, 65 boulevard Pasteur, 75015 Paris, France
|
||||||
|
Drole d'endroit pour une rencontre, 58 rue de Montorgueil, 75002 Paris, France
|
||||||
|
Le pari's café, 104 rue caulaincourt, 75018 Paris, France
|
||||||
|
Le Poulailler, 60 rue saint-sabin, 75011 Paris, France
|
||||||
|
L'Assassin, 99 rue Jean-Pierre Timbaud, 75011 Paris, France
|
||||||
|
l'Usine, 1 rue d'Avron, 75020 Paris, France
|
||||||
|
La Bricole, 52 rue Liebniz, 75018 Paris, France
|
||||||
|
le ronsard, place maubert, 75005 Paris, France
|
||||||
|
Face Bar, 82 rue des archives, 75003 Paris, France
|
||||||
|
American Kitchen, 49 rue bichat, 75010 Paris, France
|
||||||
|
La Marine, 55 bis quai de valmy, 75010 Paris, France
|
||||||
|
Le Bloc, 21 avenue Brochant, 75017 Paris, France
|
||||||
|
La Recoleta au Manoir, 229 avenue Gambetta, 75020 Paris, France
|
||||||
|
Le Pareloup, 80 Rue Saint-Charles, 75015 Paris, France
|
||||||
|
La Brasserie Gaité, 3 rue de la Gaité, 75014 Paris, France
|
||||||
|
Café Zen, 46 rue Victoire, 75009 Paris, France
|
||||||
|
O'Breizh, 27 rue de Penthièvre, 75008 Paris, France
|
||||||
|
Le Petit Choiseul, 23 rue saint augustin, 75002 Paris, France
|
||||||
|
Invitez vous chez nous, 7 rue Epée de Bois, 75005 Paris, France
|
||||||
|
La Cordonnerie, 142 Rue Saint-Denis 75002 Paris, 75002 Paris, France
|
||||||
|
Le Supercoin, 3, rue Baudelique, 75018 Paris, France
|
||||||
|
Populettes, 86 bis rue Riquet, 75018 Paris, France
|
||||||
|
Au bon coin, 49 rue des Cloys, 75018 Paris, France
|
||||||
|
Le Couvent, 69 rue Broca, 75013 Paris, France
|
||||||
|
La Brûlerie des Ternes, 111 rue mouffetard, 75005 Paris, France
|
||||||
|
L'Écir, 59 Boulevard Saint-Jacques, 75014 Paris, France
|
||||||
|
Le Chat bossu, 126, rue du Faubourg Saint Antoine, 75012 Paris, France
|
||||||
|
Denfert café, 58 boulvevard Saint Jacques, 75014 Paris, France
|
||||||
|
Le Café frappé, 95 rue Montmartre, 75002 Paris, France
|
||||||
|
La Perle, 78 rue vieille du temple, 75003 Paris, France
|
||||||
|
Le Descartes, 1 rue Thouin, 75005 Paris, France
|
||||||
|
Bagels & Coffee Corner, Place de Clichy, 75017 Paris, France
|
||||||
|
Le petit club, 55 rue de la tombe Issoire, 75014 Paris, France
|
||||||
|
Le Plein soleil, 90 avenue Parmentier, 75011 Paris, France
|
||||||
|
Le Relais Haussmann, 146, boulevard Haussmann, 75008 Paris, France
|
||||||
|
Le Malar, 88 rue Saint-Dominique, 75007 Paris, France
|
||||||
|
Au panini de la place, 47 rue Belgrand, 75020 Paris, France
|
||||||
|
Le Village, 182 rue de Courcelles, 75017 Paris, France
|
||||||
|
Pause Café, 41 rue de Charonne, 75011 Paris, France
|
||||||
|
Le Pure café, 14 rue Jean Macé, 75011 Paris, France
|
||||||
|
Extra old café, 307 fg saint Antoine, 75011 Paris, France
|
||||||
|
Chez Fafa, 44 rue Vinaigriers, 75010 Paris, France
|
||||||
|
En attendant l'or, 3 rue Faidherbe, 75011 Paris, France
|
||||||
|
Aux cadrans, 21 ter boulevard Diderot, 75012 Paris, France
|
||||||
|
Brûlerie San José, 30 rue des Petits-Champs, 75002 Paris, France
|
||||||
|
Etienne, 14 rue Turbigo, Paris, 75001 Paris, France
|
||||||
|
L'ingénu, 184 bd Voltaire, 75011 Paris, France
|
||||||
|
L'Olive, 8 rue L'Olive, 75018 Paris, France
|
||||||
|
Le Biz, 18 rue Favart, 75002 Paris, France
|
||||||
|
Le Cap Bourbon, 1 rue Louis le Grand, 75002 Paris, France
|
||||||
|
Le General Beuret, 9 Place du General Beuret, 75015 Paris, France
|
||||||
|
Le Germinal, 95 avenue Emile Zola, 75015 Paris, France
|
||||||
|
Le Ragueneau, 202 rue Saint-Honoré, 75001 Paris, France
|
||||||
|
Le refuge, 72 rue lamarck, 75018 Paris, France
|
||||||
|
Le sully, 13 rue du Faubourg Saint Denis, 75010 Paris, France
|
||||||
|
L'antre d'eux, 16 rue DE MEZIERES, 75006 Paris, France
|
||||||
|
Le bal du pirate, 60 rue des bergers, 75015 Paris, France
|
||||||
|
zic zinc, 95 rue claude decaen, 75012 Paris, France
|
||||||
|
l'orillon bar, 35 rue de l'orillon, 75011 Paris, France
|
||||||
|
Le Zazabar, 116 Rue de Ménilmontant, 75020 Paris, France
|
||||||
|
L'Inévitable, 22 rue Linné, 75005 Paris, France
|
||||||
|
Ragueneau, 202 rue Saint Honoré, 75001 Paris, France
|
||||||
|
Le Caminito, 48 rue du Dessous des Berges, 75013 Paris, France
|
||||||
|
Epicerie Musicale, 55bis quai de Valmy, 75010 Paris, France
|
||||||
|
Le petit Bretonneau, Le petit Bretonneau - à l'intérieur de l'Hôpital, 75018 Paris, France
|
||||||
|
Le Centenaire, 104 rue amelot, 75011 Paris, France
|
||||||
|
La Montagne Sans Geneviève, 13 Rue du Pot de Fer, 75005 Paris, France
|
||||||
|
Les Pères Populaires, 46 rue de Buzenval, 75020 Paris, France
|
||||||
|
Cafe de grenelle, 188 rue de Grenelle, 75007 Paris, France
|
||||||
|
Le relais de la victoire, 73 rue de la Victoire, 75009 Paris, France
|
||||||
|
La chaumière gourmande, Route de la Muette à Neuilly
|
||||||
|
Club hippique du Jardin d’Acclimatation, 75016 Paris, France
|
||||||
|
Caves populaires, 22 rue des Dames, 75017 Paris, France
|
||||||
|
Caprice café, 12 avenue Jean Moulin, 75014 Paris, France
|
||||||
|
Tamm Bara, 7 rue Clisson, 75013 Paris, France
|
||||||
|
L'anjou, 1 rue de Montholon, 75009 Paris, France
|
||||||
|
Café dans l'aerogare Air France Invalides, 2 rue Robert Esnault Pelterie, 75007 Paris, France
|
||||||
|
Waikiki, 10 rue d"Ulm, 75005 Paris, France
|
||||||
|
Chez Prune, 36 rue Beaurepaire, 75010 Paris, France
|
||||||
|
Au Vin Des Rues, 21 rue Boulard, 75014 Paris, France
|
||||||
|
bistrot les timbrés, 14 rue d'alleray, 75015 Paris, France
|
||||||
|
Café beauveau, 9 rue de Miromesnil, 75008 Paris, France
|
||||||
|
Café de la Mairie (du VIII), rue de Lisbonne, 75008 Paris, France
|
||||||
|
Café Pistache, 9 rue des petits champs, 75001 Paris, France
|
||||||
|
La Cagnotte, 13 Rue Jean-Baptiste Dumay, 75020 Paris, France
|
||||||
|
le 1 cinq, 172 rue de vaugirard, 75015 Paris, France
|
||||||
|
Le Killy Jen, 28 bis boulevard Diderot, 75012 Paris, France
|
||||||
|
Les Artisans, 106 rue Lecourbe, 75015 Paris, France
|
||||||
|
Peperoni, 83 avenue de Wagram, 75001 Paris, France
|
||||||
|
le lutece, 380 rue de vaugirard, 75015 Paris, France
|
||||||
|
Brasiloja, 16 rue Ganneron, 75018 Paris, France
|
||||||
|
Rivolux, 16 rue de Rivoli, 75004 Paris, France
|
||||||
|
Chai 33, 33 Cour Saint Emilion, 75012 Paris, France
|
||||||
|
L'européen, 21 Bis Boulevard Diderot, 75012 Paris, France
|
||||||
|
NoMa, 39 rue Notre Dame de Nazareth, 75003 Paris, France
|
||||||
|
O'Paris, 1 Rue des Envierges, 75020 Paris, France
|
||||||
|
Café Clochette, 16 avenue Richerand, 75010 Paris, France
|
||||||
|
La cantoche de Paname, 40 Boulevard Beaumarchais, 75011 Paris, France
|
||||||
|
Le Saint René, 148 Boulevard de Charonne, 75020 Paris, France
|
||||||
|
La Liberté, 196 rue du faubourg saint-antoine, 75012 Paris, France
|
||||||
|
Chez Rutabaga, 16 rue des Petits Champs, 75002 Paris, France
|
||||||
|
Le BB (Bouchon des Batignolles), 2 rue Lemercier, 75017 Paris, France
|
||||||
|
La Brocante, 10 rue Rossini, 75009 Paris, France
|
||||||
|
Le Plomb du cantal, 3 rue Gaîté, 75014 Paris, France
|
||||||
|
Les caves populaires, 22 rue des Dames, 75017 Paris, France
|
||||||
|
Chez Luna, 108 rue de Ménilmontant, 75020 Paris, France
|
||||||
|
Le bar Fleuri, 1 rue du Plateau, 75019 Paris, France
|
||||||
|
Le Chaumontois, 12 rue Armand Carrel, 75018 Paris, France
|
||||||
|
Trois pièces cuisine, 101 rue des dames, 75017 Paris, France
|
||||||
|
Le Zinc, 61 avenue de la Motte Picquet, 75015 Paris, France
|
||||||
|
La cantine de Zoé, 136 rue du Faubourg poissonnière, 75010 Paris, France
|
||||||
|
Les Vendangeurs, 6/8 rue Stanislas, 75006 Paris, France
|
||||||
|
L'avant comptoir, 3 carrefour de l'Odéon, 75006 Paris, France
|
||||||
|
Botak cafe, 1 rue Paul albert, 75018 Paris, France
|
||||||
|
le chateau d'eau, 67 rue du Château d'eau, 75010 Paris, France
|
||||||
|
Bistrot Saint-Antoine, 58 rue du Fbg Saint-Antoine, 75012 Paris, France
|
||||||
|
Chez Oscar, 11/13 boulevard Beaumarchais, 75004 Paris, France
|
||||||
|
Le Fronton, 63 rue de Ponthieu, 75008 Paris, France
|
||||||
|
Le Piquet, 48 avenue de la Motte Picquet, 75015 Paris, France
|
||||||
|
Le Tournebride, 104 rue Mouffetard, 75005 Paris, France
|
||||||
|
maison du vin, 52 rue des plantes, 75014 Paris, France
|
||||||
|
Coffee Chope, 344Vrue Vaugirard, 75015 Paris, France
|
||||||
|
L'entrepôt, 157 rue Bercy 75012 Paris, 75012 Paris, France
|
||||||
|
Le café Monde et Médias, Place de la République, 75003 Paris, France
|
||||||
|
Café rallye tournelles, 11 Quai de la Tournelle, 75005 Paris, France
|
||||||
|
Brasserie le Morvan, 61 rue du château d'eau, 75010 Paris, France
|
||||||
|
Chez Miamophile, 6 rue Mélingue, 75019 Paris, France
|
||||||
|
La Caravane, Rue de la Fontaine au Roi, 75011 Paris, France
|
||||||
|
Panem, 18 rue de Crussol, 75011 Paris, France
|
||||||
|
Petits Freres des Pauvres, 47 rue de Batignolles, 75017 Paris, France
|
||||||
|
Café Dupont, 198 rue de la Convention, 75015 Paris, France
|
||||||
|
L'Angle, 28 rue de Ponthieu, 75008 Paris, France
|
||||||
|
Institut des Cultures d'Islam, 19-23 rue Léon, 75018 Paris, France
|
||||||
|
Canopy Café associatif, 19 rue Pajol, 75018 Paris, France
|
||||||
|
L'Entracte, place de l'opera, 75002 Paris, France
|
||||||
|
Le Sévigné, 15 rue du Parc Royal, 75003 Paris, France
|
||||||
|
Le Café d'avant, 35 rue Claude Bernard, 75005 Paris, France
|
||||||
|
Le Lucernaire, 53 rue Notre-Dame des Champs, 75006 Paris, France
|
||||||
|
Le Brigadier, 12 rue Blanche, 75009 Paris, France
|
||||||
|
L'âge d'or, 26 rue du Docteur Magnan, 75013 Paris, France
|
||||||
|
Café Victor, 10 boulevard Victor, 75015 Paris, France
|
||||||
|
L'empreinte, 54, avenue Daumesnil, 75012 Paris, France
|
||||||
|
L'horizon, 93, rue de la Roquette, 75011 Paris, France
|
||||||
|
Au pays de Vannes, 34 bis rue de Wattignies, 75012 Paris, France
|
||||||
|
Café Martin, 2 place Martin Nadaud, 75001 Paris, France
|
||||||
|
Café Varenne, 36 rue de Varenne, 75007 Paris, France
|
||||||
|
l'Eléphant du nil, 125 Rue Saint-Antoine, 75004 Paris, France
|
||||||
|
Le Comptoir, 354 bis rue Vaugirard, 75015 Paris, France
|
||||||
|
Le Parc Vaugirard, 358 rue de Vaugirard, 75015 Paris, France
|
||||||
|
Le Reynou, 2 bis quai de la mégisserie, 75001 Paris, France
|
||||||
|
le Zango, 58 rue Daguerre, 75014 Paris, France
|
||||||
|
Melting Pot, 3 rue de Lagny, 75020 Paris, France
|
||||||
|
Pari's Café, 174 avenue de Clichy, 75017 Paris, France
|
||||||
@ -1,16 +1,17 @@
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from blessings import Terminal
|
from blessings import Terminal
|
||||||
from pycountry import countries
|
|
||||||
|
|
||||||
from bonobo.ext.console import console_run
|
from bonobo import console_run, tee, JsonFileWriter
|
||||||
from bonobo.ext.ods import extract_ods
|
from bonobo.ext.opendatasoft import from_opendatasoft_api
|
||||||
from bonobo.util import tee
|
|
||||||
from bonobo.io.json import to_json
|
|
||||||
|
|
||||||
DATASET = 'fablabs-in-the-world'
|
try:
|
||||||
SEARCH_URL = 'https://datanova.laposte.fr/api/records/1.0/search/'
|
import pycountry
|
||||||
URL = SEARCH_URL + '?dataset=' + DATASET
|
except ImportError as exc:
|
||||||
|
raise ImportError('You must install package "pycountry" to run this example.') from exc
|
||||||
|
|
||||||
|
API_DATASET = 'fablabs-in-the-world'
|
||||||
|
API_NETLOC = 'datanova.laposte.fr'
|
||||||
ROWS = 100
|
ROWS = 100
|
||||||
|
|
||||||
t = Terminal()
|
t = Terminal()
|
||||||
@ -25,7 +26,7 @@ def normalize(row):
|
|||||||
**
|
**
|
||||||
row,
|
row,
|
||||||
'links': list(filter(None, map(_getlink, json.loads(row.get('links'))))),
|
'links': list(filter(None, map(_getlink, json.loads(row.get('links'))))),
|
||||||
'country': countries.get(alpha_2=row.get('country_code', '').upper()).name,
|
'country': pycountry.countries.get(alpha_2=row.get('country_code', '').upper()).name,
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@ -47,15 +48,15 @@ def display(row):
|
|||||||
print(' - {}: {address}'.format(t.blue('address'), address=', '.join(address)))
|
print(' - {}: {address}'.format(t.blue('address'), address=', '.join(address)))
|
||||||
print(' - {}: {links}'.format(t.blue('links'), links=', '.join(row['links'])))
|
print(' - {}: {links}'.format(t.blue('links'), links=', '.join(row['links'])))
|
||||||
print(' - {}: {geometry}'.format(t.blue('geometry'), **row))
|
print(' - {}: {geometry}'.format(t.blue('geometry'), **row))
|
||||||
print(' - {}: {source}'.format(t.blue('source'), source='datanova/' + DATASET))
|
print(' - {}: {source}'.format(t.blue('source'), source='datanova/' + API_DATASET))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
console_run(
|
console_run(
|
||||||
extract_ods(
|
from_opendatasoft_api(
|
||||||
SEARCH_URL, DATASET, timezone='Europe/Paris'),
|
API_DATASET, netloc=API_NETLOC, timezone='Europe/Paris'),
|
||||||
normalize,
|
normalize,
|
||||||
filter_france,
|
filter_france,
|
||||||
tee(display),
|
tee(display),
|
||||||
to_json('fablabs.json'),
|
JsonFileWriter('fablabs.json'),
|
||||||
output=True, )
|
output=True, )
|
||||||
|
|||||||
15
examples/read_cheap_coffeeshops_in_paris.py
Normal file
15
examples/read_cheap_coffeeshops_in_paris.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
from os.path import dirname, realpath, join
|
||||||
|
|
||||||
|
from bonobo import console_run
|
||||||
|
from bonobo.ext.opendatasoft import from_opendatasoft_api
|
||||||
|
from bonobo.io.file import FileWriter
|
||||||
|
|
||||||
|
OUTPUT_FILENAME = realpath(join(dirname(__file__), 'datasets/cheap_coffeeshops_in_paris.txt'))
|
||||||
|
|
||||||
|
console_run(
|
||||||
|
from_opendatasoft_api(
|
||||||
|
'liste-des-cafes-a-un-euro', netloc='opendata.paris.fr'),
|
||||||
|
lambda row: '{nom_du_cafe}, {adresse}, {arrondissement} Paris, France'.format(**row),
|
||||||
|
FileWriter(OUTPUT_FILENAME), )
|
||||||
|
|
||||||
|
print('Import done, read {} for results.'.format(OUTPUT_FILENAME))
|
||||||
18
examples/tutorial_basics_firststeps.py
Normal file
18
examples/tutorial_basics_firststeps.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
from bonobo import run
|
||||||
|
|
||||||
|
|
||||||
|
def generate_data():
|
||||||
|
yield 'foo'
|
||||||
|
yield 'bar'
|
||||||
|
yield 'baz'
|
||||||
|
|
||||||
|
|
||||||
|
def uppercase(x: str):
|
||||||
|
return x.upper()
|
||||||
|
|
||||||
|
|
||||||
|
def output(x: str):
|
||||||
|
print(x)
|
||||||
|
|
||||||
|
|
||||||
|
run(generate_data, uppercase, output)
|
||||||
18
examples/tutorial_basics_summary.py
Normal file
18
examples/tutorial_basics_summary.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
from bonobo import Graph, ThreadPoolExecutorStrategy
|
||||||
|
|
||||||
|
|
||||||
|
def yield_from(*args):
|
||||||
|
yield from args
|
||||||
|
|
||||||
|
|
||||||
|
# Represent our data processor as a simple directed graph of callables.
|
||||||
|
graph = Graph(
|
||||||
|
lambda: (x for x in ('foo', 'bar', 'baz')),
|
||||||
|
str.upper,
|
||||||
|
print, )
|
||||||
|
|
||||||
|
# Use a thread pool.
|
||||||
|
executor = ThreadPoolExecutorStrategy()
|
||||||
|
|
||||||
|
# Run the thing.
|
||||||
|
executor.execute(graph)
|
||||||
@ -1,6 +1,6 @@
|
|||||||
from mock import patch
|
from mock import patch
|
||||||
|
|
||||||
from bonobo.ext.ods import extract_ods
|
from bonobo.ext.opendatasoft import from_opendatasoft_api
|
||||||
|
|
||||||
|
|
||||||
class ResponseMock:
|
class ResponseMock:
|
||||||
@ -17,7 +17,7 @@ class ResponseMock:
|
|||||||
|
|
||||||
|
|
||||||
def test_read_from_opendatasoft_api():
|
def test_read_from_opendatasoft_api():
|
||||||
extract = extract_ods('http://example.com/', 'test-a-set')
|
extract = from_opendatasoft_api('http://example.com/', 'test-a-set')
|
||||||
with patch(
|
with patch(
|
||||||
'requests.get', return_value=ResponseMock([
|
'requests.get', return_value=ResponseMock([
|
||||||
{
|
{
|
||||||
|
|||||||
@ -1,13 +1,13 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from bonobo import to_json, Bag
|
from bonobo import Bag, JsonFileWriter
|
||||||
from bonobo.core.contexts import ComponentExecutionContext
|
from bonobo.core.contexts import ComponentExecutionContext
|
||||||
from bonobo.util.tokens import BEGIN, END
|
from bonobo.util.tokens import BEGIN, END
|
||||||
|
|
||||||
|
|
||||||
def test_write_json_to_file(tmpdir):
|
def test_write_json_to_file(tmpdir):
|
||||||
file = tmpdir.join('output.json')
|
file = tmpdir.join('output.json')
|
||||||
json_writer = to_json(str(file))
|
json_writer = JsonFileWriter(str(file))
|
||||||
context = ComponentExecutionContext(json_writer, None)
|
context = ComponentExecutionContext(json_writer, None)
|
||||||
|
|
||||||
context.initialize()
|
context.initialize()
|
||||||
@ -28,7 +28,7 @@ def test_write_json_to_file(tmpdir):
|
|||||||
|
|
||||||
def test_write_json_without_initializer_should_not_work(tmpdir):
|
def test_write_json_without_initializer_should_not_work(tmpdir):
|
||||||
file = tmpdir.join('output.json')
|
file = tmpdir.join('output.json')
|
||||||
json_writer = to_json(str(file))
|
json_writer = JsonFileWriter(str(file))
|
||||||
|
|
||||||
context = ComponentExecutionContext(json_writer, None)
|
context = ComponentExecutionContext(json_writer, None)
|
||||||
with pytest.raises(AttributeError):
|
with pytest.raises(AttributeError):
|
||||||
|
|||||||
Reference in New Issue
Block a user