From 7f34737c3ad5767685f82609730f1a82d6e22d27 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Tue, 7 Nov 2017 11:52:26 +0100 Subject: [PATCH 01/11] wip documentation --- bonobo/commands/run.py | 5 +---- bonobo/config/configurables.py | 22 ++++++++++++++++++- bonobo/config/options.py | 9 +++++++- bonobo/nodes/io/base.py | 31 +++++++++++++++++--------- bonobo/nodes/io/csv.py | 40 +++++++++++++--------------------- bonobo/nodes/io/file.py | 8 +++++-- bonobo/structs/tokens.py | 3 ++- docs/reference/api.rst | 3 --- 8 files changed, 74 insertions(+), 47 deletions(-) diff --git a/bonobo/commands/run.py b/bonobo/commands/run.py index 0a11577..cc82022 100644 --- a/bonobo/commands/run.py +++ b/bonobo/commands/run.py @@ -3,10 +3,7 @@ import os import bonobo from bonobo.constants import DEFAULT_SERVICES_ATTR, DEFAULT_SERVICES_FILENAME -DEFAULT_GRAPH_FILENAMES = ( - '__main__.py', - 'main.py', -) +DEFAULT_GRAPH_FILENAMES = ('__main__.py', 'main.py',) DEFAULT_GRAPH_ATTR = 'get_graph' diff --git a/bonobo/config/configurables.py b/bonobo/config/configurables.py index 85ecdde..47c87e2 100644 --- a/bonobo/config/configurables.py +++ b/bonobo/config/configurables.py @@ -1,5 +1,5 @@ -from bonobo.util import isoption, iscontextprocessor, sortedlist from bonobo.errors import AbstractError +from bonobo.util import isoption, iscontextprocessor, sortedlist, get_name __all__ = [ 'Configurable', @@ -37,6 +37,26 @@ class ConfigurableMeta(type): cls.__names.add(name) cls.__options.insort((not value.positional, value._creation_counter, name, value)) + # Docstring formating + _options_doc = [] + for _positional, _counter, _name, _value in cls.__options: + _param = _name + if _value.type: + _param = get_name(_value.type) + ' ' + _param + + prefix = ':param {}: '.format(_param) + for lineno, line in enumerate((_value.__doc__ or '').split('\n')): + _options_doc.append((' ' * len(prefix) if lineno else prefix) + line) + cls.__doc__ = '\n\n'.join( + map( + str.strip, + filter(None, ( + cls.__doc__, + '\n'.join(_options_doc) + )) + ) + ) + @property def __options__(cls): return ((name, option) for _, _, name, option in cls.__options) diff --git a/bonobo/config/options.py b/bonobo/config/options.py index 2fd9491..2b92f47 100644 --- a/bonobo/config/options.py +++ b/bonobo/config/options.py @@ -1,3 +1,5 @@ +from textwrap import dedent + from bonobo.util.inspect import istype @@ -60,7 +62,12 @@ class Option: self.positional = positional self.default = default - self.__doc__ = __doc__ or self.__doc__ + # Docstring formating + self.__doc__ = __doc__ or None + if self.__doc__: + self.__doc__ = dedent(self.__doc__.strip('\n')).strip() + if default: + self.__doc__ += '\nDefault: {!r}'.format(default) # This hack is necessary for python3.5 self._creation_counter = Option._creation_counter diff --git a/bonobo/nodes/io/base.py b/bonobo/nodes/io/base.py index 496a0e8..c59195a 100644 --- a/bonobo/nodes/io/base.py +++ b/bonobo/nodes/io/base.py @@ -5,14 +5,16 @@ from bonobo.structs.bags import Bag class IOFormatEnabled(Configurable): - ioformat = Option(default=settings.IOFORMAT.get) + ioformat = Option(default=settings.IOFORMAT.get, __doc__=''' + Input/output format for rows. This will be removed in 0.6, so please use the kwargs format. + ''') def get_input(self, *args, **kwargs): if self.ioformat == settings.IOFORMAT_ARG0: if len(args) != 1 or len(kwargs): raise UnrecoverableValueError( 'Wrong input formating: IOFORMAT=ARG0 implies one arg and no kwargs, got args={!r} and kwargs={!r}.'. - format(args, kwargs) + format(args, kwargs) ) return args[0] @@ -20,7 +22,7 @@ class IOFormatEnabled(Configurable): if len(args) or not len(kwargs): raise UnrecoverableValueError( 'Wrong input formating: IOFORMAT=KWARGS ioformat implies no arg, got args={!r} and kwargs={!r}.'. - format(args, kwargs) + format(args, kwargs) ) return kwargs @@ -40,17 +42,26 @@ class FileHandler(Configurable): """Abstract component factory for file-related components. Args: - path (str): which path to use within the provided filesystem. - eol (str): which character to use to separate lines. + eol (str): which mode (str): which mode to use when opening the file. fs (str): service name to use for filesystem. """ - path = Option(str, required=True, positional=True) # type: str - eol = Option(str, default='\n') # type: str - mode = Option(str) # type: str - encoding = Option(str, default='utf-8') # type: str - fs = Service('fs') # type: str + path = Option(str, required=True, positional=True, __doc__=''' + Path to use within the provided filesystem. + ''') # type: str + eol = Option(str, default='\n', __doc__=''' + Character to use as line separator. + ''') # type: str + mode = Option(str, __doc__=''' + What mode to use for open() call. + ''') # type: str + encoding = Option(str, default='utf-8', __doc__=''' + Encoding. + ''') # type: str + fs = Service('fs', __doc__=''' + The filesystem instance to use. + ''') # type: str @ContextProcessor def file(self, context, fs): diff --git a/bonobo/nodes/io/csv.py b/bonobo/nodes/io/csv.py index 31222fb..e141af5 100644 --- a/bonobo/nodes/io/csv.py +++ b/bonobo/nodes/io/csv.py @@ -3,43 +3,33 @@ import csv from bonobo.config import Option from bonobo.config.processors import ContextProcessor from bonobo.constants import NOT_MODIFIED -from bonobo.nodes.io.file import FileReader, FileWriter from bonobo.nodes.io.base import FileHandler, IOFormatEnabled +from bonobo.nodes.io.file import FileReader, FileWriter from bonobo.util.objects import ValueHolder class CsvHandler(FileHandler): - """ - - .. attribute:: delimiter - - The CSV delimiter. - - .. attribute:: quotechar - - The CSV quote character. - - .. attribute:: headers - - The list of column names, if the CSV does not contain it as its first line. - - """ - delimiter = Option(str, default=';') - quotechar = Option(str, default='"') - headers = Option(tuple, required=False) + delimiter = Option(str, default=';', __doc__=''' + Delimiter used between values. + ''') + quotechar = Option(str, default='"', __doc__=''' + Character used for quoting values. + ''') + headers = Option(tuple, required=False, __doc__=''' + Tuple of headers to use, if provided. + Readers will try to guess that from first line, unless this option is provided. + Writers will guess from kwargs keys, unless this option is provided. + ''') class CsvReader(IOFormatEnabled, FileReader, CsvHandler): """ Reads a CSV and yield the values as dicts. - - .. attribute:: skip - - The amount of lines to skip before it actually yield output. - """ - skip = Option(int, default=0) + skip = Option(int, default=0, __doc__=''' + If set and greater than zero, the reader will skip this amount of lines. + ''') @ContextProcessor def csv_headers(self, context, fs, file): diff --git a/bonobo/nodes/io/file.py b/bonobo/nodes/io/file.py index e49d6de..34a585f 100644 --- a/bonobo/nodes/io/file.py +++ b/bonobo/nodes/io/file.py @@ -12,7 +12,9 @@ class FileReader(Reader, FileHandler): present. Extending it is usually the right way to create more specific file readers (like json, csv, etc.) """ - mode = Option(str, default='r') + mode = Option(str, default='r', __doc__=''' + What mode to use for open() call. + ''') # type: str def read(self, fs, file): """ @@ -30,7 +32,9 @@ class FileWriter(Writer, FileHandler): usually the right way to create more specific file writers (like json, csv, etc.) """ - mode = Option(str, default='w+') + mode = Option(str, default='w+', __doc__=''' + What mode to use for open() call. + ''') # type: str @ContextProcessor def lineno(self, context, fs, file): diff --git a/bonobo/structs/tokens.py b/bonobo/structs/tokens.py index 325a3b8..9ef6f64 100644 --- a/bonobo/structs/tokens.py +++ b/bonobo/structs/tokens.py @@ -1,8 +1,9 @@ class Token: - """Factory for signal oriented queue messages or other token types.""" + """Token factory.""" def __init__(self, name): self.__name__ = name + self.__doc__ = 'The {!r} token.'.format(name) def __repr__(self): return '<{}>'.format(self.__name__) diff --git a/docs/reference/api.rst b/docs/reference/api.rst index e401b9c..3d6f7c7 100644 --- a/docs/reference/api.rst +++ b/docs/reference/api.rst @@ -4,9 +4,6 @@ Bonobo API The Bonobo API, available directly under the :mod:`bonobo` package, contains all the tools you need to get started with bonobo. -The :mod:`bonobo` package -::::::::::::::::::::::::: - .. automodule:: bonobo :members: :undoc-members: From 5490609ed563e231a92cfb2d4da16f6524604c1f Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Fri, 8 Dec 2017 07:59:15 +0100 Subject: [PATCH 02/11] Update dependencies, add sitemap. --- Makefile | 10 +++++++--- Projectfile | 9 +++++++++ config/conda.yml | 1 + docs/conf.py | 3 +++ requirements-dev.txt | 20 ++++++++++++-------- requirements-docker.txt | 12 +++++------- requirements-jupyter.txt | 20 ++++++++++---------- requirements.txt | 10 +++++----- setup.py | 10 +++++++++- 9 files changed, 61 insertions(+), 34 deletions(-) diff --git a/Makefile b/Makefile index 777e014..5c95522 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# Generated by Medikit 0.4a5 on 2017-10-30. +# Generated by Medikit 0.4.3 on 2017-12-08. # All changes will be overriden. PACKAGE ?= bonobo @@ -10,6 +10,7 @@ PYTHON_REQUIREMENTS_DEV_FILE ?= requirements-dev.txt QUICK ?= PIP ?= $(PYTHON_DIRNAME)/pip PIP_INSTALL_OPTIONS ?= +VERSION ?= $(shell git describe 2>/dev/null || git rev-parse --short HEAD) PYTEST ?= $(PYTHON_DIRNAME)/pytest PYTEST_OPTIONS ?= --capture=no --cov=$(PACKAGE) --cov-report html SPHINX_BUILD ?= $(PYTHON_DIRNAME)/sphinx-build @@ -18,9 +19,9 @@ SPHINX_SOURCEDIR ?= docs SPHINX_BUILDDIR ?= $(SPHINX_SOURCEDIR)/_build YAPF ?= $(PYTHON) -m yapf YAPF_OPTIONS ?= -rip -VERSION ?= $(shell git describe 2>/dev/null || echo dev) +SPHINX_AUTOBUILD ?= $(PYTHON_DIRNAME)/sphinx-autobuild -.PHONY: $(SPHINX_SOURCEDIR) clean format install install-dev test update update-requirements +.PHONY: $(SPHINX_SOURCEDIR) clean format install install-dev test update update-requirements watch-$(SPHINX_SOURCEDIR) # Installs the local project dependencies. install: @@ -57,3 +58,6 @@ $(SPHINX_SOURCEDIR): install-dev format: install-dev $(YAPF) $(YAPF_OPTIONS) . $(YAPF) $(YAPF_OPTIONS) Projectfile + +watch-$(SPHINX_SOURCEDIR): + $(SPHINX_AUTOBUILD) $(SPHINX_SOURCEDIR) $(shell mktemp -d) diff --git a/Projectfile b/Projectfile index c812fc1..1145c9d 100644 --- a/Projectfile +++ b/Projectfile @@ -2,6 +2,7 @@ from medikit import require +make = require('make') pytest = require('pytest') python = require('python') sphinx = require('sphinx') @@ -49,6 +50,7 @@ python.add_requirements( 'cookiecutter >=1.5,<1.6', 'pytest-sugar >=0.8,<0.9', 'pytest-timeout >=1,<2', + 'sphinx-sitemap >=0.2,<0.3', ], docker=[ 'bonobo-docker', @@ -59,4 +61,11 @@ python.add_requirements( ] ) +@listen(make.on_generate) +def on_make_generate(event): + event.makefile['SPHINX_AUTOBUILD'] = '$(PYTHON_DIRNAME)/sphinx-autobuild' + event.makefile.add_target('watch-$(SPHINX_SOURCEDIR)', ''' + $(SPHINX_AUTOBUILD) $(SPHINX_SOURCEDIR) $(shell mktemp -d) + ''', phony=True) + # vim: ft=python: diff --git a/config/conda.yml b/config/conda.yml index 09b92de..610064e 100644 --- a/config/conda.yml +++ b/config/conda.yml @@ -9,6 +9,7 @@ dependencies: - fs ==2.0.3 - psutil ==5.2.2 - requests ==2.13.0 + - sphinx-sitemap==0.2 - stevedore ==1.21.0 # for examples - pycountry ==17.9.23 diff --git a/docs/conf.py b/docs/conf.py index afbbe83..50b1c5c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -20,8 +20,11 @@ extensions = [ 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode', 'sphinx.ext.graphviz', + 'sphinx_sitemap', ] +site_url = 'http://docs.bonobo-project.org/en/master/' + # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/requirements-dev.txt b/requirements-dev.txt index 553fefc..4e491e3 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,34 +1,38 @@ -e .[dev] alabaster==0.7.10 -arrow==0.10.0 +arrow==0.12.0 +attrs==17.3.0 babel==2.5.1 binaryornot==0.4.4 -certifi==2017.7.27.1 +certifi==2017.11.5 chardet==3.0.4 click==6.7 cookiecutter==1.5.1 -coverage==4.4.1 +coverage==4.4.2 docutils==0.14 future==0.16.0 idna==2.6 imagesize==0.7.1 jinja2-time==0.2.0 -jinja2==2.9.6 +jinja2==2.10 markupsafe==1.0 +pluggy==0.6.0 poyo==0.4.1 -py==1.4.34 +py==1.5.2 pygments==2.2.0 pytest-cov==2.5.1 pytest-sugar==0.8.0 -pytest-timeout==1.2.0 -pytest==3.2.3 +pytest-timeout==1.2.1 +pytest==3.3.1 python-dateutil==2.6.1 -pytz==2017.2 +pytz==2017.3 requests==2.18.4 six==1.11.0 snowballstemmer==1.2.1 +sphinx-sitemap==0.2 sphinx==1.6.5 sphinxcontrib-websupport==1.0.1 termcolor==1.1.0 urllib3==1.22 whichcraft==0.4.1 +yapf==0.20.0 diff --git a/requirements-docker.txt b/requirements-docker.txt index d6f1160..1748f0b 100644 --- a/requirements-docker.txt +++ b/requirements-docker.txt @@ -1,22 +1,20 @@ -e .[docker] appdirs==1.4.3 bonobo-docker==0.5.0 -certifi==2017.7.27.1 +certifi==2017.11.5 chardet==3.0.4 -click==6.7 colorama==0.3.9 docker-pycreds==0.2.1 docker==2.3.0 -fs==2.0.12 +fs==2.0.17 idna==2.6 packaging==16.8 pbr==3.1.1 -psutil==5.4.0 +psutil==5.4.2 pyparsing==2.2.0 -python-dotenv==0.7.1 -pytz==2017.2 +pytz==2017.3 requests==2.18.4 six==1.11.0 -stevedore==1.27.1 +stevedore==1.28.0 urllib3==1.22 websocket-client==0.44.0 diff --git a/requirements-jupyter.txt b/requirements-jupyter.txt index 4e1d024..1ed0eb1 100644 --- a/requirements-jupyter.txt +++ b/requirements-jupyter.txt @@ -1,38 +1,38 @@ -e .[jupyter] appnope==0.1.0 -bleach==2.1.1 +bleach==2.1.2 decorator==4.1.2 entrypoints==0.2.3 -html5lib==0.999999999 -ipykernel==4.6.1 +html5lib==1.0.1 +ipykernel==4.7.0 ipython-genutils==0.2.0 ipython==6.2.1 ipywidgets==6.0.1 jedi==0.11.0 -jinja2==2.9.6 +jinja2==2.10 jsonschema==2.6.0 jupyter-client==5.1.0 jupyter-console==5.2.0 -jupyter-core==4.3.0 +jupyter-core==4.4.0 jupyter==1.0.0 markupsafe==1.0 -mistune==0.8 +mistune==0.8.3 nbconvert==5.3.1 nbformat==4.4.0 -notebook==5.2.0 +notebook==5.2.2 pandocfilters==1.4.2 parso==0.1.0 -pexpect==4.2.1 +pexpect==4.3.0 pickleshare==0.7.4 prompt-toolkit==1.0.15 ptyprocess==0.5.2 pygments==2.2.0 python-dateutil==2.6.1 -pyzmq==16.0.2 +pyzmq==16.0.3 qtconsole==4.3.1 simplegeneric==0.8.1 six==1.11.0 -terminado==0.6 +terminado==0.8.1 testpath==0.3.1 tornado==4.5.2 traitlets==4.3.2 diff --git a/requirements.txt b/requirements.txt index 13d5113..b882953 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,16 @@ -e . appdirs==1.4.3 -certifi==2017.7.27.1 +certifi==2017.11.5 chardet==3.0.4 colorama==0.3.9 -fs==2.0.12 +fs==2.0.17 idna==2.6 packaging==16.8 pbr==3.1.1 -psutil==5.4.0 +psutil==5.4.2 pyparsing==2.2.0 -pytz==2017.2 +pytz==2017.3 requests==2.18.4 six==1.11.0 -stevedore==1.27.1 +stevedore==1.28.0 urllib3==1.22 diff --git a/setup.py b/setup.py index 7b513dc..359513d 100644 --- a/setup.py +++ b/setup.py @@ -43,6 +43,14 @@ else: setup( author='Romain Dorgueil', author_email='romain@dorgueil.net', + data_files=[ + ( + 'share/jupyter/nbextensions/bonobo-jupyter', [ + 'bonobo/ext/jupyter/static/extension.js', 'bonobo/ext/jupyter/static/index.js', + 'bonobo/ext/jupyter/static/index.js.map' + ] + ) + ], description=('Bonobo, a simple, modern and atomic extract-transform-load toolkit for ' 'python 3.5+.'), license='Apache License, Version 2.0', @@ -60,7 +68,7 @@ setup( 'dev': [ 'cookiecutter (>= 1.5, < 1.6)', 'coverage (>= 4.4, < 5.0)', 'pytest (>= 3.1, < 4.0)', 'pytest-cov (>= 2.5, < 3.0)', 'pytest-sugar (>= 0.8, < 0.9)', 'pytest-timeout (>= 1, < 2)', - 'sphinx (>= 1.6, < 2.0)' + 'sphinx (>= 1.6, < 2.0)', 'sphinx-sitemap (>= 0.2, < 0.3)', 'yapf' ], 'docker': ['bonobo-docker'], 'jupyter': ['ipywidgets (>= 6.0.0, < 7)', 'jupyter (>= 1.0, < 1.1)'] From 6b83d623ac52f88b3c58833257d9704a174f7da7 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Fri, 8 Dec 2017 07:59:15 +0100 Subject: [PATCH 03/11] Update dependencies, add sitemap. --- Makefile | 10 +++++++--- Projectfile | 9 +++++++++ config/conda.yml | 1 + docs/conf.py | 3 +++ requirements-dev.txt | 20 ++++++++++++-------- requirements-docker.txt | 12 +++++------- requirements-jupyter.txt | 20 ++++++++++---------- requirements.txt | 10 +++++----- setup.py | 10 +++++++++- 9 files changed, 61 insertions(+), 34 deletions(-) diff --git a/Makefile b/Makefile index 777e014..5c95522 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# Generated by Medikit 0.4a5 on 2017-10-30. +# Generated by Medikit 0.4.3 on 2017-12-08. # All changes will be overriden. PACKAGE ?= bonobo @@ -10,6 +10,7 @@ PYTHON_REQUIREMENTS_DEV_FILE ?= requirements-dev.txt QUICK ?= PIP ?= $(PYTHON_DIRNAME)/pip PIP_INSTALL_OPTIONS ?= +VERSION ?= $(shell git describe 2>/dev/null || git rev-parse --short HEAD) PYTEST ?= $(PYTHON_DIRNAME)/pytest PYTEST_OPTIONS ?= --capture=no --cov=$(PACKAGE) --cov-report html SPHINX_BUILD ?= $(PYTHON_DIRNAME)/sphinx-build @@ -18,9 +19,9 @@ SPHINX_SOURCEDIR ?= docs SPHINX_BUILDDIR ?= $(SPHINX_SOURCEDIR)/_build YAPF ?= $(PYTHON) -m yapf YAPF_OPTIONS ?= -rip -VERSION ?= $(shell git describe 2>/dev/null || echo dev) +SPHINX_AUTOBUILD ?= $(PYTHON_DIRNAME)/sphinx-autobuild -.PHONY: $(SPHINX_SOURCEDIR) clean format install install-dev test update update-requirements +.PHONY: $(SPHINX_SOURCEDIR) clean format install install-dev test update update-requirements watch-$(SPHINX_SOURCEDIR) # Installs the local project dependencies. install: @@ -57,3 +58,6 @@ $(SPHINX_SOURCEDIR): install-dev format: install-dev $(YAPF) $(YAPF_OPTIONS) . $(YAPF) $(YAPF_OPTIONS) Projectfile + +watch-$(SPHINX_SOURCEDIR): + $(SPHINX_AUTOBUILD) $(SPHINX_SOURCEDIR) $(shell mktemp -d) diff --git a/Projectfile b/Projectfile index c812fc1..1145c9d 100644 --- a/Projectfile +++ b/Projectfile @@ -2,6 +2,7 @@ from medikit import require +make = require('make') pytest = require('pytest') python = require('python') sphinx = require('sphinx') @@ -49,6 +50,7 @@ python.add_requirements( 'cookiecutter >=1.5,<1.6', 'pytest-sugar >=0.8,<0.9', 'pytest-timeout >=1,<2', + 'sphinx-sitemap >=0.2,<0.3', ], docker=[ 'bonobo-docker', @@ -59,4 +61,11 @@ python.add_requirements( ] ) +@listen(make.on_generate) +def on_make_generate(event): + event.makefile['SPHINX_AUTOBUILD'] = '$(PYTHON_DIRNAME)/sphinx-autobuild' + event.makefile.add_target('watch-$(SPHINX_SOURCEDIR)', ''' + $(SPHINX_AUTOBUILD) $(SPHINX_SOURCEDIR) $(shell mktemp -d) + ''', phony=True) + # vim: ft=python: diff --git a/config/conda.yml b/config/conda.yml index 09b92de..610064e 100644 --- a/config/conda.yml +++ b/config/conda.yml @@ -9,6 +9,7 @@ dependencies: - fs ==2.0.3 - psutil ==5.2.2 - requests ==2.13.0 + - sphinx-sitemap==0.2 - stevedore ==1.21.0 # for examples - pycountry ==17.9.23 diff --git a/docs/conf.py b/docs/conf.py index afbbe83..50b1c5c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -20,8 +20,11 @@ extensions = [ 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode', 'sphinx.ext.graphviz', + 'sphinx_sitemap', ] +site_url = 'http://docs.bonobo-project.org/en/master/' + # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/requirements-dev.txt b/requirements-dev.txt index 553fefc..4e491e3 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,34 +1,38 @@ -e .[dev] alabaster==0.7.10 -arrow==0.10.0 +arrow==0.12.0 +attrs==17.3.0 babel==2.5.1 binaryornot==0.4.4 -certifi==2017.7.27.1 +certifi==2017.11.5 chardet==3.0.4 click==6.7 cookiecutter==1.5.1 -coverage==4.4.1 +coverage==4.4.2 docutils==0.14 future==0.16.0 idna==2.6 imagesize==0.7.1 jinja2-time==0.2.0 -jinja2==2.9.6 +jinja2==2.10 markupsafe==1.0 +pluggy==0.6.0 poyo==0.4.1 -py==1.4.34 +py==1.5.2 pygments==2.2.0 pytest-cov==2.5.1 pytest-sugar==0.8.0 -pytest-timeout==1.2.0 -pytest==3.2.3 +pytest-timeout==1.2.1 +pytest==3.3.1 python-dateutil==2.6.1 -pytz==2017.2 +pytz==2017.3 requests==2.18.4 six==1.11.0 snowballstemmer==1.2.1 +sphinx-sitemap==0.2 sphinx==1.6.5 sphinxcontrib-websupport==1.0.1 termcolor==1.1.0 urllib3==1.22 whichcraft==0.4.1 +yapf==0.20.0 diff --git a/requirements-docker.txt b/requirements-docker.txt index d6f1160..1748f0b 100644 --- a/requirements-docker.txt +++ b/requirements-docker.txt @@ -1,22 +1,20 @@ -e .[docker] appdirs==1.4.3 bonobo-docker==0.5.0 -certifi==2017.7.27.1 +certifi==2017.11.5 chardet==3.0.4 -click==6.7 colorama==0.3.9 docker-pycreds==0.2.1 docker==2.3.0 -fs==2.0.12 +fs==2.0.17 idna==2.6 packaging==16.8 pbr==3.1.1 -psutil==5.4.0 +psutil==5.4.2 pyparsing==2.2.0 -python-dotenv==0.7.1 -pytz==2017.2 +pytz==2017.3 requests==2.18.4 six==1.11.0 -stevedore==1.27.1 +stevedore==1.28.0 urllib3==1.22 websocket-client==0.44.0 diff --git a/requirements-jupyter.txt b/requirements-jupyter.txt index 4e1d024..1ed0eb1 100644 --- a/requirements-jupyter.txt +++ b/requirements-jupyter.txt @@ -1,38 +1,38 @@ -e .[jupyter] appnope==0.1.0 -bleach==2.1.1 +bleach==2.1.2 decorator==4.1.2 entrypoints==0.2.3 -html5lib==0.999999999 -ipykernel==4.6.1 +html5lib==1.0.1 +ipykernel==4.7.0 ipython-genutils==0.2.0 ipython==6.2.1 ipywidgets==6.0.1 jedi==0.11.0 -jinja2==2.9.6 +jinja2==2.10 jsonschema==2.6.0 jupyter-client==5.1.0 jupyter-console==5.2.0 -jupyter-core==4.3.0 +jupyter-core==4.4.0 jupyter==1.0.0 markupsafe==1.0 -mistune==0.8 +mistune==0.8.3 nbconvert==5.3.1 nbformat==4.4.0 -notebook==5.2.0 +notebook==5.2.2 pandocfilters==1.4.2 parso==0.1.0 -pexpect==4.2.1 +pexpect==4.3.0 pickleshare==0.7.4 prompt-toolkit==1.0.15 ptyprocess==0.5.2 pygments==2.2.0 python-dateutil==2.6.1 -pyzmq==16.0.2 +pyzmq==16.0.3 qtconsole==4.3.1 simplegeneric==0.8.1 six==1.11.0 -terminado==0.6 +terminado==0.8.1 testpath==0.3.1 tornado==4.5.2 traitlets==4.3.2 diff --git a/requirements.txt b/requirements.txt index 13d5113..b882953 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,16 @@ -e . appdirs==1.4.3 -certifi==2017.7.27.1 +certifi==2017.11.5 chardet==3.0.4 colorama==0.3.9 -fs==2.0.12 +fs==2.0.17 idna==2.6 packaging==16.8 pbr==3.1.1 -psutil==5.4.0 +psutil==5.4.2 pyparsing==2.2.0 -pytz==2017.2 +pytz==2017.3 requests==2.18.4 six==1.11.0 -stevedore==1.27.1 +stevedore==1.28.0 urllib3==1.22 diff --git a/setup.py b/setup.py index 7b513dc..359513d 100644 --- a/setup.py +++ b/setup.py @@ -43,6 +43,14 @@ else: setup( author='Romain Dorgueil', author_email='romain@dorgueil.net', + data_files=[ + ( + 'share/jupyter/nbextensions/bonobo-jupyter', [ + 'bonobo/ext/jupyter/static/extension.js', 'bonobo/ext/jupyter/static/index.js', + 'bonobo/ext/jupyter/static/index.js.map' + ] + ) + ], description=('Bonobo, a simple, modern and atomic extract-transform-load toolkit for ' 'python 3.5+.'), license='Apache License, Version 2.0', @@ -60,7 +68,7 @@ setup( 'dev': [ 'cookiecutter (>= 1.5, < 1.6)', 'coverage (>= 4.4, < 5.0)', 'pytest (>= 3.1, < 4.0)', 'pytest-cov (>= 2.5, < 3.0)', 'pytest-sugar (>= 0.8, < 0.9)', 'pytest-timeout (>= 1, < 2)', - 'sphinx (>= 1.6, < 2.0)' + 'sphinx (>= 1.6, < 2.0)', 'sphinx-sitemap (>= 0.2, < 0.3)', 'yapf' ], 'docker': ['bonobo-docker'], 'jupyter': ['ipywidgets (>= 6.0.0, < 7)', 'jupyter (>= 1.0, < 1.1)'] From f4c2e763dc038382bd99187911e9aa89e6a9090e Mon Sep 17 00:00:00 2001 From: Zeyi Wang Date: Mon, 11 Dec 2017 20:50:13 +0800 Subject: [PATCH 04/11] Fix a small typo ill -> will --- docs/tutorial/tut03.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorial/tut03.rst b/docs/tutorial/tut03.rst index 4875bd8..61366c2 100644 --- a/docs/tutorial/tut03.rst +++ b/docs/tutorial/tut03.rst @@ -13,7 +13,7 @@ Class-based transformations and configurables Bonobo is a bit dumb. If something is callable, it considers it can be used as a transformation, and it's up to the user to provide callables that logically fits in a graph. -You can use plain python objects with a `__call__()` method, and it ill just work. +You can use plain python objects with a `__call__()` method, and it will just work. As a lot of transformations needs common machinery, there is a few tools to quickly build transformations, most of them requiring your class to subclass :class:`bonobo.config.Configurable`. From 53f8757b90a0d54a7dafe3dba5c726428571cb8f Mon Sep 17 00:00:00 2001 From: matthewgodding <34448893+matthewgodding@users.noreply.github.com> Date: Mon, 11 Dec 2017 14:10:04 +0000 Subject: [PATCH 05/11] Update tut01.rst small typo fixes --- docs/tutorial/tut01.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/tutorial/tut01.rst b/docs/tutorial/tut01.rst index d6aa604..75695ac 100644 --- a/docs/tutorial/tut01.rst +++ b/docs/tutorial/tut01.rst @@ -172,8 +172,8 @@ strategy). Actual behavior of an execution will depend on the strategy chosen, b cases. ④ Before actually executing the `transformations`, the `ExecutorStrategy` instance will wrap each component in an -`execution context`, whose responsibility is to hold the state of the transformation. It enables to keep the -`transformations` stateless, while allowing to add an external state if required. We'll expand on this later. +`execution context`, whose responsibility is to hold the state of the transformation. It enables you to keep the +`transformations` stateless, while allowing you to add an external state if required. We'll expand on this later. Concepts and definitions :::::::::::::::::::::::: From e93ea8a80381a76c7d77e1c862e9425f2fdffd7e Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Mon, 8 Jan 2018 08:02:38 +0100 Subject: [PATCH 06/11] Update stable doc with warnings. --- docs/tutorial/0.5/_outdated_note.rst | 9 ++++ docs/tutorial/0.5/index.rst | 65 ++++++++++++++++++++++++++++ docs/tutorial/0.5/python.rst | 13 ++++++ docs/tutorial/{ => 0.5}/tut01.rst | 27 +++++------- docs/tutorial/{ => 0.5}/tut02.rst | 10 ++++- docs/tutorial/{ => 0.5}/tut03.rst | 10 +++-- docs/tutorial/{ => 0.5}/tut04.rst | 2 + docs/tutorial/2-jobs.rst | 2 + docs/tutorial/3-files.rst | 2 + docs/tutorial/4-services.rst | 1 + docs/tutorial/5-packaging.rst | 2 + docs/tutorial/_wip_note.rst | 12 +++++ docs/tutorial/django.rst | 5 +++ docs/tutorial/index.rst | 1 + docs/tutorial/notebooks.rst | 4 ++ docs/tutorial/sqlalchemy.rst | 3 ++ 16 files changed, 148 insertions(+), 20 deletions(-) create mode 100644 docs/tutorial/0.5/_outdated_note.rst create mode 100644 docs/tutorial/0.5/index.rst create mode 100644 docs/tutorial/0.5/python.rst rename docs/tutorial/{ => 0.5}/tut01.rst (88%) rename docs/tutorial/{ => 0.5}/tut02.rst (91%) rename docs/tutorial/{ => 0.5}/tut03.rst (97%) rename docs/tutorial/{ => 0.5}/tut04.rst (99%) create mode 100644 docs/tutorial/_wip_note.rst diff --git a/docs/tutorial/0.5/_outdated_note.rst b/docs/tutorial/0.5/_outdated_note.rst new file mode 100644 index 0000000..9aeae82 --- /dev/null +++ b/docs/tutorial/0.5/_outdated_note.rst @@ -0,0 +1,9 @@ +.. warning:: + + This tutorial was written for |bonobo| 0.5, while the current stable version is |bonobo| 0.6. + + Please be aware that some things changed. + + A summary of changes is available in the `migration guide from 0.5 to 0.6 `_. + + diff --git a/docs/tutorial/0.5/index.rst b/docs/tutorial/0.5/index.rst new file mode 100644 index 0000000..8bac110 --- /dev/null +++ b/docs/tutorial/0.5/index.rst @@ -0,0 +1,65 @@ +First steps +=========== + +.. include:: _outdated_note.rst + +What is Bonobo? +::::::::::::::: + +Bonobo is an ETL (Extract-Transform-Load) framework for python 3.5. The goal is to define data-transformations, with +python code in charge of handling similar shaped independent lines of data. + +Bonobo *is not* a statistical or data-science tool. If you're looking for a data-analysis tool in python, use Pandas. + +Bonobo is a lean manufacturing assembly line for data that let you focus on the actual work instead of the plumbery +(execution contexts, parallelism, error handling, console output, logging, ...). + +Bonobo uses simple python and should be quick and easy to learn. + +Tutorial +:::::::: + +.. note:: + + Good documentation is not easy to write. We do our best to make it better and better. + + Although all content here should be accurate, you may feel a lack of completeness, for which we plead guilty and + apologize. + + If you're stuck, please come and ask on our `slack channel `_, we'll figure + something out. + + If you're not stuck but had trouble understanding something, please consider contributing to the docs (via GitHub + pull requests). + +.. toctree:: + :maxdepth: 2 + + tut01 + tut02 + tut03 + tut04 + + +What's next? +:::::::::::: + +Read a few examples +------------------- + +* :doc:`/reference/examples` + +Read about best development practices +------------------------------------- + +* :doc:`/guide/index` +* :doc:`/guide/purity` + +Read about integrating external tools with bonobo +------------------------------------------------- + +* :doc:`/extension/docker`: run transformation graphs in isolated containers. +* :doc:`/extension/jupyter`: run transformations within jupyter notebooks. +* :doc:`/extension/selenium`: crawl the web using a real browser and work with the gathered data. +* :doc:`/extension/sqlalchemy`: everything you need to interract with SQL databases. + diff --git a/docs/tutorial/0.5/python.rst b/docs/tutorial/0.5/python.rst new file mode 100644 index 0000000..a1b5a57 --- /dev/null +++ b/docs/tutorial/0.5/python.rst @@ -0,0 +1,13 @@ +Just enough Python for Bonobo +============================= + +.. include:: _outdated_note.rst + +.. todo:: + + This is a work in progress and it is not yet available. Please come back later or even better, help us write this + guide! + + This guide is intended to help programmers or enthusiasts to grasp the python basics necessary to use Bonobo. It + should definately not be considered as a general python introduction, neither a deep dive into details. + diff --git a/docs/tutorial/tut01.rst b/docs/tutorial/0.5/tut01.rst similarity index 88% rename from docs/tutorial/tut01.rst rename to docs/tutorial/0.5/tut01.rst index 836ddad..e50620d 100644 --- a/docs/tutorial/tut01.rst +++ b/docs/tutorial/0.5/tut01.rst @@ -1,7 +1,10 @@ Let's get started! ================== -To get started with Bonobo, you need to install it in a working python 3.5+ environment: +.. include:: _outdated_note.rst + +To begin with Bonobo, you need to install it in a working python 3.5+ environment, and you'll also need cookiecutter +to bootstrap your project. .. code-block:: shell-session @@ -13,24 +16,21 @@ See :doc:`/install` for more options. Create an empty project ::::::::::::::::::::::: -Your ETL code will live in standard python files and packages. +Your ETL code will live in ETL projects, which are basically a bunch of files, including python code, that bonobo +can run. .. code-block:: shell-session - $ bonobo create tutorial.py + $ bonobo init tutorial -This will create a simple example job in a `tutorial.py` file. +This will create a `tutorial` directory (`content description here `_). -Now, try to execute it: +To run this project, use: .. code-block:: shell-session - $ python tutorial.py + $ bonobo run tutorial -Congratulations, you just ran your first ETL job! - - -.. todo:: XXX **CHANGES NEEDED BELOW THIS POINTS BEFORE 0.6** XXX Write a first transformation :::::::::::::::::::::::::::: @@ -107,9 +107,6 @@ To do this, it needs to know what data-flow you want to achieve, and you'll use The `if __name__ == '__main__':` section is not required, unless you want to run it directly using the python interpreter. - The name of the `graph` variable is arbitrary, but this variable must be global and available unconditionally. - Do not put it in its own function or in the `if __name__ == '__main__':` section. - Execute the job ::::::::::::::: @@ -133,9 +130,9 @@ Rewrite it using builtins There is a much simpler way to describe an equivalent graph: .. literalinclude:: ../../bonobo/examples/tutorials/tut01e02.py -:language: python + :language: python - The `extract()` generator has been replaced by a list, as Bonobo will interpret non-callable iterables as a no-input +The `extract()` generator has been replaced by a list, as Bonobo will interpret non-callable iterables as a no-input generator. This example is also available in :mod:`bonobo.examples.tutorials.tut01e02`, and you can also run it as a module: diff --git a/docs/tutorial/tut02.rst b/docs/tutorial/0.5/tut02.rst similarity index 91% rename from docs/tutorial/tut02.rst rename to docs/tutorial/0.5/tut02.rst index 7f51558..3617005 100644 --- a/docs/tutorial/tut02.rst +++ b/docs/tutorial/0.5/tut02.rst @@ -1,6 +1,8 @@ Working with files ================== +.. include:: _outdated_note.rst + Bonobo would be pointless if the aim was just to uppercase small lists of strings. In fact, Bonobo should not be used if you don't expect any gain from parallelization/distribution of tasks. @@ -59,7 +61,13 @@ available in **Bonobo**'s repository: .. code-block:: shell-session - $ bonobo download examples/datasets/coffeeshops.txt + $ curl https://raw.githubusercontent.com/python-bonobo/bonobo/master/bonobo/examples/datasets/coffeeshops.txt > `python3 -c 'import bonobo; print(bonobo.get_examples_path("datasets/coffeeshops.txt"))'` + +.. note:: + + The "example dataset download" step will be easier in the future. + + https://github.com/python-bonobo/bonobo/issues/134 .. literalinclude:: ../../bonobo/examples/tutorials/tut02e01_read.py :language: python diff --git a/docs/tutorial/tut03.rst b/docs/tutorial/0.5/tut03.rst similarity index 97% rename from docs/tutorial/tut03.rst rename to docs/tutorial/0.5/tut03.rst index 16fa764..8eb7887 100644 --- a/docs/tutorial/tut03.rst +++ b/docs/tutorial/0.5/tut03.rst @@ -1,6 +1,8 @@ Configurables and Services ========================== +.. include:: _outdated_note.rst + .. note:: This section lacks completeness, sorry for that (but you can still read it!). @@ -30,7 +32,7 @@ Configurables allows to use the following features: class PrefixIt(Configurable): prefix = Option(str, positional=True, default='>>>') - def __call__(self, row): + def call(self, row): return self.prefix + ' ' + row prefixer = PrefixIt('$') @@ -48,7 +50,7 @@ Configurables allows to use the following features: url = Option(default='https://jsonplaceholder.typicode.com/users') http = Service('http.client') - def __call__(self, http): + def call(self, http): resp = http.get(self.url) for row in resp.json(): @@ -68,7 +70,7 @@ Configurables allows to use the following features: class Applier(Configurable): apply = Method() - def __call__(self, row): + def call(self, row): return self.apply(row) @Applier @@ -114,7 +116,7 @@ Let's see how to use it, starting from the previous service example: url = Option(default='https://jsonplaceholder.typicode.com/users') http = Service('http.client') - def __call__(self, http): + def call(self, http): resp = http.get(self.url) for row in resp.json(): diff --git a/docs/tutorial/tut04.rst b/docs/tutorial/0.5/tut04.rst similarity index 99% rename from docs/tutorial/tut04.rst rename to docs/tutorial/0.5/tut04.rst index 2a1ef71..2ad6557 100644 --- a/docs/tutorial/tut04.rst +++ b/docs/tutorial/0.5/tut04.rst @@ -1,6 +1,8 @@ Working with databases ====================== +.. include:: _outdated_note.rst + Databases (and especially SQL databases here) are not the focus of Bonobo, thus support for it is not (and will never be) included in the main package. Instead, working with databases is done using third party, well maintained and specialized packages, like SQLAlchemy, or other database access libraries from the python cheese shop. diff --git a/docs/tutorial/2-jobs.rst b/docs/tutorial/2-jobs.rst index e7d4baf..4eff084 100644 --- a/docs/tutorial/2-jobs.rst +++ b/docs/tutorial/2-jobs.rst @@ -1,6 +1,8 @@ Part 2: Writing ETL Jobs ======================== +.. include:: _wip_note.rst + What's an ETL job ? ::::::::::::::::::: diff --git a/docs/tutorial/3-files.rst b/docs/tutorial/3-files.rst index 5430f69..a55df76 100644 --- a/docs/tutorial/3-files.rst +++ b/docs/tutorial/3-files.rst @@ -1,6 +1,8 @@ Part 3: Working with Files ========================== +.. include:: _wip_note.rst + * Filesystems * Reading files diff --git a/docs/tutorial/4-services.rst b/docs/tutorial/4-services.rst index 097d0f6..9d36315 100644 --- a/docs/tutorial/4-services.rst +++ b/docs/tutorial/4-services.rst @@ -1,6 +1,7 @@ Part 4: Services and Configurables ================================== +.. include:: _wip_note.rst In the last section, we used a few new tools. diff --git a/docs/tutorial/5-packaging.rst b/docs/tutorial/5-packaging.rst index d0e29d5..198f1b4 100644 --- a/docs/tutorial/5-packaging.rst +++ b/docs/tutorial/5-packaging.rst @@ -1,6 +1,8 @@ Part 5: Projects and Packaging ============================== +.. include:: _wip_note.rst + Until then, we worked with one file managing a job. Real life often involves more complicated setups, with relations and imports between different files. diff --git a/docs/tutorial/_wip_note.rst b/docs/tutorial/_wip_note.rst new file mode 100644 index 0000000..2c9ca9c --- /dev/null +++ b/docs/tutorial/_wip_note.rst @@ -0,0 +1,12 @@ +.. warning:: + + This section is being rewritten for |bonobo| 0.6, and it's now in a "work in progress" state. + + You can read :doc:`the tutorial for the previous version (0.5) <0.5/index>`. Please note that things changed a bit + since then and you'll have quirks here and there. + + You can also read the `migration guide from 0.5 to 0.6 `_ + that will give you a good overview of the changes. + + Hopefully, this document will be updated soon, and please accept our apologies about this doc status until then. + diff --git a/docs/tutorial/django.rst b/docs/tutorial/django.rst index 1be4f52..c1c1552 100644 --- a/docs/tutorial/django.rst +++ b/docs/tutorial/django.rst @@ -1,3 +1,8 @@ Working with Django =================== +.. warning:: + + This section does not exist yet, but it's in the plans to write it quite soon. Meanwhile, you can check the source + code and other links provided below. + diff --git a/docs/tutorial/index.rst b/docs/tutorial/index.rst index 6f57dc1..c4ae9e3 100644 --- a/docs/tutorial/index.rst +++ b/docs/tutorial/index.rst @@ -53,3 +53,4 @@ out. If you're not stuck but had trouble understanding something, please consider contributing to the docs (using GitHub pull requests). +.. include:: _wip_note.rst diff --git a/docs/tutorial/notebooks.rst b/docs/tutorial/notebooks.rst index ed59121..cc5d21b 100644 --- a/docs/tutorial/notebooks.rst +++ b/docs/tutorial/notebooks.rst @@ -1,4 +1,8 @@ Working with Jupyter Notebooks ============================== +.. warning:: + + This section does not exist yet, but it's in the plans to write it quite soon. Meanwhile, you can check the source + code and other links provided below. diff --git a/docs/tutorial/sqlalchemy.rst b/docs/tutorial/sqlalchemy.rst index 359fbd5..be9e4bb 100644 --- a/docs/tutorial/sqlalchemy.rst +++ b/docs/tutorial/sqlalchemy.rst @@ -1,4 +1,7 @@ Working with SQL Databases ========================== +.. warning:: + This section does not exist yet, but it's in the plans to write it quite soon. Meanwhile, you can check the source + code and other links provided below. From 82aab703c21d67be224342b707f62e6ad774a2ce Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Mon, 8 Jan 2018 08:20:28 +0100 Subject: [PATCH 07/11] Update deps. --- docs/tutorial/django.rst | 15 +++++++++++++++ docs/tutorial/sqlalchemy.rst | 1 + readthedocs-conda.yml | 16 ++++++++++------ 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/docs/tutorial/django.rst b/docs/tutorial/django.rst index c1c1552..ddb8d05 100644 --- a/docs/tutorial/django.rst +++ b/docs/tutorial/django.rst @@ -6,3 +6,18 @@ Working with Django This section does not exist yet, but it's in the plans to write it quite soon. Meanwhile, you can check the source code and other links provided below. +Source code +::::::::::: + +https://github.com/python-bonobo/bonobo/tree/master/bonobo/contrib/django + + +bonobo.contrib.django +::::::::::::::::::::: + +.. automodule:: bonobo.contrib.django + :members: + :undoc-members: + :show-inheritance: + + diff --git a/docs/tutorial/sqlalchemy.rst b/docs/tutorial/sqlalchemy.rst index be9e4bb..4368066 100644 --- a/docs/tutorial/sqlalchemy.rst +++ b/docs/tutorial/sqlalchemy.rst @@ -5,3 +5,4 @@ Working with SQL Databases This section does not exist yet, but it's in the plans to write it quite soon. Meanwhile, you can check the source code and other links provided below. + diff --git a/readthedocs-conda.yml b/readthedocs-conda.yml index 81d72c7..fbe1b16 100644 --- a/readthedocs-conda.yml +++ b/readthedocs-conda.yml @@ -6,23 +6,27 @@ dependencies: - wheel=0.29.0 - pip: - appdirs==1.4.3 - - certifi==2017.7.27.1 + - certifi==2017.11.5 - chardet==3.0.4 - colorama==0.3.9 - - fs==2.0.12 + - fs==2.0.17 + - graphviz==0.8.2 - idna==2.6 - - jinja2==2.9.6 + - jinja2==2.10 - markupsafe==1.0 - - mondrian==0.4.0 + - mondrian==0.6.1 - packaging==16.8 - pbr==3.1.1 - - psutil==5.4.0 + - psutil==5.4.3 - pyparsing==2.2.0 + - python-slugify==1.2.4 - pytz==2017.3 - requests==2.18.4 - six==1.11.0 - - stevedore==1.27.1 + - stevedore==1.28.0 + - unidecode==1.0.22 - urllib3==1.22 - whistle==1.0.0 # for examples - pycountry ==17.9.23 + From 133831d1570b8274c6de743e7256141d160b6b5e Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Mon, 8 Jan 2018 08:24:01 +0100 Subject: [PATCH 08/11] Add doc deps. --- readthedocs-conda.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/readthedocs-conda.yml b/readthedocs-conda.yml index fbe1b16..6552777 100644 --- a/readthedocs-conda.yml +++ b/readthedocs-conda.yml @@ -27,6 +27,11 @@ dependencies: - unidecode==1.0.22 - urllib3==1.22 - whistle==1.0.0 + # for docs + - alabaster==0.7.10 + - sphinx-sitemap==0.2 + - sphinx==1.6.5 + - sphinxcontrib-websupport==1.0.1 # for examples - pycountry ==17.9.23 From 0a9a27ae08814da61e90cc56c871f00bc4cb5319 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Mon, 8 Jan 2018 08:31:07 +0100 Subject: [PATCH 09/11] Links in doc. --- docs/tutorial/django.rst | 5 +++-- docs/tutorial/notebooks.rst | 9 +++++++-- docs/tutorial/sqlalchemy.rst | 11 +++++++++-- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/docs/tutorial/django.rst b/docs/tutorial/django.rst index ddb8d05..d5f7d32 100644 --- a/docs/tutorial/django.rst +++ b/docs/tutorial/django.rst @@ -3,8 +3,9 @@ Working with Django .. warning:: - This section does not exist yet, but it's in the plans to write it quite soon. Meanwhile, you can check the source - code and other links provided below. + This section does not exist yet, but it's in the plans to write it quite soon. + + Meanwhile, you can check the source code and other links provided below. Source code ::::::::::: diff --git a/docs/tutorial/notebooks.rst b/docs/tutorial/notebooks.rst index cc5d21b..ff141e1 100644 --- a/docs/tutorial/notebooks.rst +++ b/docs/tutorial/notebooks.rst @@ -3,6 +3,11 @@ Working with Jupyter Notebooks .. warning:: - This section does not exist yet, but it's in the plans to write it quite soon. Meanwhile, you can check the source - code and other links provided below. + This section does not exist yet, but it's in the plans to write it quite soon. + Meanwhile, you can check the source code and other links provided below. + +Source code +::::::::::: + +https://github.com/python-bonobo/bonobo/tree/master/bonobo/contrib/jupyter diff --git a/docs/tutorial/sqlalchemy.rst b/docs/tutorial/sqlalchemy.rst index 4368066..23f1078 100644 --- a/docs/tutorial/sqlalchemy.rst +++ b/docs/tutorial/sqlalchemy.rst @@ -3,6 +3,13 @@ Working with SQL Databases .. warning:: - This section does not exist yet, but it's in the plans to write it quite soon. Meanwhile, you can check the source - code and other links provided below. + This section does not exist yet, but it's in the plans to write it quite soon. + + Meanwhile, you can check the source code and other links provided below. + + +Source code +::::::::::: + +https://github.com/python-bonobo/bonobo-sqlalchemy From c1ffbe7b5f7df6d1a065403365548ca77993c9cc Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 10 Jan 2018 06:18:41 +0100 Subject: [PATCH 10/11] Working on 0.6 documentation. --- docs/guide/_toc.rst | 11 +++ docs/guide/debugging.rst | 0 docs/guide/graphs.rst | 41 ++++++++ docs/guide/index.rst | 11 +-- docs/tutorial/2-jobs.rst | 179 ++++++++++++++++++++++++---------- docs/tutorial/3-files.rst | 45 ++++++++- docs/tutorial/4-services.rst | 2 +- docs/tutorial/5-packaging.rst | 17 +++- docs/tutorial/_todo.rst | 3 + docs/tutorial/docker.rst | 16 +++ 10 files changed, 265 insertions(+), 60 deletions(-) create mode 100644 docs/guide/_toc.rst create mode 100644 docs/guide/debugging.rst create mode 100644 docs/tutorial/_todo.rst create mode 100644 docs/tutorial/docker.rst diff --git a/docs/guide/_toc.rst b/docs/guide/_toc.rst new file mode 100644 index 0000000..c98a1ee --- /dev/null +++ b/docs/guide/_toc.rst @@ -0,0 +1,11 @@ +.. toctree:: + :maxdepth: 2 + + introduction + transformations + graphs + services + environment + purity + debugging + plugins diff --git a/docs/guide/debugging.rst b/docs/guide/debugging.rst new file mode 100644 index 0000000..e69de29 diff --git a/docs/guide/graphs.rst b/docs/guide/graphs.rst index 3d2d4b1..019c96d 100644 --- a/docs/guide/graphs.rst +++ b/docs/guide/graphs.rst @@ -5,6 +5,47 @@ Graphs are the glue that ties transformations together. They are the only data-s must be acyclic, and can contain as many nodes as your system can handle. However, although in theory the number of nodes can be rather high, practical use cases usually do not exceed more than a few hundred nodes and only then in extreme cases. + + + +Each node of a graph will be executed in isolation from the other nodes, and the data is passed from one node to the +next using FIFO queues, managed by the framework. It's transparent to the end-user, though, and you'll only use +function arguments (for inputs) and return/yield values (for outputs). + +Each input row of a node will cause one call to this node's callable. Each output is cast internally as a tuple-like +data structure (or more precisely, a namedtuple-like data structure), and for one given node, each output row must +have the same structure. + +If you return/yield something which is not a tuple, bonobo will create a tuple of one element. + +Properties +---------- + +|bonobo| assists you with defining the data-flow of your data engineering process, and then streams data through your +callable graphs. + +* Each node call will process one row of data. +* Queues that flows the data between node are first-in, first-out (FIFO) standard python :class:`queue.Queue`. +* Each node will run in parallel +* Default execution strategy use threading, and each node will run in a separate thread. + +Fault tolerance +--------------- + +Node execution is fault tolerant. + +If an exception is raised from a node call, then this node call will be aborted but bonobo will continue the execution +with the next row (after outputing the stack trace and incrementing the "err" counter for the node context). + +It allows to have ETL jobs that ignore faulty data and try their best to process the valid rows of a dataset. + +Some errors are fatal, though. + +If you pass a 2 elements tuple to a node that takes 3 args, |bonobo| will raise an :class:`bonobo.errors.UnrecoverableTypeError`, and exit the +current graph execution as fast as it can (finishing the other node executions that are in progress first, but not +starting new ones if there are remaining input rows). + + Definitions ::::::::::: diff --git a/docs/guide/index.rst b/docs/guide/index.rst index 360ed61..e95cf47 100644 --- a/docs/guide/index.rst +++ b/docs/guide/index.rst @@ -3,13 +3,8 @@ Guides This section will guide you through your journey with Bonobo ETL. -.. toctree:: - :maxdepth: 2 - introduction - transformations - graphs - services - environment - purity +.. include:: _toc.rst + + diff --git a/docs/tutorial/2-jobs.rst b/docs/tutorial/2-jobs.rst index 4eff084..fbfe4b5 100644 --- a/docs/tutorial/2-jobs.rst +++ b/docs/tutorial/2-jobs.rst @@ -1,61 +1,138 @@ Part 2: Writing ETL Jobs ======================== -.. include:: _wip_note.rst +In |bonobo|, an ETL job is a graph with some logic to execute it, like the file we created in the previous section. -What's an ETL job ? -::::::::::::::::::: - -In |bonobo|, an ETL job is a formal definition of an executable graph. - -Each node of a graph will be executed in isolation from the other nodes, and the data is passed from one node to the -next using FIFO queues, managed by the framework. It's transparent to the end-user, though, and you'll only use -function arguments (for inputs) and return/yield values (for outputs). - -Each input row of a node will cause one call to this node's callable. Each output is cast internally as a tuple-like -data structure (or more precisely, a namedtuple-like data structure), and for one given node, each output row must -have the same structure. - -If you return/yield something which is not a tuple, bonobo will create a tuple of one element. - -Properties ----------- - -|bonobo| assists you with defining the data-flow of your data engineering process, and then streams data through your -callable graphs. - -* Each node call will process one row of data. -* Queues that flows the data between node are first-in, first-out (FIFO) standard python :class:`queue.Queue`. -* Each node will run in parallel -* Default execution strategy use threading, and each node will run in a separate thread. - -Fault tolerance ---------------- - -Node execution is fault tolerant. - -If an exception is raised from a node call, then this node call will be aborted but bonobo will continue the execution -with the next row (after outputing the stack trace and incrementing the "err" counter for the node context). - -It allows to have ETL jobs that ignore faulty data and try their best to process the valid rows of a dataset. - -Some errors are fatal, though. - -If you pass a 2 elements tuple to a node that takes 3 args, |bonobo| will raise an :class:`bonobo.errors.UnrecoverableTypeError`, and exit the -current graph execution as fast as it can (finishing the other node executions that are in progress first, but not -starting new ones if there are remaining input rows). +You can learn more about the :class:`bonobo.Graph` data-structure and its properties in the +:doc:`graphs guide `. -Let's write a sample data integration job -::::::::::::::::::::::::::::::::::::::::: +Scenario +:::::::: -Let's create a sample application. +Let's create a sample application, which goal will be to integrate some data in various systems. -The goal of this application will be to extract all the fablabs in the world using an open-data API, normalize this -data and, for now, display it. We'll then build on this foundation in the next steps to write to files, databases, etc. +We'll use an open-data dataset, containing all the fablabs in the world. + +We will normalize this data using a few different rules, then write it somewhere. + +In this step, we'll focus on getting this data normalized and output to the console. In the next steps, we'll extend it +to other targets, like files, and databases. +Setup +::::: +We'll change the `tutorial.py` file created in the last step to handle this new scenario. + +First, let's remove all boilerplate code, so it looks like this: + +.. code-block:: python + + import bonobo + + + def get_graph(**options): + graph = bonobo.Graph() + return graph + + + def get_services(**options): + return {} + + + if __name__ == '__main__': + parser = bonobo.get_argument_parser() + with bonobo.parse_args(parser) as options: + bonobo.run(get_graph(**options), services=get_services(**options)) + + +Your job now contains the logic for executing an empty graph, and we'll complete this with our application logic. + +Reading the source data +::::::::::::::::::::::: + +Let's add a simple chain to our `get_graph(...)` function, so that it reads from the fablabs open-data api. + +The source dataset we'll use can be found on `this site `_. +It's licensed under `Public Domain`, which makes it just perfect for our example. + +.. note:: + + There is a :mod:`bonobo.contrib.opendatasoft` module that makes reading from OpenDataSoft APIs easier, including + pagination and limits, but for our tutorial, we'll avoid that and build it manually. + +Let's write our extractor: + +.. code-block:: python + + import requests + + FABLABS_API_URL = 'https://public-us.opendatasoft.com/api/records/1.0/search/?dataset=fablabs&rows=1000' + + def extract_fablabs(): + yield from requests.get(FABLABS_API_URL).json().get('records') + +This extractor will get called once, query the API url, parse it as JSON, and yield the items from the "records" list, +one by one. + +.. note:: + + You'll probably want to make it a bit more verbose in a real application, to handle all kind of errors that can + happen here. What if the server is down? What if it returns a response which is not JSON? What if the data is not + in the expected format? + + For simplicity sake, we'll ignore that here but that's the kind of questions you should have in mind when writing + pipelines. + +To test our pipeline, let's use a :class:`bonobo.Limit` and a :class:`bonobo.PrettyPrinter`, and change our +`get_graph(...)` function accordingly: + +.. code-block:: python + + import bonobo + + def get_graph(**options): + graph = bonobo.Graph() + graph.add_chain( + extract_fablabs, + bonobo.Limit(10), + bonobo.PrettyPrinter(), + ) + return graph + +Running this job should output a bit of data, along with some statistics. + +First, let's look at the statistics: + +.. code-block:: shell-session + + - extract_fablabs in=1 out=995 [done] + - Limit in=995 out=10 [done] + - PrettyPrinter in=10 out=10 [done] + +It is important to understand that we extracted everything (995 rows), before droping 99% of the dataset. + +This is OK for debugging, but not efficient. + +.. note:: + + You should always try to limit the amount of data as early as possible, which often means not generating the data + you won't need in the first place. Here, we could have used the `rows=` query parameter in the API URL to not + request the data we would anyway drop. + +Normalize +::::::::: + +.. include:: _todo.rst + +Output +:::::: + +We used :class:`bonobo.PrettyPrinter` to output the data. + +It's a flexible transformation provided that helps you display the content of a stream, and you'll probably use it a +lot for various reasons. Moving forward @@ -63,6 +140,10 @@ Moving forward You now know: -* How to ... +* How to use a reader node. +* How to use the console output. +* How to limit the number of elements in a stream. +* How to pass data from one node to another. +* How to structure a graph using chains. -**Next: :doc:`3-files`** +It's now time to jump to :doc:`3-files`. diff --git a/docs/tutorial/3-files.rst b/docs/tutorial/3-files.rst index a55df76..d5ae047 100644 --- a/docs/tutorial/3-files.rst +++ b/docs/tutorial/3-files.rst @@ -3,6 +3,49 @@ Part 3: Working with Files .. include:: _wip_note.rst +Writing to the console is nice, but using files is probably more realistic. + +Let's see how to use a few builtin writers and both local and remote filesystems. + + +Filesystems +::::::::::: + +In |bonobo|, files are accessed within a **filesystem** service which must be something with the same interface as +`fs' FileSystem objects `_. As a default, you'll get an instance +of a local filesystem mapped to the current working directory as the `fs` service. You'll learn more about services in +the next step, but for now, let's just use it. + + +Writing using the service +::::::::::::::::::::::::: + +Although |bonobo| contains helpers to write to common file formats, let's start by writing it manually. + +.. code-block:: python + + from bonobo.config import use + from bonobo.constants import NOT_MODIFIED + + @use('fs') + def write_repr_to_file(*row, fs): + with fs.open('output.txt', 'a+') as f: + print(row, file=f) + return NOT_MODIFIED + +Then, update the `get_graph(...)` function, by adding `write_repr_to_file` just before your `PrettyPrinter()` node. + +Let's try to run that and think about what happens. + +Each time a row comes to this node, the output file is open in "append or create" mode, a line is written, and the file +is closed. + +This is **NOT** how you want to do things. Let's rewrite it so our `open(...)` call becomes execution-wide. + + + + + * Filesystems * Reading files @@ -21,4 +64,4 @@ You now know: * How to ... -**Next: :doc:`4-services`** +It's now time to jump to :doc:`4-services`. diff --git a/docs/tutorial/4-services.rst b/docs/tutorial/4-services.rst index 9d36315..6a25914 100644 --- a/docs/tutorial/4-services.rst +++ b/docs/tutorial/4-services.rst @@ -205,4 +205,4 @@ You now know: * How to ... -**Next: :doc:`5-packaging`** +It's now time to jump to :doc:`5-packaging`. diff --git a/docs/tutorial/5-packaging.rst b/docs/tutorial/5-packaging.rst index 198f1b4..68bc66d 100644 --- a/docs/tutorial/5-packaging.rst +++ b/docs/tutorial/5-packaging.rst @@ -15,7 +15,6 @@ kind of project structure, as the targert structure will be dicated by the hosti sub-package would perfectly fit a django or flask project, or even a regular package, but it's up to you to chose the structure of your project. -about using |bonobo| in a pyt is about set of jobs working together within a project. Let's see how to move from the current status to a package. @@ -28,3 +27,19 @@ You now know: * How to ... +That's the end of the tutorial, you should now be familiar with all the basics. + +A few appendixes to the tutorial can explain how to integrate with other systems (we'll use the "fablabs" application +created in this tutorial and extend it): + +* :doc:`notebooks` +* :doc:`sqlalchemy` +* :doc:`django` +* :doc:`docker` + +Then, you can either to jump head-first into your code, or you can have a better grasp at all concepts by +:doc:`reading the full bonobo guide `. + +Happy data flows! + + diff --git a/docs/tutorial/_todo.rst b/docs/tutorial/_todo.rst new file mode 100644 index 0000000..7fd98f6 --- /dev/null +++ b/docs/tutorial/_todo.rst @@ -0,0 +1,3 @@ +.. warning:: + + This section is missing. Sorry, but stay tuned! It'll be added soon. \ No newline at end of file diff --git a/docs/tutorial/docker.rst b/docs/tutorial/docker.rst new file mode 100644 index 0000000..ea28c1a --- /dev/null +++ b/docs/tutorial/docker.rst @@ -0,0 +1,16 @@ +Working with Docker +=================== + +.. warning:: + + This section does not exist yet, but it's in the plans to write it quite soon. + + Meanwhile, you can check the source code and other links provided below. + +Source code +::::::::::: + +https://github.com/python-bonobo/bonobo-docker + + + From 8900c567d909595f0074aac4a2f644a5535e122b Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 10 Jan 2018 08:33:12 +0100 Subject: [PATCH 11/11] [core] Fixes bug in graph.add_chain output that would ignore the first node when its index is 0. --- bonobo/structs/graphs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bonobo/structs/graphs.py b/bonobo/structs/graphs.py index 39de1fe..58b78a6 100644 --- a/bonobo/structs/graphs.py +++ b/bonobo/structs/graphs.py @@ -64,7 +64,7 @@ class Graph: if _name in self.named: raise KeyError('Duplicate name {!r} in graph.'.format(_name)) self.named[_name] = _last - if not _first: + if _first is None: _first = _last self.outputs_of(_input, create=True).add(_last) _input = _last