diff --git a/Makefile b/Makefile index 43c4934..127d304 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# Generated by Medikit 0.6.3 on 2018-07-22. +# Generated by Medikit 0.6.3 on 2018-07-28. # All changes will be overriden. # Edit Projectfile and run “make update” (or “medikit update”) to regenerate. diff --git a/appveyor.yml b/appveyor.yml index 3c79334..9cf3107 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -57,7 +57,7 @@ install: # Upgrade to the latest version of pip to avoid it displaying warnings # about it being out of date. - # - "pip install --disable-pip-version-check --user --upgrade pip" + - "python -m pip install --disable-pip-version-check --user --upgrade pip" # Install the build dependencies of the project. If some dependencies contain # compiled extensions and are not provided as pre-built wheel packages, diff --git a/bonobo/contrib/django/commands.py b/bonobo/contrib/django/commands.py index eda6141..ac5f387 100644 --- a/bonobo/contrib/django/commands.py +++ b/bonobo/contrib/django/commands.py @@ -41,18 +41,17 @@ class ETLCommand(BaseCommand): def get_services(self): return {} + def get_strategy(self): + return None + def info(self, *args, **kwargs): self.logger.info(*args, **kwargs) - def handle(self, *args, **options): - _stdout_backup, _stderr_backup = self.stdout, self.stderr - - self.stdout = OutputWrapper(ConsoleOutputPlugin._stdout, ending=CLEAR_EOL + '\n') - self.stderr = OutputWrapper(ConsoleOutputPlugin._stderr, ending=CLEAR_EOL + '\n') - self.stderr.style_func = lambda x: Fore.LIGHTRED_EX + Back.RED + '!' + Style.RESET_ALL + ' ' + x - + def run(self, *args, **options): + results = [] with bonobo.parse_args(options) as options: services = self.get_services() + strategy = self.get_strategy() graph_coll = self.get_graph(*args, **options) if not isinstance(graph_coll, GeneratorType): @@ -61,8 +60,20 @@ class ETLCommand(BaseCommand): for i, graph in enumerate(graph_coll): assert isinstance(graph, bonobo.Graph), 'Invalid graph provided.' print(term.lightwhite('{}. {}'.format(i + 1, graph.name))) - result = bonobo.run(graph, services=services) + result = bonobo.run(graph, services=services, strategy=strategy) + results.append(result) print(term.lightblack(' ... return value: ' + str(result))) print() + return results + + def handle(self, *args, **options): + _stdout_backup, _stderr_backup = self.stdout, self.stderr + + self.stdout = OutputWrapper(ConsoleOutputPlugin._stdout, ending=CLEAR_EOL + '\n') + self.stderr = OutputWrapper(ConsoleOutputPlugin._stderr, ending=CLEAR_EOL + '\n') + self.stderr.style_func = lambda x: Fore.LIGHTRED_EX + Back.RED + '!' + Style.RESET_ALL + ' ' + x + + self.run(*args, **kwargs) + self.stdout, self.stderr = _stdout_backup, _stderr_backup diff --git a/docs/extension/sqlalchemy.rst b/docs/extension/sqlalchemy.rst index b92ade6..d37ca68 100644 --- a/docs/extension/sqlalchemy.rst +++ b/docs/extension/sqlalchemy.rst @@ -36,6 +36,25 @@ The `sqlalchemy.engine` name is the default name used by the provided transforma example if you need more than one connection) and specify the service name using `engine='myengine'` while building your transformations. +Lets create some tables and add some data. (You may need to edit the SQL if your database server uses a different +version of SQL.) + +.. code-block:: sql + + CREATE TABLE test_in ( + id INTEGER PRIMARY KEY NOT NULL, + text TEXT + ); + + CREATE TABLE test_out ( + id INTEGER PRIMARY KEY NOT NULL, + text TEXT + ); + + INSERT INTO test_in (id, text) VALUES (1, 'Cat'); + INSERT INTO test_in (id, text) VALUES (2, 'Dog'); + + There are two transformation classes provided by this extension. One reader, one writer. @@ -50,12 +69,29 @@ Let's select some data: def get_graph(): graph = bonobo.Graph() graph.add_chain( - bonobo_sqlalchemy.Select('SELECT * FROM example', limit=100), + bonobo_sqlalchemy.Select('SELECT * FROM test_in', limit=100), bonobo.PrettyPrinter(), ) + return graph -And let's insert some data: +You should see: +.. code-block:: shell-session + + $ python tutorial.py + ┌ + │ id[0] = 1 + │ text[1] = 'Cat' + └ + ┌ + │ id[0] = 2 + │ text[1] = 'Dog' + └ + - Select in=1 out=2 [done] + - PrettyPrinter in=2 out=2 [done] + + +Now let's insert some data: .. code-block:: python @@ -66,12 +102,14 @@ And let's insert some data: def get_graph(**options): graph = bonobo.Graph() graph.add_chain( - ..., - bonobo_sqlalchemy.InsertOrUpdate('example') + bonobo_sqlalchemy.Select('SELECT * FROM test_in', limit=100), + bonobo_sqlalchemy.InsertOrUpdate('test_out') ) return graph +If you check the `test_out` table, it should now have the data. + Reference ::::::::: diff --git a/docs/tutorial/1-init.rst b/docs/tutorial/1-init.rst index 9fc92f5..f0f7fbf 100644 --- a/docs/tutorial/1-init.rst +++ b/docs/tutorial/1-init.rst @@ -40,7 +40,7 @@ This will create a simple job in a `tutorial.py` file. Let's run it: - transform in=2 out=2 [done] - load in=2 [done] -If you have a similar result, then congratulations! You just ran your first |bonobo| ETL job. +Congratulations! You just ran your first |bonobo| ETL job. Inspect your graph @@ -53,7 +53,12 @@ The basic building blocks of |bonobo| are **transformations** and **graphs**. **Graphs** are a set of transformations, with directional links between them to define the data-flow that will happen at runtime. -To inspect the graph of your first transformation (you must install graphviz first to do so), run: +To inspect the graph of your first transformation: + +.. note:: + + You must `install the graphviz software first `_. It is _not_ the python's graphviz + package, you must install it using your system's package manager (apt, brew, ...). .. code-block:: shell-session @@ -242,6 +247,37 @@ The console output contains two things. a call, but the execution will move to the next row. +However, if you run the tutorial.py it happens too fast and you can't see the status change. Let's add some delays to your code. + +At the top of tutorial.py add a new import and add some delays to the 3 stages: + +.. code-block:: python + + import time + + def extract(): + """Placeholder, change, rename, remove... """ + time.sleep(5) + yield 'hello' + time.sleep(5) + yield 'world' + + + def transform(*args): + """Placeholder, change, rename, remove... """ + time.sleep(5) + yield tuple( + map(str.title, args) + ) + + + def load(*args): + """Placeholder, change, rename, remove... """ + time.sleep(5) + print(*args) + +Now run tutorial.py again, and you can see the status change during the process. + Wrap up ::::::: diff --git a/docs/tutorial/3-files.rst b/docs/tutorial/3-files.rst index 7306726..93cbd96 100644 --- a/docs/tutorial/3-files.rst +++ b/docs/tutorial/3-files.rst @@ -41,15 +41,30 @@ Now, we need to write a `writer` transformation, and apply this context processo @use_context_processor(with_opened_file) def write_repr_to_file(f, *row): - f.write(repr(row)) + f.write(repr(row) + "\n") -The `f` parameter will contain the value yielded by the context processors, in order of appearance (you can chain -multiple context processors). +The `f` parameter will contain the value yielded by the context processors, in order of appearance. You can chain +multiple context processors. To find about how to implement this, check the |bonobo| guides in the documentation. Please note that the :func:`bonobo.config.use_context_processor` decorator will modify the function in place, but won't modify its behaviour. If you want to call it out of the |bonobo| job context, it's your responsibility to provide the right parameters (and here, the opened file). +To run this, change the last stage in the pipeline in get_graph to write_repr_to_file + +.. code-block:: python + + def get_graph(**options): + graph = bonobo.Graph() + graph.add_chain( + extract_fablabs, + bonobo.Limit(10), + write_repr_to_file, + ) + return graph + +Now run tutorial.py and check the output.txt file. + Using the filesystem :::::::::::::::::::: @@ -65,10 +80,10 @@ Let's rewrite our context processor to use it. with context.get_service('fs').open('output.txt', 'w+') as f: yield f -Interface does not change much, but this small change allows the end-user to change the filesystem implementation at -runtime, which is great to handle different environments (local development, staging servers, production, ...). +The interface does not change much, but this small change allows the end-user to change the filesystem implementation at +runtime, which is great for handling different environments (local development, staging servers, production, ...). -Note that |bonobo| only provide very few services with default implementation (actually, only `fs` and `http`), but +Note that |bonobo| only provides very few services with default implementation (actually, only `fs` and `http`), but you can define all the services you want, depending on your system. You'll learn more about this in the next tutorial chapter. @@ -122,16 +137,17 @@ function: Reading from files :::::::::::::::::: -Reading from files is done using the same logic as writing, except that you'll probably have only one call to a reader. +Reading from files is done using the same logic as writing, except that you'll probably have only one call to a reader. You can read the file we just wrote by using a :obj:`bonobo.CsvReader` instance: -Our example application does not include reading from files, but you can read the file we just wrote by using a -:obj:`bonobo.CsvReader` instance. +.. code-block:: python - -Atomic writes -::::::::::::: - -.. include:: _todo.rst + def get_graph(**options): + graph = bonobo.Graph() + graph.add_chain( + bonobo.CsvReader('output.csv'), + ... + ) + return graph Moving forward diff --git a/docs/tutorial/4-services.rst b/docs/tutorial/4-services.rst index 967fa51..51b8683 100644 --- a/docs/tutorial/4-services.rst +++ b/docs/tutorial/4-services.rst @@ -3,7 +3,7 @@ Part 4: Services All external dependencies (like filesystems, network clients, database connections, etc.) should be provided to transformations as a service. It allows great flexibility, including the ability to test your transformations isolated -from the external world, and being friendly to the infrastructure guys (and if you're one of them, it's also nice to +from the external world, and being friendly to the infrastructure people (and if you're one of them, it's also nice to treat yourself well). In the last section, we used the `fs` service to access filesystems, we'll go even further by switching our `requests` diff --git a/requirements-dev.txt b/requirements-dev.txt index be0cfb2..3b230d9 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -26,7 +26,7 @@ py==1.5.4 pygments==2.2.0 pyparsing==2.2.0 pytest-cov==2.5.1 -pytest-timeout==1.3.0 +pytest-timeout==1.3.1 pytest==3.6.3 python-dateutil==2.7.3 pytz==2018.5 diff --git a/requirements-docker.txt b/requirements-docker.txt index 4600a77..bce3bba 100644 --- a/requirements-docker.txt +++ b/requirements-docker.txt @@ -7,14 +7,14 @@ chardet==3.0.4 colorama==0.3.9 docker-pycreds==0.3.0 docker==2.7.0 -fs==2.0.25 +fs==2.0.26 graphviz==0.8.4 idna==2.7 jinja2==2.10 markupsafe==1.0 mondrian==0.7.0 packaging==17.1 -pbr==4.1.1 +pbr==4.2.0 psutil==5.4.6 pyparsing==2.2.0 python-slugify==1.2.5 @@ -23,6 +23,7 @@ requests==2.19.1 semantic-version==2.6.0 six==1.11.0 stevedore==1.29.0 +typing==3.6.4 unidecode==1.0.22 urllib3==1.23 websocket-client==0.48.0 diff --git a/requirements-sqlalchemy.txt b/requirements-sqlalchemy.txt index f80fd17..724a70d 100644 --- a/requirements-sqlalchemy.txt +++ b/requirements-sqlalchemy.txt @@ -5,14 +5,14 @@ bonobo-sqlalchemy==0.6.0 certifi==2018.4.16 chardet==3.0.4 colorama==0.3.9 -fs==2.0.25 +fs==2.0.26 graphviz==0.8.4 idna==2.7 jinja2==2.10 markupsafe==1.0 mondrian==0.7.0 packaging==17.1 -pbr==4.1.1 +pbr==4.2.0 psutil==5.4.6 pyparsing==2.2.0 python-slugify==1.2.5 @@ -21,6 +21,7 @@ requests==2.19.1 six==1.11.0 sqlalchemy==1.2.10 stevedore==1.29.0 +typing==3.6.4 unidecode==1.0.22 urllib3==1.23 whistle==1.0.1 diff --git a/requirements.txt b/requirements.txt index faed2d4..23288a3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,14 +3,14 @@ appdirs==1.4.3 certifi==2018.4.16 chardet==3.0.4 colorama==0.3.9 -fs==2.0.25 +fs==2.0.26 graphviz==0.8.4 idna==2.7 jinja2==2.10 markupsafe==1.0 mondrian==0.7.0 packaging==17.1 -pbr==4.1.1 +pbr==4.2.0 psutil==5.4.6 pyparsing==2.2.0 python-slugify==1.2.5 @@ -18,6 +18,7 @@ pytz==2018.5 requests==2.19.1 six==1.11.0 stevedore==1.29.0 +typing==3.6.4 unidecode==1.0.22 urllib3==1.23 whistle==1.0.1 diff --git a/setup.py b/setup.py index 9e0aa4b..e7a2eb6 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -# Generated by Medikit 0.6.3 on 2018-07-22. +# Generated by Medikit 0.6.3 on 2018-07-28. # All changes will be overriden. # Edit Projectfile and run “make update” (or “medikit update”) to regenerate.