From 3407994e00ddf74fef5f10bbaae1fd1f488a1b51 Mon Sep 17 00:00:00 2001 From: Ben Rudolph Date: Fri, 23 Mar 2018 17:37:11 -0700 Subject: [PATCH 01/14] extract run function --- bonobo/contrib/django/commands.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/bonobo/contrib/django/commands.py b/bonobo/contrib/django/commands.py index eda6141..b54edd8 100644 --- a/bonobo/contrib/django/commands.py +++ b/bonobo/contrib/django/commands.py @@ -44,13 +44,7 @@ class ETLCommand(BaseCommand): def info(self, *args, **kwargs): self.logger.info(*args, **kwargs) - def handle(self, *args, **options): - _stdout_backup, _stderr_backup = self.stdout, self.stderr - - self.stdout = OutputWrapper(ConsoleOutputPlugin._stdout, ending=CLEAR_EOL + '\n') - self.stderr = OutputWrapper(ConsoleOutputPlugin._stderr, ending=CLEAR_EOL + '\n') - self.stderr.style_func = lambda x: Fore.LIGHTRED_EX + Back.RED + '!' + Style.RESET_ALL + ' ' + x - + def run(self, *args, **options): with bonobo.parse_args(options) as options: services = self.get_services() graph_coll = self.get_graph(*args, **options) @@ -65,4 +59,13 @@ class ETLCommand(BaseCommand): print(term.lightblack(' ... return value: ' + str(result))) print() + def handle(self, *args, **options): + _stdout_backup, _stderr_backup = self.stdout, self.stderr + + self.stdout = OutputWrapper(ConsoleOutputPlugin._stdout, ending=CLEAR_EOL + '\n') + self.stderr = OutputWrapper(ConsoleOutputPlugin._stderr, ending=CLEAR_EOL + '\n') + self.stderr.style_func = lambda x: Fore.LIGHTRED_EX + Back.RED + '!' + Style.RESET_ALL + ' ' + x + + self.run(*args, **kwargs) + self.stdout, self.stderr = _stdout_backup, _stderr_backup From d5b3c2e3033142753a4fce841789beb70e006b56 Mon Sep 17 00:00:00 2001 From: Ben Rudolph Date: Fri, 23 Mar 2018 17:43:50 -0700 Subject: [PATCH 02/14] store results --- bonobo/contrib/django/commands.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bonobo/contrib/django/commands.py b/bonobo/contrib/django/commands.py index b54edd8..4edcd09 100644 --- a/bonobo/contrib/django/commands.py +++ b/bonobo/contrib/django/commands.py @@ -45,6 +45,7 @@ class ETLCommand(BaseCommand): self.logger.info(*args, **kwargs) def run(self, *args, **options): + results = [] with bonobo.parse_args(options) as options: services = self.get_services() graph_coll = self.get_graph(*args, **options) @@ -56,9 +57,12 @@ class ETLCommand(BaseCommand): assert isinstance(graph, bonobo.Graph), 'Invalid graph provided.' print(term.lightwhite('{}. {}'.format(i + 1, graph.name))) result = bonobo.run(graph, services=services) + results.append(result) print(term.lightblack(' ... return value: ' + str(result))) print() + return results + def handle(self, *args, **options): _stdout_backup, _stderr_backup = self.stdout, self.stderr From 5cab4effeac1c1fe7b7f8dce5c93ae9ba608f63b Mon Sep 17 00:00:00 2001 From: Ben Rudolph Date: Fri, 23 Mar 2018 17:45:47 -0700 Subject: [PATCH 03/14] allow for overriding strategy --- bonobo/contrib/django/commands.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bonobo/contrib/django/commands.py b/bonobo/contrib/django/commands.py index 4edcd09..ac5f387 100644 --- a/bonobo/contrib/django/commands.py +++ b/bonobo/contrib/django/commands.py @@ -41,6 +41,9 @@ class ETLCommand(BaseCommand): def get_services(self): return {} + def get_strategy(self): + return None + def info(self, *args, **kwargs): self.logger.info(*args, **kwargs) @@ -48,6 +51,7 @@ class ETLCommand(BaseCommand): results = [] with bonobo.parse_args(options) as options: services = self.get_services() + strategy = self.get_strategy() graph_coll = self.get_graph(*args, **options) if not isinstance(graph_coll, GeneratorType): @@ -56,7 +60,7 @@ class ETLCommand(BaseCommand): for i, graph in enumerate(graph_coll): assert isinstance(graph, bonobo.Graph), 'Invalid graph provided.' print(term.lightwhite('{}. {}'.format(i + 1, graph.name))) - result = bonobo.run(graph, services=services) + result = bonobo.run(graph, services=services, strategy=strategy) results.append(result) print(term.lightblack(' ... return value: ' + str(result))) print() From 9e0736b2468ebba4e7941b2a4f045a1685c9dd19 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Mon, 21 May 2018 11:56:53 +0200 Subject: [PATCH 04/14] Update requirements. --- Makefile | 2 +- requirements-docker.txt | 2 +- requirements-sqlalchemy.txt | 2 +- requirements.txt | 2 +- setup.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 73a5d55..42c8835 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# Generated by Medikit 0.6.1 on 2018-05-16. +# Generated by Medikit 0.6.1 on 2018-05-21. # All changes will be overriden. # Edit Projectfile and run “make update” (or “medikit update”) to regenerate. diff --git a/requirements-docker.txt b/requirements-docker.txt index c8466c0..f25e570 100644 --- a/requirements-docker.txt +++ b/requirements-docker.txt @@ -7,7 +7,7 @@ chardet==3.0.4 colorama==0.3.9 docker-pycreds==0.2.3 docker==2.7.0 -fs==2.0.21 +fs==2.0.23 graphviz==0.8.3 idna==2.6 jinja2==2.10 diff --git a/requirements-sqlalchemy.txt b/requirements-sqlalchemy.txt index b25c586..3d8083d 100644 --- a/requirements-sqlalchemy.txt +++ b/requirements-sqlalchemy.txt @@ -5,7 +5,7 @@ bonobo-sqlalchemy==0.6.0 certifi==2018.4.16 chardet==3.0.4 colorama==0.3.9 -fs==2.0.21 +fs==2.0.23 graphviz==0.8.3 idna==2.6 jinja2==2.10 diff --git a/requirements.txt b/requirements.txt index 56e1d12..89a7968 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ appdirs==1.4.3 certifi==2018.4.16 chardet==3.0.4 colorama==0.3.9 -fs==2.0.21 +fs==2.0.23 graphviz==0.8.3 idna==2.6 jinja2==2.10 diff --git a/setup.py b/setup.py index 6d48947..b5334d2 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -# Generated by Medikit 0.6.1 on 2018-05-16. +# Generated by Medikit 0.6.1 on 2018-05-21. # All changes will be overriden. # Edit Projectfile and run “make update” (or “medikit update”) to regenerate. From a0734724e90d6e9cbbbb817e77dad0526e97b2ce Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 28 Jul 2018 10:02:48 +0100 Subject: [PATCH 05/14] Update 1-init.rst --- docs/tutorial/1-init.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorial/1-init.rst b/docs/tutorial/1-init.rst index 9fc92f5..8450ada 100644 --- a/docs/tutorial/1-init.rst +++ b/docs/tutorial/1-init.rst @@ -53,7 +53,7 @@ The basic building blocks of |bonobo| are **transformations** and **graphs**. **Graphs** are a set of transformations, with directional links between them to define the data-flow that will happen at runtime. -To inspect the graph of your first transformation (you must install graphviz first to do so), run: +To inspect the graph of your first transformation (`you must install graphviz first to do so `_), run: .. code-block:: shell-session From 381d077313f8650ef180848d48688600a7796888 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 28 Jul 2018 10:07:54 +0100 Subject: [PATCH 06/14] Update 1-init.rst --- docs/tutorial/1-init.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorial/1-init.rst b/docs/tutorial/1-init.rst index 8450ada..93e02a7 100644 --- a/docs/tutorial/1-init.rst +++ b/docs/tutorial/1-init.rst @@ -40,7 +40,7 @@ This will create a simple job in a `tutorial.py` file. Let's run it: - transform in=2 out=2 [done] - load in=2 [done] -If you have a similar result, then congratulations! You just ran your first |bonobo| ETL job. +Congratulations! You just ran your first |bonobo| ETL job. Inspect your graph From 3e39421a25c2f2be33d97273fed044c6361bdea9 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 28 Jul 2018 10:18:08 +0100 Subject: [PATCH 07/14] Update 1-init.rst --- docs/tutorial/1-init.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/tutorial/1-init.rst b/docs/tutorial/1-init.rst index 93e02a7..6f565d1 100644 --- a/docs/tutorial/1-init.rst +++ b/docs/tutorial/1-init.rst @@ -53,7 +53,12 @@ The basic building blocks of |bonobo| are **transformations** and **graphs**. **Graphs** are a set of transformations, with directional links between them to define the data-flow that will happen at runtime. -To inspect the graph of your first transformation (`you must install graphviz first to do so `_), run: +To inspect the graph of your first transformation: + +.. note:: + + You must `install the graphviz software first `_. It is _not_ the python's graphviz + package, you must install it using your system's package manager (apt, brew, ...). .. code-block:: shell-session From 200b1b630e891de7f522b83abbccf4eb85c75811 Mon Sep 17 00:00:00 2001 From: James Baster Date: Sat, 28 Jul 2018 10:21:53 +0100 Subject: [PATCH 08/14] Add to tutorial - add sleep to code so people can see the status changes --- docs/tutorial/1-init.rst | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/docs/tutorial/1-init.rst b/docs/tutorial/1-init.rst index 93e02a7..2d10893 100644 --- a/docs/tutorial/1-init.rst +++ b/docs/tutorial/1-init.rst @@ -242,6 +242,37 @@ The console output contains two things. a call, but the execution will move to the next row. +However, if you run the tutorial.py it happens too fast and you can't see the status change. Let's add some delays to your code. + +At the top of tutorial.py add a new import and add some delays to the 3 stages: + +.. code-block:: python + + import time + + def extract(): + """Placeholder, change, rename, remove... """ + time.sleep(5) + yield 'hello' + time.sleep(5) + yield 'world' + + + def transform(*args): + """Placeholder, change, rename, remove... """ + time.sleep(5) + yield tuple( + map(str.title, args) + ) + + + def load(*args): + """Placeholder, change, rename, remove... """ + time.sleep(5) + print(*args) + +Now run tutorial.py again, and you can see the status change during the process. + Wrap up ::::::: From c60dd4c7e73c4ac20be2fa5595f64b900130d909 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 28 Jul 2018 10:33:01 +0100 Subject: [PATCH 09/14] Update appveyor.yml --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index f044adb..9cf3107 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -57,7 +57,7 @@ install: # Upgrade to the latest version of pip to avoid it displaying warnings # about it being out of date. - - "pip install --disable-pip-version-check --user --upgrade pip" + - "python -m pip install --disable-pip-version-check --user --upgrade pip" # Install the build dependencies of the project. If some dependencies contain # compiled extensions and are not provided as pre-built wheel packages, From 54390107256f514486e37ab759af7b44fcc7bb46 Mon Sep 17 00:00:00 2001 From: James Baster Date: Sat, 28 Jul 2018 11:10:46 +0100 Subject: [PATCH 10/14] Add to tutorial - step 3 should tell reader to explicitly run the example Also add new line when writing so output is clearer --- docs/tutorial/3-files.rst | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/docs/tutorial/3-files.rst b/docs/tutorial/3-files.rst index 7306726..f23faa7 100644 --- a/docs/tutorial/3-files.rst +++ b/docs/tutorial/3-files.rst @@ -41,7 +41,7 @@ Now, we need to write a `writer` transformation, and apply this context processo @use_context_processor(with_opened_file) def write_repr_to_file(f, *row): - f.write(repr(row)) + f.write(repr(row) + "\n") The `f` parameter will contain the value yielded by the context processors, in order of appearance (you can chain multiple context processors). @@ -50,6 +50,20 @@ Please note that the :func:`bonobo.config.use_context_processor` decorator will modify its behaviour. If you want to call it out of the |bonobo| job context, it's your responsibility to provide the right parameters (and here, the opened file). +To run this, change the last stage in the pipeline in get_graph to write_repr_to_file + +.. code-block:: python + + def get_graph(**options): + graph = bonobo.Graph() + graph.add_chain( + extract_fablabs, + bonobo.Limit(10), + write_repr_to_file, + ) + return graph + +Now run tutorial.py and check the output.txt file. Using the filesystem :::::::::::::::::::: From b49063d5511206a071419ffd96b7239b5fcfff43 Mon Sep 17 00:00:00 2001 From: James Baster Date: Sat, 28 Jul 2018 11:30:44 +0100 Subject: [PATCH 11/14] Tutorial - gender neutral --- docs/tutorial/4-services.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorial/4-services.rst b/docs/tutorial/4-services.rst index 967fa51..51b8683 100644 --- a/docs/tutorial/4-services.rst +++ b/docs/tutorial/4-services.rst @@ -3,7 +3,7 @@ Part 4: Services All external dependencies (like filesystems, network clients, database connections, etc.) should be provided to transformations as a service. It allows great flexibility, including the ability to test your transformations isolated -from the external world, and being friendly to the infrastructure guys (and if you're one of them, it's also nice to +from the external world, and being friendly to the infrastructure people (and if you're one of them, it's also nice to treat yourself well). In the last section, we used the `fs` service to access filesystems, we'll go even further by switching our `requests` From dceb898ca0e3005caa592744737d1270abed3a92 Mon Sep 17 00:00:00 2001 From: James Baster Date: Sat, 28 Jul 2018 13:44:51 +0100 Subject: [PATCH 12/14] SQL Alchemy Tutorial - include SQL for a quick start (Also a return statement was missing) --- docs/extension/sqlalchemy.rst | 46 ++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/docs/extension/sqlalchemy.rst b/docs/extension/sqlalchemy.rst index b92ade6..d37ca68 100644 --- a/docs/extension/sqlalchemy.rst +++ b/docs/extension/sqlalchemy.rst @@ -36,6 +36,25 @@ The `sqlalchemy.engine` name is the default name used by the provided transforma example if you need more than one connection) and specify the service name using `engine='myengine'` while building your transformations. +Lets create some tables and add some data. (You may need to edit the SQL if your database server uses a different +version of SQL.) + +.. code-block:: sql + + CREATE TABLE test_in ( + id INTEGER PRIMARY KEY NOT NULL, + text TEXT + ); + + CREATE TABLE test_out ( + id INTEGER PRIMARY KEY NOT NULL, + text TEXT + ); + + INSERT INTO test_in (id, text) VALUES (1, 'Cat'); + INSERT INTO test_in (id, text) VALUES (2, 'Dog'); + + There are two transformation classes provided by this extension. One reader, one writer. @@ -50,12 +69,29 @@ Let's select some data: def get_graph(): graph = bonobo.Graph() graph.add_chain( - bonobo_sqlalchemy.Select('SELECT * FROM example', limit=100), + bonobo_sqlalchemy.Select('SELECT * FROM test_in', limit=100), bonobo.PrettyPrinter(), ) + return graph -And let's insert some data: +You should see: +.. code-block:: shell-session + + $ python tutorial.py + ┌ + │ id[0] = 1 + │ text[1] = 'Cat' + └ + ┌ + │ id[0] = 2 + │ text[1] = 'Dog' + └ + - Select in=1 out=2 [done] + - PrettyPrinter in=2 out=2 [done] + + +Now let's insert some data: .. code-block:: python @@ -66,12 +102,14 @@ And let's insert some data: def get_graph(**options): graph = bonobo.Graph() graph.add_chain( - ..., - bonobo_sqlalchemy.InsertOrUpdate('example') + bonobo_sqlalchemy.Select('SELECT * FROM test_in', limit=100), + bonobo_sqlalchemy.InsertOrUpdate('test_out') ) return graph +If you check the `test_out` table, it should now have the data. + Reference ::::::::: From 79b8d516b3b90ac4402f40ed1741832b463c8498 Mon Sep 17 00:00:00 2001 From: Obiamaka Agbaneje Date: Sat, 28 Jul 2018 12:07:30 +0100 Subject: [PATCH 13/14] Add example code for bonobo.CsvReader --- docs/tutorial/3-files.rst | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/docs/tutorial/3-files.rst b/docs/tutorial/3-files.rst index f23faa7..348293a 100644 --- a/docs/tutorial/3-files.rst +++ b/docs/tutorial/3-files.rst @@ -43,8 +43,8 @@ Now, we need to write a `writer` transformation, and apply this context processo def write_repr_to_file(f, *row): f.write(repr(row) + "\n") -The `f` parameter will contain the value yielded by the context processors, in order of appearance (you can chain -multiple context processors). +The `f` parameter will contain the value yielded by the context processors, in order of appearance. You can chain +multiple context processors. To find about how to implement this, check the |bonobo| guides in the documentation. Please note that the :func:`bonobo.config.use_context_processor` decorator will modify the function in place, but won't modify its behaviour. If you want to call it out of the |bonobo| job context, it's your responsibility to provide @@ -65,6 +65,7 @@ To run this, change the last stage in the pipeline in get_graph to write_repr_to Now run tutorial.py and check the output.txt file. + Using the filesystem :::::::::::::::::::: @@ -79,10 +80,10 @@ Let's rewrite our context processor to use it. with context.get_service('fs').open('output.txt', 'w+') as f: yield f -Interface does not change much, but this small change allows the end-user to change the filesystem implementation at -runtime, which is great to handle different environments (local development, staging servers, production, ...). +The interface does not change much, but this small change allows the end-user to change the filesystem implementation at +runtime, which is great for handling different environments (local development, staging servers, production, ...). -Note that |bonobo| only provide very few services with default implementation (actually, only `fs` and `http`), but +Note that |bonobo| only provides very few services with default implementation (actually, only `fs` and `http`), but you can define all the services you want, depending on your system. You'll learn more about this in the next tutorial chapter. @@ -136,10 +137,17 @@ function: Reading from files :::::::::::::::::: -Reading from files is done using the same logic as writing, except that you'll probably have only one call to a reader. +Reading from files is done using the same logic as writing, except that you'll probably have only one call to a reader. You can read the file we just wrote by using a :obj:`bonobo.CsvReader` instance: -Our example application does not include reading from files, but you can read the file we just wrote by using a -:obj:`bonobo.CsvReader` instance. +.. code-block:: python + + def get_graph(**options): + graph = bonobo.Graph() + graph.add_chain( + bonobo.CsvReader('output.csv'), + ... + ) + return graph Atomic writes From 6042bf680a6e5bdd2167be389df0a5fd66d2651f Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 28 Jul 2018 14:38:28 +0100 Subject: [PATCH 14/14] Update 3-files.rst --- docs/tutorial/3-files.rst | 6 ------ 1 file changed, 6 deletions(-) diff --git a/docs/tutorial/3-files.rst b/docs/tutorial/3-files.rst index 348293a..93cbd96 100644 --- a/docs/tutorial/3-files.rst +++ b/docs/tutorial/3-files.rst @@ -150,12 +150,6 @@ Reading from files is done using the same logic as writing, except that you'll p return graph -Atomic writes -::::::::::::: - -.. include:: _todo.rst - - Moving forward ::::::::::::::