From 71386ea30c54119f63dbb700698408d7fec3c2ee Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Thu, 6 Jul 2017 12:46:19 +0200 Subject: [PATCH 1/6] [doc] sqla: move logger usage to service, fix service name. --- docs/tutorial/tut04.rst | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/docs/tutorial/tut04.rst b/docs/tutorial/tut04.rst index 69e1846..6cd7675 100644 --- a/docs/tutorial/tut04.rst +++ b/docs/tutorial/tut04.rst @@ -39,17 +39,21 @@ Open your `_services.py` file and replace the code: .. code-block:: python - import bonobo - import dotenv - + import bonobo, dotenv, logging, os from bonobo_sqlalchemy.util import create_postgresql_engine dotenv.load_dotenv(dotenv.find_dotenv()) + logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) def get_services(): return { - 'fs': bonobo.open_fs(), - 'db': create_postgresql_engine(name='tutorial') + 'fs': bonobo.open_examples_fs('datasets'), + 'fs.output': bonobo.open_fs(), + 'sqlalchemy.engine': create_postgresql_engine(**{ + 'name': 'tutorial', + 'user': 'tutorial', + 'pass': 'tutorial', + }) } The `create_postgresql_engine` is a tiny function building the DSN from reasonable defaults, that you can override @@ -125,15 +129,15 @@ Now, let's use a little trick and add this section to `pgdb.py`: .. code-block:: python - import logging, sys - - from bonobo.commands.run import get_default_services + import sys from sqlalchemy import Table, Column, String, Integer, MetaData def main(): + from bonobo.commands.run import get_default_services services = get_default_services(__file__) - - if len(sys.argv) == 2 and sys.argv[1] == 'reset': + if len(sys.argv) == 1: + return bonobo.run(graph, services=services) + elif len(sys.argv) == 2 and sys.argv[1] == 'reset': engine = services.get('sqlalchemy.engine') metadata = MetaData() @@ -145,11 +149,10 @@ Now, let's use a little trick and add this section to `pgdb.py`: Column('address', String(255)), ) - logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) metadata.drop_all(engine) metadata.create_all(engine) else: - return bonobo.run(graph, services=services) + raise NotImplementedError('I do not understand.') if __name__ == '__main__': main() From a1074341394f5f208611d1bb0585dfe3cb695633 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Thu, 6 Jul 2017 12:46:19 +0200 Subject: [PATCH 2/6] [doc] sqla: move logger usage to service, fix service name. --- docs/tutorial/tut04.rst | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/docs/tutorial/tut04.rst b/docs/tutorial/tut04.rst index 69e1846..6cd7675 100644 --- a/docs/tutorial/tut04.rst +++ b/docs/tutorial/tut04.rst @@ -39,17 +39,21 @@ Open your `_services.py` file and replace the code: .. code-block:: python - import bonobo - import dotenv - + import bonobo, dotenv, logging, os from bonobo_sqlalchemy.util import create_postgresql_engine dotenv.load_dotenv(dotenv.find_dotenv()) + logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) def get_services(): return { - 'fs': bonobo.open_fs(), - 'db': create_postgresql_engine(name='tutorial') + 'fs': bonobo.open_examples_fs('datasets'), + 'fs.output': bonobo.open_fs(), + 'sqlalchemy.engine': create_postgresql_engine(**{ + 'name': 'tutorial', + 'user': 'tutorial', + 'pass': 'tutorial', + }) } The `create_postgresql_engine` is a tiny function building the DSN from reasonable defaults, that you can override @@ -125,15 +129,15 @@ Now, let's use a little trick and add this section to `pgdb.py`: .. code-block:: python - import logging, sys - - from bonobo.commands.run import get_default_services + import sys from sqlalchemy import Table, Column, String, Integer, MetaData def main(): + from bonobo.commands.run import get_default_services services = get_default_services(__file__) - - if len(sys.argv) == 2 and sys.argv[1] == 'reset': + if len(sys.argv) == 1: + return bonobo.run(graph, services=services) + elif len(sys.argv) == 2 and sys.argv[1] == 'reset': engine = services.get('sqlalchemy.engine') metadata = MetaData() @@ -145,11 +149,10 @@ Now, let's use a little trick and add this section to `pgdb.py`: Column('address', String(255)), ) - logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) metadata.drop_all(engine) metadata.create_all(engine) else: - return bonobo.run(graph, services=services) + raise NotImplementedError('I do not understand.') if __name__ == '__main__': main() From 7f30df93c38ded267f3923799f7a50aaeebc3bca Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Thu, 6 Jul 2017 12:52:19 +0200 Subject: [PATCH 3/6] [doc] sqla tutorial: adds some titles. --- docs/tutorial/tut04.rst | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/docs/tutorial/tut04.rst b/docs/tutorial/tut04.rst index 6cd7675..2a1ef71 100644 --- a/docs/tutorial/tut04.rst +++ b/docs/tutorial/tut04.rst @@ -18,8 +18,8 @@ specialized packages, like SQLAlchemy, or other database access libraries from t First, read https://www.bonobo-project.org/with/sqlalchemy for instructions on how to install. You **do need** the bleeding edge version of `bonobo` and `bonobo-sqlalchemy` to make this work. -Additional requirements -::::::::::::::::::::::: +Requirements +:::::::::::: Once you installed `bonobo_sqlalchemy` (read https://www.bonobo-project.org/with/sqlalchemy to use bleeding edge version), install the following additional packages: @@ -62,6 +62,9 @@ file and add values for one or more of `POSTGRES_NAME`, `POSTGRES_USER`, 'POSTGR `POSTGRES_PORT`. Please note that kwargs always have precedence on environment, but that you should prefer using environment variables for anything that is not immutable from one platform to another. +Add database operation to the graph +::::::::::::::::::::::::::::::::::: + Let's create a `tutorial/pgdb.py` job: .. code-block:: python @@ -110,6 +113,9 @@ If we run this transformation (with `bonobo run tutorial/pgdb.py`), we should ge The database we requested do not exist. It is not the role of bonobo to do database administration, and thus there is no tool here to create neither the database, nor the tables we want to use. +Create database and table +::::::::::::::::::::::::: + There are however tools in `sqlalchemy` to manage tables, so we'll create the database by ourselves, and ask sqlalchemy to create the table: @@ -170,6 +176,9 @@ Now run: Database and table should now exist. +Format the data +::::::::::::::: + Let's prepare our data for database, and change the `.add_chain(..)` call to do it prior to `InsertOrUpdate(...)` .. code-block:: python @@ -193,6 +202,9 @@ Let's prepare our data for database, and change the `.add_chain(..)` call to do _input=split_one_to_map ) +Run! +:::: + You can now run the script (either with `bonobo run tutorial/pgdb.py` or directly with the python interpreter, as we added a "main" section) and the dataset should be inserted in your database. If you run it again, no new rows are created. From 53d6ac5887d70b74a726f10e808941ad1d151c45 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Tue, 11 Jul 2017 16:25:32 +0200 Subject: [PATCH 4/6] [nodes] Adds arg0_to_kwargs and kwargs_to_arg0 transformations. --- bonobo/nodes/basics.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/bonobo/nodes/basics.py b/bonobo/nodes/basics.py index 164eeb1..c0434ed 100644 --- a/bonobo/nodes/basics.py +++ b/bonobo/nodes/basics.py @@ -10,11 +10,13 @@ from bonobo.util.objects import ValueHolder from bonobo.util.term import CLEAR_EOL __all__ = [ - 'identity', 'Limit', - 'Tee', - 'count', 'PrettyPrinter', + 'Tee', + 'arg0_to_kwargs', + 'count', + 'identity', + 'kwargs_to_arg0', 'noop', ] @@ -86,3 +88,11 @@ class PrettyPrinter(Configurable): def noop(*args, **kwargs): # pylint: disable=unused-argument from bonobo.constants import NOT_MODIFIED return NOT_MODIFIED + + +def arg0_to_kwargs(row): + return Bag(**row) + + +def kwargs_to_arg0(**row): + return Bag(row) From f2a9a45fd134715c929dc69d5ec25f77152768a3 Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 15 Jul 2017 10:14:30 +0200 Subject: [PATCH 5/6] [nodes] Adds arg0_to_kwargs and kwargs_to_arg0 transformations. --- bonobo/_api.py | 6 ++++-- bonobo/nodes/basics.py | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/bonobo/_api.py b/bonobo/_api.py index ab890c6..6b2a72d 100644 --- a/bonobo/_api.py +++ b/bonobo/_api.py @@ -1,6 +1,6 @@ from bonobo.structs import Bag, Graph, Token from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \ - PrettyPrinter, PickleWriter, PickleReader, RateLimited, Tee, count, identity, noop + PrettyPrinter, PickleWriter, PickleReader, RateLimited, Tee, count, identity, noop, arg0_to_kwargs, kwargs_to_arg0 from bonobo.strategies import create_strategy from bonobo.util.objects import get_name @@ -101,13 +101,15 @@ register_api_group( JsonReader, JsonWriter, Limit, - PrettyPrinter, PickleReader, PickleWriter, + PrettyPrinter, RateLimited, Tee, + arg0_to_kwargs, count, identity, + kwargs_to_arg0, noop, ) diff --git a/bonobo/nodes/basics.py b/bonobo/nodes/basics.py index c0434ed..ea05c29 100644 --- a/bonobo/nodes/basics.py +++ b/bonobo/nodes/basics.py @@ -91,8 +91,22 @@ def noop(*args, **kwargs): # pylint: disable=unused-argument def arg0_to_kwargs(row): + """ + Transform items in a stream from "arg0" format (each call only has one positional argument, which is a dict-like + object) to "kwargs" format (each call only has keyword arguments that represent a row). + + :param row: + :return: bonobo.Bag + """ return Bag(**row) def kwargs_to_arg0(**row): + """ + Transform items in a stream from "kwargs" format (each call only has keyword arguments that represent a row) to + "arg0" format (each call only has one positional argument, which is a dict-like object) . + + :param **row: + :return: bonobo.Bag + """ return Bag(row) From fbd0ee9862332f070282ef6c8f682a99ca8638dd Mon Sep 17 00:00:00 2001 From: Parthiv20 Date: Sat, 15 Jul 2017 10:34:30 +0200 Subject: [PATCH 6/6] Update tut02.rst --- docs/tutorial/tut02.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorial/tut02.rst b/docs/tutorial/tut02.rst index b1545f9..9ad5ae3 100644 --- a/docs/tutorial/tut02.rst +++ b/docs/tutorial/tut02.rst @@ -59,7 +59,7 @@ available in **Bonobo**'s repository: .. code-block:: shell-session - $ curl https://raw.githubusercontent.com/python-bonobo/bonobo/master/bonobo/examples/datasets/coffeeshops.txt > `python -c 'import bonobo; print(bonobo.get_examples_path("datasets/coffeeshops.txt"))'` + $ curl https://raw.githubusercontent.com/python-bonobo/bonobo/master/bonobo/examples/datasets/coffeeshops.txt > `python3 -c 'import bonobo; print(bonobo.get_examples_path("datasets/coffeeshops.txt"))'` .. note::