From 6abdf09ef7615ad9de9e14e68afc77998fb8ea3b Mon Sep 17 00:00:00 2001 From: cwandrews Date: Sun, 1 Oct 2017 19:56:49 -0400 Subject: [PATCH 1/9] Moved tests/util/get_passed_env.py into /bonobo/examples/env_vars/get_passed_env.py. --- bonobo/examples/env_vars/__init__.py | 0 bonobo/examples/env_vars/get_passed_env.py | 22 ++++++++++++++++++++++ tests/test_commands.py | 4 ++-- 3 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 bonobo/examples/env_vars/__init__.py create mode 100644 bonobo/examples/env_vars/get_passed_env.py diff --git a/bonobo/examples/env_vars/__init__.py b/bonobo/examples/env_vars/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bonobo/examples/env_vars/get_passed_env.py b/bonobo/examples/env_vars/get_passed_env.py new file mode 100644 index 0000000..d9c4ba6 --- /dev/null +++ b/bonobo/examples/env_vars/get_passed_env.py @@ -0,0 +1,22 @@ +import os + +from bonobo import Graph + + +def extract(): + env_test_user = os.getenv('ENV_TEST_USER') + env_test_number = os.getenv('ENV_TEST_NUMBER') + env_test_string = os.getenv('ENV_TEST_STRING') + return env_test_user, env_test_number, env_test_string + + +def load(s: str): + print(s) + + +graph = Graph(extract, load) + +if __name__ == '__main__': + from bonobo import run + + run(graph) diff --git a/tests/test_commands.py b/tests/test_commands.py index 730bc0b..3f26e44 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -103,7 +103,7 @@ def test_version(runner, capsys): def test_run_with_env(runner, capsys): runner( 'run', '--quiet', - str(pathlib.Path(os.path.dirname(__file__), 'util', 'get_passed_env.py')), '--env', 'ENV_TEST_NUMBER=123', + get_examples_path('env_vars/get_passed_env.py'), '--env', 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'" ) out, err = capsys.readouterr() @@ -116,7 +116,7 @@ def test_run_with_env(runner, capsys): @all_runners def test_run_module_with_env(runner, capsys): runner( - 'run', '--quiet', '-m', 'tests.util.get_passed_env', '--env', 'ENV_TEST_NUMBER=123', '--env', + 'run', '--quiet', '-m', 'bonobo.examples.env_vars.get_passed_env', '--env', 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'" ) out, err = capsys.readouterr() From e38ce3cd1144687b30df54d4f3ac75a68d23745d Mon Sep 17 00:00:00 2001 From: cwandrews Date: Sun, 1 Oct 2017 20:03:11 -0400 Subject: [PATCH 2/9] Switched --env / -e optional argument from register to register_generic_run_arguements per suggestion. Also removed tests/util/get_passed_env.py --- bonobo/commands/run.py | 2 +- tests/util/get_passed_env.py | 22 ---------------------- 2 files changed, 1 insertion(+), 23 deletions(-) delete mode 100644 tests/util/get_passed_env.py diff --git a/bonobo/commands/run.py b/bonobo/commands/run.py index 2204a3b..a37282c 100644 --- a/bonobo/commands/run.py +++ b/bonobo/commands/run.py @@ -106,6 +106,7 @@ def register_generic_run_arguments(parser, required=True): source_group = parser.add_mutually_exclusive_group(required=required) source_group.add_argument('filename', nargs='?', type=str) source_group.add_argument('--module', '-m', type=str) + parser.add_argument('--env', '-e', action='append') return parser @@ -115,5 +116,4 @@ def register(parser): verbosity_group.add_argument('--quiet', '-q', action='store_true') verbosity_group.add_argument('--verbose', '-v', action='store_true') parser.add_argument('--install', '-I', action='store_true') - parser.add_argument('--env', '-e', action='append') return execute diff --git a/tests/util/get_passed_env.py b/tests/util/get_passed_env.py deleted file mode 100644 index d9c4ba6..0000000 --- a/tests/util/get_passed_env.py +++ /dev/null @@ -1,22 +0,0 @@ -import os - -from bonobo import Graph - - -def extract(): - env_test_user = os.getenv('ENV_TEST_USER') - env_test_number = os.getenv('ENV_TEST_NUMBER') - env_test_string = os.getenv('ENV_TEST_STRING') - return env_test_user, env_test_number, env_test_string - - -def load(s: str): - print(s) - - -graph = Graph(extract, load) - -if __name__ == '__main__': - from bonobo import run - - run(graph) From c1a5750b60b34539140bfc67996643f390dcfa50 Mon Sep 17 00:00:00 2001 From: cwandrews Date: Sun, 1 Oct 2017 20:16:24 -0400 Subject: [PATCH 3/9] Made a few spelling and grammar corrections/adjustments to the docs under guide. --- docs/guide/ext/jupyter.rst | 6 +++--- docs/guide/purity.rst | 2 +- docs/guide/services.rst | 6 +++--- docs/guide/transformations.rst | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/guide/ext/jupyter.rst b/docs/guide/ext/jupyter.rst index 6e96bf6..0d00c58 100644 --- a/docs/guide/ext/jupyter.rst +++ b/docs/guide/ext/jupyter.rst @@ -1,8 +1,8 @@ Bonobo with Jupyter =================== - -There is a builtin plugin that integrates (kind of minimalistically, for now) bonobo within jupyter notebooks, so -you can read the execution status of a graph within a nice (ok not so nice) html/javascript widget. +minimalistically +There is a builtin plugin that integrates (somewhat minimallistically, for now) bonobo within jupyter notebooks, so +you can read the execution status of a graph within a nice (ok, not so nice) html/javascript widget. See https://github.com/jupyter-widgets/widget-cookiecutter for the base template used. diff --git a/docs/guide/purity.rst b/docs/guide/purity.rst index bd20d4e..4b21735 100644 --- a/docs/guide/purity.rst +++ b/docs/guide/purity.rst @@ -128,7 +128,7 @@ Now let's see how to do it correctly: 'index': i } -I hear you think «Yeah, but if I create like millions of dicts ...». +I bet you think «Yeah, but if I create like millions of dicts ...». Let's say we chose the opposite way and copied the dict outside the transformation (in fact, `it's what we did in bonobo's ancestor `_). This means you will also create the diff --git a/docs/guide/services.rst b/docs/guide/services.rst index cf7ecc7..4e1a22c 100644 --- a/docs/guide/services.rst +++ b/docs/guide/services.rst @@ -12,8 +12,8 @@ If you're going a little further than that, you'll feel limited, for a few reaso * Hardcoded and tightly linked dependencies make your transformations hard to test, and hard to reuse. * Processing data on your laptop is great, but being able to do it on different target systems (or stages), in different - environments, is more realistic. You'll want to contigure a different database on a staging environment, - preprod environment or production system. Maybe you have silimar systems for different clients and want to select + environments, is more realistic. You'll want to configure a different database on a staging environment, + pre-production environment, or production system. Maybe you have similar systems for different clients and want to select the system at runtime. Etc. Service injection @@ -44,7 +44,7 @@ Let's define such a transformation: 'category': database.get_category_name_for_sku(row['sku']) } -This piece of code tells bonobo that your transformation expect a sercive called "primary_sql_database", that will be +This piece of code tells bonobo that your transformation expect a service called "primary_sql_database", that will be injected to your calls under the parameter name "database". Function-based transformations diff --git a/docs/guide/transformations.rst b/docs/guide/transformations.rst index 8222357..e0fc347 100644 --- a/docs/guide/transformations.rst +++ b/docs/guide/transformations.rst @@ -22,7 +22,7 @@ underscores and lowercase names: def uppercase(s: str) -> str: return s.upper() -If you're naming something which is configurable, that will need to be instanciated or called to obtain something that +If you're naming something which is configurable, that will need to be instantiated or called to obtain something that can be used as a graph node, then use camelcase names: .. code-block:: python From 074d173ea72d450544210f83061b9ac50fbdc746 Mon Sep 17 00:00:00 2001 From: cwandrews Date: Sun, 1 Oct 2017 21:26:10 -0400 Subject: [PATCH 4/9] Added envrionmental_variables page to the guides section. --- docs/guide/environmental_variables.rst | 71 ++++++++++++++++++++++++++ docs/guide/index.rst | 1 + 2 files changed, 72 insertions(+) create mode 100644 docs/guide/environmental_variables.rst diff --git a/docs/guide/environmental_variables.rst b/docs/guide/environmental_variables.rst new file mode 100644 index 0000000..aa8bbaf --- /dev/null +++ b/docs/guide/environmental_variables.rst @@ -0,0 +1,71 @@ +Environmental Variables +======================= + +Best practice holds that variables should be passed to graphs via environmental variables. +Doing this is important for keeping sensitive data out of the code - such as an +API token or username and password used to access a database. Not only is this +approach more secure, it also makes graphs more flexible by allowing adjustments +for a variety of environments and contexts. Importantly, environmental variables +are also the means by-which arguments can be passed to graphs. + + +Passing / Setting Environmental Variables +:::::::::::::::::::::::::::::::::::::::::::: + +The recommended way to set environmental variables for a given graph is simply to use +the optional ``--env`` argument when running bonobo from the shell (bash, command prompt, etc). +``--env`` (or ``-e`` for short) should then be followed by the variable name and value using the +syntax `VAR_NAME=VAR_VALUE`. Multiple environmental variables can be passed by using +multiple ``--env`` / ``-e`` flags. + +Example: + +.. code-block:: bash + + # Using one environmental variable: + bonobo run csvsanitizer --env SECRET_TOKEN=secret123 + + # Using multiple environmental variables: + bonobo run csvsanitizer -e SRC_FILE=inventory.txt -e DST_FILE=inventory_processed.csv + +If you're naming something which is configurable, that is will need to be instantiated or called to obtain something that +can be used as a graph node, then use camelcase names: + + +Accessing Environmental Variables from within the Graph Context +::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +Environmental variables, whether global or only for the scope of the graph, +can be can be accessed using any of the normal means. It is important to note +that whether set globally for the system or just for the graph context, +environmental variables are accessed by bonobo in the same way. In the example +below the database user and password are accessed via the ``os`` module's ``getenv`` +function and used to get data from the database. + +.. code-block:: python + + import os + + from bonobo import Graph, run + + + def extract(): + database_user = os.getenv('DB_USER') + database_password = os.getenv('DB_PASS') + # ... + # (connect to database using database_user and database_password) + # (get data from database) + # ... + + return database_data + + + def load(database_data: dict): + for k, v in database_data.items(): + print('{key} = {value}'.format(key=k, value=v)) + + + graph = Graph(extract, load) + + if __name__ == '__main__': + run(graph) diff --git a/docs/guide/index.rst b/docs/guide/index.rst index 18e5565..a05dbf0 100644 --- a/docs/guide/index.rst +++ b/docs/guide/index.rst @@ -12,6 +12,7 @@ There are a few things that you should know while writing transformations graphs purity transformations services + envrionmental_variables Third party integrations :::::::::::::::::::::::: From b83ba99beb2426a7a6e4a9160c34042b854517e1 Mon Sep 17 00:00:00 2001 From: CW Andrews Date: Mon, 2 Oct 2017 15:24:41 -0400 Subject: [PATCH 5/9] Update index.rst --- docs/guide/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guide/index.rst b/docs/guide/index.rst index a05dbf0..27b0a3e 100644 --- a/docs/guide/index.rst +++ b/docs/guide/index.rst @@ -12,7 +12,7 @@ There are a few things that you should know while writing transformations graphs purity transformations services - envrionmental_variables + envrionment_variables Third party integrations :::::::::::::::::::::::: From 5f83aef47d7af31b2b71a715d7edf28d57902467 Mon Sep 17 00:00:00 2001 From: CW Andrews Date: Mon, 2 Oct 2017 15:25:13 -0400 Subject: [PATCH 6/9] Update jupyter.rst --- docs/guide/ext/jupyter.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guide/ext/jupyter.rst b/docs/guide/ext/jupyter.rst index 0d00c58..8250853 100644 --- a/docs/guide/ext/jupyter.rst +++ b/docs/guide/ext/jupyter.rst @@ -1,6 +1,6 @@ Bonobo with Jupyter =================== -minimalistically + There is a builtin plugin that integrates (somewhat minimallistically, for now) bonobo within jupyter notebooks, so you can read the execution status of a graph within a nice (ok, not so nice) html/javascript widget. From 56d8f3291066348309121787f72cd0a67193c63a Mon Sep 17 00:00:00 2001 From: CW Andrews Date: Mon, 2 Oct 2017 15:34:00 -0400 Subject: [PATCH 7/9] Update environmental_variables.rst --- docs/guide/environmental_variables.rst | 30 ++++++++++++++------------ 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/docs/guide/environmental_variables.rst b/docs/guide/environmental_variables.rst index aa8bbaf..b81197a 100644 --- a/docs/guide/environmental_variables.rst +++ b/docs/guide/environmental_variables.rst @@ -1,44 +1,46 @@ -Environmental Variables +Environment Variables ======================= -Best practice holds that variables should be passed to graphs via environmental variables. +Best practice holds that variables should be passed to graphs via environment variables. Doing this is important for keeping sensitive data out of the code - such as an API token or username and password used to access a database. Not only is this approach more secure, it also makes graphs more flexible by allowing adjustments -for a variety of environments and contexts. Importantly, environmental variables +for a variety of environments and contexts. Importantly, environment variables are also the means by-which arguments can be passed to graphs. -Passing / Setting Environmental Variables +Passing / Setting Environment Variables :::::::::::::::::::::::::::::::::::::::::::: -The recommended way to set environmental variables for a given graph is simply to use +The recommended way to set environment variables for a given graph is simply to use the optional ``--env`` argument when running bonobo from the shell (bash, command prompt, etc). ``--env`` (or ``-e`` for short) should then be followed by the variable name and value using the -syntax `VAR_NAME=VAR_VALUE`. Multiple environmental variables can be passed by using +syntax `VAR_NAME=VAR_VALUE`. Multiple environment variables can be passed by using multiple ``--env`` / ``-e`` flags. Example: .. code-block:: bash - # Using one environmental variable: + # Using one environment variable via --env flag: bonobo run csvsanitizer --env SECRET_TOKEN=secret123 - # Using multiple environmental variables: + # Using multiple environment variables via -e (env) flag: bonobo run csvsanitizer -e SRC_FILE=inventory.txt -e DST_FILE=inventory_processed.csv + + # Using one environment variable in bash (*bash only): + SECRET_TOKEN=secret123 bonobo run csvsanitizer -If you're naming something which is configurable, that is will need to be instantiated or called to obtain something that -can be used as a graph node, then use camelcase names: + # Using multiple environment variables in bash (*bash only): + SRC_FILE=inventory.txt DST_FILE=inventory_processed.csv bonobo run csvsanitizer - -Accessing Environmental Variables from within the Graph Context +Accessing Environment Variables from within the Graph Context ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -Environmental variables, whether global or only for the scope of the graph, +Environment variables, whether global or only for the scope of the graph, can be can be accessed using any of the normal means. It is important to note that whether set globally for the system or just for the graph context, -environmental variables are accessed by bonobo in the same way. In the example +environment variables are accessed by bonobo in the same way. In the example below the database user and password are accessed via the ``os`` module's ``getenv`` function and used to get data from the database. From 504e8920f250689a65834bf728bb478bceaa2ff4 Mon Sep 17 00:00:00 2001 From: CW Andrews Date: Mon, 2 Oct 2017 16:06:04 -0400 Subject: [PATCH 8/9] Update and rename environmental_variables.rst to environment_variables.rst Made review requests per @hardym. --- ...ental_variables.rst => environment_variables.rst} | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) rename docs/guide/{environmental_variables.rst => environment_variables.rst} (74%) diff --git a/docs/guide/environmental_variables.rst b/docs/guide/environment_variables.rst similarity index 74% rename from docs/guide/environmental_variables.rst rename to docs/guide/environment_variables.rst index b81197a..44feb0d 100644 --- a/docs/guide/environmental_variables.rst +++ b/docs/guide/environment_variables.rst @@ -12,13 +12,13 @@ are also the means by-which arguments can be passed to graphs. Passing / Setting Environment Variables :::::::::::::::::::::::::::::::::::::::::::: -The recommended way to set environment variables for a given graph is simply to use -the optional ``--env`` argument when running bonobo from the shell (bash, command prompt, etc). +Setting environment variables for your graphs to use can be done in a variety of ways and which one used can vary +based-upon context. Perhaps the most immediate and simple way to set/override a variable for a given graph is +simply to use the optional ``--env`` argument when running bonobo from the shell (bash, command prompt, etc). ``--env`` (or ``-e`` for short) should then be followed by the variable name and value using the -syntax `VAR_NAME=VAR_VALUE`. Multiple environment variables can be passed by using -multiple ``--env`` / ``-e`` flags. +syntax `VAR_NAME=VAR_VALUE`. Multiple environment variables can be passed by using multiple ``--env`` / ``-e`` flags. Additionally, in bash you can also set environment variables by listing those you wish to set before the `bonobo run` command with space separating the key-value pairs (i.e. `FIZZ=buzz bonobo run ...` or `FIZZ=buzz FOO=bar bonobo run ...`). -Example: +The Examples below demonstrate setting one or multiple variables using both of these methods: .. code-block:: bash @@ -33,6 +33,8 @@ Example: # Using multiple environment variables in bash (*bash only): SRC_FILE=inventory.txt DST_FILE=inventory_processed.csv bonobo run csvsanitizer + +*Though not-yet implemented, the bonobo roadmap includes implementing environment / .env files as well.* Accessing Environment Variables from within the Graph Context ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: From 142a6d60dfaf5f95198e0beb1f1fdd35812ad523 Mon Sep 17 00:00:00 2001 From: CW Andrews Date: Mon, 2 Oct 2017 16:11:41 -0400 Subject: [PATCH 9/9] Updated inline examples and fixed code markup. --- docs/guide/environment_variables.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/guide/environment_variables.rst b/docs/guide/environment_variables.rst index 44feb0d..003f0a1 100644 --- a/docs/guide/environment_variables.rst +++ b/docs/guide/environment_variables.rst @@ -16,7 +16,7 @@ Setting environment variables for your graphs to use can be done in a variety of based-upon context. Perhaps the most immediate and simple way to set/override a variable for a given graph is simply to use the optional ``--env`` argument when running bonobo from the shell (bash, command prompt, etc). ``--env`` (or ``-e`` for short) should then be followed by the variable name and value using the -syntax `VAR_NAME=VAR_VALUE`. Multiple environment variables can be passed by using multiple ``--env`` / ``-e`` flags. Additionally, in bash you can also set environment variables by listing those you wish to set before the `bonobo run` command with space separating the key-value pairs (i.e. `FIZZ=buzz bonobo run ...` or `FIZZ=buzz FOO=bar bonobo run ...`). +syntax ``VAR_NAME=VAR_VALUE``. Multiple environment variables can be passed by using multiple ``--env`` / ``-e`` flags (i.e. ``bonobo run --env FIZZ=buzz ...`` and ``bonobo run --env FIZZ=buzz --env Foo=bar ...``). Additionally, in bash you can also set environment variables by listing those you wish to set before the `bonobo run` command with space separating the key-value pairs (i.e. ``FIZZ=buzz bonobo run ...`` or ``FIZZ=buzz FOO=bar bonobo run ...``). The Examples below demonstrate setting one or multiple variables using both of these methods: @@ -39,7 +39,7 @@ The Examples below demonstrate setting one or multiple variables using both of t Accessing Environment Variables from within the Graph Context ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -Environment variables, whether global or only for the scope of the graph, +Environment variables, whether set globally or only for the scope of the graph, can be can be accessed using any of the normal means. It is important to note that whether set globally for the system or just for the graph context, environment variables are accessed by bonobo in the same way. In the example