diff --git a/bonobo/commands/run.py b/bonobo/commands/run.py index 2204a3b..a37282c 100644 --- a/bonobo/commands/run.py +++ b/bonobo/commands/run.py @@ -106,6 +106,7 @@ def register_generic_run_arguments(parser, required=True): source_group = parser.add_mutually_exclusive_group(required=required) source_group.add_argument('filename', nargs='?', type=str) source_group.add_argument('--module', '-m', type=str) + parser.add_argument('--env', '-e', action='append') return parser @@ -115,5 +116,4 @@ def register(parser): verbosity_group.add_argument('--quiet', '-q', action='store_true') verbosity_group.add_argument('--verbose', '-v', action='store_true') parser.add_argument('--install', '-I', action='store_true') - parser.add_argument('--env', '-e', action='append') return execute diff --git a/bonobo/examples/env_vars/__init__.py b/bonobo/examples/env_vars/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/util/get_passed_env.py b/bonobo/examples/env_vars/get_passed_env.py similarity index 100% rename from tests/util/get_passed_env.py rename to bonobo/examples/env_vars/get_passed_env.py diff --git a/docs/guide/environmental_variables.rst b/docs/guide/environmental_variables.rst new file mode 100644 index 0000000..aa8bbaf --- /dev/null +++ b/docs/guide/environmental_variables.rst @@ -0,0 +1,71 @@ +Environmental Variables +======================= + +Best practice holds that variables should be passed to graphs via environmental variables. +Doing this is important for keeping sensitive data out of the code - such as an +API token or username and password used to access a database. Not only is this +approach more secure, it also makes graphs more flexible by allowing adjustments +for a variety of environments and contexts. Importantly, environmental variables +are also the means by-which arguments can be passed to graphs. + + +Passing / Setting Environmental Variables +:::::::::::::::::::::::::::::::::::::::::::: + +The recommended way to set environmental variables for a given graph is simply to use +the optional ``--env`` argument when running bonobo from the shell (bash, command prompt, etc). +``--env`` (or ``-e`` for short) should then be followed by the variable name and value using the +syntax `VAR_NAME=VAR_VALUE`. Multiple environmental variables can be passed by using +multiple ``--env`` / ``-e`` flags. + +Example: + +.. code-block:: bash + + # Using one environmental variable: + bonobo run csvsanitizer --env SECRET_TOKEN=secret123 + + # Using multiple environmental variables: + bonobo run csvsanitizer -e SRC_FILE=inventory.txt -e DST_FILE=inventory_processed.csv + +If you're naming something which is configurable, that is will need to be instantiated or called to obtain something that +can be used as a graph node, then use camelcase names: + + +Accessing Environmental Variables from within the Graph Context +::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +Environmental variables, whether global or only for the scope of the graph, +can be can be accessed using any of the normal means. It is important to note +that whether set globally for the system or just for the graph context, +environmental variables are accessed by bonobo in the same way. In the example +below the database user and password are accessed via the ``os`` module's ``getenv`` +function and used to get data from the database. + +.. code-block:: python + + import os + + from bonobo import Graph, run + + + def extract(): + database_user = os.getenv('DB_USER') + database_password = os.getenv('DB_PASS') + # ... + # (connect to database using database_user and database_password) + # (get data from database) + # ... + + return database_data + + + def load(database_data: dict): + for k, v in database_data.items(): + print('{key} = {value}'.format(key=k, value=v)) + + + graph = Graph(extract, load) + + if __name__ == '__main__': + run(graph) diff --git a/docs/guide/ext/jupyter.rst b/docs/guide/ext/jupyter.rst index 6e96bf6..0d00c58 100644 --- a/docs/guide/ext/jupyter.rst +++ b/docs/guide/ext/jupyter.rst @@ -1,8 +1,8 @@ Bonobo with Jupyter =================== - -There is a builtin plugin that integrates (kind of minimalistically, for now) bonobo within jupyter notebooks, so -you can read the execution status of a graph within a nice (ok not so nice) html/javascript widget. +minimalistically +There is a builtin plugin that integrates (somewhat minimallistically, for now) bonobo within jupyter notebooks, so +you can read the execution status of a graph within a nice (ok, not so nice) html/javascript widget. See https://github.com/jupyter-widgets/widget-cookiecutter for the base template used. diff --git a/docs/guide/index.rst b/docs/guide/index.rst index 18e5565..a05dbf0 100644 --- a/docs/guide/index.rst +++ b/docs/guide/index.rst @@ -12,6 +12,7 @@ There are a few things that you should know while writing transformations graphs purity transformations services + envrionmental_variables Third party integrations :::::::::::::::::::::::: diff --git a/docs/guide/purity.rst b/docs/guide/purity.rst index bd20d4e..4b21735 100644 --- a/docs/guide/purity.rst +++ b/docs/guide/purity.rst @@ -128,7 +128,7 @@ Now let's see how to do it correctly: 'index': i } -I hear you think «Yeah, but if I create like millions of dicts ...». +I bet you think «Yeah, but if I create like millions of dicts ...». Let's say we chose the opposite way and copied the dict outside the transformation (in fact, `it's what we did in bonobo's ancestor `_). This means you will also create the diff --git a/docs/guide/services.rst b/docs/guide/services.rst index cf7ecc7..4e1a22c 100644 --- a/docs/guide/services.rst +++ b/docs/guide/services.rst @@ -12,8 +12,8 @@ If you're going a little further than that, you'll feel limited, for a few reaso * Hardcoded and tightly linked dependencies make your transformations hard to test, and hard to reuse. * Processing data on your laptop is great, but being able to do it on different target systems (or stages), in different - environments, is more realistic. You'll want to contigure a different database on a staging environment, - preprod environment or production system. Maybe you have silimar systems for different clients and want to select + environments, is more realistic. You'll want to configure a different database on a staging environment, + pre-production environment, or production system. Maybe you have similar systems for different clients and want to select the system at runtime. Etc. Service injection @@ -44,7 +44,7 @@ Let's define such a transformation: 'category': database.get_category_name_for_sku(row['sku']) } -This piece of code tells bonobo that your transformation expect a sercive called "primary_sql_database", that will be +This piece of code tells bonobo that your transformation expect a service called "primary_sql_database", that will be injected to your calls under the parameter name "database". Function-based transformations diff --git a/docs/guide/transformations.rst b/docs/guide/transformations.rst index 8222357..e0fc347 100644 --- a/docs/guide/transformations.rst +++ b/docs/guide/transformations.rst @@ -22,7 +22,7 @@ underscores and lowercase names: def uppercase(s: str) -> str: return s.upper() -If you're naming something which is configurable, that will need to be instanciated or called to obtain something that +If you're naming something which is configurable, that will need to be instantiated or called to obtain something that can be used as a graph node, then use camelcase names: .. code-block:: python diff --git a/tests/test_commands.py b/tests/test_commands.py index 730bc0b..3f26e44 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -103,7 +103,7 @@ def test_version(runner, capsys): def test_run_with_env(runner, capsys): runner( 'run', '--quiet', - str(pathlib.Path(os.path.dirname(__file__), 'util', 'get_passed_env.py')), '--env', 'ENV_TEST_NUMBER=123', + get_examples_path('env_vars/get_passed_env.py'), '--env', 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'" ) out, err = capsys.readouterr() @@ -116,7 +116,7 @@ def test_run_with_env(runner, capsys): @all_runners def test_run_module_with_env(runner, capsys): runner( - 'run', '--quiet', '-m', 'tests.util.get_passed_env', '--env', 'ENV_TEST_NUMBER=123', '--env', + 'run', '--quiet', '-m', 'bonobo.examples.env_vars.get_passed_env', '--env', 'ENV_TEST_NUMBER=123', '--env', 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'" ) out, err = capsys.readouterr()