Merge remote-tracking branch 'cw-andrews/feature/pass_env_vars' into develop
This commit is contained in:
@ -106,6 +106,7 @@ def register_generic_run_arguments(parser, required=True):
|
|||||||
source_group = parser.add_mutually_exclusive_group(required=required)
|
source_group = parser.add_mutually_exclusive_group(required=required)
|
||||||
source_group.add_argument('filename', nargs='?', type=str)
|
source_group.add_argument('filename', nargs='?', type=str)
|
||||||
source_group.add_argument('--module', '-m', type=str)
|
source_group.add_argument('--module', '-m', type=str)
|
||||||
|
parser.add_argument('--env', '-e', action='append')
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
@ -115,5 +116,4 @@ def register(parser):
|
|||||||
verbosity_group.add_argument('--quiet', '-q', action='store_true')
|
verbosity_group.add_argument('--quiet', '-q', action='store_true')
|
||||||
verbosity_group.add_argument('--verbose', '-v', action='store_true')
|
verbosity_group.add_argument('--verbose', '-v', action='store_true')
|
||||||
parser.add_argument('--install', '-I', action='store_true')
|
parser.add_argument('--install', '-I', action='store_true')
|
||||||
parser.add_argument('--env', '-e', action='append')
|
|
||||||
return execute
|
return execute
|
||||||
|
|||||||
0
bonobo/examples/env_vars/__init__.py
Normal file
0
bonobo/examples/env_vars/__init__.py
Normal file
71
docs/guide/environmental_variables.rst
Normal file
71
docs/guide/environmental_variables.rst
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
Environmental Variables
|
||||||
|
=======================
|
||||||
|
|
||||||
|
Best practice holds that variables should be passed to graphs via environmental variables.
|
||||||
|
Doing this is important for keeping sensitive data out of the code - such as an
|
||||||
|
API token or username and password used to access a database. Not only is this
|
||||||
|
approach more secure, it also makes graphs more flexible by allowing adjustments
|
||||||
|
for a variety of environments and contexts. Importantly, environmental variables
|
||||||
|
are also the means by-which arguments can be passed to graphs.
|
||||||
|
|
||||||
|
|
||||||
|
Passing / Setting Environmental Variables
|
||||||
|
::::::::::::::::::::::::::::::::::::::::::::
|
||||||
|
|
||||||
|
The recommended way to set environmental variables for a given graph is simply to use
|
||||||
|
the optional ``--env`` argument when running bonobo from the shell (bash, command prompt, etc).
|
||||||
|
``--env`` (or ``-e`` for short) should then be followed by the variable name and value using the
|
||||||
|
syntax `VAR_NAME=VAR_VALUE`. Multiple environmental variables can be passed by using
|
||||||
|
multiple ``--env`` / ``-e`` flags.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
# Using one environmental variable:
|
||||||
|
bonobo run csvsanitizer --env SECRET_TOKEN=secret123
|
||||||
|
|
||||||
|
# Using multiple environmental variables:
|
||||||
|
bonobo run csvsanitizer -e SRC_FILE=inventory.txt -e DST_FILE=inventory_processed.csv
|
||||||
|
|
||||||
|
If you're naming something which is configurable, that is will need to be instantiated or called to obtain something that
|
||||||
|
can be used as a graph node, then use camelcase names:
|
||||||
|
|
||||||
|
|
||||||
|
Accessing Environmental Variables from within the Graph Context
|
||||||
|
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
||||||
|
|
||||||
|
Environmental variables, whether global or only for the scope of the graph,
|
||||||
|
can be can be accessed using any of the normal means. It is important to note
|
||||||
|
that whether set globally for the system or just for the graph context,
|
||||||
|
environmental variables are accessed by bonobo in the same way. In the example
|
||||||
|
below the database user and password are accessed via the ``os`` module's ``getenv``
|
||||||
|
function and used to get data from the database.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from bonobo import Graph, run
|
||||||
|
|
||||||
|
|
||||||
|
def extract():
|
||||||
|
database_user = os.getenv('DB_USER')
|
||||||
|
database_password = os.getenv('DB_PASS')
|
||||||
|
# ...
|
||||||
|
# (connect to database using database_user and database_password)
|
||||||
|
# (get data from database)
|
||||||
|
# ...
|
||||||
|
|
||||||
|
return database_data
|
||||||
|
|
||||||
|
|
||||||
|
def load(database_data: dict):
|
||||||
|
for k, v in database_data.items():
|
||||||
|
print('{key} = {value}'.format(key=k, value=v))
|
||||||
|
|
||||||
|
|
||||||
|
graph = Graph(extract, load)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
run(graph)
|
||||||
@ -1,8 +1,8 @@
|
|||||||
Bonobo with Jupyter
|
Bonobo with Jupyter
|
||||||
===================
|
===================
|
||||||
|
minimalistically
|
||||||
There is a builtin plugin that integrates (kind of minimalistically, for now) bonobo within jupyter notebooks, so
|
There is a builtin plugin that integrates (somewhat minimallistically, for now) bonobo within jupyter notebooks, so
|
||||||
you can read the execution status of a graph within a nice (ok not so nice) html/javascript widget.
|
you can read the execution status of a graph within a nice (ok, not so nice) html/javascript widget.
|
||||||
|
|
||||||
See https://github.com/jupyter-widgets/widget-cookiecutter for the base template used.
|
See https://github.com/jupyter-widgets/widget-cookiecutter for the base template used.
|
||||||
|
|
||||||
|
|||||||
@ -12,6 +12,7 @@ There are a few things that you should know while writing transformations graphs
|
|||||||
purity
|
purity
|
||||||
transformations
|
transformations
|
||||||
services
|
services
|
||||||
|
envrionmental_variables
|
||||||
|
|
||||||
Third party integrations
|
Third party integrations
|
||||||
::::::::::::::::::::::::
|
::::::::::::::::::::::::
|
||||||
|
|||||||
@ -128,7 +128,7 @@ Now let's see how to do it correctly:
|
|||||||
'index': i
|
'index': i
|
||||||
}
|
}
|
||||||
|
|
||||||
I hear you think «Yeah, but if I create like millions of dicts ...».
|
I bet you think «Yeah, but if I create like millions of dicts ...».
|
||||||
|
|
||||||
Let's say we chose the opposite way and copied the dict outside the transformation (in fact, `it's what we did in bonobo's
|
Let's say we chose the opposite way and copied the dict outside the transformation (in fact, `it's what we did in bonobo's
|
||||||
ancestor <https://github.com/rdcli/rdc.etl/blob/dev/rdc/etl/io/__init__.py#L187>`_). This means you will also create the
|
ancestor <https://github.com/rdcli/rdc.etl/blob/dev/rdc/etl/io/__init__.py#L187>`_). This means you will also create the
|
||||||
|
|||||||
@ -12,8 +12,8 @@ If you're going a little further than that, you'll feel limited, for a few reaso
|
|||||||
|
|
||||||
* Hardcoded and tightly linked dependencies make your transformations hard to test, and hard to reuse.
|
* Hardcoded and tightly linked dependencies make your transformations hard to test, and hard to reuse.
|
||||||
* Processing data on your laptop is great, but being able to do it on different target systems (or stages), in different
|
* Processing data on your laptop is great, but being able to do it on different target systems (or stages), in different
|
||||||
environments, is more realistic. You'll want to contigure a different database on a staging environment,
|
environments, is more realistic. You'll want to configure a different database on a staging environment,
|
||||||
preprod environment or production system. Maybe you have silimar systems for different clients and want to select
|
pre-production environment, or production system. Maybe you have similar systems for different clients and want to select
|
||||||
the system at runtime. Etc.
|
the system at runtime. Etc.
|
||||||
|
|
||||||
Service injection
|
Service injection
|
||||||
@ -44,7 +44,7 @@ Let's define such a transformation:
|
|||||||
'category': database.get_category_name_for_sku(row['sku'])
|
'category': database.get_category_name_for_sku(row['sku'])
|
||||||
}
|
}
|
||||||
|
|
||||||
This piece of code tells bonobo that your transformation expect a sercive called "primary_sql_database", that will be
|
This piece of code tells bonobo that your transformation expect a service called "primary_sql_database", that will be
|
||||||
injected to your calls under the parameter name "database".
|
injected to your calls under the parameter name "database".
|
||||||
|
|
||||||
Function-based transformations
|
Function-based transformations
|
||||||
|
|||||||
@ -22,7 +22,7 @@ underscores and lowercase names:
|
|||||||
def uppercase(s: str) -> str:
|
def uppercase(s: str) -> str:
|
||||||
return s.upper()
|
return s.upper()
|
||||||
|
|
||||||
If you're naming something which is configurable, that will need to be instanciated or called to obtain something that
|
If you're naming something which is configurable, that will need to be instantiated or called to obtain something that
|
||||||
can be used as a graph node, then use camelcase names:
|
can be used as a graph node, then use camelcase names:
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|||||||
@ -103,7 +103,7 @@ def test_version(runner, capsys):
|
|||||||
def test_run_with_env(runner, capsys):
|
def test_run_with_env(runner, capsys):
|
||||||
runner(
|
runner(
|
||||||
'run', '--quiet',
|
'run', '--quiet',
|
||||||
str(pathlib.Path(os.path.dirname(__file__), 'util', 'get_passed_env.py')), '--env', 'ENV_TEST_NUMBER=123',
|
get_examples_path('env_vars/get_passed_env.py'), '--env', 'ENV_TEST_NUMBER=123',
|
||||||
'--env', 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'"
|
'--env', 'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'"
|
||||||
)
|
)
|
||||||
out, err = capsys.readouterr()
|
out, err = capsys.readouterr()
|
||||||
@ -116,7 +116,7 @@ def test_run_with_env(runner, capsys):
|
|||||||
@all_runners
|
@all_runners
|
||||||
def test_run_module_with_env(runner, capsys):
|
def test_run_module_with_env(runner, capsys):
|
||||||
runner(
|
runner(
|
||||||
'run', '--quiet', '-m', 'tests.util.get_passed_env', '--env', 'ENV_TEST_NUMBER=123', '--env',
|
'run', '--quiet', '-m', 'bonobo.examples.env_vars.get_passed_env', '--env', 'ENV_TEST_NUMBER=123', '--env',
|
||||||
'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'"
|
'ENV_TEST_USER=cwandrews', '--env', "ENV_TEST_STRING='my_test_string'"
|
||||||
)
|
)
|
||||||
out, err = capsys.readouterr()
|
out, err = capsys.readouterr()
|
||||||
|
|||||||
Reference in New Issue
Block a user