@ -2,7 +2,7 @@ import logging
|
|||||||
|
|
||||||
from bonobo.structs import Bag, Graph, Token
|
from bonobo.structs import Bag, Graph, Token
|
||||||
from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \
|
from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \
|
||||||
PrettyPrinter, PickleWriter, PickleReader, RateLimited, Tee, count, identity, noop
|
PrettyPrinter, PickleWriter, PickleReader, RateLimited, Tee, count, identity, noop, arg0_to_kwargs, kwargs_to_arg0
|
||||||
from bonobo.strategies import create_strategy
|
from bonobo.strategies import create_strategy
|
||||||
from bonobo.util.objects import get_name
|
from bonobo.util.objects import get_name
|
||||||
|
|
||||||
@ -108,13 +108,15 @@ register_api_group(
|
|||||||
JsonReader,
|
JsonReader,
|
||||||
JsonWriter,
|
JsonWriter,
|
||||||
Limit,
|
Limit,
|
||||||
PrettyPrinter,
|
|
||||||
PickleReader,
|
PickleReader,
|
||||||
PickleWriter,
|
PickleWriter,
|
||||||
|
PrettyPrinter,
|
||||||
RateLimited,
|
RateLimited,
|
||||||
Tee,
|
Tee,
|
||||||
|
arg0_to_kwargs,
|
||||||
count,
|
count,
|
||||||
identity,
|
identity,
|
||||||
|
kwargs_to_arg0,
|
||||||
noop,
|
noop,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -10,11 +10,13 @@ from bonobo.util.objects import ValueHolder
|
|||||||
from bonobo.util.term import CLEAR_EOL
|
from bonobo.util.term import CLEAR_EOL
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'identity',
|
|
||||||
'Limit',
|
'Limit',
|
||||||
'Tee',
|
|
||||||
'count',
|
|
||||||
'PrettyPrinter',
|
'PrettyPrinter',
|
||||||
|
'Tee',
|
||||||
|
'arg0_to_kwargs',
|
||||||
|
'count',
|
||||||
|
'identity',
|
||||||
|
'kwargs_to_arg0',
|
||||||
'noop',
|
'noop',
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -86,3 +88,25 @@ class PrettyPrinter(Configurable):
|
|||||||
def noop(*args, **kwargs): # pylint: disable=unused-argument
|
def noop(*args, **kwargs): # pylint: disable=unused-argument
|
||||||
from bonobo.constants import NOT_MODIFIED
|
from bonobo.constants import NOT_MODIFIED
|
||||||
return NOT_MODIFIED
|
return NOT_MODIFIED
|
||||||
|
|
||||||
|
|
||||||
|
def arg0_to_kwargs(row):
|
||||||
|
"""
|
||||||
|
Transform items in a stream from "arg0" format (each call only has one positional argument, which is a dict-like
|
||||||
|
object) to "kwargs" format (each call only has keyword arguments that represent a row).
|
||||||
|
|
||||||
|
:param row:
|
||||||
|
:return: bonobo.Bag
|
||||||
|
"""
|
||||||
|
return Bag(**row)
|
||||||
|
|
||||||
|
|
||||||
|
def kwargs_to_arg0(**row):
|
||||||
|
"""
|
||||||
|
Transform items in a stream from "kwargs" format (each call only has keyword arguments that represent a row) to
|
||||||
|
"arg0" format (each call only has one positional argument, which is a dict-like object) .
|
||||||
|
|
||||||
|
:param **row:
|
||||||
|
:return: bonobo.Bag
|
||||||
|
"""
|
||||||
|
return Bag(row)
|
||||||
|
|||||||
@ -59,7 +59,7 @@ available in **Bonobo**'s repository:
|
|||||||
|
|
||||||
.. code-block:: shell-session
|
.. code-block:: shell-session
|
||||||
|
|
||||||
$ curl https://raw.githubusercontent.com/python-bonobo/bonobo/master/bonobo/examples/datasets/coffeeshops.txt > `python -c 'import bonobo; print(bonobo.get_examples_path("datasets/coffeeshops.txt"))'`
|
$ curl https://raw.githubusercontent.com/python-bonobo/bonobo/master/bonobo/examples/datasets/coffeeshops.txt > `python3 -c 'import bonobo; print(bonobo.get_examples_path("datasets/coffeeshops.txt"))'`
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
|
||||||
|
|||||||
@ -18,8 +18,8 @@ specialized packages, like SQLAlchemy, or other database access libraries from t
|
|||||||
First, read https://www.bonobo-project.org/with/sqlalchemy for instructions on how to install. You **do need** the
|
First, read https://www.bonobo-project.org/with/sqlalchemy for instructions on how to install. You **do need** the
|
||||||
bleeding edge version of `bonobo` and `bonobo-sqlalchemy` to make this work.
|
bleeding edge version of `bonobo` and `bonobo-sqlalchemy` to make this work.
|
||||||
|
|
||||||
Additional requirements
|
Requirements
|
||||||
:::::::::::::::::::::::
|
::::::::::::
|
||||||
|
|
||||||
Once you installed `bonobo_sqlalchemy` (read https://www.bonobo-project.org/with/sqlalchemy to use bleeding edge
|
Once you installed `bonobo_sqlalchemy` (read https://www.bonobo-project.org/with/sqlalchemy to use bleeding edge
|
||||||
version), install the following additional packages:
|
version), install the following additional packages:
|
||||||
@ -39,17 +39,21 @@ Open your `_services.py` file and replace the code:
|
|||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
import bonobo
|
import bonobo, dotenv, logging, os
|
||||||
import dotenv
|
|
||||||
|
|
||||||
from bonobo_sqlalchemy.util import create_postgresql_engine
|
from bonobo_sqlalchemy.util import create_postgresql_engine
|
||||||
|
|
||||||
dotenv.load_dotenv(dotenv.find_dotenv())
|
dotenv.load_dotenv(dotenv.find_dotenv())
|
||||||
|
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
|
||||||
|
|
||||||
def get_services():
|
def get_services():
|
||||||
return {
|
return {
|
||||||
'fs': bonobo.open_fs(),
|
'fs': bonobo.open_examples_fs('datasets'),
|
||||||
'db': create_postgresql_engine(name='tutorial')
|
'fs.output': bonobo.open_fs(),
|
||||||
|
'sqlalchemy.engine': create_postgresql_engine(**{
|
||||||
|
'name': 'tutorial',
|
||||||
|
'user': 'tutorial',
|
||||||
|
'pass': 'tutorial',
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
The `create_postgresql_engine` is a tiny function building the DSN from reasonable defaults, that you can override
|
The `create_postgresql_engine` is a tiny function building the DSN from reasonable defaults, that you can override
|
||||||
@ -58,6 +62,9 @@ file and add values for one or more of `POSTGRES_NAME`, `POSTGRES_USER`, 'POSTGR
|
|||||||
`POSTGRES_PORT`. Please note that kwargs always have precedence on environment, but that you should prefer using
|
`POSTGRES_PORT`. Please note that kwargs always have precedence on environment, but that you should prefer using
|
||||||
environment variables for anything that is not immutable from one platform to another.
|
environment variables for anything that is not immutable from one platform to another.
|
||||||
|
|
||||||
|
Add database operation to the graph
|
||||||
|
:::::::::::::::::::::::::::::::::::
|
||||||
|
|
||||||
Let's create a `tutorial/pgdb.py` job:
|
Let's create a `tutorial/pgdb.py` job:
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
@ -106,6 +113,9 @@ If we run this transformation (with `bonobo run tutorial/pgdb.py`), we should ge
|
|||||||
The database we requested do not exist. It is not the role of bonobo to do database administration, and thus there is
|
The database we requested do not exist. It is not the role of bonobo to do database administration, and thus there is
|
||||||
no tool here to create neither the database, nor the tables we want to use.
|
no tool here to create neither the database, nor the tables we want to use.
|
||||||
|
|
||||||
|
Create database and table
|
||||||
|
:::::::::::::::::::::::::
|
||||||
|
|
||||||
There are however tools in `sqlalchemy` to manage tables, so we'll create the database by ourselves, and ask sqlalchemy
|
There are however tools in `sqlalchemy` to manage tables, so we'll create the database by ourselves, and ask sqlalchemy
|
||||||
to create the table:
|
to create the table:
|
||||||
|
|
||||||
@ -125,15 +135,15 @@ Now, let's use a little trick and add this section to `pgdb.py`:
|
|||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
import logging, sys
|
import sys
|
||||||
|
|
||||||
from bonobo.commands.run import get_default_services
|
|
||||||
from sqlalchemy import Table, Column, String, Integer, MetaData
|
from sqlalchemy import Table, Column, String, Integer, MetaData
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
from bonobo.commands.run import get_default_services
|
||||||
services = get_default_services(__file__)
|
services = get_default_services(__file__)
|
||||||
|
if len(sys.argv) == 1:
|
||||||
if len(sys.argv) == 2 and sys.argv[1] == 'reset':
|
return bonobo.run(graph, services=services)
|
||||||
|
elif len(sys.argv) == 2 and sys.argv[1] == 'reset':
|
||||||
engine = services.get('sqlalchemy.engine')
|
engine = services.get('sqlalchemy.engine')
|
||||||
metadata = MetaData()
|
metadata = MetaData()
|
||||||
|
|
||||||
@ -145,11 +155,10 @@ Now, let's use a little trick and add this section to `pgdb.py`:
|
|||||||
Column('address', String(255)),
|
Column('address', String(255)),
|
||||||
)
|
)
|
||||||
|
|
||||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
|
|
||||||
metadata.drop_all(engine)
|
metadata.drop_all(engine)
|
||||||
metadata.create_all(engine)
|
metadata.create_all(engine)
|
||||||
else:
|
else:
|
||||||
return bonobo.run(graph, services=services)
|
raise NotImplementedError('I do not understand.')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
@ -167,6 +176,9 @@ Now run:
|
|||||||
|
|
||||||
Database and table should now exist.
|
Database and table should now exist.
|
||||||
|
|
||||||
|
Format the data
|
||||||
|
:::::::::::::::
|
||||||
|
|
||||||
Let's prepare our data for database, and change the `.add_chain(..)` call to do it prior to `InsertOrUpdate(...)`
|
Let's prepare our data for database, and change the `.add_chain(..)` call to do it prior to `InsertOrUpdate(...)`
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
@ -190,6 +202,9 @@ Let's prepare our data for database, and change the `.add_chain(..)` call to do
|
|||||||
_input=split_one_to_map
|
_input=split_one_to_map
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Run!
|
||||||
|
::::
|
||||||
|
|
||||||
You can now run the script (either with `bonobo run tutorial/pgdb.py` or directly with the python interpreter, as we
|
You can now run the script (either with `bonobo run tutorial/pgdb.py` or directly with the python interpreter, as we
|
||||||
added a "main" section) and the dataset should be inserted in your database. If you run it again, no new rows are
|
added a "main" section) and the dataset should be inserted in your database. If you run it again, no new rows are
|
||||||
created.
|
created.
|
||||||
|
|||||||
Reference in New Issue
Block a user