Files
bonobo/bonobo/examples/datasets/__main__.py
2018-07-16 10:00:28 +02:00

61 lines
2.1 KiB
Python

import os
import bonobo
from bonobo import examples
from bonobo.examples.datasets.coffeeshops import get_graph as get_coffeeshops_graph
from bonobo.examples.datasets.fablabs import get_graph as get_fablabs_graph
from bonobo.examples.datasets.services import get_services, get_datasets_dir, get_minor_version
graph_factories = {
'coffeeshops': get_coffeeshops_graph,
'fablabs': get_fablabs_graph,
}
if __name__ == '__main__':
parser = examples.get_argument_parser()
parser.add_argument(
'--target', '-t', choices=graph_factories.keys(), nargs='+'
)
parser.add_argument('--sync', action='store_true', default=False)
with bonobo.parse_args(parser) as options:
graph_options = examples.get_graph_options(options)
graph_names = list(
options['target']
if options['target'] else sorted(graph_factories.keys())
)
# Create a graph with all requested subgraphs
graph = bonobo.Graph()
for name in graph_names:
graph = graph_factories[name](graph, **graph_options)
bonobo.run(graph, services=get_services())
if options['sync']:
# TODO: when parallel option for node will be implemented, need to be rewriten to use a graph.
import boto3
s3 = boto3.client('s3')
local_dir = get_datasets_dir()
for root, dirs, files in os.walk(local_dir):
for filename in files:
local_path = os.path.join(root, filename)
relative_path = os.path.relpath(local_path, local_dir)
s3_path = os.path.join(
get_minor_version(), relative_path
)
try:
s3.head_object(
Bucket='bonobo-examples', Key=s3_path
)
except Exception:
s3.upload_file(
local_path,
'bonobo-examples',
s3_path,
ExtraArgs={'ACL': 'public-read'}
)