From 7a25774b0f8d06f5035a14855d683c318d82f0fc Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Sat, 2 Dec 2017 15:57:14 +0100 Subject: [PATCH] Removing datasets from the repository. --- bonobo/examples/datasets/__main__.py | 35 +++++++++++++++++- bonobo/examples/datasets/coffeeshops.py | 5 ++- bonobo/examples/datasets/fablabs.py | 5 ++- bonobo/examples/datasets/services.py | 15 ++++++-- .../examples/datasets/{ => static}/Makefile | 0 .../examples/datasets/{ => static}/passwd.txt | 0 .../examples/datasets/{ => static}/spam.tgz | Bin .../datasets/{ => static}/theaters.json | 0 8 files changed, 54 insertions(+), 6 deletions(-) rename bonobo/examples/datasets/{ => static}/Makefile (100%) rename bonobo/examples/datasets/{ => static}/passwd.txt (100%) rename bonobo/examples/datasets/{ => static}/spam.tgz (100%) rename bonobo/examples/datasets/{ => static}/theaters.json (100%) diff --git a/bonobo/examples/datasets/__main__.py b/bonobo/examples/datasets/__main__.py index a066e07..8dd4c49 100644 --- a/bonobo/examples/datasets/__main__.py +++ b/bonobo/examples/datasets/__main__.py @@ -1,8 +1,10 @@ +import os + import bonobo from bonobo import examples from bonobo.examples.datasets.coffeeshops import get_graph as get_coffeeshops_graph from bonobo.examples.datasets.fablabs import get_graph as get_fablabs_graph -from bonobo.examples.datasets.services import get_services +from bonobo.examples.datasets.services import get_services, get_datasets_dir graph_factories = { 'coffeeshops': get_coffeeshops_graph, @@ -14,6 +16,7 @@ if __name__ == '__main__': parser.add_argument( '--target', '-t', choices=graph_factories.keys(), nargs='+' ) + parser.add_argument('--sync', action='store_true', default=False) with bonobo.parse_args(parser) as options: graph_options = examples.get_graph_options(options) @@ -22,8 +25,38 @@ if __name__ == '__main__': if options['target'] else sorted(graph_factories.keys()) ) + # Create a graph with all requested subgraphs graph = bonobo.Graph() for name in graph_names: graph = graph_factories[name](graph, **graph_options) bonobo.run(graph, services=get_services()) + + if options['sync']: + # XXX/TODO: when parallel option for node will be implemented, need to be rewriten to use a graph. + import boto3 + + s3 = boto3.client('s3') + + local_dir = get_datasets_dir() + for root, dirs, files in os.walk(local_dir): + for filename in files: + local_path = os.path.join(root, filename) + relative_path = os.path.relpath(local_path, local_dir) + s3_path = os.path.join( + bonobo.__version__, relative_path + ) + + try: + s3.head_object( + Bucket='bonobo-examples', Key=s3_path + ) + except: + s3.upload_file( + local_path, + 'bonobo-examples', + s3_path, + ExtraArgs={ + 'ACL': 'public-read' + } + ) diff --git a/bonobo/examples/datasets/coffeeshops.py b/bonobo/examples/datasets/coffeeshops.py index 85883d7..93aa0d5 100644 --- a/bonobo/examples/datasets/coffeeshops.py +++ b/bonobo/examples/datasets/coffeeshops.py @@ -58,4 +58,7 @@ if __name__ == '__main__': parser = examples.get_argument_parser() with bonobo.parse_args(parser) as options: - bonobo.run(get_graph(**examples.get_graph_options(options)), services=get_services()) + bonobo.run( + get_graph(**examples.get_graph_options(options)), + services=get_services() + ) diff --git a/bonobo/examples/datasets/fablabs.py b/bonobo/examples/datasets/fablabs.py index 086bdfc..0a6e188 100644 --- a/bonobo/examples/datasets/fablabs.py +++ b/bonobo/examples/datasets/fablabs.py @@ -62,4 +62,7 @@ if __name__ == '__main__': parser = examples.get_argument_parser() with bonobo.parse_args(parser) as options: - bonobo.run(get_graph(**examples.get_graph_options(options)), services=get_services()) + bonobo.run( + get_graph(**examples.get_graph_options(options)), + services=get_services() + ) diff --git a/bonobo/examples/datasets/services.py b/bonobo/examples/datasets/services.py index eb9e8e8..9c8f2ac 100644 --- a/bonobo/examples/datasets/services.py +++ b/bonobo/examples/datasets/services.py @@ -1,7 +1,16 @@ +import os + import bonobo +def get_datasets_dir(*dirs): + home_dir = os.path.expanduser('~') + target_dir = os.path.join( + home_dir, '.cache/bonobo', bonobo.__version__, *dirs + ) + os.makedirs(target_dir, exist_ok=True) + return target_dir + + def get_services(): - return { - 'fs': bonobo.open_fs(bonobo.get_examples_path('datasets')) - } \ No newline at end of file + return {'fs': bonobo.open_fs(get_datasets_dir('datasets'))} diff --git a/bonobo/examples/datasets/Makefile b/bonobo/examples/datasets/static/Makefile similarity index 100% rename from bonobo/examples/datasets/Makefile rename to bonobo/examples/datasets/static/Makefile diff --git a/bonobo/examples/datasets/passwd.txt b/bonobo/examples/datasets/static/passwd.txt similarity index 100% rename from bonobo/examples/datasets/passwd.txt rename to bonobo/examples/datasets/static/passwd.txt diff --git a/bonobo/examples/datasets/spam.tgz b/bonobo/examples/datasets/static/spam.tgz similarity index 100% rename from bonobo/examples/datasets/spam.tgz rename to bonobo/examples/datasets/static/spam.tgz diff --git a/bonobo/examples/datasets/theaters.json b/bonobo/examples/datasets/static/theaters.json similarity index 100% rename from bonobo/examples/datasets/theaters.json rename to bonobo/examples/datasets/static/theaters.json