feat: new alternate syntax and switch to black + isort (yeah, maybe not the best time, but that is done).

This commit is contained in:
Romain Dorgueil
2018-07-29 18:21:56 +01:00
parent 3094e43f9f
commit 89dda0dca6
123 changed files with 1672 additions and 1640 deletions

View File

@ -6,24 +6,16 @@ from bonobo.examples import get_datasets_dir, get_minor_version, get_services
from bonobo.examples.datasets.coffeeshops import get_graph as get_coffeeshops_graph
from bonobo.examples.datasets.fablabs import get_graph as get_fablabs_graph
graph_factories = {
'coffeeshops': get_coffeeshops_graph,
'fablabs': get_fablabs_graph,
}
graph_factories = {"coffeeshops": get_coffeeshops_graph, "fablabs": get_fablabs_graph}
if __name__ == '__main__':
if __name__ == "__main__":
parser = examples.get_argument_parser()
parser.add_argument(
'--target', '-t', choices=graph_factories.keys(), nargs='+'
)
parser.add_argument('--sync', action='store_true', default=False)
parser.add_argument("--target", "-t", choices=graph_factories.keys(), nargs="+")
parser.add_argument("--sync", action="store_true", default=False)
with bonobo.parse_args(parser) as options:
graph_options = examples.get_graph_options(options)
graph_names = list(
options['target']
if options['target'] else sorted(graph_factories.keys())
)
graph_names = list(options["target"] if options["target"] else sorted(graph_factories.keys()))
# Create a graph with all requested subgraphs
graph = bonobo.Graph()
@ -32,29 +24,20 @@ if __name__ == '__main__':
bonobo.run(graph, services=get_services())
if options['sync']:
if options["sync"]:
# TODO: when parallel option for node will be implemented, need to be rewriten to use a graph.
import boto3
s3 = boto3.client('s3')
s3 = boto3.client("s3")
local_dir = get_datasets_dir()
for root, dirs, files in os.walk(local_dir):
for filename in files:
local_path = os.path.join(root, filename)
relative_path = os.path.relpath(local_path, local_dir)
s3_path = os.path.join(
get_minor_version(), relative_path
)
s3_path = os.path.join(get_minor_version(), relative_path)
try:
s3.head_object(
Bucket='bonobo-examples', Key=s3_path
)
s3.head_object(Bucket="bonobo-examples", Key=s3_path)
except Exception:
s3.upload_file(
local_path,
'bonobo-examples',
s3_path,
ExtraArgs={'ACL': 'public-read'}
)
s3.upload_file(local_path, "bonobo-examples", s3_path, ExtraArgs={"ACL": "public-read"})

View File

@ -1,63 +1,39 @@
"""
"""
import sys
import bonobo
from bonobo import examples
from bonobo.contrib.opendatasoft import OpenDataSoftAPI as ODSReader
from bonobo.examples import get_services
from bonobo.structs.graphs import PartialGraph
def get_graph(graph=None, *, _limit=(), _print=()):
graph = graph or bonobo.Graph()
producer = graph.add_chain(
ODSReader(
dataset='liste-des-cafes-a-un-euro',
netloc='opendata.paris.fr'
),
*_limit,
bonobo.UnpackItems(0),
bonobo.Rename(
name='nom_du_cafe',
address='adresse',
zipcode='arrondissement'
),
bonobo.Format(city='Paris', country='France'),
bonobo.OrderFields(
[
'name', 'address', 'zipcode', 'city', 'country',
'geometry', 'geoloc'
]
),
*_print,
producer = (
graph.get_cursor()
>> ODSReader(dataset="liste-des-cafes-a-un-euro", netloc="opendata.paris.fr")
>> PartialGraph(*_limit)
>> bonobo.UnpackItems(0)
>> bonobo.Rename(name="nom_du_cafe", address="adresse", zipcode="arrondissement")
>> bonobo.Format(city="Paris", country="France")
>> bonobo.OrderFields(["name", "address", "zipcode", "city", "country", "geometry", "geoloc"])
>> PartialGraph(*_print)
)
# Comma separated values.
graph.add_chain(
bonobo.CsvWriter(
'coffeeshops.csv',
fields=['name', 'address', 'zipcode', 'city'],
delimiter=','
),
_input=producer.output,
graph.get_cursor(producer.output) >> bonobo.CsvWriter(
"coffeeshops.csv", fields=["name", "address", "zipcode", "city"], delimiter=","
)
# Standard JSON
graph.add_chain(
bonobo.JsonWriter(path='coffeeshops.json'),
_input=producer.output,
)
graph.get_cursor(producer.output) >> bonobo.JsonWriter(path="coffeeshops.json")
# Line-delimited JSON
graph.add_chain(
bonobo.LdjsonWriter(path='coffeeshops.ldjson'),
_input=producer.output,
)
graph.get_cursor(producer.output) >> bonobo.LdjsonWriter(path="coffeeshops.ldjson")
return graph
if __name__ == '__main__':
if __name__ == "__main__":
sys.exit(examples.run(get_graph, get_services))

View File

@ -25,27 +25,21 @@ from bonobo.examples import get_services
try:
import pycountry
except ImportError as exc:
raise ImportError(
'You must install package "pycountry" to run this example.'
) from exc
raise ImportError('You must install package "pycountry" to run this example.') from exc
API_DATASET = 'fablabs@public-us'
API_DATASET = "fablabs@public-us"
ROWS = 100
def _getlink(x):
return x.get('url', None)
return x.get("url", None)
def normalize(row):
result = {
**row,
'links':
list(filter(None, map(_getlink, json.loads(row.get('links'))))),
'country':
pycountry.countries.get(
alpha_2=row.get('country_code', '').upper()
).name,
"links": list(filter(None, map(_getlink, json.loads(row.get("links"))))),
"country": pycountry.countries.get(alpha_2=row.get("country_code", "").upper()).name,
}
return result
@ -58,10 +52,10 @@ def get_graph(graph=None, *, _limit=(), _print=()):
normalize,
bonobo.UnpackItems(0),
*_print,
bonobo.JsonWriter(path='fablabs.json'),
bonobo.JsonWriter(path="fablabs.json"),
)
return graph
if __name__ == '__main__':
if __name__ == "__main__":
sys.exit(examples.run(get_graph, get_services))