feat: new alternate syntax and switch to black + isort (yeah, maybe not the best time, but that is done).
This commit is contained in:
@ -6,24 +6,16 @@ from bonobo.examples import get_datasets_dir, get_minor_version, get_services
|
||||
from bonobo.examples.datasets.coffeeshops import get_graph as get_coffeeshops_graph
|
||||
from bonobo.examples.datasets.fablabs import get_graph as get_fablabs_graph
|
||||
|
||||
graph_factories = {
|
||||
'coffeeshops': get_coffeeshops_graph,
|
||||
'fablabs': get_fablabs_graph,
|
||||
}
|
||||
graph_factories = {"coffeeshops": get_coffeeshops_graph, "fablabs": get_fablabs_graph}
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
parser = examples.get_argument_parser()
|
||||
parser.add_argument(
|
||||
'--target', '-t', choices=graph_factories.keys(), nargs='+'
|
||||
)
|
||||
parser.add_argument('--sync', action='store_true', default=False)
|
||||
parser.add_argument("--target", "-t", choices=graph_factories.keys(), nargs="+")
|
||||
parser.add_argument("--sync", action="store_true", default=False)
|
||||
|
||||
with bonobo.parse_args(parser) as options:
|
||||
graph_options = examples.get_graph_options(options)
|
||||
graph_names = list(
|
||||
options['target']
|
||||
if options['target'] else sorted(graph_factories.keys())
|
||||
)
|
||||
graph_names = list(options["target"] if options["target"] else sorted(graph_factories.keys()))
|
||||
|
||||
# Create a graph with all requested subgraphs
|
||||
graph = bonobo.Graph()
|
||||
@ -32,29 +24,20 @@ if __name__ == '__main__':
|
||||
|
||||
bonobo.run(graph, services=get_services())
|
||||
|
||||
if options['sync']:
|
||||
if options["sync"]:
|
||||
# TODO: when parallel option for node will be implemented, need to be rewriten to use a graph.
|
||||
import boto3
|
||||
|
||||
s3 = boto3.client('s3')
|
||||
s3 = boto3.client("s3")
|
||||
|
||||
local_dir = get_datasets_dir()
|
||||
for root, dirs, files in os.walk(local_dir):
|
||||
for filename in files:
|
||||
local_path = os.path.join(root, filename)
|
||||
relative_path = os.path.relpath(local_path, local_dir)
|
||||
s3_path = os.path.join(
|
||||
get_minor_version(), relative_path
|
||||
)
|
||||
s3_path = os.path.join(get_minor_version(), relative_path)
|
||||
|
||||
try:
|
||||
s3.head_object(
|
||||
Bucket='bonobo-examples', Key=s3_path
|
||||
)
|
||||
s3.head_object(Bucket="bonobo-examples", Key=s3_path)
|
||||
except Exception:
|
||||
s3.upload_file(
|
||||
local_path,
|
||||
'bonobo-examples',
|
||||
s3_path,
|
||||
ExtraArgs={'ACL': 'public-read'}
|
||||
)
|
||||
s3.upload_file(local_path, "bonobo-examples", s3_path, ExtraArgs={"ACL": "public-read"})
|
||||
|
||||
@ -1,63 +1,39 @@
|
||||
"""
|
||||
|
||||
"""
|
||||
import sys
|
||||
|
||||
import bonobo
|
||||
from bonobo import examples
|
||||
from bonobo.contrib.opendatasoft import OpenDataSoftAPI as ODSReader
|
||||
from bonobo.examples import get_services
|
||||
from bonobo.structs.graphs import PartialGraph
|
||||
|
||||
|
||||
def get_graph(graph=None, *, _limit=(), _print=()):
|
||||
graph = graph or bonobo.Graph()
|
||||
|
||||
producer = graph.add_chain(
|
||||
ODSReader(
|
||||
dataset='liste-des-cafes-a-un-euro',
|
||||
netloc='opendata.paris.fr'
|
||||
),
|
||||
*_limit,
|
||||
bonobo.UnpackItems(0),
|
||||
bonobo.Rename(
|
||||
name='nom_du_cafe',
|
||||
address='adresse',
|
||||
zipcode='arrondissement'
|
||||
),
|
||||
bonobo.Format(city='Paris', country='France'),
|
||||
bonobo.OrderFields(
|
||||
[
|
||||
'name', 'address', 'zipcode', 'city', 'country',
|
||||
'geometry', 'geoloc'
|
||||
]
|
||||
),
|
||||
*_print,
|
||||
producer = (
|
||||
graph.get_cursor()
|
||||
>> ODSReader(dataset="liste-des-cafes-a-un-euro", netloc="opendata.paris.fr")
|
||||
>> PartialGraph(*_limit)
|
||||
>> bonobo.UnpackItems(0)
|
||||
>> bonobo.Rename(name="nom_du_cafe", address="adresse", zipcode="arrondissement")
|
||||
>> bonobo.Format(city="Paris", country="France")
|
||||
>> bonobo.OrderFields(["name", "address", "zipcode", "city", "country", "geometry", "geoloc"])
|
||||
>> PartialGraph(*_print)
|
||||
)
|
||||
|
||||
# Comma separated values.
|
||||
graph.add_chain(
|
||||
bonobo.CsvWriter(
|
||||
'coffeeshops.csv',
|
||||
fields=['name', 'address', 'zipcode', 'city'],
|
||||
delimiter=','
|
||||
),
|
||||
_input=producer.output,
|
||||
graph.get_cursor(producer.output) >> bonobo.CsvWriter(
|
||||
"coffeeshops.csv", fields=["name", "address", "zipcode", "city"], delimiter=","
|
||||
)
|
||||
|
||||
# Standard JSON
|
||||
graph.add_chain(
|
||||
bonobo.JsonWriter(path='coffeeshops.json'),
|
||||
_input=producer.output,
|
||||
)
|
||||
graph.get_cursor(producer.output) >> bonobo.JsonWriter(path="coffeeshops.json")
|
||||
|
||||
# Line-delimited JSON
|
||||
graph.add_chain(
|
||||
bonobo.LdjsonWriter(path='coffeeshops.ldjson'),
|
||||
_input=producer.output,
|
||||
)
|
||||
graph.get_cursor(producer.output) >> bonobo.LdjsonWriter(path="coffeeshops.ldjson")
|
||||
|
||||
return graph
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
sys.exit(examples.run(get_graph, get_services))
|
||||
|
||||
@ -25,27 +25,21 @@ from bonobo.examples import get_services
|
||||
try:
|
||||
import pycountry
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
'You must install package "pycountry" to run this example.'
|
||||
) from exc
|
||||
raise ImportError('You must install package "pycountry" to run this example.') from exc
|
||||
|
||||
API_DATASET = 'fablabs@public-us'
|
||||
API_DATASET = "fablabs@public-us"
|
||||
ROWS = 100
|
||||
|
||||
|
||||
def _getlink(x):
|
||||
return x.get('url', None)
|
||||
return x.get("url", None)
|
||||
|
||||
|
||||
def normalize(row):
|
||||
result = {
|
||||
**row,
|
||||
'links':
|
||||
list(filter(None, map(_getlink, json.loads(row.get('links'))))),
|
||||
'country':
|
||||
pycountry.countries.get(
|
||||
alpha_2=row.get('country_code', '').upper()
|
||||
).name,
|
||||
"links": list(filter(None, map(_getlink, json.loads(row.get("links"))))),
|
||||
"country": pycountry.countries.get(alpha_2=row.get("country_code", "").upper()).name,
|
||||
}
|
||||
return result
|
||||
|
||||
@ -58,10 +52,10 @@ def get_graph(graph=None, *, _limit=(), _print=()):
|
||||
normalize,
|
||||
bonobo.UnpackItems(0),
|
||||
*_print,
|
||||
bonobo.JsonWriter(path='fablabs.json'),
|
||||
bonobo.JsonWriter(path="fablabs.json"),
|
||||
)
|
||||
return graph
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
sys.exit(examples.run(get_graph, get_services))
|
||||
|
||||
Reference in New Issue
Block a user