Attempt to refactor a bit of context, new count transform that counts the calls, new bonobo.structs package with basic data structures, removal of blessings.

This commit is contained in:
Romain Dorgueil
2017-04-24 23:44:29 +02:00
parent cff32c1612
commit ef2c881075
37 changed files with 405 additions and 145 deletions

View File

@ -19,13 +19,14 @@ class OpenDataSoftAPI(Configurable):
scheme = Option(str, default='https')
netloc = Option(str, default='data.opendatasoft.com')
path = Option(path_str, default='/api/records/1.0/search/')
rows = Option(int, default=100)
rows = Option(int, default=500)
limit = Option(int, default=None)
timezone = Option(str, default='Europe/Paris')
kwargs = Option(dict, default=dict)
@ContextProcessor
def compute_path(self, context):
params = (('dataset', self.dataset), ('rows', self.rows), ('timezone', self.timezone)) + tuple(sorted(self.kwargs.items()))
params = (('dataset', self.dataset), ('timezone', self.timezone)) + tuple(sorted(self.kwargs.items()))
yield self.endpoint.format(scheme=self.scheme, netloc=self.netloc, path=self.path) + '?' + urlencode(params)
@ContextProcessor
@ -33,8 +34,8 @@ class OpenDataSoftAPI(Configurable):
yield ValueHolder(0)
def __call__(self, base_url, start, *args, **kwargs):
while True:
url = '{}&start={start}'.format(base_url, start=start.value)
while (not self.limit) or (self.limit > start):
url = '{}&start={start}&rows={rows}'.format(base_url, start=start.value, rows=self.rows if not self.limit else min(self.rows, self.limit-start))
resp = requests.get(url)
records = resp.json().get('records', [])
@ -42,7 +43,10 @@ class OpenDataSoftAPI(Configurable):
break
for row in records:
yield {**row.get('fields', {}), 'geometry': row.get('geometry', {})}
yield {
**row.get('fields', {}),
'geometry': row.get('geometry', {})
}
start.value += self.rows