basic reimplementation of what was working in rdc.etl, jupyter widget, json import, demo with opendatasoft api, etc. Lot of things are still work in progress, stay tuned.

This commit is contained in:
Romain Dorgueil
2016-12-24 10:37:53 +01:00
parent f0315936d3
commit c30048f1b0
54 changed files with 4680 additions and 256 deletions

0
bonobo/ext/__init__.py Normal file
View File

View File

@ -0,0 +1,7 @@
from .helpers import console_run
from .plugin import ConsoleOutputPlugin
__all__ = [
ConsoleOutputPlugin,
console_run,
]

View File

@ -0,0 +1,9 @@
from bonobo import Graph, ThreadPoolExecutorStrategy
from .plugin import ConsoleOutputPlugin
def console_run(*chain, output=True, plugins=None):
graph = Graph()
executor = ThreadPoolExecutorStrategy()
graph.add_chain(*chain)
return executor.execute(graph, plugins=(plugins or []) + [ConsoleOutputPlugin()] if output else [])

View File

@ -0,0 +1,106 @@
# -*- coding: utf-8 -*-
#
# Copyright 2012-2014 Romain Dorgueil
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from functools import lru_cache
import blessings
import psutil
t = blessings.Terminal()
@lru_cache(1)
def memory_usage():
process = psutil.Process(os.getpid())
return process.get_memory_info()[0] / float(2 ** 20)
# @lru_cache(64)
# def execution_time(harness):
# return datetime.datetime.now() - harness._started_at
class ConsoleOutputPlugin:
"""
Outputs status information to the connected stdout. Can be a TTY, with or without support for colors/cursor
movements, or a non tty (pipe, file, ...). The features are adapted to terminal capabilities.
.. attribute:: prefix
String prefix of output lines.
"""
def __init__(self, prefix=''):
self.prefix = prefix
def _write(self, context, rewind):
profile, debug = False, False
if profile:
append = (
('Memory', '{0:.2f} Mb'.format(memory_usage())),
# ('Total time', '{0} s'.format(execution_time(harness))),
)
else:
append = ()
self.write(context, prefix=self.prefix, append=append, debug=debug, profile=profile, rewind=rewind)
# self.widget.value = [repr(component) for component in context.parent.components]
def run(self, context):
if t.is_a_tty:
self._write(context.parent, rewind=True)
else:
pass # not a tty
def finalize(self, context):
self._write(context.parent, rewind=False)
@staticmethod
def write(context, prefix='', rewind=True, append=None, debug=False, profile=False):
t_cnt = len(context)
for i, component in enumerate(context):
if component.running:
_line = ''.join((
t.black('({})'.format(i+1)),
' ',
t.bold(t.white('+')),
' ',
component.name,
' ',
component.get_stats_as_string(debug=debug, profile=profile),
' ',
))
else:
_line = t.black(''.join((
'({})'.format(i+1),
' - ',
component.name,
' ',
component.get_stats_as_string(debug=debug, profile=profile),
' ',
)))
print(prefix + _line + t.clear_eol)
if append:
# todo handle multiline
print(' `->', ' '.join('{0}: {1}'.format(t.bold(t.white(k)), v) for k, v in append), t.clear_eol)
t_cnt += 1
if rewind:
print(t.clear_eol)
print(t.move_up * (t_cnt + 2))

58
bonobo/ext/couchdb_.py Normal file
View File

@ -0,0 +1,58 @@
from bonobo import inject
try:
import couchdb
except ImportError as e:
import logging
logging.exception('You must install couchdb to use the bonobo couchdb extension. Easiest way is to install the '
'optional "couchdb" dependencies with «pip install bonobo[couchdb]», but you can also install a '
'specific version by yourself.')
import datetime
from bonobo import service
@service
def client(username, password):
client = couchdb.Server()
client.resource.credentials = (username, password,)
return client
@service
@inject(client)
def database(client, name):
return client[name]
def json_datetime(dt=None):
dt = dt or datetime.datetime.now()
return dt.replace(microsecond=0).isoformat() + 'Z'
@inject(database)
def query(db, map, reduce, *args, **kwargs):
pass
cli1 = client.define('admin', 'admin')
cli2 = client.define('foo', 'bar')
@inject(client[cli1])
def print_db(db):
print(db)
@inject(client[cli2])
def print_db2(db):
print(db)
if __name__ == '__main__':
print_db()
print_db2()
print_db()
print_db2()

View File

@ -0,0 +1,18 @@
from .helpers import jupyter_run
from .plugin import JupyterOutputPlugin
def _jupyter_nbextension_paths():
return [{
'section': 'notebook',
'src': 'static',
'dest': 'bonobo-jupyter',
'require': 'bonobo-jupyter/extension'
}]
__all__ = [
JupyterOutputPlugin,
_jupyter_nbextension_paths,
jupyter_run,
]

View File

@ -0,0 +1,9 @@
from bonobo import Graph, ThreadPoolExecutorStrategy
from .plugin import JupyterOutputPlugin
def jupyter_run(*chain, plugins=None):
graph = Graph()
executor = ThreadPoolExecutorStrategy()
graph.add_chain(*chain)
return executor.execute(graph, plugins=(plugins or []) + [JupyterOutputPlugin()])

View File

@ -0,0 +1,19 @@
Bonobo integration in Jupyter
Package Install
---------------
**Prerequisites**
- [node](http://nodejs.org/)
```bash
npm install --save bonobo-jupyter
```
Watch mode (for development)
----------------------------
```bash
./node_modules/.bin/webpack --watch
``

1702
bonobo/ext/jupyter/js/dist/index.js vendored Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,29 @@
{
"name": "bonobo-jupyter",
"version": "0.0.1",
"description": "Jupyter integration for Bonobo",
"author": "",
"main": "src/index.js",
"repository": {
"type": "git",
"url": ""
},
"keywords": [
"jupyter",
"widgets",
"ipython",
"ipywidgets"
],
"scripts": {
"prepublish": "webpack",
"test": "echo \"Error: no test specified\" && exit 1"
},
"devDependencies": {
"json-loader": "^0.5.4",
"webpack": "^1.12.14"
},
"dependencies": {
"jupyter-js-widgets": "^2.0.9",
"underscore": "^1.8.3"
}
}

View File

@ -0,0 +1,42 @@
var widgets = require('jupyter-js-widgets');
var _ = require('underscore');
// Custom Model. Custom widgets models must at least provide default values
// for model attributes, including `_model_name`, `_view_name`, `_model_module`
// and `_view_module` when different from the base class.
//
// When serialiazing entire widget state for embedding, only values different from the
// defaults will be specified.
var BonoboModel = widgets.DOMWidgetModel.extend({
defaults: _.extend({}, widgets.DOMWidgetModel.prototype.defaults, {
_model_name: 'BonoboModel',
_view_name: 'BonoboView',
_model_module: 'bonobo',
_view_module: 'bonobo',
value: []
})
});
// Custom View. Renders the widget model.
var BonoboView = widgets.DOMWidgetView.extend({
render: function () {
this.value_changed();
this.model.on('change:value', this.value_changed, this);
},
value_changed: function () {
this.$el.html(
this.model.get('value').join('<br>')
);
},
});
module.exports = {
BonoboModel: BonoboModel,
BonoboView: BonoboView
};

View File

@ -0,0 +1,9 @@
// Entry point for the unpkg bundle containing custom model definitions.
//
// It differs from the notebook bundle in that it does not need to define a
// dynamic baseURL for the static assets and may load some css that would
// already be loaded by the notebook otherwise.
// Export widget models and views, and the npm package version number.
module.exports = require('./bonobo.js');
module.exports['version'] = require('../package.json').version;

View File

@ -0,0 +1,20 @@
// This file contains the javascript that is run when the notebook is loaded.
// It contains some requirejs configuration and the `load_ipython_extension`
// which is required for any notebook extension.
// Configure requirejs
if (window.require) {
window.require.config({
map: {
"*" : {
"bonobo-jupyter": "nbextensions/bonobo-jupyter/index",
"jupyter-js-widgets": "nbextensions/jupyter-js-widgets/extension"
}
}
});
}
// Export the required load_ipython_extention
module.exports = {
load_ipython_extension: function() {}
};

View File

@ -0,0 +1,12 @@
// Entry point for the notebook bundle containing custom model definitions.
//
// Setup notebook base URL
//
// Some static assets may be required by the custom widget javascript. The base
// url for the notebook is not known at build time and is therefore computed
// dynamically.
__webpack_public_path__ = document.querySelector('body').getAttribute('data-base-url') + 'nbextensions/bonobo/';
// Export widget models and views, and the npm package version number.
module.exports = require('./bonobo.js');
module.exports['version'] = require('../package.json').version;

View File

@ -0,0 +1,74 @@
var version = require('./package.json').version;
// Custom webpack loaders are generally the same for all webpack bundles, hence
// stored in a separate local variable.
var loaders = [
{test: /\.json$/, loader: 'json-loader'},
];
module.exports = [
{
// Notebook extension
//
// This bundle only contains the part of the JavaScript that is run on
// load of the notebook. This section generally only performs
// some configuration for requirejs, and provides the legacy
// "load_ipython_extension" function which is required for any notebook
// extension.
//
entry: './src/extension.js',
output: {
filename: 'extension.js',
path: '../static',
libraryTarget: 'amd'
}
},
{
// Bundle for the notebook containing the custom widget views and models
//
// This bundle contains the implementation for the custom widget views and
// custom widget.
// It must be an amd module
//
entry: './src/index.js',
output: {
filename: 'index.js',
path: '../static',
libraryTarget: 'amd'
},
devtool: 'source-map',
module: {
loaders: loaders
},
externals: ['jupyter-js-widgets']
},
{
// Embeddable jupyter-widget-example bundle
//
// This bundle is generally almost identical to the notebook bundle
// containing the custom widget views and models.
//
// The only difference is in the configuration of the webpack public path
// for the static assets.
//
// It will be automatically distributed by unpkg to work with the static
// widget embedder.
//
// The target bundle is always `dist/index.js`, which is the path required
// by the custom widget embedder.
//
entry: './src/embed.js',
output: {
filename: 'index.js',
path: './dist/',
libraryTarget: 'amd',
publicPath: 'https://unpkg.com/jupyter-widget-example@' + version + '/dist/'
},
devtool: 'source-map',
module: {
loaders: loaders
},
externals: ['jupyter-js-widgets']
}
];

View File

@ -0,0 +1,14 @@
from IPython.core.display import display
from bonobo.ext.jupyter.widget import BonoboWidget
class JupyterOutputPlugin:
def initialize(self, context):
self.widget = BonoboWidget()
display(self.widget)
def run(self, context):
self.widget.value = [repr(component) for component in context.parent.components]
finalize = run

View File

@ -0,0 +1,70 @@
define(function() { return /******/ (function(modules) { // webpackBootstrap
/******/ // The module cache
/******/ var installedModules = {};
/******/ // The require function
/******/ function __webpack_require__(moduleId) {
/******/ // Check if module is in cache
/******/ if(installedModules[moduleId])
/******/ return installedModules[moduleId].exports;
/******/ // Create a new module (and put it into the cache)
/******/ var module = installedModules[moduleId] = {
/******/ exports: {},
/******/ id: moduleId,
/******/ loaded: false
/******/ };
/******/ // Execute the module function
/******/ modules[moduleId].call(module.exports, module, module.exports, __webpack_require__);
/******/ // Flag the module as loaded
/******/ module.loaded = true;
/******/ // Return the exports of the module
/******/ return module.exports;
/******/ }
/******/ // expose the modules object (__webpack_modules__)
/******/ __webpack_require__.m = modules;
/******/ // expose the module cache
/******/ __webpack_require__.c = installedModules;
/******/ // __webpack_public_path__
/******/ __webpack_require__.p = "";
/******/ // Load entry module and return exports
/******/ return __webpack_require__(0);
/******/ })
/************************************************************************/
/******/ ([
/* 0 */
/***/ function(module, exports) {
// This file contains the javascript that is run when the notebook is loaded.
// It contains some requirejs configuration and the `load_ipython_extension`
// which is required for any notebook extension.
// Configure requirejs
if (window.require) {
window.require.config({
map: {
"*" : {
"bonobo-jupyter": "nbextensions/bonobo-jupyter/index",
"jupyter-js-widgets": "nbextensions/jupyter-js-widgets/extension"
}
}
});
}
// Export the required load_ipython_extention
module.exports = {
load_ipython_extension: function() {}
};
/***/ }
/******/ ])});;

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,11 @@
import ipywidgets as widgets
from traitlets import List, Unicode
@widgets.register('bonobo-widget.Bonobo')
class BonoboWidget(widgets.DOMWidget):
_view_name = Unicode('BonoboView').tag(sync=True)
_model_name = Unicode('BonoboModel').tag(sync=True)
_view_module = Unicode('bonobo-jupyter').tag(sync=True)
_model_module = Unicode('bonobo-jupyter').tag(sync=True)
value = List().tag(sync=True)

29
bonobo/ext/ods.py Normal file
View File

@ -0,0 +1,29 @@
from urllib.parse import urlencode
import requests # todo: make this a service so we can substitute it ?
def extract_ods(url, dataset, rows=100, **kwargs):
params = (('dataset', dataset), ('rows', rows),) + tuple(sorted(kwargs.items()))
base_url = url + '?' + urlencode(params)
def _extract_ods():
nonlocal base_url, rows
start = 0
while True:
resp = requests.get('{}&start={start}'.format(base_url, start=start))
records = resp.json().get('records', [])
if not len(records):
break
for row in records:
yield {
**row.get('fields', {}),
'geometry': row.get('geometry', {})
}
start += rows
_extract_ods.__name__ = 'extract_' + dataset.replace('-', '_')
return _extract_ods

0
bonobo/ext/pandas.py Normal file
View File

38
bonobo/ext/selenium.py Normal file
View File

@ -0,0 +1,38 @@
from selenium import webdriver
from bonobo import service
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/601.4.4 (KHTML, like Gecko) Version/9.0.3 Safari/601.4.4'
def create_profile(use_tor=False):
profile = webdriver.FirefoxProfile()
profile.set_preference("toolkit.startup.max_resumed_crashes", "-1")
if use_tor:
# tor connection
profile.set_preference('network.proxy.type', 1)
profile.set_preference('network.proxy.socks', '127.0.0.1')
profile.set_preference('network.proxy.socks_port', 9050)
# user agent
profile.set_preference("general.useragent.override", USER_AGENT)
return profile
def create_browser(profile):
browser = webdriver.Firefox(profile)
browser.implicitly_wait(10)
browser.set_page_load_timeout(10)
return browser
@service
def browser():
return create_browser(create_profile(use_tor=False))
@service
def torbrowser():
return create_browser(create_profile(use_tor=True))