Merge pull request #84 from hartym/develop

Develop
This commit is contained in:
Romain Dorgueil
2017-05-27 08:11:17 -07:00
committed by GitHub
22 changed files with 253 additions and 166 deletions

View File

@ -1,7 +1,7 @@
# This file has been auto-generated.
# All changes will be lost, see Projectfile.
#
# Updated at 2017-05-22 19:54:27.969596
# Updated at 2017-05-27 17:05:44.723397
PACKAGE ?= bonobo
PYTHON ?= $(shell which python)
@ -20,8 +20,9 @@ SPHINX_SOURCEDIR ?= docs
SPHINX_BUILDDIR ?= $(SPHINX_SOURCEDIR)/_build
YAPF ?= $(PYTHON_DIRNAME)/yapf
YAPF_OPTIONS ?= -rip
VERSION ?= $(shell git describe 2>/dev/null || echo dev)
.PHONY: $(SPHINX_SOURCEDIR) clean format install install-dev lint test
.PHONY: $(SPHINX_SOURCEDIR) clean format install install-dev test
# Installs the local project dependencies.
install:
@ -39,9 +40,6 @@ install-dev:
clean:
rm -rf build dist *.egg-info
lint: install-dev
$(PYTHON_DIRNAME)/pylint --py3k $(PACKAGE) -f html > pylint.html
test: install-dev
$(PYTEST) $(PYTEST_OPTIONS) tests

View File

@ -1,73 +1,53 @@
# bonobo (see github.com/python-edgy/project)
name = 'bonobo'
description = 'Bonobo, a simple, modern and atomic extract-transform-load toolkit for python 3.5+.'
license = 'Apache License, Version 2.0'
from edgy.project import require
url = 'https://www.bonobo-project.org/'
download_url = 'https://github.com/python-bonobo/bonobo/tarball/{version}'
pytest = require('pytest')
python = require('python')
sphinx = require('sphinx')
yapf = require('yapf')
author = 'Romain Dorgueil'
author_email = 'romain@dorgueil.net'
python.setup(
name='bonobo',
description='Bonobo, a simple, modern and atomic extract-transform-load toolkit for python 3.5+.',
license='Apache License, Version 2.0',
url='https://www.bonobo-project.org/',
download_url='https://github.com/python-bonobo/bonobo/tarball/{version}',
author='Romain Dorgueil',
author_email='romain@dorgueil.net',
data_files=[
('share/jupyter/nbextensions/bonobo-jupyter', [
'bonobo/ext/jupyter/static/extension.js',
'bonobo/ext/jupyter/static/index.js',
'bonobo/ext/jupyter/static/index.js.map',
]),
],
enable_features = {
'make',
'sphinx',
'pytest',
'git',
'pylint',
'python',
'yapf',
}
entry_points={
'console_scripts': [
'bonobo = bonobo.commands:entrypoint',
],
'bonobo.commands': [
'init = bonobo.commands.init:register',
'run = bonobo.commands.run:register',
'version = bonobo.commands.version:register',
],
}
# stricts deendencies in requirements.txt
install_requires = [
)
python.add_requirements(
'colorama >=0.3,<1.0',
'fs >=2.0,<3.0',
'packaging >=16,<17',
'psutil >=5.2,<6.0',
'requests >=2.0,<3.0',
'stevedore >=1.21,<2.0',
]
extras_require = {
'jupyter': [
'jupyter >=1.0,<1.1',
'ipywidgets >=6.0.0.beta5'
],
'dev': [
'coverage >=4,<5',
'pylint >=1,<2',
'pytest >=3,<4',
'pytest-cov >=2,<3',
dev=[
'pytest-timeout >=1,<2',
'sphinx',
'sphinx_rtd_theme',
'yapf',
],
}
data_files = [
('share/jupyter/nbextensions/bonobo-jupyter', [
'bonobo/ext/jupyter/static/extension.js',
'bonobo/ext/jupyter/static/index.js',
'bonobo/ext/jupyter/static/index.js.map',
]),
]
entry_points = {
'console_scripts': [
'bonobo = bonobo.commands:entrypoint',
],
'bonobo.commands': [
'init = bonobo.commands.init:register',
'run = bonobo.commands.run:register',
'version = bonobo.commands.version:register',
],
'edgy.project.features': [
'bonobo = bonobo.ext.edgy.project.feature:BonoboFeature'
jupyter=[
'jupyter >=1.0,<1.1',
'ipywidgets >=6.0.0.beta5',
]
}
@listen('edgy.project.feature.make.on_generate', priority=10)
def on_make_generate_docker_targets(event):
event.makefile['SPHINX_SOURCEDIR'] = 'docs'
)

View File

@ -1,6 +1,6 @@
from bonobo.structs import Bag, Graph, Token
from bonobo.nodes import CsvReader, CsvWriter, FileReader, FileWriter, Filter, JsonReader, JsonWriter, Limit, \
PrettyPrint, PickleWriter, PickleReader, Tee, count, identity, noop, pprint
PrettyPrinter, PickleWriter, PickleReader, Tee, count, identity, noop, pprint
from bonobo.strategies import create_strategy
from bonobo.util.objects import get_name
@ -97,9 +97,9 @@ register_api_group(
JsonReader,
JsonWriter,
Limit,
PrettyPrinter,
PickleReader,
PickleWriter,
PrettyPrint,
Tee,
count,
identity,

View File

@ -1,16 +1,19 @@
import os
def execute():
def execute(name):
try:
from edgy.project.__main__ import handle_init
from cookiecutter.main import cookiecutter
except ImportError as exc:
raise ImportError(
'You must install "edgy.project" to use this command.\n\n $ pip install edgy.project\n'
'You must install "cookiecutter" to use this command.\n\n $ pip install edgy.project\n'
) from exc
return handle_init(os.path.join(os.getcwd(), 'Projectfile'))
return cookiecutter(
'https://github.com/python-bonobo/cookiecutter-bonobo.git', extra_context={'name': name}, no_input=True
)
def register(parser):
parser.add_argument('name')
return execute

View File

@ -1,6 +1,8 @@
import os
import runpy
import pip
import bonobo
DEFAULT_SERVICES_FILENAME = '_services.py'
@ -29,7 +31,7 @@ def get_default_services(filename, services=None):
return services or {}
def execute(filename, module, quiet=False, verbose=False):
def execute(filename, module, install=False, quiet=False, verbose=False):
from bonobo import settings
if quiet:
@ -40,7 +42,12 @@ def execute(filename, module, quiet=False, verbose=False):
if filename:
if os.path.isdir(filename):
if install:
requirements = os.path.join(filename, 'requirements.txt')
pip.main(['install', '-qr', requirements])
filename = os.path.join(filename, DEFAULT_GRAPH_FILENAME)
elif install:
raise RuntimeError('Cannot --install on a file (only available for dirs containing requirements.txt).')
context = runpy.run_path(filename, run_name='__bonobo__')
elif module:
context = runpy.run_module(module, run_name='__bonobo__')
@ -68,11 +75,17 @@ def execute(filename, module, quiet=False, verbose=False):
)
def register(parser):
def register_generic_run_arguments(parser):
source_group = parser.add_mutually_exclusive_group(required=True)
source_group.add_argument('filename', nargs='?', type=str)
source_group.add_argument('--module', '-m', type=str)
return parser
def register(parser):
parser = register_generic_run_arguments(parser)
verbosity_group = parser.add_mutually_exclusive_group()
verbosity_group.add_argument('--quiet', '-q', action='store_true')
verbosity_group.add_argument('--verbose', '-v', action='store_true')
parser.add_argument('--install', '-I', action='store_true')
return execute

View File

@ -1,9 +1,40 @@
import bonobo
from bonobo.util.pkgs import bonobo_packages
def execute():
print('{} v.{}'.format(bonobo.__name__, bonobo.__version__))
def format_version(mod, *, name=None, quiet=False):
args = {
'name': name or mod.__name__,
'version': mod.__version__,
'location': bonobo_packages[name or mod.__name__].location
}
if not quiet:
return '{name} v.{version} (in {location})'.format(**args)
if quiet < 2:
return '{name} {version}'.format(**args)
if quiet < 3:
return '{version}'.format(**args)
raise RuntimeError('Hard to be so quiet...')
def execute(all=False, quiet=False):
print(format_version(bonobo, quiet=quiet))
if all:
for name in sorted(bonobo_packages):
if name != 'bonobo':
try:
mod = __import__(name.replace('-', '_'))
try:
print(format_version(mod, name=name, quiet=quiet))
except Exception as exc:
print('{} ({})'.format(name, exc))
except ImportError as exc:
print('{} is not importable ({}).'.format(name, exc))
def register(parser):
parser.add_argument('--all', '-a', action='store_true')
parser.add_argument('--quiet', '-q', action='count')
return execute

View File

@ -6,7 +6,9 @@ import os
def cleanse_sms(row):
if row['category'] == 'spam':
row['sms_clean'] = '**MARKED AS SPAM** ' + row['sms'][0:50] + ('...' if len(row['sms']) > 50 else '')
row['sms_clean'] = '**MARKED AS SPAM** ' + row['sms'][0:50] + (
'...' if len(row['sms']) > 50 else ''
)
else:
row['sms_clean'] = row['sms']
@ -14,14 +16,13 @@ def cleanse_sms(row):
graph = bonobo.Graph(
bonobo.PickleReader('spam.pkl'), # spam.pkl is within the gzipped tarball
bonobo.PickleReader('spam.pkl'
), # spam.pkl is within the gzipped tarball
cleanse_sms,
print
)
if __name__ == '__main__':
'''
This example shows how a different file system service can be injected
into a transformation (as compressing pickled objects often makes sense
@ -51,8 +52,10 @@ if __name__ == '__main__':
'''
services = {
'fs': TarFS(
os.path.join(bonobo.get_examples_path(), 'datasets', 'spam.tgz')
'fs':
TarFS(
os.path.
join(bonobo.get_examples_path(), 'datasets', 'spam.tgz')
)
}
bonobo.run(graph, services=services)

View File

@ -0,0 +1,16 @@
import bonobo
import time
from bonobo.constants import NOT_MODIFIED
def pause(*args, **kwargs):
time.sleep(0.1)
return NOT_MODIFIED
graph = bonobo.Graph(
lambda: tuple(range(20)),
pause,
print,
)

View File

@ -1,5 +1,6 @@
import functools
import io
import sys
from contextlib import redirect_stdout
from colorama import Style, Fore
@ -8,6 +9,21 @@ from bonobo.plugins import Plugin
from bonobo.util.term import CLEAR_EOL, MOVE_CURSOR_UP
class IOBuffer():
def __init__(self):
self.current = io.StringIO()
self.write = self.current.write
def switch(self):
previous = self.current
self.current = io.StringIO()
self.write = self.current.write
try:
return previous.getvalue()
finally:
previous.close()
class ConsoleOutputPlugin(Plugin):
"""
Outputs status information to the connected stdout. Can be a TTY, with or without support for colors/cursor
@ -21,30 +37,32 @@ class ConsoleOutputPlugin(Plugin):
def initialize(self):
self.prefix = ''
self.counter = 0
self._append_cache = ''
self.isatty = sys.stdout.isatty()
def _write(self, graph_context, rewind):
if settings.PROFILE:
append = (
('Memory', '{0:.2f} Mb'.format(memory_usage())),
# ('Total time', '{0} s'.format(execution_time(harness))),
)
else:
append = ()
self.write(graph_context, prefix=self.prefix, append=append, rewind=rewind)
self._stdout = sys.stdout
self.stdout = IOBuffer()
self.redirect_stdout = redirect_stdout(self.stdout)
self.redirect_stdout.__enter__()
def run(self):
if sys.stdout.isatty():
if self.isatty:
self._write(self.context.parent, rewind=True)
else:
pass # not a tty
def finalize(self):
self._write(self.context.parent, rewind=False)
self.redirect_stdout.__exit__(None, None, None)
@staticmethod
def write(context, prefix='', rewind=True, append=None):
def write(self, context, prefix='', rewind=True, append=None):
t_cnt = len(context)
buffered = self.stdout.switch()
for line in buffered.split('\n')[:-1]:
print(line + CLEAR_EOL, file=sys.stderr)
for i in context.graph.topologically_sorted_indexes:
node = context[i]
name_suffix = '({})'.format(i) if settings.DEBUG else ''
@ -62,7 +80,7 @@ class ConsoleOutputPlugin(Plugin):
Style.RESET_ALL, ' ',
)
)
print(prefix + _line + '\033[0K')
print(prefix + _line + '\033[0K', file=sys.stderr)
if append:
# todo handle multiline
@ -72,17 +90,31 @@ class ConsoleOutputPlugin(Plugin):
' `-> ', ' '.join('{}{}{}: {}'.format(Style.BRIGHT, k, Style.RESET_ALL, v)
for k, v in append), CLEAR_EOL
)
)
),
file=sys.stderr
)
t_cnt += 1
if rewind:
print(CLEAR_EOL)
print(MOVE_CURSOR_UP(t_cnt + 2))
print(CLEAR_EOL, file=sys.stderr)
print(MOVE_CURSOR_UP(t_cnt + 2), file=sys.stderr)
def _write(self, graph_context, rewind):
if settings.PROFILE:
if self.counter % 10 and self._append_cache:
append = self._append_cache
else:
self._append_cache = append = (
('Memory', '{0:.2f} Mb'.format(memory_usage())),
# ('Total time', '{0} s'.format(execution_time(harness))),
)
else:
append = ()
self.write(graph_context, prefix=self.prefix, append=append, rewind=rewind)
self.counter += 1
@functools.lru_cache(1)
def memory_usage():
import os, psutil
process = psutil.Process(os.getpid())
return process.memory_info()[0] / float(2**20)
return process.memory_info()[0] / float(2**20)

View File

@ -1,22 +0,0 @@
try:
import edgy.project
except ImportError as e:
import logging
logging.exception('You must install edgy.project to use this.')
import os
from edgy.project.events import subscribe
from edgy.project.feature import Feature, SUPPORT_PRIORITY
class BonoboFeature(Feature):
requires = {'python'}
@subscribe('edgy.project.on_start', priority=SUPPORT_PRIORITY)
def on_start(self, event):
package_path = event.setup['name'].replace('.', os.sep)
for file in ('example_graph'):
self.render_file(os.path.join(package_path, file + '.py'), os.path.join('tornado', file + '.py.j2'))

View File

@ -4,6 +4,7 @@ from pprint import pprint as _pprint
import itertools
from colorama import Fore, Style
from bonobo import settings
from bonobo.config import Configurable, Option
from bonobo.config.processors import ContextProcessor
from bonobo.structs.bags import Bag
@ -17,7 +18,7 @@ __all__ = [
'Tee',
'count',
'pprint',
'PrettyPrint',
'PrettyPrinter',
'noop',
]
@ -72,11 +73,22 @@ def _count_counter(self, context):
class PrettyPrinter(Configurable):
def call(self, *args, **kwargs):
formater = self._format_quiet if settings.QUIET else self._format_console
for i, (item, value) in enumerate(itertools.chain(enumerate(args), kwargs.items())):
print(' ' if i else '', item, '=', str(value).strip().replace('\n', '\n' + CLEAR_EOL), CLEAR_EOL)
print(formater(i, item, value))
def _format_quiet(self, i, item, value):
return ' '.join(((' ' if i else '-'), str(item), ':', str(value).strip()))
def _format_console(self, i, item, value):
return ' '.join(
((' ' if i else ''), str(item), '=', str(value).strip().replace('\n', '\n' + CLEAR_EOL), CLEAR_EOL)
)
pprint = Tee(_pprint)
pprint = PrettyPrinter()
pprint.__name__ = 'pprint'
def PrettyPrint(title_keys=('title', 'name', 'id'), print_values=True, sort=True):

View File

@ -21,4 +21,4 @@ QUIET = to_bool(os.environ.get('BONOBO_QUIET', 'f'))
def check():
if DEBUG and QUIET:
raise RuntimeError('I cannot be verbose and quiet at the same time.')
raise RuntimeError('I cannot be verbose and quiet at the same time.')

8
bonobo/util/pkgs.py Normal file
View File

@ -0,0 +1,8 @@
import pkg_resources
from packaging.utils import canonicalize_name
bonobo_packages = {}
for p in pkg_resources.working_set:
name = canonicalize_name(p.project_name)
if name.startswith('bonobo'):
bonobo_packages[name] = p

View File

@ -13,6 +13,26 @@ contributions have less value, all contributions are very important.
* You can enhance tests.
* etc.
tl;dr
:::::
1. Fork the github repository
.. code-block:: shell-session
$ git clone https://github.com/python-bonobo/bonobo.git # change this to use your fork.
$ cd bonobo
$ git remote add upstream https://github.com/python-bonobo/bonobo.git
$ git fetch upstream
$ git checkout upstream/develop -b feature/my_awesome_feature
$ # code, code, code, test, doc, code, test ...
$ git commit -m '[topic] .... blaaaah ....'
$ git push origin feature/my_awesome_feature
2. Open pull request
3. Rince, repeat
Code-related contributions (including tests and examples)
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::

View File

@ -1,27 +1,24 @@
-e .[dev]
alabaster==0.7.10
astroid==1.5.2
babel==2.4.0
coverage==4.3.4
certifi==2017.4.17
chardet==3.0.3
coverage==4.4.1
docutils==0.13.1
idna==2.5
imagesize==0.7.1
isort==4.2.5
jinja2==2.9.6
lazy-object-proxy==1.2.2
markupsafe==1.0
mccabe==0.6.1
py==1.4.33
pygments==2.2.0
pylint==1.7.1
pytest-cov==2.4.0
pytest-cov==2.5.1
pytest-timeout==1.2.0
pytest==3.0.7
pytest==3.1.0
pytz==2017.2
requests==2.13.0
requests==2.16.2
six==1.10.0
snowballstemmer==1.2.1
sphinx-rtd-theme==0.2.4
sphinx==1.5.5
wrapt==1.10.10
yapf==0.16.1
sphinx==1.6.1
sphinxcontrib-websupport==1.0.1
typing==3.6.1
urllib3==1.21.1

View File

@ -1,5 +1,4 @@
-e .[jupyter]
appnope==0.1.0
bleach==2.0.0
decorator==4.0.11
@ -18,7 +17,7 @@ jupyter-core==4.3.0
jupyter==1.0.0
markupsafe==1.0
mistune==0.7.4
nbconvert==5.1.1
nbconvert==5.2.1
nbformat==4.3.0
notebook==5.0.0
pandocfilters==1.4.1
@ -33,7 +32,7 @@ qtconsole==4.3.0
simplegeneric==0.8.1
six==1.10.0
terminado==0.6
testpath==0.3
testpath==0.3.1
tornado==4.5.1
traitlets==4.3.2
wcwidth==0.1.7

View File

@ -1,12 +1,17 @@
-e .
appdirs==1.4.3
certifi==2017.4.17
chardet==3.0.3
colorama==0.3.9
enum34==1.1.6
fs==2.0.3
pbr==3.0.0
idna==2.5
packaging==16.8
pbr==3.0.1
psutil==5.2.2
pyparsing==2.2.0
pytz==2017.2
requests==2.13.0
requests==2.16.2
six==1.10.0
stevedore==1.21.0
urllib3==1.21.1

View File

@ -41,41 +41,34 @@ else:
version = version_ns.get('__version__', 'dev')
setup(
name='bonobo',
author='Romain Dorgueil',
author_email='romain@dorgueil.net',
description=('Bonobo, a simple, modern and atomic extract-transform-load toolkit for '
'python 3.5+.'),
license='Apache License, Version 2.0',
install_requires=[
'colorama >=0.3,<1.0', 'fs >=2.0,<3.0', 'psutil >=5.2,<6.0', 'requests >=2.0,<3.0', 'stevedore >=1.21,<2.0'
],
name='bonobo',
version=version,
long_description=long_description,
classifiers=classifiers,
packages=find_packages(exclude=['ez_setup', 'example', 'test']),
include_package_data=True,
data_files=[
(
'share/jupyter/nbextensions/bonobo-jupyter', [
'bonobo/ext/jupyter/static/extension.js', 'bonobo/ext/jupyter/static/index.js',
'bonobo/ext/jupyter/static/index.js.map'
]
)
install_requires=[
'colorama (>= 0.3, < 1.0)', 'fs (>= 2.0, < 3.0)', 'packaging (>= 16, < 17)', 'psutil (>= 5.2, < 6.0)',
'requests (>= 2.0, < 3.0)', 'stevedore (>= 1.21, < 2.0)'
],
extras_require={
'dev': [
'coverage >=4,<5', 'pylint >=1,<2', 'pytest >=3,<4', 'pytest-cov >=2,<3', 'pytest-timeout >=1,<2', 'sphinx',
'sphinx_rtd_theme', 'yapf'
'coverage (>= 4.4, < 5.0)', 'pytest (>= 3.1, < 4.0)', 'pytest-cov (>= 2.5, < 3.0)',
'pytest-timeout (>= 1, < 2)', 'sphinx (>= 1.6, < 2.0)'
],
'jupyter': ['jupyter >=1.0,<1.1', 'ipywidgets >=6.0.0.beta5']
'jupyter': ['ipywidgets (>= 6.0.0.beta5)', 'jupyter (>= 1.0, < 1.1)']
},
entry_points={
'bonobo.commands': [
'init = bonobo.commands.init:register', 'run = bonobo.commands.run:register',
'version = bonobo.commands.version:register'
],
'console_scripts': ['bonobo = bonobo.commands:entrypoint'],
'edgy.project.features': ['bonobo = '
'bonobo.ext.edgy.project.feature:BonoboFeature']
'console_scripts': ['bonobo = bonobo.commands:entrypoint']
},
url='https://www.bonobo-project.org/',
download_url='https://github.com/python-bonobo/bonobo/tarball/{version}'.format(version=version),

View File

@ -28,9 +28,8 @@ def test_write_pickled_dict_to_file(tmpdir):
def test_read_pickled_list_from_file(tmpdir):
fs, filename = open_fs(tmpdir), 'input.pkl'
fs.open(filename, 'wb').write(pickle.dumps([
['a', 'b', 'c'], ['a foo', 'b foo', 'c foo'], ['a bar', 'b bar', 'c bar']
]))
fs.open(filename,
'wb').write(pickle.dumps([['a', 'b', 'c'], ['a foo', 'b foo', 'c foo'], ['a bar', 'b bar', 'c bar']]))
reader = PickleReader(path=filename)

View File

@ -76,4 +76,4 @@ def test_version(runner, capsys):
out, err = capsys.readouterr()
out = out.strip()
assert out.startswith('bonobo ')
assert out.endswith(__version__)
assert __version__ in out