attempt to improve landscape scores. (#6)

This commit is contained in:
Romain Dorgueil
2016-12-26 13:01:49 +01:00
committed by GitHub
parent b658c1f536
commit 8b42ff0bc3
16 changed files with 132 additions and 82 deletions

View File

@ -8,3 +8,7 @@ python-targets:
ignore-paths:
- docs
- examples
pep257:
disable:
- D205
- D210

View File

@ -186,7 +186,7 @@
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright {yyyy} {name of copyright owner}
Copyright 2012-2017 Romain Dorgueil
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@ -1,4 +1,27 @@
""" Bonobo data-processing toolkit.
Bonobo is a line-by-line data-processing toolkit for python 3.5+ emphasizing simplicity and atomicity of data
transformations using a simple directed graph of python callables.
Read more at http://docs.bonobo-project.org/
Copyright 2012-2014 Romain Dorgueil
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import os
import sys
from .core import *
from .io import *
from .util import *
@ -8,8 +31,21 @@ PY35 = (sys.version_info >= (3, 5))
assert PY35, 'Python 3.5+ is required to use Bonobo.'
# Version infos
try:
with open('../version.txt') as f:
__version__ = f.read().strip()
except Exception as e:
__version__ = 'dev'
with open(os.path.realpath(os.path.join(os.path.dirname(__file__), '../version.txt'))) as f:
__version__ = f.read().strip()
__all__ = [
'Bag',
'Graph',
'NaiveStrategy',
'NotModified',
'ProcessPoolExecutorStrategy',
'ThreadPoolExecutorStrategy',
'head',
'inject',
'log',
'noop',
'service',
'tee',
'to_json',
]

View File

@ -1,3 +1,5 @@
""" Core required libraries. """
from .bags import Bag
from .graphs import Graph
from .services import inject, service

View File

@ -21,7 +21,7 @@ class ExecutionContext:
for i, component_context in enumerate(self):
try:
component_context.outputs = [self[j].input for j in self.graph.outputs_of(i)]
except KeyError as e:
except KeyError:
continue
component_context.input.on_begin = partial(component_context.send, Begin, _control=True)
component_context.input.on_end = partial(component_context.send, End, _control=True)
@ -55,23 +55,23 @@ class PluginExecutionContext:
def run(self):
try:
get_initializer(self.plugin)(self)
except Exception as e:
print('error in initializer', type(e), e)
except Exception as exc:
print('error in initializer', type(exc), exc)
while self.alive:
# todo with wrap_errors ....
try:
self.plugin.run(self)
except Exception as e:
print('error', type(e), e)
except Exception as exc:
print('error', type(exc), exc)
sleep(0.25)
try:
get_finalizer(self.plugin)(self)
except Exception as e:
print('error in finalizer', type(e), e)
except Exception as exc:
print('error in finalizer', type(exc), exc)
def shutdown(self):
self.alive = False
@ -193,35 +193,23 @@ class ComponentExecutionContext(WithStatistics):
while True:
try:
output = next(outputs)
except StopIteration as e:
except StopIteration:
break
self.send(_resolve(input_bag, output))
def run(self):
assert self.state is New, ('A {} can only be run once, and thus is expected to be in {} state at the '
'beginning of a run().').format(type(self).__name__, New)
def initialize(self):
assert self.state is New, ('A {} can only be run once, and thus is expected to be in {} state at '
'initialization time.').format(type(self).__name__, New)
self.state = Running
try:
get_initializer(self.component)(self)
except Exception as e:
self.handle_error(e, traceback.format_exc())
while True:
try:
self.step()
except KeyboardInterrupt as e:
raise
except InactiveReadableError as e:
sleep(1)
# Terminated, exit loop.
break # BREAK !!!
except Empty as e:
continue
except Exception as e:
self.handle_error(e, traceback.format_exc())
assert self.state is Running, ('A {} must be in {} state when finalization starts.').format(
def finalize(self):
assert self.state is Running, ('A {} must be in {} state at finalization time.').format(
type(self).__name__, Running)
self.state = Terminated
@ -230,6 +218,25 @@ class ComponentExecutionContext(WithStatistics):
except Exception as e:
self.handle_error(e, traceback.format_exc())
def run(self):
self.initialize()
while True:
try:
self.step()
except KeyboardInterrupt:
raise
except InactiveReadableError:
sleep(1)
# Terminated, exit loop.
break # BREAK !!!
except Empty as e:
continue
except Exception as e:
self.handle_error(e, traceback.format_exc())
self.finalize()
def handle_error(self, exc, tb):
self.stats['err'] += 1
print('\U0001F4A3 {} in {}'.format(type(exc).__name__, self.component))

View File

@ -20,8 +20,8 @@ class Graph:
self.components.append(c)
return i
def add_chain(self, *components, input=Begin):
def add_chain(self, *components, _input=Begin):
for component in components:
next = self.add_component(component)
self.outputs_of(input, create=True).add(next)
input = next
_next = self.add_component(component)
self.outputs_of(_input, create=True).add(_next)
_input = _next

View File

@ -3,9 +3,7 @@ from concurrent.futures import Executor
from concurrent.futures import ProcessPoolExecutor
from concurrent.futures import ThreadPoolExecutor
from bonobo.core.bags import Bag
from bonobo.core.strategies.base import Strategy
from bonobo.util.tokens import Begin, End
class ExecutorStrategy(Strategy):

View File

@ -0,0 +1 @@
""" Extensions, not required. """

View File

@ -2,6 +2,6 @@ from .helpers import console_run
from .plugin import ConsoleOutputPlugin
__all__ = [
ConsoleOutputPlugin,
console_run,
'ConsoleOutputPlugin',
'console_run',
]

View File

@ -7,7 +7,6 @@ def _jupyter_nbextension_paths():
__all__ = [
JupyterOutputPlugin,
_jupyter_nbextension_paths,
jupyter_run,
'JupyterOutputPlugin',
'jupyter_run',
]

View File

@ -1,5 +1,6 @@
from IPython.core.display import display
from bonobo.core.plugins import Plugin
from bonobo.ext.jupyter.widget import BonoboWidget

View File

@ -6,26 +6,26 @@ USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/601.4.
def create_profile(use_tor=False):
profile = webdriver.FirefoxProfile()
profile.set_preference("toolkit.startup.max_resumed_crashes", "-1")
_profile = webdriver.FirefoxProfile()
_profile.set_preference("toolkit.startup.max_resumed_crashes", "-1")
if use_tor:
# tor connection
profile.set_preference('network.proxy.type', 1)
profile.set_preference('network.proxy.socks', '127.0.0.1')
profile.set_preference('network.proxy.socks_port', 9050)
_profile.set_preference('network.proxy.type', 1)
_profile.set_preference('network.proxy.socks', '127.0.0.1')
_profile.set_preference('network.proxy.socks_port', 9050)
# user agent
profile.set_preference("general.useragent.override", USER_AGENT)
_profile.set_preference("general.useragent.override", USER_AGENT)
return profile
return _profile
def create_browser(profile):
browser = webdriver.Firefox(profile)
browser.implicitly_wait(10)
browser.set_page_load_timeout(10)
return browser
_browser = webdriver.Firefox(profile)
_browser.implicitly_wait(10)
_browser.set_page_load_timeout(10)
return _browser
@service

View File

@ -1 +1,5 @@
""" Readers and writers for common file formats. """
from .json import *
__all__ = ['to_json', ]

View File

@ -1,3 +1,5 @@
""" Various simple utilities. """
import functools
import pprint

View File

@ -86,11 +86,9 @@ html_sidebars = {
]
}
html_theme_path = ['_themes']
html_additional_pages = {'index': 'index.html'}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".

View File

@ -5,40 +5,38 @@ from setuptools import setup, find_packages
tolines = lambda c: list(filter(None, map(lambda s: s.strip(), c.split('\n'))))
def read(filename, flt=None):
with open(filename) as f:
content = f.read().strip()
return flt(content) if callable(flt) else content
try:
version = read('version.txt')
except:
version = 'dev'
setup(
name = 'bonobo',
description = 'Bonobo',
license = 'Apache License, Version 2.0',
install_requires = ['blessings >=1.6,<1.7', 'psutil >=5.0,<5.1'],
version = version,
long_description = read('README.rst'),
classifiers = read('classifiers.txt', tolines),
packages = find_packages(exclude=['ez_setup', 'example', 'test']),
include_package_data = True,
data_files = [('share/jupyter/nbextensions/bonobo-jupyter',
['bonobo/ext/jupyter/static/extension.js',
'bonobo/ext/jupyter/static/index.js',
'bonobo/ext/jupyter/static/index.js.map'])],
extras_require = {'dev': ['coverage >=4.2,<4.3',
'mock >=2.0,<2.1',
'nose >=1.3,<1.4',
'pylint >=1.6,<1.7',
'pytest >=3,<4',
'pytest-cov >=2.4,<2.5',
'sphinx',
'sphinx_rtd_theme',
'yapf'],
'jupyter': ['jupyter >=1.0,<1.1', 'ipywidgets >=6.0.0.beta5']},
url = 'https://github.com/python-bonobo/bonobo',
download_url = 'https://github.com/python-bonobo/bonobo/tarball/{version}'.format(version=version),
)
name='bonobo',
description='Bonobo',
license='Apache License, Version 2.0',
install_requires=['blessings >=1.6,<1.7', 'psutil >=5.0,<5.1'],
version=version,
long_description=read('README.rst'),
classifiers=read('classifiers.txt', tolines),
packages=find_packages(exclude=['ez_setup', 'example', 'test']),
include_package_data=True,
data_files=[('share/jupyter/nbextensions/bonobo-jupyter', [
'bonobo/ext/jupyter/static/extension.js', 'bonobo/ext/jupyter/static/index.js',
'bonobo/ext/jupyter/static/index.js.map'
])],
extras_require={
'dev': [
'coverage >=4.2,<4.3', 'mock >=2.0,<2.1', 'nose >=1.3,<1.4', 'pylint >=1.6,<1.7', 'pytest >=3,<4',
'pytest-cov >=2.4,<2.5', 'sphinx', 'sphinx_rtd_theme', 'yapf'
],
'jupyter': ['jupyter >=1.0,<1.1', 'ipywidgets >=6.0.0.beta5']
},
url='https://github.com/python-bonobo/bonobo',
download_url='https://github.com/python-bonobo/bonobo/tarball/{version}'.format(version=version), )