wip, aio: asyncio strategy (defunct, not fully implemented) and related refactorings.

2018-07-29 15:24:35 +01:00
parent 980a76399b
commit 8ea7ce0b1a
16 changed files with 206 additions and 95 deletions
--- a/2
+++ b/2
@ -1,4 +1,4 @@
-# Generated by Medikit 0.6.3 on 2018-07-28.
+# Generated by Medikit 0.6.3 on 2018-07-29.
 # All changes will be overriden.
 # Edit Projectfile and run “make update” (or “medikit update”) to regenerate.
--- a/1
+++ b/1
@ -43,6 +43,7 @@ python.setup(
 )
 python.add_requirements(
    'cached-property ~=1.4',
    'fs ~=2.0',
    'graphviz >=0.8,<0.9',
    'jinja2 ~=2.9',
--- a/bonobo/examples/init.py
+++ b/bonobo/examples/init.py
@ -1,4 +1,5 @@
 import bonobo
 from bonobo.execution.strategies import STRATEGIES, DEFAULT_STRATEGY
 def get_argument_parser(parser=None):
@ -19,6 +20,14 @@ def get_argument_parser(parser=None):
        help='If set, pretty prints before writing to output file.'
    )
    parser.add_argument(
        '--strategy',
        '-s',
        type=str,
        choices=STRATEGIES.keys(),
        default=DEFAULT_STRATEGY,
    )
    return parser
--- a/bonobo/examples/datasets/coffeeshops.py
+++ b/bonobo/examples/datasets/coffeeshops.py
@ -1,3 +1,6 @@
 """
 """
 import bonobo
 from bonobo import examples
 from bonobo.contrib.opendatasoft import OpenDataSoftAPI as ODSReader
--- a/bonobo/examples/datasets/fablabs.py
+++ b/bonobo/examples/datasets/fablabs.py
@ -15,11 +15,13 @@ and a flat txt file.
 """
 import json
 import sys
 import bonobo
 from bonobo import examples
 from bonobo.contrib.opendatasoft import OpenDataSoftAPI
 from bonobo.examples.datasets.services import get_services
 from bonobo.util.statistics import Timer
 try:
    import pycountry
@ -66,7 +68,20 @@ if __name__ == '__main__':
    parser = examples.get_argument_parser()
    with bonobo.parse_args(parser) as options:
-        bonobo.run(
+        with Timer() as timer:
-            get_graph(**examples.get_graph_options(options)),
+            print(
-            services=get_services()
+                'Options:', ' '.join(
-        )
+                    '{}={}'.format(k, v)
                    for k, v in sorted(options.items())
                )
            )
            retval = bonobo.run(
                get_graph(**examples.get_graph_options(options)),
                services=get_services(),
                strategy=options['strategy'],
            )
        print('Execution time:', timer)
        print('Return value:', retval)
        print('XStatus:', retval.xstatus)
        if retval.xstatus:
            sys.exit(retval.xstatus)
--- a/bonobo/execution/contexts/graph.py
+++ b/bonobo/execution/contexts/graph.py
@ -8,18 +8,20 @@ from bonobo.constants import BEGIN, END, EMPTY
 from bonobo.errors import InactiveReadableError
 from bonobo.execution import events
 from bonobo.execution.contexts.base import BaseContext
-from bonobo.execution.contexts.node import NodeExecutionContext
+from bonobo.execution.contexts.node import NodeExecutionContext, AsyncNodeExecutionContext
 from bonobo.execution.contexts.plugin import PluginExecutionContext
 from whistle import EventDispatcher
 logger = logging.getLogger(__name__)
-class GraphExecutionContext(BaseContext):
+class BaseGraphExecutionContext(BaseContext):
    """
-    Stores the actual state of a graph execution, and manages its lifecycle.
+    Stores the actual state of a graph execution, and manages its lifecycle. This is an abstract base class for all
    graph execution contexts, and a few methods should actually be implemented for the child classes to be useable.
    """
    NodeExecutionContextType = NodeExecutionContext
    PluginExecutionContextType = PluginExecutionContext
@ -28,23 +30,31 @@ class GraphExecutionContext(BaseContext):
    @property
    def started(self):
        if not len(self.nodes):
-            return super(GraphExecutionContext, self).started
+            return super(BaseGraphExecutionContext, self).started
        return any(node.started for node in self.nodes)
    @property
    def stopped(self):
        if not len(self.nodes):
-            return super(GraphExecutionContext, self).stopped
+            return super(BaseGraphExecutionContext, self).stopped
        return all(node.started and node.stopped for node in self.nodes)
    @property
    def alive(self):
        if not len(self.nodes):
-            return super(GraphExecutionContext, self).alive
+            return super(BaseGraphExecutionContext, self).alive
        return any(node.alive for node in self.nodes)
    @property
    def xstatus(self):
        """
        UNIX-like exit status, only coherent if the context has stopped.
        """
        return max(node.xstatus for node in self.nodes) if len(self.nodes) else 0
    def __init__(self, graph, *, plugins=None, services=None, dispatcher=None):
-        super(GraphExecutionContext, self).__init__(graph)
+        super(BaseGraphExecutionContext, self).__init__(graph)
        self.dispatcher = dispatcher or EventDispatcher()
        self.graph = graph
        self.nodes = [self.create_node_execution_context_for(node) for node in self.graph]
@ -58,8 +68,8 @@ class GraphExecutionContext(BaseContext):
            outputs = self.graph.outputs_of(i)
            if len(outputs):
                node_context.outputs = [self[j].input for j in outputs]
-            node_context.input.on_begin = partial(node_context._send, BEGIN, _control=True)
+            node_context.input.on_begin = partial(node_context._put, BEGIN, _control=True)
-            node_context.input.on_end = partial(node_context._send, END, _control=True)
+            node_context.input.on_end = partial(node_context._put, END, _control=True)
            node_context.input.on_finalize = partial(node_context.stop)
    def __getitem__(self, item):
@ -79,28 +89,32 @@ class GraphExecutionContext(BaseContext):
            plugin = plugin()
        return self.PluginExecutionContextType(plugin, parent=self)
    def write(self, *messages):
        """Push a list of messages in the inputs of this graph's inputs, matching the output of special node "BEGIN" in
        our graph."""
        for i in self.graph.outputs_of(BEGIN):
            for message in messages:
                self[i].write(message)
    def dispatch(self, name):
        self.dispatcher.dispatch(name, events.ExecutionEvent(self))
    def register_plugins(self):
        for plugin_context in self.plugins:
            plugin_context.register()
    def unregister_plugins(self):
        for plugin_context in self.plugins:
            plugin_context.unregister()
 class GraphExecutionContext(BaseGraphExecutionContext):
    def start(self, starter=None):
        super(GraphExecutionContext, self).start()
        self.register_plugins()
        self.dispatch(events.START)
        self.tick(pause=False)
        for node in self.nodes:
            if starter is None:
                node.start()
            else:
                starter(node)
        self.dispatch(events.STARTED)
    def tick(self, pause=True):
@ -108,22 +122,6 @@ class GraphExecutionContext(BaseContext):
        if pause:
            sleep(self.TICK_PERIOD)
    def loop(self):
        nodes = set(node for node in self.nodes if node.should_loop)
        while self.should_loop and len(nodes):
            self.tick(pause=False)
            for node in list(nodes):
                try:
                    node.step()
                except Empty:
                    continue
                except InactiveReadableError:
                    nodes.discard(node)
    def run_until_complete(self):
        self.write(BEGIN, EMPTY, END)
        self.loop()
    def stop(self, stopper=None):
        super(GraphExecutionContext, self).stop()
@ -145,18 +143,37 @@ class GraphExecutionContext(BaseContext):
            node_context.kill()
        self.tick()
-    def register_plugins(self):
+    def write(self, *messages):
-        for plugin_context in self.plugins:
+        """Push a list of messages in the inputs of this graph's inputs, matching the output of special node "BEGIN" in
-            plugin_context.register()
+        our graph."""
-    def unregister_plugins(self):
+        for i in self.graph.outputs_of(BEGIN):
-        for plugin_context in self.plugins:
+            for message in messages:
-            plugin_context.unregister()
+                self[i].write(message)
-    @property
+    def loop(self):
-    def xstatus(self):
+        nodes = set(node for node in self.nodes if node.should_loop)
-        """
+        while self.should_loop and len(nodes):
-        UNIX-like exit status, only coherent if the context has stopped.
+            self.tick(pause=False)
            for node in list(nodes):
                try:
                    node.step()
                except Empty:
                    continue
                except InactiveReadableError:
                    nodes.discard(node)
-        """
+    def run_until_complete(self):
-        return max(node.xstatus for node in self.nodes) if len(self.nodes) else 0
+        self.write(BEGIN, EMPTY, END)
        self.loop()
 class AsyncGraphExecutionContext(GraphExecutionContext):
    NodeExecutionContextType = AsyncNodeExecutionContext
    def __init__(self, *args, loop, **kwargs):
        self._event_loop = loop
        super().__init__(*args, **kwargs)
    def create_node_execution_context_for(self, node):
        return self.NodeExecutionContextType(node, parent=self, loop=self._event_loop)
--- a/bonobo/execution/contexts/node.py
+++ b/bonobo/execution/contexts/node.py
@ -10,7 +10,7 @@ from bonobo.config.processors import ContextCurrifier
 from bonobo.constants import BEGIN, END, TICK_PERIOD
 from bonobo.errors import InactiveReadableError, UnrecoverableError, UnrecoverableTypeError
 from bonobo.execution.contexts.base import BaseContext
-from bonobo.structs.inputs import Input
+from bonobo.structs.inputs import Input, AioInput
 from bonobo.structs.tokens import Token, Flag
 from bonobo.util import get_name, isconfigurabletype, ensure_tuple, deprecated
 from bonobo.util.bags import BagType
@ -33,6 +33,8 @@ class NodeExecutionContext(BaseContext, WithStatistics):
    """
    QueueType = Input
    def __init__(self, wrapped, *, parent=None, services=None, _input=None, _outputs=None):
        """
        Node execution context has the responsibility fo storing the state of a transformation during its execution.
@ -57,7 +59,7 @@ class NodeExecutionContext(BaseContext, WithStatistics):
            self.services = None
        # Input / Output: how the wrapped node will communicate
-        self.input = _input or Input()
+        self.input = _input or self.QueueType()
        self.outputs = _outputs or []
        # Types
@ -174,10 +176,10 @@ class NodeExecutionContext(BaseContext, WithStatistics):
                    break
                else:
                    # Push data (in case of an iterator)
-                    self._send(self._cast(input_bag, result))
+                    self._put(self._cast(input_bag, result))
        elif results:
            # Push data (returned value)
-            self._send(self._cast(input_bag, results))
+            self._put(self._cast(input_bag, results))
        else:
            # case with no result, an execution went through anyway, use for stats.
            # self._exec_count += 1
@ -197,7 +199,7 @@ class NodeExecutionContext(BaseContext, WithStatistics):
        super().stop()
    def send(self, *_output, _input=None):
-        return self._send(self._cast(_input, _output))
+        return self._put(self._cast(_input, _output))
    ### Input type and fields
    @property
@ -324,7 +326,7 @@ class NodeExecutionContext(BaseContext, WithStatistics):
    def _cast(self, _input, _output):
        """
-        Transforms a pair of input/output into the real slim output.
+        Transforms a pair of input/output into the real slim shoutput.
        :param _input: Bag
        :param _output: mixed
@ -355,7 +357,7 @@ class NodeExecutionContext(BaseContext, WithStatistics):
        return ensure_tuple(_output, cls=self._output_type)
-    def _send(self, value, _control=False):
+    def _put(self, value, _control=False):
        """
        Sends a message to all of this context's outputs.
@ -377,6 +379,52 @@ class NodeExecutionContext(BaseContext, WithStatistics):
        return UnboundArguments((), {})
 class AsyncNodeExecutionContext(NodeExecutionContext):
    QueueType = AioInput
    def __init__(self, *args, loop, **kwargs):
        super().__init__(*args, **kwargs)
        self._event_loop = loop
    async def _get(self):
        """
        Read from the input queue.
        If Queue raises (like Timeout or Empty), stat won't be changed.
        """
        input_bag = await self.input.get()
        # Store or check input type
        if self._input_type is None:
            self._input_type = type(input_bag)
        elif type(input_bag) != self._input_type:
            try:
                if self._input_type == tuple:
                    input_bag = self._input_type(input_bag)
                else:
                    input_bag = self._input_type(*input_bag)
            except Exception as exc:
                raise UnrecoverableTypeError(
                    'Input type changed to incompatible type between calls to {!r}.\nGot {!r} which is not of type {!r}.'.
                    format(self.wrapped, input_bag, self._input_type)
                ) from exc
        # Store or check input length, which is a soft fallback in case we're just using tuples
        if self._input_length is None:
            self._input_length = len(input_bag)
        elif len(input_bag) != self._input_length:
            raise UnrecoverableTypeError(
                'Input length changed between calls to {!r}.\nExpected {} but got {}: {!r}.'.format(
                    self.wrapped, self._input_length, len(input_bag), input_bag
                )
            )
        self.increment('in')  # XXX should that go before type check ?
        return input_bag
 def isflag(param):
    return isinstance(param, Flag)
--- a/bonobo/execution/strategies/init.py
+++ b/bonobo/execution/strategies/init.py
@ -6,7 +6,8 @@ In the future, the two strategies that would really benefit bonobo are subproces
 at home if you want to give it a shot.
 """
-from bonobo.execution.strategies.executor import ProcessPoolExecutorStrategy, ThreadPoolExecutorStrategy
+from bonobo.execution.strategies.executor import ProcessPoolExecutorStrategy, ThreadPoolExecutorStrategy, \
    AsyncThreadPoolExecutorStrategy
 from bonobo.execution.strategies.naive import NaiveStrategy
 __all__ = [
@ -17,6 +18,7 @@ STRATEGIES = {
    'naive': NaiveStrategy,
    'processpool': ProcessPoolExecutorStrategy,
    'threadpool': ThreadPoolExecutorStrategy,
    'aio_threadpool': AsyncThreadPoolExecutorStrategy,
 }
 DEFAULT_STRATEGY = 'threadpool'
--- a/bonobo/execution/strategies/executor.py
+++ b/bonobo/execution/strategies/executor.py
@ -1,10 +1,16 @@
 import asyncio
 import functools
 import logging
 import sys
 from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor
 from cached_property import cached_property
 from bonobo import settings
 from bonobo.constants import BEGIN, END
 from bonobo.execution.contexts.graph import AsyncGraphExecutionContext
 from bonobo.execution.strategies.base import Strategy
 from bonobo.util import get_name
 logger = logging.getLogger(__name__)
@ -73,6 +79,35 @@ class ThreadPoolExecutorStrategy(ExecutorStrategy):
        return self.executor_factory(max_workers=len(graph))
 class AsyncThreadPoolExecutorStrategy(ThreadPoolExecutorStrategy):
    GraphExecutionContextType = AsyncGraphExecutionContext
    def __init__(self, GraphExecutionContextType=None):
        if not settings.ALPHA.get():
            raise NotImplementedError(
                '{} is experimental, you need to explicitely activate it using ALPHA=True in system env.'.format(
                    get_name(self)
                )
            )
        super().__init__(GraphExecutionContextType)
    @cached_property
    def loop(self):
        return asyncio.get_event_loop()
    def create_graph_execution_context(self, *args, **kwargs):
        return super(AsyncThreadPoolExecutorStrategy, self).create_graph_execution_context(
            *args, **kwargs, loop=self.loop
        )
    def get_starter(self, executor, futures):
        return functools.partial(
            self.loop.run_in_executor,
            executor,
            super(AsyncThreadPoolExecutorStrategy, self).get_starter(executor, futures),
        )
 class ProcessPoolExecutorStrategy(ExecutorStrategy):
    executor_factory = ProcessPoolExecutor
--- a/bonobo/execution/strategies/naive.py
+++ b/bonobo/execution/strategies/naive.py
@ -1,4 +1,3 @@
 from bonobo.constants import BEGIN, END
 from bonobo.execution.strategies.base import Strategy
@ -6,20 +5,6 @@ class NaiveStrategy(Strategy):
    # TODO: how to run plugins in "naive" mode ?
    def execute(self, graph, **kwargs):
-        context = self.create_graph_execution_context(graph, **kwargs)
+        with self.create_graph_execution_context(graph, **kwargs) as context:
-        context.write(BEGIN, (), END)
+            context.run_until_complete()
        # start
        context.start()
        # loop
        nodes = list(context.nodes)
        while len(nodes):
            for node in nodes:
                node.loop()
            nodes = list(node for node in nodes if node.alive)
        # stop
        context.stop()
        return context
--- a/bonobo/settings.py
+++ b/bonobo/settings.py
@ -84,6 +84,9 @@ DEBUG = Setting('DEBUG', formatter=to_bool, default=False)
 # Profile mode.
 PROFILE = Setting('PROFILE', formatter=to_bool, default=False)
 # Alpha mode.
 ALPHA = Setting('ALPHA', formatter=to_bool, default=False)
 # Quiet mode.
 QUIET = Setting('QUIET', formatter=to_bool, default=False)
--- a/bonobo/structs/inputs.py
+++ b/bonobo/structs/inputs.py
@ -16,6 +16,7 @@
 from abc import ABCMeta, abstractmethod
 from queue import Queue
 from asyncio.queues import Queue as AioQueue
 from bonobo.constants import BEGIN, END
 from bonobo.errors import AbstractError, InactiveReadableError, InactiveWritableError
@ -115,3 +116,7 @@ class Input(Queue, Readable, Writable):
    @property
    def alive(self):
        return self._runlevel > 0
 class AioInput(AioQueue):
    pass
--- a/bonobo/util/statistics.py
+++ b/bonobo/util/statistics.py
@ -1,18 +1,3 @@
 # -*- coding: utf-8 -*-
 #
 # copyright 2012-2014 romain dorgueil
 #
 # licensed under the apache license, version 2.0 (the "license");
 # you may not use this file except in compliance with the license.
 # you may obtain a copy of the license at
 #
 #     http://www.apache.org/licenses/license-2.0
 #
 # unless required by applicable law or agreed to in writing, software
 # distributed under the license is distributed on an "as is" basis,
 # without warranties or conditions of any kind, either express or implied.
 # see the license for the specific language governing permissions and
 # limitations under the license.
 import time
@ -39,6 +24,7 @@ class Timer:
    def __enter__(self):
        self.__start = time.time()
        return self
    def __exit__(self, type=None, value=None, traceback=None):
        # Error handling here
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@ -20,14 +20,14 @@ jinja2==2.10
 markupsafe==1.0
 more-itertools==4.2.0
 packaging==17.1
-pluggy==0.6.0
+pluggy==0.7.1
 poyo==0.4.1
 py==1.5.4
 pygments==2.2.0
 pyparsing==2.2.0
 pytest-cov==2.5.1
 pytest-timeout==1.3.1
-pytest==3.6.3
+pytest==3.6.4
 python-dateutil==2.7.3
 pytz==2018.5
 requests==2.19.1
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,6 @@
 -e .
 appdirs==1.4.3
 cached-property==1.4.3
 certifi==2018.4.16
 chardet==3.0.4
 colorama==0.3.9
--- a/setup.py
+++ b/setup.py
@ -1,4 +1,4 @@
-# Generated by Medikit 0.6.3 on 2018-07-28.
+# Generated by Medikit 0.6.3 on 2018-07-29.
 # All changes will be overriden.
 # Edit Projectfile and run “make update” (or “medikit update”) to regenerate.
@ -61,8 +61,9 @@ setup(
    packages=find_packages(exclude=['ez_setup', 'example', 'test']),
    include_package_data=True,
    install_requires=[
-        'fs (~= 2.0)', 'graphviz (>= 0.8, < 0.9)', 'jinja2 (~= 2.9)', 'mondrian (~= 0.7)', 'packaging (~= 17.0)',
+        'cached-property (~= 1.4)', 'fs (~= 2.0)', 'graphviz (>= 0.8, < 0.9)', 'jinja2 (~= 2.9)', 'mondrian (~= 0.7)',
-        'psutil (~= 5.4)', 'python-slugify (~= 1.2.0)', 'requests (~= 2.0)', 'stevedore (~= 1.27)', 'whistle (~= 1.0)'
+        'packaging (~= 17.0)', 'psutil (~= 5.4)', 'python-slugify (~= 1.2.0)', 'requests (~= 2.0)',
        'stevedore (~= 1.27)', 'whistle (~= 1.0)'
    ],
    extras_require={
        'dev': [