From 2b8216ec0d497d2c4c630a6d5ca5bad4e5380eca Mon Sep 17 00:00:00 2001 From: Romain Dorgueil Date: Wed, 28 Dec 2016 10:54:50 +0100 Subject: [PATCH] implementing FileReader/FileWriter (#2) --- bonobo/__init__.py | 2 +- bonobo/compat/pandas.py | 4 +- bonobo/ext/console/plugin.py | 2 +- bonobo/ext/jupyter/plugin.py | 10 +++ bonobo/ext/selenium.py | 14 +++- bonobo/io/__init__.py | 8 ++- bonobo/io/file.py | 123 +++++++++++++++++++++++++++++------ bonobo/io/json.py | 4 +- docs/reference/modules.rst | 3 - examples/opendata_fablabs.py | 4 +- tests/io/test_file.py | 67 +++++++++++++++++++ tests/io/test_json.py | 6 +- 12 files changed, 207 insertions(+), 40 deletions(-) delete mode 100644 docs/reference/modules.rst create mode 100644 tests/io/test_file.py diff --git a/bonobo/__init__.py b/bonobo/__init__.py index b66e2d8..03ca92a 100644 --- a/bonobo/__init__.py +++ b/bonobo/__init__.py @@ -38,7 +38,7 @@ __all__ = [ 'Bag', 'FileWriter', 'Graph', - 'JsonFileWriter', + 'JsonWriter', 'NOT_MODIFIED', 'NaiveStrategy', 'ProcessPoolExecutorStrategy', diff --git a/bonobo/compat/pandas.py b/bonobo/compat/pandas.py index aab0dbd..153ba36 100644 --- a/bonobo/compat/pandas.py +++ b/bonobo/compat/pandas.py @@ -1,7 +1,7 @@ -from bonobo import FileWriter, JsonFileWriter +from bonobo import FileWriter, JsonWriter to_file = FileWriter -to_json = JsonFileWriter +to_json = JsonWriter __all__ = [ 'to_json', diff --git a/bonobo/ext/console/plugin.py b/bonobo/ext/console/plugin.py index 4c5e8fb..e23beb7 100644 --- a/bonobo/ext/console/plugin.py +++ b/bonobo/ext/console/plugin.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2012-2014 Romain Dorgueil +# Copyright 2012-2017 Romain Dorgueil # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/bonobo/ext/jupyter/plugin.py b/bonobo/ext/jupyter/plugin.py index 6e583cc..c9acdae 100644 --- a/bonobo/ext/jupyter/plugin.py +++ b/bonobo/ext/jupyter/plugin.py @@ -3,6 +3,16 @@ from IPython.core.display import display from bonobo.core.plugins import Plugin from bonobo.ext.jupyter.widget import BonoboWidget +try: + import selenium +except ImportError as e: + import logging + + logging.exception( + 'You must install selenium to use the bonobo selenium extension. Easiest way is to install the ' + 'optional "selenium" dependencies with «pip install bonobo[selenium]», but you can also install a ' + 'specific version by yourself.') + class JupyterOutputPlugin(Plugin): def initialize(self, context): diff --git a/bonobo/ext/selenium.py b/bonobo/ext/selenium.py index 8360227..8603767 100644 --- a/bonobo/ext/selenium.py +++ b/bonobo/ext/selenium.py @@ -1,4 +1,12 @@ -from selenium import webdriver +try: + import selenium +except ImportError as e: + import logging + + logging.exception( + 'You must install selenium to use the bonobo selenium extension. Easiest way is to install the ' + 'optional "selenium" dependencies with «pip install bonobo[selenium]», but you can also install a ' + 'specific version by yourself.') from bonobo import service @@ -6,7 +14,7 @@ USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/601.4. def create_profile(use_tor=False): - _profile = webdriver.FirefoxProfile() + _profile = selenium.webdriver.FirefoxProfile() _profile.set_preference("toolkit.startup.max_resumed_crashes", "-1") if use_tor: @@ -22,7 +30,7 @@ def create_profile(use_tor=False): def create_browser(profile): - _browser = webdriver.Firefox(profile) + _browser = selenium.webdriver.Firefox(profile) _browser.implicitly_wait(10) _browser.set_page_load_timeout(10) return _browser diff --git a/bonobo/io/__init__.py b/bonobo/io/__init__.py index ca10d08..22229d9 100644 --- a/bonobo/io/__init__.py +++ b/bonobo/io/__init__.py @@ -1,9 +1,11 @@ """ Readers and writers for common file formats. """ -from .file import FileWriter -from .json import JsonFileWriter +from .file import FileHandler, FileReader, FileWriter +from .json import JsonWriter __all__ = [ + 'FileHandler', + 'FileReader', 'FileWriter', - 'JsonFileWriter', + 'JsonWriter', ] diff --git a/bonobo/io/file.py b/bonobo/io/file.py index b30e515..fba076a 100644 --- a/bonobo/io/file.py +++ b/bonobo/io/file.py @@ -1,35 +1,118 @@ +from functools import partial + from bonobo.util.lifecycle import with_context -__all__ = ['FileWriter', ] +__all__ = [ + 'FileHandler', + 'FileReader', + 'FileWriter', +] @with_context -class FileWriter: - # XXX TODO implement @with_context like this ? Pros and cons ? - class Meta: - contextual = True +class FileHandler: + """ + Abstract component factory for file-related components. + + """ + + mode = None def __init__(self, path_or_buf, eol='\n'): self.path_or_buf = path_or_buf self.eol = eol + def open(self): + return open(self.path_or_buf, self.mode) + + def close(self, fp): + """ + :param file fp: + """ + fp.close() + def initialize(self, ctx): - """ todo add lock file ? optional maybe ? """ - assert not hasattr(ctx, 'fp'), 'One at a time, baby.' - ctx.fp = open(self.path_or_buf, 'w+') - ctx.first = True + """ + Initialize a + :param ctx: + :return: + """ + assert not hasattr(ctx, 'file'), 'A file pointer is already in the context... I do not know what to say...' + ctx.file = self.open() - def write(self, fp, line, prefix=''): - fp.write(prefix + line) + def __call__(self, ctx, *args): + """ + :param ComponentExecutionContext ctx: + :param mixed row: + """ + result = self.handle(ctx, *args) + try: + yield from result + except TypeError: + return - def __call__(self, ctx, row): - if ctx.first: - prefix, ctx.first = '', False - else: - prefix = self.eol - - self.write(ctx.fp, row, prefix=prefix) + def handle(self, ctx, *args): + raise NotImplementedError('Abstract.') def finalize(self, ctx): - ctx.fp.close() - del ctx.fp, ctx.first + self.close(ctx.file) + del ctx.file + + +class FileReader(FileHandler): + """ + Component factory for file-like readers. + + On its own, it can be used to read a file and yield one row per line, trimming the "eol" character at the end if + present. Extending it is usually the right way to create more specific file readers (like json, csv, etc.) + + """ + + mode = 'r' + + def handle(self, ctx, *args): + """ + Write a row on the next line of file pointed by `ctx.file`. + Prefix is used for newlines. + + :param ctx: + :param row: + """ + assert not len(args) + for line in ctx.file: + yield line.rstrip(self.eol) + + +class FileWriter(FileHandler): + """ + Component factory for file or file-like writers. + + On its own, it can be used to write in a file one line per row that comes into this component. Extending it is + usually the right way to create more specific file writers (like json, csv, etc.) + + """ + + mode = 'w+' + + def initialize(self, ctx): + super().initialize(ctx) + ctx.line = 0 + + def handle(self, ctx, row): + """ + Write a row on the next line of file pointed by fp. Prefix is used for newlines. + + :param file fp: + :param str row: + :param str prefix: + """ + + self.write(ctx.file, (self.eol if ctx.line else '') + row) + ctx.line += 1 + + def write(self, fp, line): + return fp.write(line) + + def finalize(self, ctx): + del ctx.line + super().finalize(ctx) diff --git a/bonobo/io/json.py b/bonobo/io/json.py index 98712a7..156d385 100644 --- a/bonobo/io/json.py +++ b/bonobo/io/json.py @@ -3,11 +3,11 @@ import json from .file import FileWriter from bonobo.util.lifecycle import with_context -__all__ = ['JsonFileWriter', ] +__all__ = ['JsonWriter', ] @with_context -class JsonFileWriter(FileWriter): +class JsonWriter(FileWriter): def __init__(self, path_or_buf): super().__init__(path_or_buf, eol=',\n') diff --git a/docs/reference/modules.rst b/docs/reference/modules.rst deleted file mode 100644 index e29cd78..0000000 --- a/docs/reference/modules.rst +++ /dev/null @@ -1,3 +0,0 @@ -bonobo -====== - diff --git a/examples/opendata_fablabs.py b/examples/opendata_fablabs.py index 72adc9c..b8a87d8 100644 --- a/examples/opendata_fablabs.py +++ b/examples/opendata_fablabs.py @@ -2,7 +2,7 @@ import json from blessings import Terminal -from bonobo import console_run, tee, JsonFileWriter +from bonobo import console_run, tee, JsonWriter from bonobo.ext.opendatasoft import from_opendatasoft_api try: @@ -58,5 +58,5 @@ if __name__ == '__main__': normalize, filter_france, tee(display), - JsonFileWriter('fablabs.json'), + JsonWriter('fablabs.json'), output=True, ) diff --git a/tests/io/test_file.py b/tests/io/test_file.py new file mode 100644 index 0000000..34e7119 --- /dev/null +++ b/tests/io/test_file.py @@ -0,0 +1,67 @@ +import pytest +from mock import MagicMock + +from bonobo import FileWriter, Bag, FileReader +from bonobo.core.contexts import ComponentExecutionContext +from bonobo.util.tokens import BEGIN, END + + +class CapturingComponentExecutionContext(ComponentExecutionContext): + send = MagicMock() + + +@pytest.mark.parametrize( + 'lines,output', + [ + (('ACME', ), 'ACME'), # one line... + (('Foo', 'Bar', 'Baz'), 'Foo\nBar\nBaz'), # more than one line... + ]) +def test_file_writer_in_context(tmpdir, lines, output): + file = tmpdir.join('output.txt') + + writer = FileWriter(str(file)) + context = ComponentExecutionContext(writer, None) + + context.initialize() + context.recv(BEGIN, *map(Bag, lines), END) + for i in range(len(lines)): + context.step() + context.finalize() + + assert file.read() == output + + with pytest.raises(AttributeError): + getattr(context, 'file') + + +def test_file_writer_out_of_context(tmpdir): + file = tmpdir.join('output.txt') + writer = FileWriter(str(file)) + fp = writer.open() + fp.write('Yosh!') + writer.close(fp) + + assert file.read() == 'Yosh!' + + +def test_file_reader_in_context(tmpdir): + file = tmpdir.join('input.txt') + file.write('Hello\nWorld\n') + + reader = FileReader(str(file)) + context = CapturingComponentExecutionContext(reader, None) + + context.initialize() + context.recv(BEGIN, Bag(), END) + context.step() + context.finalize() + + assert len(context.send.mock_calls) == 2 + + args0, kwargs0 = context.send.call_args_list[0] + assert len(args0) == 1 and not len(kwargs0) + args1, kwargs1 = context.send.call_args_list[1] + assert len(args1) == 1 and not len(kwargs1) + + assert args0[0].args[0] == 'Hello' + assert args1[0].args[0] == 'World' diff --git a/tests/io/test_json.py b/tests/io/test_json.py index 15cc91d..547f435 100644 --- a/tests/io/test_json.py +++ b/tests/io/test_json.py @@ -1,13 +1,13 @@ import pytest -from bonobo import Bag, JsonFileWriter +from bonobo import Bag, JsonWriter from bonobo.core.contexts import ComponentExecutionContext from bonobo.util.tokens import BEGIN, END def test_write_json_to_file(tmpdir): file = tmpdir.join('output.json') - json_writer = JsonFileWriter(str(file)) + json_writer = JsonWriter(str(file)) context = ComponentExecutionContext(json_writer, None) context.initialize() @@ -28,7 +28,7 @@ def test_write_json_to_file(tmpdir): def test_write_json_without_initializer_should_not_work(tmpdir): file = tmpdir.join('output.json') - json_writer = JsonFileWriter(str(file)) + json_writer = JsonWriter(str(file)) context = ComponentExecutionContext(json_writer, None) with pytest.raises(AttributeError):