implementing FileReader/FileWriter (#2)

This commit is contained in:
Romain Dorgueil
2016-12-28 10:54:50 +01:00
parent b409391666
commit 2b8216ec0d
12 changed files with 207 additions and 40 deletions

View File

@ -38,7 +38,7 @@ __all__ = [
'Bag',
'FileWriter',
'Graph',
'JsonFileWriter',
'JsonWriter',
'NOT_MODIFIED',
'NaiveStrategy',
'ProcessPoolExecutorStrategy',

View File

@ -1,7 +1,7 @@
from bonobo import FileWriter, JsonFileWriter
from bonobo import FileWriter, JsonWriter
to_file = FileWriter
to_json = JsonFileWriter
to_json = JsonWriter
__all__ = [
'to_json',

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright 2012-2014 Romain Dorgueil
# Copyright 2012-2017 Romain Dorgueil
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.

View File

@ -3,6 +3,16 @@ from IPython.core.display import display
from bonobo.core.plugins import Plugin
from bonobo.ext.jupyter.widget import BonoboWidget
try:
import selenium
except ImportError as e:
import logging
logging.exception(
'You must install selenium to use the bonobo selenium extension. Easiest way is to install the '
'optional "selenium" dependencies with «pip install bonobo[selenium]», but you can also install a '
'specific version by yourself.')
class JupyterOutputPlugin(Plugin):
def initialize(self, context):

View File

@ -1,4 +1,12 @@
from selenium import webdriver
try:
import selenium
except ImportError as e:
import logging
logging.exception(
'You must install selenium to use the bonobo selenium extension. Easiest way is to install the '
'optional "selenium" dependencies with «pip install bonobo[selenium]», but you can also install a '
'specific version by yourself.')
from bonobo import service
@ -6,7 +14,7 @@ USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/601.4.
def create_profile(use_tor=False):
_profile = webdriver.FirefoxProfile()
_profile = selenium.webdriver.FirefoxProfile()
_profile.set_preference("toolkit.startup.max_resumed_crashes", "-1")
if use_tor:
@ -22,7 +30,7 @@ def create_profile(use_tor=False):
def create_browser(profile):
_browser = webdriver.Firefox(profile)
_browser = selenium.webdriver.Firefox(profile)
_browser.implicitly_wait(10)
_browser.set_page_load_timeout(10)
return _browser

View File

@ -1,9 +1,11 @@
""" Readers and writers for common file formats. """
from .file import FileWriter
from .json import JsonFileWriter
from .file import FileHandler, FileReader, FileWriter
from .json import JsonWriter
__all__ = [
'FileHandler',
'FileReader',
'FileWriter',
'JsonFileWriter',
'JsonWriter',
]

View File

@ -1,35 +1,118 @@
from functools import partial
from bonobo.util.lifecycle import with_context
__all__ = ['FileWriter', ]
__all__ = [
'FileHandler',
'FileReader',
'FileWriter',
]
@with_context
class FileWriter:
# XXX TODO implement @with_context like this ? Pros and cons ?
class Meta:
contextual = True
class FileHandler:
"""
Abstract component factory for file-related components.
"""
mode = None
def __init__(self, path_or_buf, eol='\n'):
self.path_or_buf = path_or_buf
self.eol = eol
def open(self):
return open(self.path_or_buf, self.mode)
def close(self, fp):
"""
:param file fp:
"""
fp.close()
def initialize(self, ctx):
""" todo add lock file ? optional maybe ? """
assert not hasattr(ctx, 'fp'), 'One at a time, baby.'
ctx.fp = open(self.path_or_buf, 'w+')
ctx.first = True
"""
Initialize a
:param ctx:
:return:
"""
assert not hasattr(ctx, 'file'), 'A file pointer is already in the context... I do not know what to say...'
ctx.file = self.open()
def write(self, fp, line, prefix=''):
fp.write(prefix + line)
def __call__(self, ctx, *args):
"""
:param ComponentExecutionContext ctx:
:param mixed row:
"""
result = self.handle(ctx, *args)
try:
yield from result
except TypeError:
return
def __call__(self, ctx, row):
if ctx.first:
prefix, ctx.first = '', False
else:
prefix = self.eol
self.write(ctx.fp, row, prefix=prefix)
def handle(self, ctx, *args):
raise NotImplementedError('Abstract.')
def finalize(self, ctx):
ctx.fp.close()
del ctx.fp, ctx.first
self.close(ctx.file)
del ctx.file
class FileReader(FileHandler):
"""
Component factory for file-like readers.
On its own, it can be used to read a file and yield one row per line, trimming the "eol" character at the end if
present. Extending it is usually the right way to create more specific file readers (like json, csv, etc.)
"""
mode = 'r'
def handle(self, ctx, *args):
"""
Write a row on the next line of file pointed by `ctx.file`.
Prefix is used for newlines.
:param ctx:
:param row:
"""
assert not len(args)
for line in ctx.file:
yield line.rstrip(self.eol)
class FileWriter(FileHandler):
"""
Component factory for file or file-like writers.
On its own, it can be used to write in a file one line per row that comes into this component. Extending it is
usually the right way to create more specific file writers (like json, csv, etc.)
"""
mode = 'w+'
def initialize(self, ctx):
super().initialize(ctx)
ctx.line = 0
def handle(self, ctx, row):
"""
Write a row on the next line of file pointed by fp. Prefix is used for newlines.
:param file fp:
:param str row:
:param str prefix:
"""
self.write(ctx.file, (self.eol if ctx.line else '') + row)
ctx.line += 1
def write(self, fp, line):
return fp.write(line)
def finalize(self, ctx):
del ctx.line
super().finalize(ctx)

View File

@ -3,11 +3,11 @@ import json
from .file import FileWriter
from bonobo.util.lifecycle import with_context
__all__ = ['JsonFileWriter', ]
__all__ = ['JsonWriter', ]
@with_context
class JsonFileWriter(FileWriter):
class JsonWriter(FileWriter):
def __init__(self, path_or_buf):
super().__init__(path_or_buf, eol=',\n')

View File

@ -1,3 +0,0 @@
bonobo
======

View File

@ -2,7 +2,7 @@ import json
from blessings import Terminal
from bonobo import console_run, tee, JsonFileWriter
from bonobo import console_run, tee, JsonWriter
from bonobo.ext.opendatasoft import from_opendatasoft_api
try:
@ -58,5 +58,5 @@ if __name__ == '__main__':
normalize,
filter_france,
tee(display),
JsonFileWriter('fablabs.json'),
JsonWriter('fablabs.json'),
output=True, )

67
tests/io/test_file.py Normal file
View File

@ -0,0 +1,67 @@
import pytest
from mock import MagicMock
from bonobo import FileWriter, Bag, FileReader
from bonobo.core.contexts import ComponentExecutionContext
from bonobo.util.tokens import BEGIN, END
class CapturingComponentExecutionContext(ComponentExecutionContext):
send = MagicMock()
@pytest.mark.parametrize(
'lines,output',
[
(('ACME', ), 'ACME'), # one line...
(('Foo', 'Bar', 'Baz'), 'Foo\nBar\nBaz'), # more than one line...
])
def test_file_writer_in_context(tmpdir, lines, output):
file = tmpdir.join('output.txt')
writer = FileWriter(str(file))
context = ComponentExecutionContext(writer, None)
context.initialize()
context.recv(BEGIN, *map(Bag, lines), END)
for i in range(len(lines)):
context.step()
context.finalize()
assert file.read() == output
with pytest.raises(AttributeError):
getattr(context, 'file')
def test_file_writer_out_of_context(tmpdir):
file = tmpdir.join('output.txt')
writer = FileWriter(str(file))
fp = writer.open()
fp.write('Yosh!')
writer.close(fp)
assert file.read() == 'Yosh!'
def test_file_reader_in_context(tmpdir):
file = tmpdir.join('input.txt')
file.write('Hello\nWorld\n')
reader = FileReader(str(file))
context = CapturingComponentExecutionContext(reader, None)
context.initialize()
context.recv(BEGIN, Bag(), END)
context.step()
context.finalize()
assert len(context.send.mock_calls) == 2
args0, kwargs0 = context.send.call_args_list[0]
assert len(args0) == 1 and not len(kwargs0)
args1, kwargs1 = context.send.call_args_list[1]
assert len(args1) == 1 and not len(kwargs1)
assert args0[0].args[0] == 'Hello'
assert args1[0].args[0] == 'World'

View File

@ -1,13 +1,13 @@
import pytest
from bonobo import Bag, JsonFileWriter
from bonobo import Bag, JsonWriter
from bonobo.core.contexts import ComponentExecutionContext
from bonobo.util.tokens import BEGIN, END
def test_write_json_to_file(tmpdir):
file = tmpdir.join('output.json')
json_writer = JsonFileWriter(str(file))
json_writer = JsonWriter(str(file))
context = ComponentExecutionContext(json_writer, None)
context.initialize()
@ -28,7 +28,7 @@ def test_write_json_to_file(tmpdir):
def test_write_json_without_initializer_should_not_work(tmpdir):
file = tmpdir.join('output.json')
json_writer = JsonFileWriter(str(file))
json_writer = JsonWriter(str(file))
context = ComponentExecutionContext(json_writer, None)
with pytest.raises(AttributeError):