implementing FileReader/FileWriter (#2)

This commit is contained in:
Romain Dorgueil
2016-12-28 10:54:50 +01:00
parent b409391666
commit 2b8216ec0d
12 changed files with 207 additions and 40 deletions

View File

@ -38,7 +38,7 @@ __all__ = [
'Bag', 'Bag',
'FileWriter', 'FileWriter',
'Graph', 'Graph',
'JsonFileWriter', 'JsonWriter',
'NOT_MODIFIED', 'NOT_MODIFIED',
'NaiveStrategy', 'NaiveStrategy',
'ProcessPoolExecutorStrategy', 'ProcessPoolExecutorStrategy',

View File

@ -1,7 +1,7 @@
from bonobo import FileWriter, JsonFileWriter from bonobo import FileWriter, JsonWriter
to_file = FileWriter to_file = FileWriter
to_json = JsonFileWriter to_json = JsonWriter
__all__ = [ __all__ = [
'to_json', 'to_json',

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# Copyright 2012-2014 Romain Dorgueil # Copyright 2012-2017 Romain Dorgueil
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.

View File

@ -3,6 +3,16 @@ from IPython.core.display import display
from bonobo.core.plugins import Plugin from bonobo.core.plugins import Plugin
from bonobo.ext.jupyter.widget import BonoboWidget from bonobo.ext.jupyter.widget import BonoboWidget
try:
import selenium
except ImportError as e:
import logging
logging.exception(
'You must install selenium to use the bonobo selenium extension. Easiest way is to install the '
'optional "selenium" dependencies with «pip install bonobo[selenium]», but you can also install a '
'specific version by yourself.')
class JupyterOutputPlugin(Plugin): class JupyterOutputPlugin(Plugin):
def initialize(self, context): def initialize(self, context):

View File

@ -1,4 +1,12 @@
from selenium import webdriver try:
import selenium
except ImportError as e:
import logging
logging.exception(
'You must install selenium to use the bonobo selenium extension. Easiest way is to install the '
'optional "selenium" dependencies with «pip install bonobo[selenium]», but you can also install a '
'specific version by yourself.')
from bonobo import service from bonobo import service
@ -6,7 +14,7 @@ USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/601.4.
def create_profile(use_tor=False): def create_profile(use_tor=False):
_profile = webdriver.FirefoxProfile() _profile = selenium.webdriver.FirefoxProfile()
_profile.set_preference("toolkit.startup.max_resumed_crashes", "-1") _profile.set_preference("toolkit.startup.max_resumed_crashes", "-1")
if use_tor: if use_tor:
@ -22,7 +30,7 @@ def create_profile(use_tor=False):
def create_browser(profile): def create_browser(profile):
_browser = webdriver.Firefox(profile) _browser = selenium.webdriver.Firefox(profile)
_browser.implicitly_wait(10) _browser.implicitly_wait(10)
_browser.set_page_load_timeout(10) _browser.set_page_load_timeout(10)
return _browser return _browser

View File

@ -1,9 +1,11 @@
""" Readers and writers for common file formats. """ """ Readers and writers for common file formats. """
from .file import FileWriter from .file import FileHandler, FileReader, FileWriter
from .json import JsonFileWriter from .json import JsonWriter
__all__ = [ __all__ = [
'FileHandler',
'FileReader',
'FileWriter', 'FileWriter',
'JsonFileWriter', 'JsonWriter',
] ]

View File

@ -1,35 +1,118 @@
from functools import partial
from bonobo.util.lifecycle import with_context from bonobo.util.lifecycle import with_context
__all__ = ['FileWriter', ] __all__ = [
'FileHandler',
'FileReader',
'FileWriter',
]
@with_context @with_context
class FileWriter: class FileHandler:
# XXX TODO implement @with_context like this ? Pros and cons ? """
class Meta: Abstract component factory for file-related components.
contextual = True
"""
mode = None
def __init__(self, path_or_buf, eol='\n'): def __init__(self, path_or_buf, eol='\n'):
self.path_or_buf = path_or_buf self.path_or_buf = path_or_buf
self.eol = eol self.eol = eol
def open(self):
return open(self.path_or_buf, self.mode)
def close(self, fp):
"""
:param file fp:
"""
fp.close()
def initialize(self, ctx): def initialize(self, ctx):
""" todo add lock file ? optional maybe ? """ """
assert not hasattr(ctx, 'fp'), 'One at a time, baby.' Initialize a
ctx.fp = open(self.path_or_buf, 'w+') :param ctx:
ctx.first = True :return:
"""
assert not hasattr(ctx, 'file'), 'A file pointer is already in the context... I do not know what to say...'
ctx.file = self.open()
def write(self, fp, line, prefix=''): def __call__(self, ctx, *args):
fp.write(prefix + line) """
:param ComponentExecutionContext ctx:
:param mixed row:
"""
result = self.handle(ctx, *args)
try:
yield from result
except TypeError:
return
def __call__(self, ctx, row): def handle(self, ctx, *args):
if ctx.first: raise NotImplementedError('Abstract.')
prefix, ctx.first = '', False
else:
prefix = self.eol
self.write(ctx.fp, row, prefix=prefix)
def finalize(self, ctx): def finalize(self, ctx):
ctx.fp.close() self.close(ctx.file)
del ctx.fp, ctx.first del ctx.file
class FileReader(FileHandler):
"""
Component factory for file-like readers.
On its own, it can be used to read a file and yield one row per line, trimming the "eol" character at the end if
present. Extending it is usually the right way to create more specific file readers (like json, csv, etc.)
"""
mode = 'r'
def handle(self, ctx, *args):
"""
Write a row on the next line of file pointed by `ctx.file`.
Prefix is used for newlines.
:param ctx:
:param row:
"""
assert not len(args)
for line in ctx.file:
yield line.rstrip(self.eol)
class FileWriter(FileHandler):
"""
Component factory for file or file-like writers.
On its own, it can be used to write in a file one line per row that comes into this component. Extending it is
usually the right way to create more specific file writers (like json, csv, etc.)
"""
mode = 'w+'
def initialize(self, ctx):
super().initialize(ctx)
ctx.line = 0
def handle(self, ctx, row):
"""
Write a row on the next line of file pointed by fp. Prefix is used for newlines.
:param file fp:
:param str row:
:param str prefix:
"""
self.write(ctx.file, (self.eol if ctx.line else '') + row)
ctx.line += 1
def write(self, fp, line):
return fp.write(line)
def finalize(self, ctx):
del ctx.line
super().finalize(ctx)

View File

@ -3,11 +3,11 @@ import json
from .file import FileWriter from .file import FileWriter
from bonobo.util.lifecycle import with_context from bonobo.util.lifecycle import with_context
__all__ = ['JsonFileWriter', ] __all__ = ['JsonWriter', ]
@with_context @with_context
class JsonFileWriter(FileWriter): class JsonWriter(FileWriter):
def __init__(self, path_or_buf): def __init__(self, path_or_buf):
super().__init__(path_or_buf, eol=',\n') super().__init__(path_or_buf, eol=',\n')

View File

@ -1,3 +0,0 @@
bonobo
======

View File

@ -2,7 +2,7 @@ import json
from blessings import Terminal from blessings import Terminal
from bonobo import console_run, tee, JsonFileWriter from bonobo import console_run, tee, JsonWriter
from bonobo.ext.opendatasoft import from_opendatasoft_api from bonobo.ext.opendatasoft import from_opendatasoft_api
try: try:
@ -58,5 +58,5 @@ if __name__ == '__main__':
normalize, normalize,
filter_france, filter_france,
tee(display), tee(display),
JsonFileWriter('fablabs.json'), JsonWriter('fablabs.json'),
output=True, ) output=True, )

67
tests/io/test_file.py Normal file
View File

@ -0,0 +1,67 @@
import pytest
from mock import MagicMock
from bonobo import FileWriter, Bag, FileReader
from bonobo.core.contexts import ComponentExecutionContext
from bonobo.util.tokens import BEGIN, END
class CapturingComponentExecutionContext(ComponentExecutionContext):
send = MagicMock()
@pytest.mark.parametrize(
'lines,output',
[
(('ACME', ), 'ACME'), # one line...
(('Foo', 'Bar', 'Baz'), 'Foo\nBar\nBaz'), # more than one line...
])
def test_file_writer_in_context(tmpdir, lines, output):
file = tmpdir.join('output.txt')
writer = FileWriter(str(file))
context = ComponentExecutionContext(writer, None)
context.initialize()
context.recv(BEGIN, *map(Bag, lines), END)
for i in range(len(lines)):
context.step()
context.finalize()
assert file.read() == output
with pytest.raises(AttributeError):
getattr(context, 'file')
def test_file_writer_out_of_context(tmpdir):
file = tmpdir.join('output.txt')
writer = FileWriter(str(file))
fp = writer.open()
fp.write('Yosh!')
writer.close(fp)
assert file.read() == 'Yosh!'
def test_file_reader_in_context(tmpdir):
file = tmpdir.join('input.txt')
file.write('Hello\nWorld\n')
reader = FileReader(str(file))
context = CapturingComponentExecutionContext(reader, None)
context.initialize()
context.recv(BEGIN, Bag(), END)
context.step()
context.finalize()
assert len(context.send.mock_calls) == 2
args0, kwargs0 = context.send.call_args_list[0]
assert len(args0) == 1 and not len(kwargs0)
args1, kwargs1 = context.send.call_args_list[1]
assert len(args1) == 1 and not len(kwargs1)
assert args0[0].args[0] == 'Hello'
assert args1[0].args[0] == 'World'

View File

@ -1,13 +1,13 @@
import pytest import pytest
from bonobo import Bag, JsonFileWriter from bonobo import Bag, JsonWriter
from bonobo.core.contexts import ComponentExecutionContext from bonobo.core.contexts import ComponentExecutionContext
from bonobo.util.tokens import BEGIN, END from bonobo.util.tokens import BEGIN, END
def test_write_json_to_file(tmpdir): def test_write_json_to_file(tmpdir):
file = tmpdir.join('output.json') file = tmpdir.join('output.json')
json_writer = JsonFileWriter(str(file)) json_writer = JsonWriter(str(file))
context = ComponentExecutionContext(json_writer, None) context = ComponentExecutionContext(json_writer, None)
context.initialize() context.initialize()
@ -28,7 +28,7 @@ def test_write_json_to_file(tmpdir):
def test_write_json_without_initializer_should_not_work(tmpdir): def test_write_json_without_initializer_should_not_work(tmpdir):
file = tmpdir.join('output.json') file = tmpdir.join('output.json')
json_writer = JsonFileWriter(str(file)) json_writer = JsonWriter(str(file))
context = ComponentExecutionContext(json_writer, None) context = ComponentExecutionContext(json_writer, None)
with pytest.raises(AttributeError): with pytest.raises(AttributeError):