implementing FileReader/FileWriter (#2)
This commit is contained in:
@ -38,7 +38,7 @@ __all__ = [
|
|||||||
'Bag',
|
'Bag',
|
||||||
'FileWriter',
|
'FileWriter',
|
||||||
'Graph',
|
'Graph',
|
||||||
'JsonFileWriter',
|
'JsonWriter',
|
||||||
'NOT_MODIFIED',
|
'NOT_MODIFIED',
|
||||||
'NaiveStrategy',
|
'NaiveStrategy',
|
||||||
'ProcessPoolExecutorStrategy',
|
'ProcessPoolExecutorStrategy',
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
from bonobo import FileWriter, JsonFileWriter
|
from bonobo import FileWriter, JsonWriter
|
||||||
|
|
||||||
to_file = FileWriter
|
to_file = FileWriter
|
||||||
to_json = JsonFileWriter
|
to_json = JsonWriter
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'to_json',
|
'to_json',
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# Copyright 2012-2014 Romain Dorgueil
|
# Copyright 2012-2017 Romain Dorgueil
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
|
|||||||
@ -3,6 +3,16 @@ from IPython.core.display import display
|
|||||||
from bonobo.core.plugins import Plugin
|
from bonobo.core.plugins import Plugin
|
||||||
from bonobo.ext.jupyter.widget import BonoboWidget
|
from bonobo.ext.jupyter.widget import BonoboWidget
|
||||||
|
|
||||||
|
try:
|
||||||
|
import selenium
|
||||||
|
except ImportError as e:
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logging.exception(
|
||||||
|
'You must install selenium to use the bonobo selenium extension. Easiest way is to install the '
|
||||||
|
'optional "selenium" dependencies with «pip install bonobo[selenium]», but you can also install a '
|
||||||
|
'specific version by yourself.')
|
||||||
|
|
||||||
|
|
||||||
class JupyterOutputPlugin(Plugin):
|
class JupyterOutputPlugin(Plugin):
|
||||||
def initialize(self, context):
|
def initialize(self, context):
|
||||||
|
|||||||
@ -1,4 +1,12 @@
|
|||||||
from selenium import webdriver
|
try:
|
||||||
|
import selenium
|
||||||
|
except ImportError as e:
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logging.exception(
|
||||||
|
'You must install selenium to use the bonobo selenium extension. Easiest way is to install the '
|
||||||
|
'optional "selenium" dependencies with «pip install bonobo[selenium]», but you can also install a '
|
||||||
|
'specific version by yourself.')
|
||||||
|
|
||||||
from bonobo import service
|
from bonobo import service
|
||||||
|
|
||||||
@ -6,7 +14,7 @@ USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/601.4.
|
|||||||
|
|
||||||
|
|
||||||
def create_profile(use_tor=False):
|
def create_profile(use_tor=False):
|
||||||
_profile = webdriver.FirefoxProfile()
|
_profile = selenium.webdriver.FirefoxProfile()
|
||||||
_profile.set_preference("toolkit.startup.max_resumed_crashes", "-1")
|
_profile.set_preference("toolkit.startup.max_resumed_crashes", "-1")
|
||||||
|
|
||||||
if use_tor:
|
if use_tor:
|
||||||
@ -22,7 +30,7 @@ def create_profile(use_tor=False):
|
|||||||
|
|
||||||
|
|
||||||
def create_browser(profile):
|
def create_browser(profile):
|
||||||
_browser = webdriver.Firefox(profile)
|
_browser = selenium.webdriver.Firefox(profile)
|
||||||
_browser.implicitly_wait(10)
|
_browser.implicitly_wait(10)
|
||||||
_browser.set_page_load_timeout(10)
|
_browser.set_page_load_timeout(10)
|
||||||
return _browser
|
return _browser
|
||||||
|
|||||||
@ -1,9 +1,11 @@
|
|||||||
""" Readers and writers for common file formats. """
|
""" Readers and writers for common file formats. """
|
||||||
|
|
||||||
from .file import FileWriter
|
from .file import FileHandler, FileReader, FileWriter
|
||||||
from .json import JsonFileWriter
|
from .json import JsonWriter
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
|
'FileHandler',
|
||||||
|
'FileReader',
|
||||||
'FileWriter',
|
'FileWriter',
|
||||||
'JsonFileWriter',
|
'JsonWriter',
|
||||||
]
|
]
|
||||||
|
|||||||
@ -1,35 +1,118 @@
|
|||||||
|
from functools import partial
|
||||||
|
|
||||||
from bonobo.util.lifecycle import with_context
|
from bonobo.util.lifecycle import with_context
|
||||||
|
|
||||||
__all__ = ['FileWriter', ]
|
__all__ = [
|
||||||
|
'FileHandler',
|
||||||
|
'FileReader',
|
||||||
|
'FileWriter',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
@with_context
|
@with_context
|
||||||
class FileWriter:
|
class FileHandler:
|
||||||
# XXX TODO implement @with_context like this ? Pros and cons ?
|
"""
|
||||||
class Meta:
|
Abstract component factory for file-related components.
|
||||||
contextual = True
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
mode = None
|
||||||
|
|
||||||
def __init__(self, path_or_buf, eol='\n'):
|
def __init__(self, path_or_buf, eol='\n'):
|
||||||
self.path_or_buf = path_or_buf
|
self.path_or_buf = path_or_buf
|
||||||
self.eol = eol
|
self.eol = eol
|
||||||
|
|
||||||
|
def open(self):
|
||||||
|
return open(self.path_or_buf, self.mode)
|
||||||
|
|
||||||
|
def close(self, fp):
|
||||||
|
"""
|
||||||
|
:param file fp:
|
||||||
|
"""
|
||||||
|
fp.close()
|
||||||
|
|
||||||
def initialize(self, ctx):
|
def initialize(self, ctx):
|
||||||
""" todo add lock file ? optional maybe ? """
|
"""
|
||||||
assert not hasattr(ctx, 'fp'), 'One at a time, baby.'
|
Initialize a
|
||||||
ctx.fp = open(self.path_or_buf, 'w+')
|
:param ctx:
|
||||||
ctx.first = True
|
:return:
|
||||||
|
"""
|
||||||
|
assert not hasattr(ctx, 'file'), 'A file pointer is already in the context... I do not know what to say...'
|
||||||
|
ctx.file = self.open()
|
||||||
|
|
||||||
def write(self, fp, line, prefix=''):
|
def __call__(self, ctx, *args):
|
||||||
fp.write(prefix + line)
|
"""
|
||||||
|
:param ComponentExecutionContext ctx:
|
||||||
|
:param mixed row:
|
||||||
|
"""
|
||||||
|
result = self.handle(ctx, *args)
|
||||||
|
try:
|
||||||
|
yield from result
|
||||||
|
except TypeError:
|
||||||
|
return
|
||||||
|
|
||||||
def __call__(self, ctx, row):
|
def handle(self, ctx, *args):
|
||||||
if ctx.first:
|
raise NotImplementedError('Abstract.')
|
||||||
prefix, ctx.first = '', False
|
|
||||||
else:
|
|
||||||
prefix = self.eol
|
|
||||||
|
|
||||||
self.write(ctx.fp, row, prefix=prefix)
|
|
||||||
|
|
||||||
def finalize(self, ctx):
|
def finalize(self, ctx):
|
||||||
ctx.fp.close()
|
self.close(ctx.file)
|
||||||
del ctx.fp, ctx.first
|
del ctx.file
|
||||||
|
|
||||||
|
|
||||||
|
class FileReader(FileHandler):
|
||||||
|
"""
|
||||||
|
Component factory for file-like readers.
|
||||||
|
|
||||||
|
On its own, it can be used to read a file and yield one row per line, trimming the "eol" character at the end if
|
||||||
|
present. Extending it is usually the right way to create more specific file readers (like json, csv, etc.)
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
mode = 'r'
|
||||||
|
|
||||||
|
def handle(self, ctx, *args):
|
||||||
|
"""
|
||||||
|
Write a row on the next line of file pointed by `ctx.file`.
|
||||||
|
Prefix is used for newlines.
|
||||||
|
|
||||||
|
:param ctx:
|
||||||
|
:param row:
|
||||||
|
"""
|
||||||
|
assert not len(args)
|
||||||
|
for line in ctx.file:
|
||||||
|
yield line.rstrip(self.eol)
|
||||||
|
|
||||||
|
|
||||||
|
class FileWriter(FileHandler):
|
||||||
|
"""
|
||||||
|
Component factory for file or file-like writers.
|
||||||
|
|
||||||
|
On its own, it can be used to write in a file one line per row that comes into this component. Extending it is
|
||||||
|
usually the right way to create more specific file writers (like json, csv, etc.)
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
mode = 'w+'
|
||||||
|
|
||||||
|
def initialize(self, ctx):
|
||||||
|
super().initialize(ctx)
|
||||||
|
ctx.line = 0
|
||||||
|
|
||||||
|
def handle(self, ctx, row):
|
||||||
|
"""
|
||||||
|
Write a row on the next line of file pointed by fp. Prefix is used for newlines.
|
||||||
|
|
||||||
|
:param file fp:
|
||||||
|
:param str row:
|
||||||
|
:param str prefix:
|
||||||
|
"""
|
||||||
|
|
||||||
|
self.write(ctx.file, (self.eol if ctx.line else '') + row)
|
||||||
|
ctx.line += 1
|
||||||
|
|
||||||
|
def write(self, fp, line):
|
||||||
|
return fp.write(line)
|
||||||
|
|
||||||
|
def finalize(self, ctx):
|
||||||
|
del ctx.line
|
||||||
|
super().finalize(ctx)
|
||||||
|
|||||||
@ -3,11 +3,11 @@ import json
|
|||||||
from .file import FileWriter
|
from .file import FileWriter
|
||||||
from bonobo.util.lifecycle import with_context
|
from bonobo.util.lifecycle import with_context
|
||||||
|
|
||||||
__all__ = ['JsonFileWriter', ]
|
__all__ = ['JsonWriter', ]
|
||||||
|
|
||||||
|
|
||||||
@with_context
|
@with_context
|
||||||
class JsonFileWriter(FileWriter):
|
class JsonWriter(FileWriter):
|
||||||
def __init__(self, path_or_buf):
|
def __init__(self, path_or_buf):
|
||||||
super().__init__(path_or_buf, eol=',\n')
|
super().__init__(path_or_buf, eol=',\n')
|
||||||
|
|
||||||
|
|||||||
@ -1,3 +0,0 @@
|
|||||||
bonobo
|
|
||||||
======
|
|
||||||
|
|
||||||
@ -2,7 +2,7 @@ import json
|
|||||||
|
|
||||||
from blessings import Terminal
|
from blessings import Terminal
|
||||||
|
|
||||||
from bonobo import console_run, tee, JsonFileWriter
|
from bonobo import console_run, tee, JsonWriter
|
||||||
from bonobo.ext.opendatasoft import from_opendatasoft_api
|
from bonobo.ext.opendatasoft import from_opendatasoft_api
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -58,5 +58,5 @@ if __name__ == '__main__':
|
|||||||
normalize,
|
normalize,
|
||||||
filter_france,
|
filter_france,
|
||||||
tee(display),
|
tee(display),
|
||||||
JsonFileWriter('fablabs.json'),
|
JsonWriter('fablabs.json'),
|
||||||
output=True, )
|
output=True, )
|
||||||
|
|||||||
67
tests/io/test_file.py
Normal file
67
tests/io/test_file.py
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
import pytest
|
||||||
|
from mock import MagicMock
|
||||||
|
|
||||||
|
from bonobo import FileWriter, Bag, FileReader
|
||||||
|
from bonobo.core.contexts import ComponentExecutionContext
|
||||||
|
from bonobo.util.tokens import BEGIN, END
|
||||||
|
|
||||||
|
|
||||||
|
class CapturingComponentExecutionContext(ComponentExecutionContext):
|
||||||
|
send = MagicMock()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
'lines,output',
|
||||||
|
[
|
||||||
|
(('ACME', ), 'ACME'), # one line...
|
||||||
|
(('Foo', 'Bar', 'Baz'), 'Foo\nBar\nBaz'), # more than one line...
|
||||||
|
])
|
||||||
|
def test_file_writer_in_context(tmpdir, lines, output):
|
||||||
|
file = tmpdir.join('output.txt')
|
||||||
|
|
||||||
|
writer = FileWriter(str(file))
|
||||||
|
context = ComponentExecutionContext(writer, None)
|
||||||
|
|
||||||
|
context.initialize()
|
||||||
|
context.recv(BEGIN, *map(Bag, lines), END)
|
||||||
|
for i in range(len(lines)):
|
||||||
|
context.step()
|
||||||
|
context.finalize()
|
||||||
|
|
||||||
|
assert file.read() == output
|
||||||
|
|
||||||
|
with pytest.raises(AttributeError):
|
||||||
|
getattr(context, 'file')
|
||||||
|
|
||||||
|
|
||||||
|
def test_file_writer_out_of_context(tmpdir):
|
||||||
|
file = tmpdir.join('output.txt')
|
||||||
|
writer = FileWriter(str(file))
|
||||||
|
fp = writer.open()
|
||||||
|
fp.write('Yosh!')
|
||||||
|
writer.close(fp)
|
||||||
|
|
||||||
|
assert file.read() == 'Yosh!'
|
||||||
|
|
||||||
|
|
||||||
|
def test_file_reader_in_context(tmpdir):
|
||||||
|
file = tmpdir.join('input.txt')
|
||||||
|
file.write('Hello\nWorld\n')
|
||||||
|
|
||||||
|
reader = FileReader(str(file))
|
||||||
|
context = CapturingComponentExecutionContext(reader, None)
|
||||||
|
|
||||||
|
context.initialize()
|
||||||
|
context.recv(BEGIN, Bag(), END)
|
||||||
|
context.step()
|
||||||
|
context.finalize()
|
||||||
|
|
||||||
|
assert len(context.send.mock_calls) == 2
|
||||||
|
|
||||||
|
args0, kwargs0 = context.send.call_args_list[0]
|
||||||
|
assert len(args0) == 1 and not len(kwargs0)
|
||||||
|
args1, kwargs1 = context.send.call_args_list[1]
|
||||||
|
assert len(args1) == 1 and not len(kwargs1)
|
||||||
|
|
||||||
|
assert args0[0].args[0] == 'Hello'
|
||||||
|
assert args1[0].args[0] == 'World'
|
||||||
@ -1,13 +1,13 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from bonobo import Bag, JsonFileWriter
|
from bonobo import Bag, JsonWriter
|
||||||
from bonobo.core.contexts import ComponentExecutionContext
|
from bonobo.core.contexts import ComponentExecutionContext
|
||||||
from bonobo.util.tokens import BEGIN, END
|
from bonobo.util.tokens import BEGIN, END
|
||||||
|
|
||||||
|
|
||||||
def test_write_json_to_file(tmpdir):
|
def test_write_json_to_file(tmpdir):
|
||||||
file = tmpdir.join('output.json')
|
file = tmpdir.join('output.json')
|
||||||
json_writer = JsonFileWriter(str(file))
|
json_writer = JsonWriter(str(file))
|
||||||
context = ComponentExecutionContext(json_writer, None)
|
context = ComponentExecutionContext(json_writer, None)
|
||||||
|
|
||||||
context.initialize()
|
context.initialize()
|
||||||
@ -28,7 +28,7 @@ def test_write_json_to_file(tmpdir):
|
|||||||
|
|
||||||
def test_write_json_without_initializer_should_not_work(tmpdir):
|
def test_write_json_without_initializer_should_not_work(tmpdir):
|
||||||
file = tmpdir.join('output.json')
|
file = tmpdir.join('output.json')
|
||||||
json_writer = JsonFileWriter(str(file))
|
json_writer = JsonWriter(str(file))
|
||||||
|
|
||||||
context = ComponentExecutionContext(json_writer, None)
|
context = ComponentExecutionContext(json_writer, None)
|
||||||
with pytest.raises(AttributeError):
|
with pytest.raises(AttributeError):
|
||||||
|
|||||||
Reference in New Issue
Block a user