doc update and better versionning method
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@ -9,6 +9,7 @@
|
|||||||
*.py[cod]
|
*.py[cod]
|
||||||
*.so
|
*.so
|
||||||
*.spec
|
*.spec
|
||||||
|
.*.sw?
|
||||||
.Python
|
.Python
|
||||||
.cache
|
.cache
|
||||||
.coverage
|
.coverage
|
||||||
|
|||||||
2
Makefile
2
Makefile
@ -1,7 +1,7 @@
|
|||||||
# This file has been auto-generated.
|
# This file has been auto-generated.
|
||||||
# All changes will be lost, see Projectfile.
|
# All changes will be lost, see Projectfile.
|
||||||
#
|
#
|
||||||
# Updated at 2016-12-28 15:50:31.026587
|
# Updated at 2016-12-29 17:04:15.403353
|
||||||
|
|
||||||
PYTHON ?= $(shell which python)
|
PYTHON ?= $(shell which python)
|
||||||
PYTHON_BASENAME ?= $(shell basename $(PYTHON))
|
PYTHON_BASENAME ?= $(shell basename $(PYTHON))
|
||||||
|
|||||||
@ -19,21 +19,15 @@
|
|||||||
See the License for the specific language governing permissions and
|
See the License for the specific language governing permissions and
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
"""
|
"""
|
||||||
import os
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
assert (sys.version_info >= (3, 5)), 'Python 3.5+ is required to use Bonobo.'
|
||||||
|
|
||||||
|
from ._version import __version__
|
||||||
from .core import *
|
from .core import *
|
||||||
from .io import CsvReader, CsvWriter, FileReader, FileWriter, JsonReader, JsonWriter
|
from .io import CsvReader, CsvWriter, FileReader, FileWriter, JsonReader, JsonWriter
|
||||||
from .util import *
|
from .util import *
|
||||||
|
|
||||||
PY35 = (sys.version_info >= (3, 5))
|
|
||||||
|
|
||||||
assert PY35, 'Python 3.5+ is required to use Bonobo.'
|
|
||||||
|
|
||||||
# Version infos
|
|
||||||
with open(os.path.realpath(os.path.join(os.path.dirname(__file__), '../version.txt'))) as f:
|
|
||||||
__version__ = f.read().strip()
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'Bag',
|
'Bag',
|
||||||
'CsvReader',
|
'CsvReader',
|
||||||
@ -47,6 +41,7 @@ __all__ = [
|
|||||||
'NaiveStrategy',
|
'NaiveStrategy',
|
||||||
'ProcessPoolExecutorStrategy',
|
'ProcessPoolExecutorStrategy',
|
||||||
'ThreadPoolExecutorStrategy',
|
'ThreadPoolExecutorStrategy',
|
||||||
|
'__version__',
|
||||||
'console_run',
|
'console_run',
|
||||||
'head',
|
'head',
|
||||||
'inject',
|
'inject',
|
||||||
|
|||||||
1
bonobo/_version.py
Normal file
1
bonobo/_version.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
__version__ = '0.1.3'
|
||||||
3
docs/_static/custom.css
vendored
Normal file
3
docs/_static/custom.css
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
svg {
|
||||||
|
border: 2px solid green
|
||||||
|
}
|
||||||
2
docs/_static/graphs.css
vendored
Normal file
2
docs/_static/graphs.css
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
.node {
|
||||||
|
}
|
||||||
33
docs/guide/crawlers.rst
Normal file
33
docs/guide/crawlers.rst
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
Web crawlers with Bonobo
|
||||||
|
========================
|
||||||
|
|
||||||
|
.. todo:: Bonobo-Selenium is at a very alpha stage, and things will change. This section is here to give a brief
|
||||||
|
overview but is neither complete nor definitive.
|
||||||
|
|
||||||
|
Writing web crawlers with Bonobo and Selenium is easy.
|
||||||
|
|
||||||
|
First, install **bonobo-selenium**:
|
||||||
|
|
||||||
|
.. code-block:: shell-session
|
||||||
|
|
||||||
|
$ pip install bonobo-selenium
|
||||||
|
|
||||||
|
The idea is to have one callable crawl one thing and delegate drill downs to callables further away in the chain.
|
||||||
|
|
||||||
|
An example chain could be:
|
||||||
|
|
||||||
|
.. graphviz::
|
||||||
|
|
||||||
|
digraph {
|
||||||
|
rankdir = LR;
|
||||||
|
login -> paginate -> list -> details -> "ExcelWriter(...)";
|
||||||
|
}
|
||||||
|
|
||||||
|
Where each step would do the following:
|
||||||
|
|
||||||
|
* `login()` is in charge to open an authenticated session in the browser.
|
||||||
|
* `paginate()` open each page of a fictive list and pass it to next.
|
||||||
|
* `list()` take every list item and yield it.
|
||||||
|
* `details()` extract the data you're interested in.
|
||||||
|
* ... and the writer saves it somewhere.
|
||||||
|
|
||||||
@ -1,4 +1,8 @@
|
|||||||
Guides
|
Guides
|
||||||
======
|
======
|
||||||
|
|
||||||
.. todo:: write the fucking doc!
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
purity
|
||||||
|
crawlers
|
||||||
|
|||||||
128
docs/guide/purity.rst
Normal file
128
docs/guide/purity.rst
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
Pure components and space complexity
|
||||||
|
====================================
|
||||||
|
|
||||||
|
The nature of components, and how the data flow from one to another, make them not so easy to write correctly.
|
||||||
|
Hopefully, with a few hints, you will be able to understand why and how they should be written.
|
||||||
|
|
||||||
|
The major problem we have is that one message can go through more than one component, and at the same time. If you
|
||||||
|
wanna be safe, you tend to :func:`copy.copy()` everything between two calls to two different components, but that
|
||||||
|
will mean that a lot of useless memory space would be taken for copies that are never modified.
|
||||||
|
|
||||||
|
Instead of that, we chosed the oposite: copies are never made, and you should not modify in place the inputs of your
|
||||||
|
component before yielding them, and that mostly means that you want to recreate dicts and lists before yielding (or
|
||||||
|
returning) them. Numeric values, strings and tuples being immutable in python, modifying a variable of one of those
|
||||||
|
type will already return a different instance.
|
||||||
|
|
||||||
|
Numbers
|
||||||
|
=======
|
||||||
|
|
||||||
|
You can't be wrong with numbers. All of the following are correct.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
def do_your_number_thing(n: int) -> int:
|
||||||
|
return n
|
||||||
|
|
||||||
|
def do_your_number_thing(n: int) -> int:
|
||||||
|
yield n
|
||||||
|
|
||||||
|
def do_your_number_thing(n: int) -> int:
|
||||||
|
return n + 1
|
||||||
|
|
||||||
|
def do_your_number_thing(n: int) -> int:
|
||||||
|
yield n + 1
|
||||||
|
|
||||||
|
def do_your_number_thing(n: int) -> int:
|
||||||
|
# correct, but bad style
|
||||||
|
n += 1
|
||||||
|
return n
|
||||||
|
|
||||||
|
def do_your_number_thing(n: int) -> int:
|
||||||
|
# correct, but bad style
|
||||||
|
n += 1
|
||||||
|
yield n
|
||||||
|
|
||||||
|
The same is true with other numeric types, so don't be shy. Operate like crazy, my friend.
|
||||||
|
|
||||||
|
Tuples
|
||||||
|
======
|
||||||
|
|
||||||
|
Tuples are immutable, so you risk nothing.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
def do_your_tuple_thing(t: tuple) -> tuple:
|
||||||
|
return ('foo', ) + t
|
||||||
|
|
||||||
|
def do_your_tuple_thing(t: tuple) -> tuple:
|
||||||
|
return t + ('bar', )
|
||||||
|
|
||||||
|
def do_your_tuple_thing(t: tuple) -> tuple:
|
||||||
|
# correct, but bad style
|
||||||
|
t += ('baaaz', )
|
||||||
|
return t
|
||||||
|
|
||||||
|
Strings
|
||||||
|
=======
|
||||||
|
|
||||||
|
You know the drill, strings are immutable, blablabla ... Examples left as an exercise for the reader.
|
||||||
|
|
||||||
|
Dicts
|
||||||
|
=====
|
||||||
|
|
||||||
|
So, now it gets interesting. Dicts are mutable. It means that you can mess things up badly here if you're not cautious.
|
||||||
|
|
||||||
|
For example, doing the following may cause unexpected problems:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
def mutate_my_dict_like_crazy(d: dict) -> dict:
|
||||||
|
# Bad! Don't do that!
|
||||||
|
d.update({
|
||||||
|
'foo': compute_something()
|
||||||
|
})
|
||||||
|
# Still bad! Don't mutate the dict!
|
||||||
|
d['bar']: compute_anotherthing()
|
||||||
|
return d
|
||||||
|
|
||||||
|
The problem is easy to understand: as **Bonobo** won't make copies of your dict, the same dict will be passed along the
|
||||||
|
transformation graph, and mutations will be seen in components downwards the output, but also upward. Let's see
|
||||||
|
a more obvious example of something you should not do:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
def mutate_my_dict_and_yield() -> dict:
|
||||||
|
d = {}
|
||||||
|
for i in range(100):
|
||||||
|
# Bad! Don't do that!
|
||||||
|
d['index'] = i
|
||||||
|
yield d
|
||||||
|
|
||||||
|
Here, the same dict is yielded in each iteration, and its state when the next component in chain is called is undetermined.
|
||||||
|
|
||||||
|
Now let's see how to do it correctly:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
def new_dicts_like_crazy(d: dict) -> dict:
|
||||||
|
# Creating a new dict is correct.
|
||||||
|
return {
|
||||||
|
**d,
|
||||||
|
'foo': compute_something(),
|
||||||
|
'bar': compute_anotherthing(),
|
||||||
|
}
|
||||||
|
|
||||||
|
def new_dict_and_yield() -> dict:
|
||||||
|
d = {}
|
||||||
|
for i in range(100):
|
||||||
|
# Different dict each time.
|
||||||
|
yield {
|
||||||
|
'index': i
|
||||||
|
}
|
||||||
|
|
||||||
|
I hear you think «Yeah, but if I create like millions of dicts ...». The answer is simple. Using dicts like this will
|
||||||
|
create a lot, but also free a lot because as soon as all the future components that take this dict as input are done,
|
||||||
|
the dict will be garbage collected. Youplaboum!
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -1,22 +0,0 @@
|
|||||||
bonobo.ext.console package
|
|
||||||
==========================
|
|
||||||
|
|
||||||
Submodules
|
|
||||||
----------
|
|
||||||
|
|
||||||
bonobo.ext.console.plugin module
|
|
||||||
--------------------------------
|
|
||||||
|
|
||||||
.. automodule:: bonobo.ext.console.plugin
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
|
|
||||||
|
|
||||||
Module contents
|
|
||||||
---------------
|
|
||||||
|
|
||||||
.. automodule:: bonobo.ext.console
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
@ -1,30 +0,0 @@
|
|||||||
bonobo.ext.jupyter package
|
|
||||||
==========================
|
|
||||||
|
|
||||||
Submodules
|
|
||||||
----------
|
|
||||||
|
|
||||||
bonobo.ext.jupyter.plugin module
|
|
||||||
--------------------------------
|
|
||||||
|
|
||||||
.. automodule:: bonobo.ext.jupyter.plugin
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
|
|
||||||
bonobo.ext.jupyter.widget module
|
|
||||||
--------------------------------
|
|
||||||
|
|
||||||
.. automodule:: bonobo.ext.jupyter.widget
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
|
|
||||||
|
|
||||||
Module contents
|
|
||||||
---------------
|
|
||||||
|
|
||||||
.. automodule:: bonobo.ext.jupyter
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
@ -1,46 +0,0 @@
|
|||||||
bonobo.ext package
|
|
||||||
==================
|
|
||||||
|
|
||||||
Subpackages
|
|
||||||
-----------
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
|
|
||||||
bonobo.ext.console
|
|
||||||
bonobo.ext.jupyter
|
|
||||||
|
|
||||||
Submodules
|
|
||||||
----------
|
|
||||||
|
|
||||||
bonobo.ext.couchdb_ module
|
|
||||||
--------------------------
|
|
||||||
|
|
||||||
.. automodule:: bonobo.ext.couchdb_
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
|
|
||||||
bonobo.ext.opendatasoft module
|
|
||||||
------------------------------
|
|
||||||
|
|
||||||
.. automodule:: bonobo.ext.opendatasoft
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
|
|
||||||
bonobo.ext.selenium module
|
|
||||||
--------------------------
|
|
||||||
|
|
||||||
.. automodule:: bonobo.ext.selenium
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
|
|
||||||
|
|
||||||
Module contents
|
|
||||||
---------------
|
|
||||||
|
|
||||||
.. automodule:: bonobo.ext
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
@ -58,7 +58,10 @@ Let's chain the three components together and run the transformation:
|
|||||||
|
|
||||||
digraph {
|
digraph {
|
||||||
rankdir = LR;
|
rankdir = LR;
|
||||||
"generate_data" -> "uppercase" -> "output";
|
stylesheet = "../_static/graphs.css";
|
||||||
|
|
||||||
|
BEGIN [shape="point"];
|
||||||
|
BEGIN -> "generate_data" -> "uppercase" -> "output";
|
||||||
}
|
}
|
||||||
|
|
||||||
We use the :func:`bonobo.run` helper that hides the underlying object composition necessary to actually run the
|
We use the :func:`bonobo.run` helper that hides the underlying object composition necessary to actually run the
|
||||||
|
|||||||
18
setup.py
18
setup.py
@ -1,8 +1,11 @@
|
|||||||
# This file is autogenerated by edgy.project code generator.
|
# This file is autogenerated by edgy.project code generator.
|
||||||
# All changes will be overwritten.
|
# All changes will be overwritten.
|
||||||
|
|
||||||
|
import os
|
||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
root_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
tolines = lambda c: list(filter(None, map(lambda s: s.strip(), c.split('\n'))))
|
tolines = lambda c: list(filter(None, map(lambda s: s.strip(), c.split('\n'))))
|
||||||
|
|
||||||
|
|
||||||
@ -12,10 +15,19 @@ def read(filename, flt=None):
|
|||||||
return flt(content) if callable(flt) else content
|
return flt(content) if callable(flt) else content
|
||||||
|
|
||||||
|
|
||||||
|
# Py3 compatibility hacks, borrowed from IPython.
|
||||||
try:
|
try:
|
||||||
version = read('version.txt')
|
execfile
|
||||||
except: # pylint: disable=bare-except
|
except NameError:
|
||||||
version = 'dev'
|
|
||||||
|
def execfile(fname, globs, locs=None):
|
||||||
|
locs = locs or globs
|
||||||
|
exec(compile(open(fname).read(), fname, "exec"), globs, locs)
|
||||||
|
|
||||||
|
|
||||||
|
version_ns = {}
|
||||||
|
execfile(os.path.join(root_dir, 'bonobo/_version.py'), version_ns)
|
||||||
|
version = version_ns.get('__version__', 'dev')
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='bonobo',
|
name='bonobo',
|
||||||
|
|||||||
@ -1 +0,0 @@
|
|||||||
0.1.2
|
|
||||||
Reference in New Issue
Block a user