diff --git a/.gitignore b/.gitignore index c731916..da70ff2 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ *.py[cod] *.so *.spec +.*.sw? .Python .cache .coverage diff --git a/Makefile b/Makefile index c580632..e84f7a5 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # This file has been auto-generated. # All changes will be lost, see Projectfile. # -# Updated at 2016-12-28 15:50:31.026587 +# Updated at 2016-12-29 17:04:15.403353 PYTHON ?= $(shell which python) PYTHON_BASENAME ?= $(shell basename $(PYTHON)) diff --git a/bonobo/__init__.py b/bonobo/__init__.py index d70e30a..685e08d 100644 --- a/bonobo/__init__.py +++ b/bonobo/__init__.py @@ -19,21 +19,15 @@ See the License for the specific language governing permissions and limitations under the License. """ -import os import sys +assert (sys.version_info >= (3, 5)), 'Python 3.5+ is required to use Bonobo.' + +from ._version import __version__ from .core import * from .io import CsvReader, CsvWriter, FileReader, FileWriter, JsonReader, JsonWriter from .util import * -PY35 = (sys.version_info >= (3, 5)) - -assert PY35, 'Python 3.5+ is required to use Bonobo.' - -# Version infos -with open(os.path.realpath(os.path.join(os.path.dirname(__file__), '../version.txt'))) as f: - __version__ = f.read().strip() - __all__ = [ 'Bag', 'CsvReader', @@ -47,6 +41,7 @@ __all__ = [ 'NaiveStrategy', 'ProcessPoolExecutorStrategy', 'ThreadPoolExecutorStrategy', + '__version__', 'console_run', 'head', 'inject', diff --git a/bonobo/_version.py b/bonobo/_version.py new file mode 100644 index 0000000..8ce9b36 --- /dev/null +++ b/bonobo/_version.py @@ -0,0 +1 @@ +__version__ = '0.1.3' diff --git a/docs/_static/custom.css b/docs/_static/custom.css new file mode 100644 index 0000000..f658da9 --- /dev/null +++ b/docs/_static/custom.css @@ -0,0 +1,3 @@ +svg { + border: 2px solid green +} \ No newline at end of file diff --git a/docs/_static/graphs.css b/docs/_static/graphs.css new file mode 100644 index 0000000..873f3d6 --- /dev/null +++ b/docs/_static/graphs.css @@ -0,0 +1,2 @@ +.node { +} \ No newline at end of file diff --git a/docs/guide/crawlers.rst b/docs/guide/crawlers.rst new file mode 100644 index 0000000..7a9a181 --- /dev/null +++ b/docs/guide/crawlers.rst @@ -0,0 +1,33 @@ +Web crawlers with Bonobo +======================== + +.. todo:: Bonobo-Selenium is at a very alpha stage, and things will change. This section is here to give a brief + overview but is neither complete nor definitive. + +Writing web crawlers with Bonobo and Selenium is easy. + +First, install **bonobo-selenium**: + +.. code-block:: shell-session + + $ pip install bonobo-selenium + +The idea is to have one callable crawl one thing and delegate drill downs to callables further away in the chain. + +An example chain could be: + +.. graphviz:: + + digraph { + rankdir = LR; + login -> paginate -> list -> details -> "ExcelWriter(...)"; + } + +Where each step would do the following: + +* `login()` is in charge to open an authenticated session in the browser. +* `paginate()` open each page of a fictive list and pass it to next. +* `list()` take every list item and yield it. +* `details()` extract the data you're interested in. +* ... and the writer saves it somewhere. + diff --git a/docs/guide/index.rst b/docs/guide/index.rst index ebd0d17..99ae56f 100644 --- a/docs/guide/index.rst +++ b/docs/guide/index.rst @@ -1,4 +1,8 @@ Guides ====== -.. todo:: write the fucking doc! +.. toctree:: + :maxdepth: 2 + + purity + crawlers diff --git a/docs/guide/purity.rst b/docs/guide/purity.rst new file mode 100644 index 0000000..1995284 --- /dev/null +++ b/docs/guide/purity.rst @@ -0,0 +1,128 @@ +Pure components and space complexity +==================================== + +The nature of components, and how the data flow from one to another, make them not so easy to write correctly. +Hopefully, with a few hints, you will be able to understand why and how they should be written. + +The major problem we have is that one message can go through more than one component, and at the same time. If you +wanna be safe, you tend to :func:`copy.copy()` everything between two calls to two different components, but that +will mean that a lot of useless memory space would be taken for copies that are never modified. + +Instead of that, we chosed the oposite: copies are never made, and you should not modify in place the inputs of your +component before yielding them, and that mostly means that you want to recreate dicts and lists before yielding (or +returning) them. Numeric values, strings and tuples being immutable in python, modifying a variable of one of those +type will already return a different instance. + +Numbers +======= + +You can't be wrong with numbers. All of the following are correct. + +.. code-block:: python + + def do_your_number_thing(n: int) -> int: + return n + + def do_your_number_thing(n: int) -> int: + yield n + + def do_your_number_thing(n: int) -> int: + return n + 1 + + def do_your_number_thing(n: int) -> int: + yield n + 1 + + def do_your_number_thing(n: int) -> int: + # correct, but bad style + n += 1 + return n + + def do_your_number_thing(n: int) -> int: + # correct, but bad style + n += 1 + yield n + +The same is true with other numeric types, so don't be shy. Operate like crazy, my friend. + +Tuples +====== + +Tuples are immutable, so you risk nothing. + +.. code-block:: python + + def do_your_tuple_thing(t: tuple) -> tuple: + return ('foo', ) + t + + def do_your_tuple_thing(t: tuple) -> tuple: + return t + ('bar', ) + + def do_your_tuple_thing(t: tuple) -> tuple: + # correct, but bad style + t += ('baaaz', ) + return t + +Strings +======= + +You know the drill, strings are immutable, blablabla ... Examples left as an exercise for the reader. + +Dicts +===== + +So, now it gets interesting. Dicts are mutable. It means that you can mess things up badly here if you're not cautious. + +For example, doing the following may cause unexpected problems: + +.. code-block:: python + + def mutate_my_dict_like_crazy(d: dict) -> dict: + # Bad! Don't do that! + d.update({ + 'foo': compute_something() + }) + # Still bad! Don't mutate the dict! + d['bar']: compute_anotherthing() + return d + +The problem is easy to understand: as **Bonobo** won't make copies of your dict, the same dict will be passed along the +transformation graph, and mutations will be seen in components downwards the output, but also upward. Let's see +a more obvious example of something you should not do: + +.. code-block:: python + + def mutate_my_dict_and_yield() -> dict: + d = {} + for i in range(100): + # Bad! Don't do that! + d['index'] = i + yield d + +Here, the same dict is yielded in each iteration, and its state when the next component in chain is called is undetermined. + +Now let's see how to do it correctly: + +.. code-block:: python + + def new_dicts_like_crazy(d: dict) -> dict: + # Creating a new dict is correct. + return { + **d, + 'foo': compute_something(), + 'bar': compute_anotherthing(), + } + + def new_dict_and_yield() -> dict: + d = {} + for i in range(100): + # Different dict each time. + yield { + 'index': i + } + +I hear you think «Yeah, but if I create like millions of dicts ...». The answer is simple. Using dicts like this will +create a lot, but also free a lot because as soon as all the future components that take this dict as input are done, +the dict will be garbage collected. Youplaboum! + + + diff --git a/docs/reference/bonobo.ext.console.rst b/docs/reference/bonobo.ext.console.rst deleted file mode 100644 index 4385466..0000000 --- a/docs/reference/bonobo.ext.console.rst +++ /dev/null @@ -1,22 +0,0 @@ -bonobo.ext.console package -========================== - -Submodules ----------- - -bonobo.ext.console.plugin module --------------------------------- - -.. automodule:: bonobo.ext.console.plugin - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: bonobo.ext.console - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/reference/bonobo.ext.jupyter.rst b/docs/reference/bonobo.ext.jupyter.rst deleted file mode 100644 index c695678..0000000 --- a/docs/reference/bonobo.ext.jupyter.rst +++ /dev/null @@ -1,30 +0,0 @@ -bonobo.ext.jupyter package -========================== - -Submodules ----------- - -bonobo.ext.jupyter.plugin module --------------------------------- - -.. automodule:: bonobo.ext.jupyter.plugin - :members: - :undoc-members: - :show-inheritance: - -bonobo.ext.jupyter.widget module --------------------------------- - -.. automodule:: bonobo.ext.jupyter.widget - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: bonobo.ext.jupyter - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/reference/bonobo.ext.rst b/docs/reference/bonobo.ext.rst deleted file mode 100644 index 63327ef..0000000 --- a/docs/reference/bonobo.ext.rst +++ /dev/null @@ -1,46 +0,0 @@ -bonobo.ext package -================== - -Subpackages ------------ - -.. toctree:: - - bonobo.ext.console - bonobo.ext.jupyter - -Submodules ----------- - -bonobo.ext.couchdb_ module --------------------------- - -.. automodule:: bonobo.ext.couchdb_ - :members: - :undoc-members: - :show-inheritance: - -bonobo.ext.opendatasoft module ------------------------------- - -.. automodule:: bonobo.ext.opendatasoft - :members: - :undoc-members: - :show-inheritance: - -bonobo.ext.selenium module --------------------------- - -.. automodule:: bonobo.ext.selenium - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: bonobo.ext - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/tutorial/basics.rst b/docs/tutorial/basics.rst index ad507b7..3b1b989 100644 --- a/docs/tutorial/basics.rst +++ b/docs/tutorial/basics.rst @@ -58,7 +58,10 @@ Let's chain the three components together and run the transformation: digraph { rankdir = LR; - "generate_data" -> "uppercase" -> "output"; + stylesheet = "../_static/graphs.css"; + + BEGIN [shape="point"]; + BEGIN -> "generate_data" -> "uppercase" -> "output"; } We use the :func:`bonobo.run` helper that hides the underlying object composition necessary to actually run the diff --git a/setup.py b/setup.py index 624fd3f..a19e5bd 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,11 @@ # This file is autogenerated by edgy.project code generator. # All changes will be overwritten. +import os from setuptools import setup, find_packages +root_dir = os.path.dirname(os.path.abspath(__file__)) + tolines = lambda c: list(filter(None, map(lambda s: s.strip(), c.split('\n')))) @@ -12,10 +15,19 @@ def read(filename, flt=None): return flt(content) if callable(flt) else content +# Py3 compatibility hacks, borrowed from IPython. try: - version = read('version.txt') -except: # pylint: disable=bare-except - version = 'dev' + execfile +except NameError: + + def execfile(fname, globs, locs=None): + locs = locs or globs + exec(compile(open(fname).read(), fname, "exec"), globs, locs) + + +version_ns = {} +execfile(os.path.join(root_dir, 'bonobo/_version.py'), version_ns) +version = version_ns.get('__version__', 'dev') setup( name='bonobo', diff --git a/version.txt b/version.txt deleted file mode 100644 index d917d3e..0000000 --- a/version.txt +++ /dev/null @@ -1 +0,0 @@ -0.1.2