compiling notebooks into a report
For a master's project in computational neuroscience, we adopted a quite novel workflow to go all the steps from the learning of the small steps to the wrtiting of the final thesis. Though we were flexible in our method during the 6 months of this work, a simple workflow emerged that I describe here.
The project involved the modelling of a rather complex system involving the modelling of spiking neurons, studying the emerging states when they interact in populations. Of particular interest was the possibility to obtain what is called a "balanced state" which is relevant to model the brain. Finally, we modeled some realistic representation of the coding of orientation in what is called a "ring model".
- As such, we had a hierarchy of problems to solve from the single neuron to the full network. The only solution was thus to take small steps and we were careful to create on a daily basis new notebooks reporting for these advances (pre-pended with the iso8601, for instance
2016-03-02_FeedForward_comparing_ExpVSAlpha
). - To avoid the usual rush at the moment of handing over the thesis, another constraint that we adopted was that for every notebook, the first cell would describe what was done with possibly a visual figure. One huge advantage is that the student did not have to learn LaTeX, but only markdown.
- In the end we had a bunch of notebooks that could just be "compiled" to produce a nice looking PDF file.
Let's focus on how to do that. For those in a hurry, let's just say that it involves:
- globing a set of relevant notebooks,
- striping out what is not important and keep the important stuff,
- concatenate all of these in one notebook,
- convert that to a PDF using a latex template
This is summarized in this script:
%%writefile thesis.py
name = 'thesis'
import nbconvert
import nbformat
nb_list = []
import glob
for fname in glob.glob('*.ipynb'):
if fname[0] in ['1', '2', '3', '4']:
print ("'{}', ".format(fname) )
nb_list.append(fname)
def strip(nb):
"""
Keeps only the cells :
- starting with the first to begin with a section (that is with a ``#``)
- stoping with the next cell to begin with a section (that is with a ``#``)
"""
start, stop = -1, len(nb.cells)
nb_out = nb.copy()
for i_cell, cell in enumerate(nb_out.cells):
if len(cell['source'])>0:
if cell['source'][0] == '#':
if start == -1: start = i_cell
else:
if stop == len(nb.cells): stop = i_cell
if start == -1: start = 0
nb_out.cells = nb.cells[start:stop]
return nb_out
def merge_notebooks(outfile, filenames):
merged = None
for fname in filenames:
with open(fname, 'r', encoding='utf-8') as f:
nb = nbformat.read(f, nbformat.NO_CONVERT)
nb = strip(nb)
if merged is None:
merged = nb
else:
merged.cells.extend(nb.cells)
with open(outfile, 'w', encoding='utf-8') as f:
f.write(nbformat.writes(merged, nbformat.NO_CONVERT))
merge_notebooks(name + '.ipynb', nb_list)
with open(name + '.ipynb', 'r') as f:
nb = nbformat.read(f, as_version=nbformat.NO_CONVERT)
latex_exporter = nbconvert.PDFExporter()
latex_exporter.template_file = name # assumes it has the same name as the output
latex_exporter.verbose = True
(body, resources) = latex_exporter.from_notebook_node(nb)
with open(name + '.pdf', 'w', encoding="iso-8859-1") as f:
f.write(body.decode(encoding="iso-8859-1"))
%run thesis.py
Voilà!
details of the method¶
Below I will detail the method in more detail.
templating¶
While essential to typeset documents, $\LaTeX$ takes a while to learn and we decided to have everything written down in MarkDown, as it is native to ipython notebooks and allows to cover most of most needs, from structuring a document to writing equations. In ipython's nbconvert
scheme, this involves doing the conversion machinery in a template that we simply adapted to our needs (mainly by tweeking the report.tplx
file and using information around the web). Still some cosmetics could be done to pass some parameters such as author's name etc... programmatically, but that is pratictally what came out:
%%writefile thesis.tplx
((*- extends 'report.tplx' -*))
% Default to the notebook output style
((* set cell_style = 'style_ipython.tplx' *))
%((* set cell_style = 'style_bw_python.tplx' *))
((* block docclass *))
\documentclass[french, 12pt]{report}
((* endblock docclass *))
((* block packages *))
((( super() )))
\usepackage[french]{babel}%
%\usepackage{graphics}%
\usepackage{setspace}%
\newcommand{\BookTitle}{
{\bf Aix-Marseille Université}\\
{\bf Mémoire de Recherche} \\
présenté en vue de l'obtention du \\
{\bf MASTER de NEUROSCIENCES} \\
(Spécialité: NIC)
}
\newcommand{\Title}{OB-V1 : un modèle de détection de l'orientation dans l'aire visuelle primaire}%
\newcommand{\Author}{Fernand David Arbib}%
\newcommand{\AuthorB}{Laurent U.~Perrinet}%
\newcommand{\Team}{\'Equipe Inference in Visual Behaviour (InViBe)}%
\newcommand{\Institute}{Institut de Neurosciences de la Timone}%
\newcommand{\InstituteUMR}{UMR 7289, CNRS / Aix-Marseille Université}%
\newcommand{\Address}{27, Bd. Jean Moulin, 13385 Marseille Cedex 5, France}
\newcommand{\Website}{https://URL/LaurentPerrinet}
\newcommand{\Email}{Laurent.Perrinet@univ-amu.fr}
((* endblock packages *))
((* block h1 -*))\chapter((* endblock h1 -*))
((* block h2 -*))\section((* endblock h2 -*))
((* block h3 -*))\subsection((* endblock h3 -*))
((* block h4 -*))\subsubsection((* endblock h4 -*))
((* block h5 -*))\paragraph((* endblock h5 -*))
((* block abstract *))
%\tableofcontents
~\par
\newpage
((* endblock abstract *))
((* block margins *))
\parindent=0pt
\parskip=6pt
((* endblock margins *))
((* block predoc *))
((* block title *))
\title{\Title}
((* endblock title *))
((* block author *))
\author{\Author}
((* endblock author *))
((* block maketitle *))
%\maketitle
\begin{titlepage}
\begin{center}
%\vskip 2cm
\begin{spacing}{1.2}
{\Large \BookTitle }%
\end{spacing}
\vskip 1cm
\begin{spacing}{1.5}
{\Huge \Title }
\end{spacing}
%\vskip 1cm
%\emph{\Large \SubTitle }%
\begin{center}
\includegraphics{/tmp/troislogos.png}
\end{center}
%\vskip 1cm
{\renewcommand{\arraystretch}{1.5} %<- modify value to suit your needs
\begin{tabular}[t]{|c|c|}
\hline
Par: & {\large \Author} \\\hline
Responsable de Stage: & {\large \AuthorB} \\%\hline
&\url{\Website}\\%\hline
&\url{\Email}\\\hline
Laboratoire: & \Team \\%\hline
& \Institute \\
& \InstituteUMR \\%\hline
&\Address \\\hline
\end{tabular}
}
\vskip .5cm
\vfill
{\large Juin 2016}
%\pageskip
\end{center}
\end{titlepage}
\tableofcontents
%\doublespacing
((* endblock maketitle *))
((* endblock predoc *))
((* block commands *))
% Prevent overflowing lines due to hard-to-break entities
\sloppy
% Setup hyperref package
\hypersetup{
breaklinks=true, % so long urls are correctly broken across lines
pdftitle={\Title},
pdfauthor={\Author},
colorlinks=true, %colorise les liens
breaklinks=true, %permet le retour à la ligne dans les liens trop longs
urlcolor= blue, %couleur des hyperliens
linkcolor= blue, %couleur des liens internes
citecolor=blue, %couleur des liens de citations
bookmarksopen=false,
pdftoolbar=false,
pdfmenubar=false,
% hidelinks
}
% Slightly bigger margins than the latex defaults
\geometry{verbose,tmargin=1in,bmargin=1in,lmargin=1in,rmargin=1in}
((* endblock commands *))
((* block bibliography *))
\bibliographystyle{plain}
\bibliography{/tmp/thesis}
((* endblock bibliography *))
For some reason, the latex file is compiled on a temporary folder and loses track of the current working directory. One solution is to copy files in an absolute path that will be cleaned-up at the next reboot:
!cp ../figs/troislogos.png /tmp/troislogos.png
!cp ../figs/ring_model.png /tmp/ring_model.png
!cp ../figs/future_model.png /tmp/future_model.png
Optionnally, it is possible to disable selectively some cells by introduciong the following in the template:
including references¶
Moreover, it is possible to include references and have that included using BibTeX. In MarkDown, it has to be fomatted like
<cite data-cite="Brunel2000">(Brunel, 2000)</cite>
To get somthing like (Brunel, 2000). This involves of course creating a bibliography file:
%%writefile /tmp/thesis.bib
@article{Brunel_2007,
Author = {Brunel, Nicolas and van Rossum, Mark C. W.},
Doi = {10.1007/s00422-007-0190-0},
Issn = {1432-0770},
Journal = {Biol Cybern},
Month = {Oct},
Number = {5-6},
Pages = {337--339},
Publisher = {Springer Science + Business Media},
Title = {Lapicque's 1907 paper: from frogs to integrate-and-fire},
Url = {http://dx.doi.org/10.1007/s00422-007-0190-0},
Volume = {97},
Year = {2007},
Bdsk-Url-1 = {http://dx.doi.org/10.1007/s00422-007-0190-0}}
@article{Burkitt_2006,
Author = {Burkitt, A. N.},
Doi = {10.1007/s00422-006-0068-6},
Issn = {1432-0770},
Journal = {Biol Cybern},
Month = {Apr},
Number = {1},
Pages = {1--19},
Publisher = {Springer Science + Business Media},
Title = {A Review of the Integrate-and-fire Neuron Model: I. Homogeneous Synaptic Input},
Url = {http://dx.doi.org/10.1007/s00422-006-0068-6},
Volume = {95},
Year = {2006},
Bdsk-Url-1 = {http://dx.doi.org/10.1007/s00422-006-0068-6}}
@article{Burkitt_2006a,
Author = {Burkitt, A. N.},
Date-Modified = {2016-06-02 09:06:54 +0000},
Doi = {10.1007/s00422-006-0082-8},
Issn = {1432-0770},
Journal = {Biol Cybern},
Month = {Jul},
Number = {2},
Pages = {97--112},
Publisher = {Springer Science + Business Media},
Title = {A review of the integrate-and-fire neuron model: II. Inhomogeneous synaptic input and network properties},
Url = {http://dx.doi.org/10.1007/s00422-006-0082-8},
Volume = {95},
Year = {2006},
Bdsk-Url-1 = {http://dx.doi.org/10.1007/s00422-006-0082-8}}
@article{Goris_2015,
Author = {Goris, Robbe L.T. and Simoncelli, Eero P. and Movshon, J. Anthony},
Doi = {10.1016/j.neuron.2015.10.009},
Issn = {0896-6273},
Journal = {Neuron},
Month = {Nov},
Number = {4},
Pages = {819--831},
Publisher = {Elsevier BV},
Title = {Origin and Function of Tuning Diversity in Macaque Visual Cortex},
Url = {http://dx.doi.org/10.1016/j.neuron.2015.10.009},
Volume = {88},
Year = {2015},
Bdsk-Url-1 = {http://dx.doi.org/10.1016/j.neuron.2015.10.009}}
@article{hansel1995synchrony,
Author = {Hansel, David and Mato, Germ{\'a}n and Meunier, Claude},
Date-Added = {2016-06-02 09:07:52 +0000},
Date-Modified = {2016-06-02 09:07:52 +0000},
Journal = {Neural computation},
Number = {2},
Pages = {307--337},
Publisher = {MIT Press},
Title = {Synchrony in excitatory neural networks},
Volume = {7},
Year = {1995}}
@article{Hunter07,
Abstract = {Matplotlib is a {2D} graphics package for Python for application development, interactive scripting, and publication-quality image generation across user interfaces and operating systems.},
Address = {Los Alamitos, CA, USA},
Author = {Hunter, John D.},
Booktitle = {Computing in Science \& Engineering},
Citeulike-Article-Id = {2878517},
Citeulike-Linkout-0 = {http://dx.doi.org/10.1109/MCSE.2007.55},
Citeulike-Linkout-1 = {http://doi.ieeecomputersociety.org/10.1109/MCSE.2007.55},
Citeulike-Linkout-2 = {http://dx.doi.org/10.1109/mcse.2007.55},
Citeulike-Linkout-3 = {http://ieeexplore.ieee.org/xpls/abs\_all.jsp?arnumber=4160265},
Date-Added = {2016-06-02 09:06:18 +0000},
Date-Modified = {2016-06-02 09:06:18 +0000},
Day = {01},
Doi = {10.1109/MCSE.2007.55},
Issn = {1521-9615},
Journal = {Computing in Science and Engineering},
Keywords = {assofield, bicv-motion, bicv-sparse, kaplan13, khoei13jpp, perrinet12pred, python, reproducible-science, thesis},
Month = may,
Number = {3},
Pages = {90--95},
Priority = {0},
Publisher = {IEEE Computer Society},
Title = {Matplotlib: A {2D} Graphics Environment},
Url = {http://dx.doi.org/10.1109/MCSE.2007.55},
Volume = {9},
Year = {2007},
Bdsk-Url-1 = {http://dx.doi.org/10.1109/MCSE.2007.55}}
@article{Leon12,
Abstract = {Choosing an appropriate set of stimuli is essential to characterize the response of a sensory system to a particular functional dimension, such as the eye movement following the motion of a visual scene. Here, we describe a framework to generate random texture movies with controlled information content, i.e., Motion Clouds. These stimuli are defined using a generative model that is based on controlled experimental parametrization. We show that Motion Clouds correspond to dense mixing of localized moving gratings with random positions. Their global envelope is similar to natural-like stimulation with an approximate full-field translation corresponding to a retinal slip. We describe the construction of these stimuli mathematically and propose an open-source Python-based implementation. Examples of the use of this framework are shown. We also propose extensions to other modalities such as color vision, touch, and audition.},
Author = {Sanz-Leon, Paula and Vanzetta, I. and Masson, G. S. and Perrinet, L. U.},
Citeulike-Article-Id = {10461699},
Citeulike-Linkout-0 = {http://dx.doi.org/10.1152/jn.00737.2011},
Citeulike-Linkout-1 = {http://jn.physiology.org/content/early/2012/03/10/jn.00737.2011.abstract},
Citeulike-Linkout-2 = {http://jn.physiology.org/content/early/2012/03/10/jn.00737.2011.full.pdf},
Citeulike-Linkout-3 = {http://view.ncbi.nlm.nih.gov/pubmed/22423003},
Citeulike-Linkout-4 = {http://www.hubmed.org/display.cgi?uids=22423003},
Date-Added = {2016-06-02 09:07:31 +0000},
Date-Modified = {2016-06-02 09:07:31 +0000},
Day = {14},
Doi = {10.1152/jn.00737.2011},
Issn = {1522-1598},
Journal = {Journal of Neurophysiology},
Keywords = {bicv-sparse, freemove, kaplan13, motion-clouds, sanz12jnp, vacher14},
Month = mar,
Number = {11},
Pages = {3217--3226},
Pmid = {22423003},
Priority = {0},
Publisher = {American Physiological Society},
Title = {Motion clouds: model-based stimulus synthesis of natural-like random textures for the study of motion perception},
Url = {http://dx.doi.org/10.1152/jn.00737.2011},
Volume = {107},
Year = {2012},
Bdsk-Url-1 = {http://dx.doi.org/10.1152/jn.00737.2011}}
@article{Oliphant07,
Abstract = {By itself, Python is an excellent "steering" language for scientific codes written in other languages. However, with additional basic tools, Python transforms into a high-level language suited for scientific and engineering code that's often fast enough to be immediately useful but also flexible enough to be sped up with additional extensions.},
Address = {Los Alamitos, CA, USA},
Author = {Oliphant, T. E.},
Citeulike-Article-Id = {5662279},
Citeulike-Linkout-0 = {http://dx.doi.org/10.1109/MCSE.2007.58},
Citeulike-Linkout-1 = {http://doi.ieeecomputersociety.org/10.1109/MCSE.2007.58},
Citeulike-Linkout-2 = {http://dx.doi.org/10.1109/mcse.2007.58},
Citeulike-Linkout-3 = {http://ieeexplore.ieee.org/xpls/abs\_all.jsp?arnumber=4160250},
Date-Added = {2016-06-02 09:06:18 +0000},
Date-Modified = {2016-06-02 09:06:18 +0000},
Day = {01},
Doi = {10.1109/MCSE.2007.58},
Institution = {Brigham Young Univ., Provo},
Issn = {1521-9615},
Journal = {Computing in Science and Engineering},
Keywords = {assofield, bicv-motion, bicv-sparse, kaplan13, khoei13jpp, perrinet12pred, python, reproducible-science, thesis},
Month = may,
Number = {3},
Pages = {10--20},
Priority = {0},
Publisher = {IEEE Computer Society},
Title = {Python for Scientific Computing},
Url = {http://dx.doi.org/10.1109/MCSE.2007.58},
Volume = {9},
Year = {2007},
Bdsk-Url-1 = {http://dx.doi.org/10.1109/MCSE.2007.58}}
merging all notebooks¶
From all dated notebooks, we slected the one to be included in the report and ordered theme according to a hierarchical naming schemes that allowed to easily glob them:
if False: # manual mode
nb_list =['1-Introduction.ipynb',
'2.1.1_NeuralNetworks_SpikingNeuronModel.ipynb',
'2.1.2_NeuralNetworks_Brian_OneNeuron.ipynb',
'2.1.3_NeuralNetworks_Brian.ipynb',
'2.1.4_NeuralNetworks_Nest.ipynb',
'2.1.5_NeuralNetworks_pyNN_CODAvsCUBA.ipynb',
'2.1.6_FeedForward_Exploration_I-Fcurve.ipynb',
'2.2.10_RRNN_Exploration_Curve_Sparseness.ipynb',
'2.2.11_RRNN_Exploration_Curve_G.ipynb',
'2.2.12_RRNN_Rasterplot_G.ipynb',
'2.2.1_RRNN_Presentation.ipynb',
'2.2.2_RRNN_Exploration_control cell parameters.ipynb',
'2.2.3_RRNN_Exploration_ModelExpVSAlpha.ipynb',
'2.2.4_RRNN_Rasterplot_InputVariation.ipynb',
'2.2.5_RRNN_Rasterplot_InputWeight.ipynb',
'2.2.6_RRNN_Exploration_I-Fcurve.ipynb',
'2.2.7_RRNN_Rasterplot_CheckingInvariants.ipynb',
'2.2.8_RRNN_Rasterplot_GlobalWeight_Variation.ipynb',
'2.2.9_RRNN_Exploration_Curve_Weights.ipynb',
'2.3.1_RRNN_BalancedStates_MultiOptimisation_Intro.ipynb',
'2.3.2_RRNN_BalancedStates_MultiOptimisation_DifferentWeight.ipynb',
'2.3.3_RRNN_BalancedStates_MultiOptimisation_DifferentG.ipynb',
'3.1.1_Ring Intro.ipynb',
'3.2.1_Ring Tuned input.ipynb',
'3.2.2_Ring Tuned input Homogeneous weight.ipynb',
'3.2.3_Ring Tuned_Bandwidths.ipynb',
]
else:
nb_list = []
import glob
for fname in glob.glob('*.ipynb'):
if fname[0] in ['1', '2', '3', '4']:
print ("'{}', ".format(fname) )
nb_list.append(fname)
This also gives the outline of the thesis.
stripping the first block¶
When working on a notebook, you want to be free to experiment new things or to be able to test code bits. By convention, we chose to define a "serious block" the rest being stripped of the the final report. by convention, we chose to keep only the cells :
- starting with the first to begin with a section (that is with a
#
) - stoping with the next cell to begin with a section (that is with a
#
)
We can experiment how to read for instance one notebook:
with open('1.0-Introduction.ipynb', 'r') as f:
nb = nbformat.read(f, as_version=nbformat.NO_CONVERT)
nb.cells = nb.cells[0:len(nb.cells)]
print ('Number of cells: ', len(nb.cells))
And define different blocks:
with open('1.0-Introduction.ipynb', 'r') as f:
nb = nbformat.read(f, as_version=nbformat.NO_CONVERT)
print (nb.cells[2])
blocks = 0
for cell in nb.cells:
if len(cell['source'])>0:
if cell['source'][0] == '#':
blocks += 1
print('-----')
print(blocks)
print('-----')
print(cell['source'])
we can wrap up this in one function:
with open('1.0-Introduction.ipynb', 'r') as f:
nb = nbformat.read(f, as_version=nbformat.NO_CONVERT)
def strip(nb):
"""
Keeps only the cells :
- starting with the first to begin with a section (that is with a ``#``)
- stoping with the next cell to begin with a section (that is with a ``#``)
"""
start, stop = -1, len(nb.cells)
nb_out = nb.copy()
for i_cell, cell in enumerate(nb_out.cells):
if len(cell['source'])>0:
if cell['source'][0] == '#':
if start == -1: start = i_cell
else:
if stop == len(nb.cells): stop = i_cell
#print(start, stop, cell['source'])
if start == -1: start = 0
nb_out.cells = nb.cells[start:stop]
#print(start, stop, nb_out.cells)
return nb_out
nb_out = strip(nb)
print(nb_out)
merging notebooks¶
We can now merge these blocks together in one master notebook:
def merge_notebooks(outfile, filenames):
merged = None
for fname in filenames:
with open(fname, 'r', encoding='utf-8') as f:
nb = nbformat.read(f, nbformat.NO_CONVERT)
nb = strip(nb)
if merged is None:
merged = nb
else:
merged.cells.extend(nb.cells)
with open(outfile, 'w', encoding='utf-8') as f:
f.write(nbformat.writes(merged, nbformat.NO_CONVERT))
merge_notebooks(name + '.ipynb', nb_list)
converting to LaTeX and PDF¶
Finally, we convert this notebook using nconvert
and the template that we defined above.
with open(name + '.ipynb', 'r') as f:
nb = nbformat.read(f, as_version=nbformat.NO_CONVERT)
#nb.cells[0]
import nbconvert
#help(nbconvert.LatexExporter)
#nbconvert.exporters.export_latex(nb)
from traitlets.config import Config
# 1. Import the exporter
from nbconvert import PDFExporter
# 2. Instantiate the exporter. We use the `basic` template for now; we'll get into
# later about how to customize the exporter further.
latex_exporter = PDFExporter()
latex_exporter.template_file = 'thesis'
latex_exporter.verbose = True
# 3. Process the notebook we loaded earlier
(body, resources) = latex_exporter.from_notebook_node(nb)
# 4. write to file
with open(name + '.pdf', 'w', encoding="iso-8859-1") as f:
f.write(body.decode(encoding="iso-8859-1"))
We can now enjoy reading the thesis file:
!open thesis.pdf