3 years ago · b0e5d171b9
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,5 @@ crates/scratch/
 
				 *.fls
			
 
				 *.xdv
			
 
				 *.blg
			
 
				+*.bcf
			
 
				+*.xml
			
--- a/doc/Paper/.vscode/settings.json
+++ b/doc/Paper/.vscode/settings.json
--- a/doc/Paper/Paper.tex
+++ b/doc/Paper/Paper.tex
@@ -2,6 +2,8 @@
 
				 \usepackage{amsfonts,amssymb,amsmath}
			
 
				 \usepackage[scale=0.75]{geometry}
			
 
				 \usepackage{hyperref}
			
 
				+\usepackage{biblatex}
			
 
				+\bibliography{../citations.bib}
			
 
				 
			
 
				 \title{Blocktree \\
			
 
				 \large A platform for distributed computing.}
			
@@ -23,13 +25,14 @@ the details of backing up user data and implementing access controls to facilita
 
				 safe sharing. However, because these are closed systems, users are forced to trust that
			
 
				 the operators are benevolent, and they lack any real way of ensuring that the access
			
 
				 controls they prescribe will actually be enforced. There have been several systems proposed
			
 
				-as alternatives to the conventional model, but these systems suffer from several shortcomings.
			
 
				-They either assume the need for cloud storage providers (Blockstack) or implement all operations
			
 
				-using a global blockchain, limiting performance (Filecoin). Blocktree takes a different approach.
			
 
				+as alternatives to the conventional model, but these systems suffer from their own shortcomings.
			
 
				+They either assume the need for cloud storage providers (Blockstack \cite{blockstack}) or implement
			
 
				+all operations using a global blockchain, limiting performance (Filecoin \cite{filecoin}).
			
 
				+Blocktree takes a different approach.
			
 
				 
			
 
				 The idea behind blocktree is to organize a user's computers into a cooperative unit, called a
			
 
				 blocktree. The user is said to own the blocktree, and they wield sovereign authority over it.
			
 
				-The artifact granting them this authority is the root private key for the blocktree. Measures for protecting
			
 
				+The artifact granting them this authority is the root private key of the blocktree. Measures for protecting
			
 
				 this key and delegating its authority are important design considerations of the system.
			
 
				 The owners of blocktrees are encouraged to collaborate with each other to replicate data by
			
 
				 means of a cryptocurrency known as blockcoin. The blockchain implementing this cryptocurrency
			
@@ -82,7 +85,11 @@ recognize this as the ``trees grow up" model of formal logic, and it was chosen
 
				 appear visually similar to real world trees when rendered in a virtual environment.
			
 
				 
			
 
				 Authenticity guarantees are provided using a digital signature scheme. In order to change the
			
 
				-contents of a block a data structure called a write capability, or writecap, is needed. A
			
 
				+contents of a block a data structure called a write capability, or writecap
			
 
				+\footnote{The names readcap and writecap were taken from the Tahoe Least-Authority Filesystem
			
 
				+\cite{tahoe}. The access control mechanism described in the Tahoe system heavily influenced the
			
 
				+design of Blocktree.},
			
 
				+is needed. A
			
 
				 writecap is approximately an x509 certificate chain. A writecap contains the following data:
			
 
				 \begin{itemize}
			
 
				 \item The path the writecap can be used under.
			
@@ -122,11 +129,12 @@ responsible for, then the child node is responsible for the subdirectory.
 
				 In this way data storage can be delegated, allowing the system to scale. When more than one
			
 
				 node is attached to the same directory they form a cluster.
			
 
				 Each node in the cluster contains a copy of the data that the cluster is responsible for. They
			
 
				-maintain consistency of this data by running the Raft consensus protocol.
			
 
				+maintain consistency of this data by running the Raft \cite{raft} consensus protocol.
			
 
				 
			
 
				 When a new blocktree is created a node generates a key pair to serve as the root keys.
			
 
				 It is imperative for the security of the system that the root private key is protected, and it
			
 
				-is highly recommended that it be stored in a Trusted Platform Module (TPM) and that the TPM
			
 
				+is highly recommended that it be stored in a Trusted Platform Module (TPM) \cite{tpm} and that
			
 
				+the TPM
			
 
				 be configured to disallow unauthenticated use of this key. The node then generates its own key pair
			
 
				 and uses the root private key to issue itself a writecap for the root of the tree. Once it has
			
 
				 this writecap, it creates the root block and generates a block key for it. A readcap is added to
			
@@ -169,13 +177,13 @@ a child for redundancy. User code is also able to initiate the sending of messag
 
				 addressed using blocktree paths. When a node receives a message that is not addressed to it,
			
 
				 but is addressed to its blocktree, it forwards it to the closest node to the recipient that it
			
 
				 is connected to. In order to enable efficient low-latency message transfers, nodes maintain open
			
 
				-connections to the other nodes in their cluster, and the cluster leader maintains a connection to
			
 
				+TCP connections to the other nodes in their cluster, and the cluster leader maintains a connection to
			
 
				 its parent. Diffie-Hellman key exchange is used to exchange a key for use in an AEAD cipher, and
			
 
				 once this cipher context is established, the two nodes mutually authenticate each other using their
			
 
				 respective key pairs. When a node comes online, it uses the global blocktree (described later)
			
 
				 to find the other nodes in its cluster. If it is not part of a cluster, or this information is
			
 
				 not stored in the global blocktree, then it instead looks up the
			
 
				-IP address of a root node and connects to it. The root node may then direct the node to connect to
			
 
				+IP address of a root node and connects to it. The root node may direct the node to connect to
			
 
				 one of root's children, and this process repeats until the new node is connected to its parent.
			
 
				 
			
 
				 A concept that has proven to be very useful in the world of filesystems is the symbolic link.
			
@@ -226,7 +234,8 @@ In order to allow nodes to access blocks in other blocktrees, a global ledger of
 
				 This ledger is implemented using a proof of work (PoW) blockchain and a corresponding 
			
 
				 cryptocurrency known as blockcoin. Nodes mine chain blocks (not to be confused with the tree 
			
 
				 blocks we've been discussing up till now) in the same way they do in other PoW blockchain
			
 
				-systems such as BitCoin. The node which manages to mine the next chain block receives a reward,
			
 
				+systems such as Bitcoin \cite{bitcoin}. The node which manages to mine the next chain block receives
			
 
				+a reward,
			
 
				 which is the sum of the fees for each event in the chain and a variable amount of newly minted
			
 
				 blockcoin. The amount of new blockcoin created by a chain block is directly proportional to the
			
 
				 amount of data storage events contained in the chain block. Thus the total amount of blockcoin
			
@@ -235,10 +244,10 @@ the fact that blockcoin exists to provide an accounting mechanism for data.
 
				 
			
 
				 When a node writes data to a tree block, and it wishes this block to be globally accessible or
			
 
				 replicated for redundancy,
			
 
				-it produces what are called fragments. Fragments are the output symbols from an Erasure Coding
			
 
				-algorithm (such as the RaptorQ code). These algorithms are a class of fountain codes which have
			
 
				-the property that only $m$ out of $n$ (where $m < n$) symbols are needed to reconstruct the
			
 
				-original data. Such a code ensures that even if some of the fragments are lost, as long as $m$
			
 
				+it produces what are called fragments. Fragments are the output symbols of the RaptorQ code
			
 
				+\cite{raptorq}. This algorithm is an example of an Erasure Code, which is a class of fountain codes
			
 
				+which have the property that only $m$ out of $n$ (where $m < n$) symbols are needed to reconstruct
			
 
				+the original data. Such a code ensures that even if some of the fragments are lost, as long as $m$
			
 
				 remain, the original data can be recovered.
			
 
				 
			
 
				 Once these fragments have been computed an event is created for each one and published to the
			
@@ -303,17 +312,16 @@ blockcoin balance.
 
				 To compute the entries in these tree blocks, the nodes in the network iterate over all the chain blocks, updating
			
 
				 their local copy of each tree block appropriately. The experienced reader will recognize that this is an event 
			
 
				 sourced architecture. Currently only these two tree blocks are known to be needed, but if new events are
			
 
				-added to the system it's easy to see that this system can be used for creating other data structures enabling
			
 
				-queries that we have yet to envision.
			
 
				+added to the system it can be easily extended to enable queries that have yet to be envisioned.
			
 
				 
			
 
				 \section{Programming Environment}
			
 
				 Enabling an excellent developer experience is one of the primary goals of this system (the others being security
			
 
				-and scalability). Nodes execute user code that has been compiled into WebAssembly modules. Such code
			
 
				+and scalability). Nodes execute user code that has been compiled into WebAssembly modules \cite{wasm}. Such code
			
 
				 running on a blocktree node is referred to as an "app". An app
			
 
				 executes in a sandbox that isolates it from other code, as well as the security critical operations of the node
			
 
				 itself. The sandbox provides the code with an extension of the WebAssembly System Interface (WASI), with extra 
			
 
				 system calls to interact with the particulars of the blocktree system.
			
 
				-The extra system calls fall into three categories:
			
 
				+The extra system calls fall into these categories:
			
 
				 \begin{itemize}
			
 
				 \item Distributed Locking
			
 
				 \item Messaging
			
@@ -323,7 +331,7 @@ The extra system calls fall into three categories:
 
				 The standard WASI filesystem APIs are used
			
 
				 to interact with the contents of blocktrees. For instance a file descriptor for a block can be obtained
			
 
				 by calling path\_open. Writes and reads of blocks are performed using the privileges of the node on which
			
 
				-the app is running. 
			
 
				+the app is running, but the node may limit the app's access depending on its permissions.
			
 
				 
			
 
				 When an app is installed it is given a directory under which it can store data that is shared between all nodes
			
 
				 in the blocktree. The path of this block is formed by prefixing the path the app was published at
			
@@ -368,14 +376,21 @@ which can be safely run in the node itself, or even in a SmartNIC. This would re
 
				 the callbacks only use an approved set of APIs, but could enable much higher performance.
			
 
				 
			
 
				 % Supervision Trees
			
 
				-Apps can also arrange themselves into supervision trees, in the same way that Erlang
			
 
				-processes are arranged. In this scheme, when a child app crashes, or the node its
			
 
				-running on dies (which is detected by other nodes), then the app receives a message. In the
			
 
				+Apps can arrange themselves into supervision trees, in the same way that Erlang
			
 
				+processes are arranged \cite{armstrong}. In this scheme, when a child app crashes, or the node its
			
 
				+running on dies (which is detected by other nodes), then the app receives a message.
			
 
				+In the
			
 
				 simplest case this can be used to implement a logging system, where crashes and node
			
 
				 deaths are recorded. More interestingly, this can be used to integrate with a control
			
 
				 plane. For instance, if a blocktree were running in AWS, when a message is received indicating
			
 
				 that a node has died, a new EC2 instance could be started to replace it. The reliability of Erlang
			
 
				 and other system employing the Actor Model have shown the robustness of this approach.
			
 
				+Apps can form this relationship after they've started running, provided that both of the apps
			
 
				+have permission to send messages to each other. Apps, with the appropriate permissions, can also
			
 
				+spawn other apps on descendent nodes. This can be used to implement map-reduce workloads, where
			
 
				+an app spawns mapping jobs on descendent nodes containing the data of interest, and it processes
			
 
				+their messages to compute the final reduction. Due to the tiny size of most programs, this is a much
			
 
				+more efficient approach than moving the data to the nodes performing the computation.
			
 
				 
			
 
				 \section{A Brave New Web}
			
 
				 In order to explore how blocktree can be used, the design of several hypothetical systems
			
@@ -528,4 +543,6 @@ One thing is certain however, it is a moral imperative that we provide users wit
 
				 to online services which harvest their data and weaponize it against them. Only then will the web
			
 
				 become the place it was meant to be.
			
 
				 
			
 
				+\printbibliography
			
 
				+
			
 
				 \end{document}
			
--- a/doc/citations.bib
+++ b/doc/citations.bib
@@ -0,0 +1,115 @@
 
				+@article{filecoin,
			
 
				+    author = "Protocol Labs",
			
 
				+    title = "Filecoin: A Decentralized Storage Network",
			
 
				+    url = "https://filecoin.io/filecoin.pdf"
			
 
				+}
			
 
				+
			
 
				+@inproceedings{blockstack,
			
 
				+    author = {Muneeb Ali and Jude Nelson and Ryan Shea and Michael J. Freedman},
			
 
				+    title = {Blockstack: A Global Naming and Storage System Secured by Blockchains},
			
 
				+    booktitle = {2016 USENIX Annual Technical Conference (USENIX ATC 16)},
			
 
				+    year = {2016},
			
 
				+    isbn = {978-1-931971-30-0},
			
 
				+    address = {Denver, CO},
			
 
				+    pages = {181--194},
			
 
				+    url = {https://www.usenix.org/conference/atc16/technical-sessions/presentation/ali},
			
 
				+    publisher = {USENIX Association},
			
 
				+    month = jun,
			
 
				+}
			
 
				+
			
 
				+@misc{tahoe,
			
 
				+    author = {Zooko Wilcox-O'Hearn and Brian Warner},
			
 
				+    title = {Tahoe – The Least-Authority Filesystem},
			
 
				+    howpublished = {Cryptology ePrint Archive, Paper 2012/524},
			
 
				+    year = {2012},
			
 
				+    note = {\url{https://eprint.iacr.org/2012/524}},
			
 
				+    url = {https://eprint.iacr.org/2012/524}
			
 
				+}
			
 
				+
			
 
				+@misc{tpm,
			
 
				+    title = {TPM 2.0 Library},
			
 
				+    url = {https://trustedcomputinggroup.org/resource/tpm-library-specification/}
			
 
				+}
			
 
				+
			
 
				+@inproceedings {raft,
			
 
				+    author = {Diego Ongaro and John Ousterhout},
			
 
				+    title = {In Search of an Understandable Consensus Algorithm},
			
 
				+    booktitle = {2014 USENIX Annual Technical Conference (USENIX ATC 14)},
			
 
				+    year = {2014},
			
 
				+    isbn = {978-1-931971-10-2},
			
 
				+    address = {Philadelphia, PA},
			
 
				+    pages = {305--319},
			
 
				+    url = {https://www.usenix.org/conference/atc14/technical-sessions/presentation/ongaro},
			
 
				+    publisher = {USENIX Association},
			
 
				+    month = jun,
			
 
				+}
			
 
				+
			
 
				+@article{bitcoin,
			
 
				+    author = {Nakamoto, Satoshi},
			
 
				+    year = {2009},
			
 
				+    month = {03},
			
 
				+    pages = {},
			
 
				+    title = {Bitcoin: A Peer-to-Peer Electronic Cash System},
			
 
				+    journal = {Cryptography Mailing list at https://metzdowd.com},
			
 
				+    url = {https://bitcoin.org/bitcoin.pdf}
			
 
				+}
			
 
				+
			
 
				+@misc{raptorq,
			
 
				+    series =	{Request for Comments},
			
 
				+    number =	6330,
			
 
				+    howpublished =	{RFC 6330},
			
 
				+    publisher =	{RFC Editor},
			
 
				+    doi =		{10.17487/RFC6330},
			
 
				+    url =		{https://www.rfc-editor.org/info/rfc6330},
			
 
				+        author =	{Lorenz Minder and Amin Shokrollahi and Mark Watson and Mike Luby and Thomas Stockhammer},
			
 
				+    title =		{{RaptorQ Forward Error Correction Scheme for Object Delivery}},
			
 
				+    pagetotal =	69,
			
 
				+    year =		2011,
			
 
				+    month =		aug,
			
 
				+    abstract =	{This document describes a Fully-Specified Forward Error Correction (FEC) scheme, corresponding to FEC Encoding ID 6, for the RaptorQ FEC code and its application to reliable delivery of data objects. RaptorQ codes are a new family of codes that provide superior flexibility, support for larger source block sizes, and better coding efficiency than Raptor codes in RFC 5053. RaptorQ is also a fountain code, i.e., as many encoding symbols as needed can be generated on the fly by the encoder from the source symbols of a source block of data. The decoder is able to recover the source block from almost any set of encoding symbols of sufficient cardinality -- in most cases, a set of cardinality equal to the number of source symbols is sufficient; in rare cases, a set of cardinality slightly more than the number of source symbols is required. The RaptorQ code described here is a systematic code, meaning that all the source symbols are among the encoding symbols that can be generated. {[}STANDARDS-TRACK{]}},
			
 
				+}
			
 
				+
			
 
				+@inproceedings{10.1145/3062341.3062363,
			
 
				+author = {Haas, Andreas and Rossberg, Andreas and Schuff, Derek L. and Titzer, Ben L. and Holman, Michael and Gohman, Dan and Wagner, Luke and Zakai, Alon and Bastien, JF},
			
 
				+title = {Bringing the Web up to Speed with WebAssembly},
			
 
				+year = {2017},
			
 
				+isbn = {9781450349888},
			
 
				+publisher = {Association for Computing Machinery},
			
 
				+address = {New York, NY, USA},
			
 
				+url = {https://doi.org/10.1145/3062341.3062363},
			
 
				+doi = {10.1145/3062341.3062363},
			
 
				+abstract = { The maturation of the Web platform has given rise to sophisticated and demanding Web applications such as interactive 3D visualization, audio and video software, and games. With that, efficiency and security of code on the Web has become more important than ever. Yet JavaScript as the only built-in language of the Web is not well-equipped to meet these requirements, especially as a compilation target.  Engineers from the four major browser vendors have risen to the challenge and collaboratively designed a portable low-level bytecode called WebAssembly. It offers compact representation, efficient validation and compilation, and safe low to no-overhead execution. Rather than committing to a specific programming model, WebAssembly is an abstraction over modern hardware, making it language-, hardware-, and platform-independent, with use cases beyond just the Web. WebAssembly has been designed with a formal semantics from the start. We describe the motivation, design and formal semantics of WebAssembly and provide some preliminary experience with implementations. },
			
 
				+booktitle = {Proceedings of the 38th ACM SIGPLAN Conference on Programming Language Design and Implementation},
			
 
				+pages = {185–200},
			
 
				+numpages = {16},
			
 
				+keywords = {just-in-time compilers, type systems, assembly languages, programming languages, virtual machines},
			
 
				+location = {Barcelona, Spain},
			
 
				+series = {PLDI 2017}
			
 
				+}
			
 
				+
			
 
				+@article{wasm,
			
 
				+    author = {Haas, Andreas and Rossberg, Andreas and Schuff, Derek L. and Titzer, Ben L. and Holman, Michael and Gohman, Dan and Wagner, Luke and Zakai, Alon and Bastien, JF},
			
 
				+    title = {Bringing the Web up to Speed with WebAssembly},
			
 
				+    year = {2017},
			
 
				+    issue_date = {June 2017},
			
 
				+    publisher = {Association for Computing Machinery},
			
 
				+    address = {New York, NY, USA},
			
 
				+    volume = {52},
			
 
				+    number = {6},
			
 
				+    issn = {0362-1340},
			
 
				+    url = {https://doi.org/10.1145/3140587.3062363},
			
 
				+    doi = {10.1145/3140587.3062363},
			
 
				+    abstract = { The maturation of the Web platform has given rise to sophisticated and demanding Web applications such as interactive 3D visualization, audio and video software, and games. With that, efficiency and security of code on the Web has become more important than ever. Yet JavaScript as the only built-in language of the Web is not well-equipped to meet these requirements, especially as a compilation target.  Engineers from the four major browser vendors have risen to the challenge and collaboratively designed a portable low-level bytecode called WebAssembly. It offers compact representation, efficient validation and compilation, and safe low to no-overhead execution. Rather than committing to a specific programming model, WebAssembly is an abstraction over modern hardware, making it language-, hardware-, and platform-independent, with use cases beyond just the Web. WebAssembly has been designed with a formal semantics from the start. We describe the motivation, design and formal semantics of WebAssembly and provide some preliminary experience with implementations. },
			
 
				+    journal = {SIGPLAN Not.},
			
 
				+    month = {jun},
			
 
				+    pages = {185–200},
			
 
				+    numpages = {16},
			
 
				+    keywords = {assembly languages, type systems, virtual machines, just-in-time compilers, programming languages}
			
 
				+}
			
 
				+
			
 
				+@phdthesis{armstrong,
			
 
				+    author    = {Joe Armstrong},
			
 
				+    title     = {Making reliable distributed systems in the presence of software errors},
			
 
				+    school    = {Royal Institute of Technology, Stockholm, Sweden},
			
 
				+    year      = {2003}
			
 
				+}