pub use private::{Blocktree, ModeAuthorizer}; mod private { use btserde::{read_from, write_to}; use fuse_backend_rs::{ abi::fuse_abi::{stat64, statvfs64, CreateIn}, api::filesystem::{ Context, DirEntry as FuseDirEntry, Entry, FileSystem, FsOptions, OpenOptions, }, }; use log::{debug, error, warn}; use serde::{Deserialize, Serialize}; use std::{ collections::hash_map::{self, HashMap}, ffi::CStr, fs::File, io::{self, SeekFrom, Write}, path::{Path, PathBuf}, sync::{ atomic::{AtomicU64, Ordering}, RwLock, }, time::Duration, }; use crate::{ crypto::Creds, Block, BlockMeta, BlockOpenOptions, BlockPath, BoxInIoErr, DirEntry, Directory, Epoch, Error, Result, ToStringInIoErr, }; type Inode = u64; type Handle = u64; #[repr(u64)] pub enum SpecInodes { RootDir = 1, Sb = 2, FirstFree = 11, } impl From for Inode { fn from(special: SpecInodes) -> Self { special as Inode } } #[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)] #[repr(u32)] // This type needs to match `libc::mode_t`. /// The type of a file (regular, directory, etc). enum FileType { /// Directory. Dir = libc::S_IFDIR, /// Regular file. Reg = libc::S_IFREG, } impl FileType { /// Returns the underlying mode bits for this file type. fn value(self) -> libc::mode_t { self as libc::mode_t } /// Attempts to convert the given mode bits into a `FileType` enum value. fn from_value(value: libc::mode_t) -> Result { if (value & libc::S_IFDIR) != 0 { return Ok(FileType::Dir); } if (value & libc::S_IFREG) != 0 { return Ok(FileType::Reg); } Err(Error::custom(format!("unknown file type: 0o{value:0o}"))) } } impl From for libc::mode_t { fn from(file_type: FileType) -> Self { file_type.value() } } impl TryFrom for FileType { type Error = crate::Error; fn try_from(value: libc::mode_t) -> Result { Self::from_value(value) } } trait SeekFromExt { /// Converts a C-style `(whence, offset)` pair into a [SeekFrom] enum value. /// See the POSIX man page of `lseek` for more details. fn whence_offset(whence: u32, offset: u64) -> io::Result { let whence = whence as i32; match whence { libc::SEEK_SET => Ok(SeekFrom::Start(offset)), libc::SEEK_CUR => Ok(SeekFrom::Current(offset as i64)), libc::SEEK_END => Ok(SeekFrom::End(offset as i64)), _ => Err(io::Error::new( io::ErrorKind::InvalidInput, "`whence` was not one of `libc::{SEEK_SET, SEEK_CUR, SEEK_END}`", )), } } } impl SeekFromExt for SeekFrom {} /// This type provides context for an authorization decision as to whether a given process will /// be allowed to access a block. pub struct AuthzContext<'a> { /// The user ID of the process being authorized. pub uid: u32, /// The group ID of the process being authorized. pub gid: u32, /// The process ID of the process being authorized. pub pid: libc::pid_t, /// A reference to the metadata of a block, the access to which is being authorized. pub meta: &'a BlockMeta, } impl<'a> AuthzContext<'a> { pub fn new(ctx: &Context, meta: &'a BlockMeta) -> AuthzContext<'a> { AuthzContext { uid: ctx.uid, gid: ctx.gid, pid: ctx.pid, meta, } } } /// A trait for types which can render authorization decisions. pub trait Authorizer { /// Returns [Ok] if read authorization is granted, and [Err] otherwise. fn can_read<'a>(&self, ctx: &AuthzContext<'a>) -> io::Result<()>; /// Returns [Ok] if write authorization is granted, and [Err] otherwise. fn can_write<'a>(&self, ctx: &AuthzContext<'a>) -> io::Result<()>; /// Returns [Ok] if execute authorization is granted, and [Err] otherwise. fn can_exec<'a>(&self, ctx: &AuthzContext<'a>) -> io::Result<()>; } /// A particularly simple authorizer that just looks at the mode bits in the block metadata /// to make authorization decisions. pub struct ModeAuthorizer {} impl ModeAuthorizer { fn authorize(mode: u32, mask: u32, denied_msg: &str) -> io::Result<()> { if (mode & mask) != 0 { Ok(()) } else { Err(io::Error::new(io::ErrorKind::PermissionDenied, denied_msg)) } } } impl Authorizer for ModeAuthorizer { fn can_read<'a>(&self, ctx: &AuthzContext<'a>) -> io::Result<()> { let secrets = ctx.meta.body.secrets()?; let mask = (libc::S_IRUSR * (secrets.uid == ctx.uid) as u32) | (libc::S_IRGRP * (secrets.gid == ctx.gid) as u32) | libc::S_IROTH; Self::authorize(secrets.mode, mask, "read access denied") } fn can_write<'a>(&self, ctx: &AuthzContext<'a>) -> io::Result<()> { let secrets = ctx.meta.body.secrets()?; let mask = (libc::S_IWUSR * (secrets.uid == ctx.uid) as u32) | (libc::S_IWGRP * (secrets.gid == ctx.gid) as u32) | libc::S_IWOTH; Self::authorize(secrets.mode, mask, "write access denied") } fn can_exec<'a>(&self, ctx: &AuthzContext<'a>) -> io::Result<()> { let secrets = ctx.meta.body.secrets()?; let mask = (libc::S_IXUSR * (secrets.uid == ctx.uid) as u32) | (libc::S_IXGRP * (secrets.gid == ctx.gid) as u32) | libc::S_IXOTH; Self::authorize(secrets.mode, mask, "exec access denied") } } enum HandleValue { File { block: RwLock>, }, Directory { dir: Directory, block: RwLock>, }, } impl HandleValue { fn new(block: Box) -> HandleValue { HandleValue::File { block: RwLock::new(block), } } fn convert_to_dir(self) -> io::Result { let lock = self.take_block(); let dir = { let mut guard = lock.write().err_to_string()?; guard.seek(SeekFrom::Start(0))?; read_from(&mut *guard)? }; Ok(HandleValue::Directory { dir, block: lock }) } fn take_block(self) -> RwLock> { match self { Self::File { block, .. } => block, Self::Directory { block, .. } => block, } } fn block(&self) -> &RwLock> { match self { Self::File { block, .. } => block, Self::Directory { block, .. } => block, } } fn block_mut(&mut self) -> io::Result<&mut Box> { let lock = match self { Self::File { block, .. } => block, Self::Directory { block, .. } => block, }; lock.get_mut().err_to_string() } fn access_block) -> io::Result>( &self, cb: F, ) -> io::Result { let guard = self.block().read().err_to_string()?; cb(&guard) } fn access_block_mut) -> io::Result>( &self, cb: F, ) -> io::Result { let mut guard = self.block().write().err_to_string()?; cb(&mut guard) } fn directory(&self) -> io::Result<&Directory> { match self { Self::Directory { dir, .. } => Ok(dir), _ => Err(io::Error::new( io::ErrorKind::Other, "handle is not for a directory", )), } } } struct InodeTableValue { handle_values: HashMap, next_handle: Handle, unclaimed_handles: Vec, lookup_count: u64, delete: bool, } impl InodeTableValue { /// If more than this number of unclaimed blocks are open, then blocks are closed until /// only this number remain open. const UNCLAIMED_HANDLE_LIMIT: usize = 3; fn new(block: Box) -> InodeTableValue { const FIRST_HANDLE: Handle = 1; let mut handles = HashMap::with_capacity(1); handles.insert(FIRST_HANDLE, HandleValue::new(block)); Self { handle_values: handles, next_handle: FIRST_HANDLE + 1, lookup_count: 1, unclaimed_handles: vec![FIRST_HANDLE], delete: false, } } fn invalid_handle_err(handle: Handle) -> io::Error { io::Error::new(io::ErrorKind::Other, format!("invalid handle {handle}")) } fn value(&self, handle: Handle) -> io::Result<&HandleValue> { self.handle_values .get(&handle) .ok_or_else(|| Self::invalid_handle_err(handle)) } fn value_mut(&mut self, handle: Handle) -> io::Result<&mut HandleValue> { self.handle_values .get_mut(&handle) .ok_or_else(|| Self::invalid_handle_err(handle)) } fn block_mut(&mut self, handle: Handle) -> io::Result<&mut Box> { self.value_mut(handle)?.block_mut() } fn convert_to_dir(&mut self, handle: Handle) -> io::Result<()> { let value = self .handle_values .remove(&handle) .ok_or_else(|| Self::invalid_handle_err(handle))?; let value = value.convert_to_dir()?; self.handle_values.insert(handle, value); Ok(()) } fn access_block_mut) -> io::Result>( &self, handle: Handle, cb: F, ) -> io::Result { self.value(handle)?.access_block_mut(cb) } fn try_borrow_block) -> io::Result>( &mut self, cb: F, ) -> io::Result { let handle = self .unclaimed_handles .last() .ok_or_else(|| Error::custom("no handles available"))?; let value = self.value_mut(*handle)?; let block = value.block_mut()?; cb(block) } fn insert(&mut self, block: Box) { let handle = self.next_handle; self.next_handle += 1; self.handle_values.insert(handle, HandleValue::new(block)); self.unclaimed_handles.push(handle); } fn insert_then_get_mut(&mut self, block: Box) -> &mut Box { self.insert(block); let handle = self.unclaimed_handles.last().unwrap(); self.block_mut(*handle).unwrap() } fn take_handle(&mut self) -> io::Result { self.unclaimed_handles .pop() .ok_or_else(|| io::Error::new(io::ErrorKind::Other, "no unclaimed handles")) } fn give_handle(&mut self, handle: Handle) { self.unclaimed_handles.push(handle); while self.unclaimed_handles.len() > Self::UNCLAIMED_HANDLE_LIMIT { let handle = self.unclaimed_handles.pop().unwrap(); self.handle_values.remove(&handle); } } /// Increments `lookup_count` by 1 and returns its current value. fn incr_lookup_count(&mut self) -> u64 { self.lookup_count += 1; self.lookup_count } /// Decrements `lookup_count` by `count` and returns its current value. fn decr_lookup_count(&mut self, count: u64) -> u64 { self.lookup_count -= count; self.lookup_count } } type InodeTable = HashMap>; type InodeTableEntry<'a> = hash_map::Entry<'a, Inode, RwLock>; /// Structure for metadata about a blocktree. #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] struct Superblock { /// The generation number of the cluster this part of the blocktree is stored on. generation: u64, /// The next free inode available to the cluster. next_inode: u64, } /// Structure for managing the part of a blocktree which is stored in the local filesystem. pub struct Blocktree { /// The path to the directory in the local filesystem where this blocktree is located. path: PathBuf, /// A map from inode numbers to their reference counts. inodes: RwLock, /// The next inode that will be assigned to a new block. next_inode: AtomicU64, /// The generation number of this filesystem. This is the same for every other server in /// the same cluster. generation: u64, /// The credentials this blocktree instance will use for all cryptographic operations. creds: C, authorizer: A, } impl Blocktree { /// Creates a new empty blocktree at the given path. pub fn new_empty( btdir: PathBuf, generation: u64, creds: C, authorizer: A, ) -> Result> { let root_block_path = creds .writecap() .ok_or(Error::MissingWritecap)? .root_block_path(); // Initialize the superblock. let mut sb_block = Self::open_block( &btdir, SpecInodes::Sb.into(), creds.clone(), root_block_path.to_owned(), )?; let sb = Superblock { generation, next_inode: SpecInodes::FirstFree.into(), }; write_to(&sb, &mut sb_block)?; sb_block.mut_meta_body().access_secrets(|secrets| { secrets.inode = SpecInodes::Sb.into(); secrets.mode = Self::default_file_mode(); secrets.uid = Self::uid(); secrets.gid = Self::gid(); secrets.nlink = 1; Ok(()) })?; sb_block.flush()?; // Initialize the root directory. let mut root_block = Self::open_block( &btdir, SpecInodes::RootDir.into(), creds.clone(), root_block_path, )?; write_to(&Directory::new(), &mut root_block)?; root_block.mut_meta_body().access_secrets(|secrets| { secrets.inode = SpecInodes::RootDir.into(); secrets.mode = Self::default_dir_mode(); secrets.uid = Self::uid(); secrets.gid = Self::gid(); secrets.nlink = 1; Ok(()) })?; root_block.flush()?; Self::new(btdir, sb, sb_block, root_block, creds, authorizer) } /// Opens an existing blocktree stored at the given path. pub fn new_existing(btdir: PathBuf, creds: C, authorizer: A) -> Result> { let root_block_path = creds .writecap() .ok_or(Error::MissingWritecap)? .root_block_path(); let mut sb_block = Self::open_block( &btdir, SpecInodes::Sb.into(), creds.clone(), root_block_path.to_owned(), )?; let sb = read_from(&mut sb_block)?; let root_block = Self::open_block( &btdir, SpecInodes::RootDir.into(), creds.clone(), root_block_path, )?; Self::new(btdir, sb, sb_block, root_block, creds, authorizer) } fn new( btdir: PathBuf, sb: Superblock, sb_block: Box, root_block: Box, creds: C, authorizer: A, ) -> Result> { let mut inodes = HashMap::with_capacity(1); inodes.insert( SpecInodes::Sb.into(), RwLock::new(InodeTableValue::new(sb_block)), ); inodes.insert( SpecInodes::RootDir.into(), RwLock::new(InodeTableValue::new(root_block)), ); Ok(Blocktree { path: btdir, inodes: RwLock::new(inodes), next_inode: AtomicU64::new(sb.next_inode), generation: sb.generation, creds, authorizer, }) } /// Returns the path to the file storing the given inode's data. fn block_path>(parent: P, inode: Inode) -> PathBuf { let group = inode / 0xFF; let mut path = PathBuf::new(); path.push(parent); path.push(format!("{group:02x}")); path.push(format!("{inode:x}.blk")); path } fn open_block>( btdir: P, inode: Inode, creds: C, block_path: BlockPath, ) -> Result> { let path = Self::block_path(&btdir, inode); let dir = path.ancestors().nth(1).unwrap(); if let Err(err) = std::fs::create_dir(dir) { match err.kind() { io::ErrorKind::AlreadyExists => (), _ => return Err(err.into()), } } let file = std::fs::OpenOptions::new() .read(true) .write(true) .create(true) .open(path)?; Self::open_block_file(file, creds, block_path) } fn open_block_file(file: File, creds: C, block_path: BlockPath) -> Result> { BlockOpenOptions::new() .with_creds(creds) .with_compress(false) .with_encrypt(true) .with_inner(file) .with_block_path(block_path) .open() } fn default_dir_mode() -> u32 { FileType::Dir.value() | 0o755 } fn default_file_mode() -> u32 { FileType::Reg.value() | 0o644 } fn uid() -> u32 { unsafe { libc::getuid() } } fn gid() -> u32 { unsafe { libc::getgid() } } /// Returns the [Err] variant containing the [io::Error] corresponding to [libc::ENOSYS]. fn not_supported() -> io::Result { let err = io::Error::from_raw_os_error(libc::ENOSYS); debug!("{err}"); Err(err) } fn access_entry io::Result>( &self, inode: Inode, cb: F, ) -> io::Result { let mut inodes = self.inodes.write().err_to_string()?; let entry = inodes.entry(inode); cb(entry) } fn access_value io::Result>( &self, inode: Inode, cb: F, ) -> io::Result { let inodes = self.inodes.read().err_to_string()?; let guard = inodes .get(&inode) .ok_or_else(|| Error::NotOpen(inode))? .read() .err_to_string()?; cb(&guard) } fn access_value_mut io::Result>( &self, inode: Inode, cb: F, ) -> io::Result { let inodes = self.inodes.read().err_to_string()?; let mut guard = inodes .get(&inode) .ok_or_else(|| Error::NotOpen(inode))? .write() .err_to_string()?; cb(&mut guard) } fn access_block_mut) -> io::Result>( &self, inode: Inode, handle: Handle, cb: F, ) -> io::Result { self.access_value(inode, |value| value.access_block_mut(handle, cb)) } fn access_meta io::Result>( &self, inode: Inode, cb: F, ) -> io::Result { self.access_value(inode, |value| { let handle_value = value .handle_values .values() .next() .ok_or_else(|| Error::NotOpen(inode))?; // Because we're using any of the meta data structs we need to ensure that any // modification of meta data is performed on all open blocks. handle_value.access_block(|block| cb(block.meta())) }) } fn borrow_block) -> io::Result>( &self, inode: Inode, cb: F, ) -> io::Result { self.access_value_mut(inode, |value| { let block = match value.unclaimed_handles.last() { Some(handle) => value.handle_values.get_mut(handle).unwrap().block_mut()?, None => { let block_path = value .handle_values .values() .next() .ok_or_else(|| Error::NotOpen(inode))? .access_block(|block| Ok(block.meta_body().path.clone()))?; let block = Self::open_block(&self.path, inode, self.creds.clone(), block_path)?; value.insert_then_get_mut(block) } }; cb(block) }) } fn take_handle_if_ok) -> io::Result>( &self, inode: Inode, cb: F, ) -> io::Result { self.access_value_mut(inode, |value| { let handle = value .take_handle() .map_err(|_| Error::NoHandlesAvailable(inode))?; let block = value.block_mut(handle)?; let result = cb(handle, block); if result.is_err() { value.give_handle(handle); } result }) } fn give_handle(&self, inode: Inode, handle: Handle) -> io::Result<()> { self.access_value_mut(inode, |value| { let block = value.block_mut(handle)?; // Be kind, rewind. block.seek(SeekFrom::Start(0))?; value.give_handle(handle); Ok(()) }) } fn open_value io::Result>( &self, inode: Inode, block_path: BlockPath, cb: F, ) -> io::Result { self.access_entry(inode, |entry| match entry { InodeTableEntry::Vacant(entry) => { let block = Self::open_block(&self.path, inode, self.creds.clone(), block_path)?; let mut value = InodeTableValue::new(block); let result = cb(&mut value); entry.insert(RwLock::new(value)); result } InodeTableEntry::Occupied(mut entry) => { let value = entry.get_mut().get_mut().err_to_string()?; if value.unclaimed_handles.is_empty() { let block = Self::open_block(&self.path, inode, self.creds.clone(), block_path)?; value.insert(block); } cb(value) } }) } fn open_then_take_handle io::Result>( &self, inode: Inode, block_path: BlockPath, cb: F, ) -> io::Result { self.open_value(inode, block_path, |value| { let handle = value.take_handle().unwrap(); cb(handle, value) }) } fn inode_forget(&self, inode: Inode, count: u64) -> io::Result<()> { let mut inodes = self.inodes.write().err_to_string()?; let lookup_count = { let inode_lock = match inodes.get_mut(&inode) { Some(inode_lock) => inode_lock, None => { warn!("an attempt was made to forget non-existent inode {inode}"); return Ok(()); } }; let mut value = inode_lock.write().err_to_string()?; value.decr_lookup_count(count) }; if 0 == lookup_count { let delete = inodes .remove(&inode) .unwrap() .into_inner() .err_to_string()? .delete; if delete { let path = Self::block_path(&self.path, inode); std::fs::remove_file(path)?; } } Ok(()) } /// Returns the next available inode and updates the superblock in one atomic operation. /// TODO: Obviously this strategy won't work when there are multiple servers in this /// generation. fn next_inode(&self) -> io::Result { self.borrow_block(SpecInodes::Sb.into(), |block| { // We don't need strict ordering because the lock the inode table is already // serializing access. let inode = self.next_inode.fetch_add(1, Ordering::Relaxed); let sb = Superblock { generation: self.generation, next_inode: inode + 1, }; block.seek(SeekFrom::Start(0))?; write_to(&sb, block)?; Ok(inode) }) } fn attr_timeout(&self) -> Duration { Duration::from_secs(5) } fn entry_timeout(&self) -> Duration { Duration::from_secs(5) } fn unsupported_flag(flag: &str) -> io::Result { Err(io::Error::new( io::ErrorKind::Unsupported, format!("unsupported flag: {flag}"), )) } } unsafe impl Sync for Blocktree {} impl FileSystem for Blocktree { type Inode = Inode; type Handle = u64; fn init(&self, _capable: FsOptions) -> io::Result { debug!("Blocktree::init called"); Ok(FsOptions::empty()) } fn destroy(&self) { debug!("Blocktree::destroy called"); } fn lookup(&self, ctx: &Context, parent: Self::Inode, name: &CStr) -> io::Result { debug!("Blocktree::lookup called on parent {parent}"); let name = name.to_str().box_err()?; let (dir, block_path) = self.borrow_block(parent, |block| { self.authorizer .can_exec(&AuthzContext::new(ctx, block.meta()))?; block.seek(SeekFrom::Start(0))?; let dir: Directory = read_from(block)?; let path = block.meta_body().path.to_owned(); Ok((dir, path)) })?; let entry = dir .entries .get(name) .ok_or_else(|| io::Error::from_raw_os_error(libc::ENOENT))?; let inode = match entry { DirEntry::File(entry) => entry.inode, DirEntry::Directory(entry) => entry.inode, DirEntry::Server(_) => { return Err(io::Error::new( io::ErrorKind::Unsupported, "can't lookup server entry", )) } }; let stat = self.open_value(inode, block_path, |value| { let stat = value.try_borrow_block(|block| Ok(block.meta_body().secrets()?.stat()))?; value.incr_lookup_count(); Ok(stat) })?; Ok(Entry { inode, generation: self.generation, attr: stat, attr_flags: 0, attr_timeout: self.attr_timeout(), entry_timeout: self.entry_timeout(), }) } fn open( &self, ctx: &Context, inode: Self::Inode, flags: u32, // This is the second field of the `fuse_open_in` struct, which is currently unused // by the kernel. (https://man7.org/linux/man-pages/man4/fuse.4.html) _fuse_flags: u32, ) -> io::Result<(Option, OpenOptions)> { debug!("Blocktree::open called on inode {inode}"); let flags: i32 = flags.try_into().box_err()?; if flags & libc::O_APPEND != 0 { return Self::unsupported_flag("O_APPEND"); } if flags & libc::O_CLOEXEC != 0 { return Self::unsupported_flag("O_CLOEXEC"); } if flags & libc::O_DIRECTORY != 0 { return Self::unsupported_flag("O_DIRECTORY"); } let handle = self.take_handle_if_ok(inode, |handle, block| { let ctx = AuthzContext::new(ctx, block.meta()); if flags == libc::O_RDONLY || (flags & libc::O_RDWR) != 0 { self.authorizer.can_read(&ctx)?; } let write_mask = libc::O_WRONLY | libc::O_RDWR; if write_mask & flags != 0 { self.authorizer.can_write(&ctx)?; } Ok(handle) })?; Ok((Some(handle), OpenOptions::empty())) } fn release( &self, _ctx: &Context, inode: Self::Inode, _flags: u32, handle: Self::Handle, flush: bool, _flock_release: bool, _lock_owner: Option, ) -> io::Result<()> { debug!("Blocktree::release called on inode {inode}"); if flush { self.access_block_mut(inode, handle, |block| block.flush())?; }; self.give_handle(inode, handle) } fn opendir( &self, ctx: &Context, inode: Self::Inode, _flags: u32, ) -> io::Result<(Option, OpenOptions)> { debug!("Blocktree::opendir called on inode {inode}"); let handle = self.access_value_mut(inode, |value| { value.try_borrow_block(|block| { let ctx = AuthzContext::new(ctx, block.meta()); self.authorizer.can_exec(&ctx)?; Ok(()) })?; let handle = value.take_handle()?; value.convert_to_dir(handle)?; Ok(handle) })?; Ok((Some(handle), OpenOptions::empty())) } fn releasedir( &self, _ctx: &Context, inode: Self::Inode, _flags: u32, handle: Self::Handle, ) -> io::Result<()> { debug!("Blocktree::releasedir called for inode {inode}"); self.access_value_mut(inode, |value| { value.give_handle(handle); Ok(()) }) } fn create( &self, ctx: &Context, parent: Self::Inode, name: &CStr, args: CreateIn, ) -> std::io::Result<(Entry, Option, OpenOptions)> { debug!("Blocktree::create called on parent {parent}"); let name = name .to_str() .map_err(|err| io::Error::new(io::ErrorKind::InvalidInput, err))? .to_owned(); // Reserve a free inode. let inode = self.next_inode()?; // Add a directory entry to the parent for the new inode. let mut block_path = self.borrow_block(parent, |block| { let ctx = AuthzContext::new(ctx, block.meta()); self.authorizer.can_write(&ctx)?; block.seek(SeekFrom::Start(0))?; let mut dir: Directory = read_from(block).box_err()?; dir.add_file(name.clone(), inode)?; block.seek(SeekFrom::Start(0))?; write_to(&dir, block).box_err()?; block.flush()?; Ok(block.meta_body().path.clone()) })?; block_path.push_component(name); let (handle, attr) = self.open_then_take_handle(inode, block_path, |handle, value| { let block = value.block_mut(handle)?; Ok(block.mut_meta_body().access_secrets(|secrets| { secrets.inode = inode; secrets.mode = args.mode; secrets.uid = ctx.uid; secrets.gid = ctx.gid; let now = Epoch::now(); secrets.atime = now; secrets.ctime = now; secrets.mtime = now; secrets.nlink = 1; Ok((handle, secrets.attr())) })?) })?; let entry = Entry { inode, generation: self.generation, attr: attr.into(), attr_flags: 0, attr_timeout: self.attr_timeout(), entry_timeout: self.entry_timeout(), }; Ok((entry, Some(handle), OpenOptions::empty())) } fn write( &self, _ctx: &Context, inode: Self::Inode, handle: Self::Handle, r: &mut dyn fuse_backend_rs::api::filesystem::ZeroCopyReader, size: u32, _offset: u64, _lock_owner: Option, _delayed_write: bool, // `flags` and `fuse_flags` are the arguments that were passed to `open` when this // handle was returned. flags: u32, _fuse_flags: u32, ) -> io::Result { debug!("Blocktree::write called called on inode {inode}"); if flags as libc::c_int == libc::O_RDONLY { return Err(io::Error::new( io::ErrorKind::PermissionDenied, "file is readonly", )); } let mut size: usize = size.try_into().box_err()?; self.access_block_mut(inode, handle, |block| { let mut buf = [0u8; crate::SECTOR_SZ_DEFAULT]; let mut written = 0; while size > 0 { let read = match r.read(&mut buf) { Ok(size) => size, Err(err) => { if written > 0 { error!("error while reading: {err}"); return Ok(written); } else { return Err(err); } } }; if 0 == read { break; } let filled = &buf[..read]; size -= read; if let Err(err) = block.write_all(filled) { if written > 0 { error!("error while writing: {err}"); return Ok(written); } else { return Err(err); } } written += filled.len(); } Ok(written) }) } fn flush( &self, _ctx: &Context, inode: Self::Inode, handle: Self::Handle, _lock_owner: u64, ) -> io::Result<()> { debug!("Blocktree::flush called for inode {inode}"); self.access_block_mut(inode, handle, |block| block.flush()) } fn read( &self, _ctx: &Context, inode: Self::Inode, handle: Self::Handle, w: &mut dyn fuse_backend_rs::api::filesystem::ZeroCopyWriter, size: u32, _offset: u64, _lock_owner: Option, flags: u32, ) -> io::Result { debug!("Blocktree::read called on inode {inode}"); if (flags as libc::c_int & libc::O_WRONLY) != 0 { return Err(io::Error::new( io::ErrorKind::PermissionDenied, "file is write only", )); } let mut size: usize = size.try_into().box_err()?; self.access_block_mut(inode, handle, |block| { let mut buf = [0u8; crate::SECTOR_SZ_DEFAULT]; let mut read = 0; while size > 0 { let just_read = match block.read(&mut buf) { Ok(just_read) => just_read, Err(err) => { if read > 0 { error!("error while reading from block: {err}"); return Ok(read); } else { return Err(err); } } }; if 0 == just_read { break; } read += just_read; let filled = &buf[..just_read]; if let Err(err) = w.write_all(filled) { if read > 0 { error!("error while writing: {err}"); return Ok(read); } else { return Err(err); } } size -= filled.len(); } Ok(read) }) } fn readdir( &self, _ctx: &Context, inode: Self::Inode, handle: Self::Handle, size: u32, offset: u64, add_entry: &mut dyn FnMut( fuse_backend_rs::api::filesystem::DirEntry, ) -> io::Result, ) -> io::Result<()> { debug!("Blocktree::readdir called on inode {inode}"); let mut size: usize = size.try_into().box_err()?; self.access_value(inode, |value| { let dir = value .value(handle) .map_err(|_| Error::InvalidHandle { handle, inode })? .directory()?; let mut index: u64 = 0; for (name, entry) in dir.entries() { index += 1; if index <= offset { continue; } let inode = match entry.inode() { Some(inode) => inode, None => continue, }; let dir_entry = FuseDirEntry { ino: inode, offset: index, type_: entry.kind() as u32, name: name.as_bytes(), }; size = size.saturating_sub(add_entry(dir_entry)?); if size == 0 { break; } } Ok(()) }) } fn getattr( &self, _ctx: &Context, inode: Self::Inode, _handle: Option, ) -> io::Result<(stat64, Duration)> { debug!("Blocktree::getattr called for inode {inode}"); let mut stat = self.access_meta(inode, |meta| Ok(meta.body.secrets()?.stat()))?; stat.st_ino = inode; Ok((stat, self.attr_timeout())) } fn forget(&self, _ctx: &Context, inode: Self::Inode, count: u64) { debug!("Blocktree::forget called for inode {inode}"); if let Err(err) = self.inode_forget(inode, count) { error!("Blocktree::forget failed for inode {inode}: {err}"); } } fn lseek( &self, _ctx: &Context, inode: Self::Inode, handle: Self::Handle, offset: u64, whence: u32, ) -> io::Result { debug!("Blocktree::lseek called for inode {inode}"); let seek_from = SeekFrom::whence_offset(whence, offset)?; self.access_block_mut(inode, handle, |block| block.seek(seek_from)) } fn unlink(&self, ctx: &Context, parent: Self::Inode, name: &CStr) -> io::Result<()> { debug!("Blocktree::unlink called on parent {parent}"); let name = name.to_str().box_err()?; let (block_path, inode) = self.borrow_block(parent, |block| { let ctx = AuthzContext::new(ctx, block.meta()); self.authorizer.can_write(&ctx)?; block.seek(SeekFrom::Start(0))?; let mut dir: Directory = read_from(block)?; let inode = match dir.entries.remove(name) { None => return Err(io::Error::from_raw_os_error(libc::ENOENT)), Some(entry) => entry.inode().ok_or_else(|| { io::Error::new( io::ErrorKind::InvalidInput, format!("name {name} does not refer to a file or directory"), ) })?, }; block.seek(SeekFrom::Start(0))?; write_to(&dir, block)?; let mut block_path = block.meta_body().path.clone(); block_path.push_component(name.to_owned()); Ok((block_path, inode)) })?; self.open_value(inode, block_path, |value| { // We mark the block for deletion if `nlink` drops to zero. value.delete = value.try_borrow_block(|block| { let nlink = block.mut_meta_body().access_secrets(|secrets| { secrets.nlink -= 1; Ok(secrets.nlink) })?; block.flush_meta()?; Ok(0 == nlink) })?; Ok(()) }) } ////////////////////////////////// // METHODS WHICH ARE NOT SUPPORTED ////////////////////////////////// fn getxattr( &self, _ctx: &Context, inode: Self::Inode, _name: &CStr, _size: u32, ) -> io::Result { debug!("Blocktree::getxattr called for inode {inode}"); Self::not_supported() } fn ioctl( &self, _ctx: &Context, inode: Self::Inode, _handle: Self::Handle, _flags: u32, _cmd: u32, _data: fuse_backend_rs::api::filesystem::IoctlData, _out_size: u32, ) -> io::Result { debug!("Blocktree::ioctl called for inode {inode}"); Self::not_supported() } fn access(&self, _ctx: &Context, inode: Self::Inode, _mask: u32) -> io::Result<()> { debug!("Blocktree::access called for inode {inode}"); Self::not_supported() } fn batch_forget(&self, _ctx: &Context, _requests: Vec<(Self::Inode, u64)>) { debug!("Blocktree::batch_forget called"); Self::not_supported().unwrap() } fn bmap( &self, _ctx: &Context, inode: Self::Inode, _block: u64, _blocksize: u32, ) -> io::Result { debug!("Blocktree::bmap called for inode {inode}"); Self::not_supported() } fn fallocate( &self, _ctx: &Context, inode: Self::Inode, _handle: Self::Handle, _mode: u32, _offset: u64, _length: u64, ) -> io::Result<()> { debug!("Blocktree::fallocate called for inode {inode}"); Self::not_supported() } fn fsync( &self, _ctx: &Context, inode: Self::Inode, _datasync: bool, _handle: Self::Handle, ) -> io::Result<()> { debug!("Blocktree::fsync called for inode {inode}"); Self::not_supported() } fn fsyncdir( &self, _ctx: &Context, inode: Self::Inode, _datasync: bool, _handle: Self::Handle, ) -> io::Result<()> { debug!("Blocktree::fsyncdir called for inode {inode}"); Self::not_supported() } fn getlk( &self, _ctx: &Context, inode: Self::Inode, _handle: Self::Handle, _owner: u64, _lock: fuse_backend_rs::api::filesystem::FileLock, _flags: u32, ) -> io::Result { debug!("Blocktree::getlk called for inode {inode}"); Self::not_supported() } fn link( &self, _ctx: &Context, inode: Self::Inode, _newparent: Self::Inode, _newname: &CStr, ) -> io::Result { debug!("Blocktree::link called for inode {inode}"); Self::not_supported() } fn listxattr( &self, _ctx: &Context, inode: Self::Inode, _size: u32, ) -> io::Result { debug!("Blocktree::listxattr called for inode {inode}"); Self::not_supported() } fn mkdir( &self, _ctx: &Context, _parent: Self::Inode, _name: &CStr, _mode: u32, _umask: u32, ) -> io::Result { debug!("Blocktree::mkdir called"); Self::not_supported() } fn mknod( &self, _ctx: &Context, inode: Self::Inode, _name: &CStr, _mode: u32, _rdev: u32, _umask: u32, ) -> io::Result { debug!("Blocktree::mknod called for inode {inode}"); Self::not_supported() } fn notify_reply(&self) -> io::Result<()> { debug!("Blocktree::notify_reply called"); Self::not_supported() } fn poll( &self, _ctx: &Context, inode: Self::Inode, _handle: Self::Handle, _khandle: Self::Handle, _flags: u32, _events: u32, ) -> io::Result { debug!("Blocktree::poll called for inode {inode}"); Self::not_supported() } fn readdirplus( &self, _ctx: &Context, _inode: Self::Inode, _handle: Self::Handle, _size: u32, _offset: u64, _add_entry: &mut dyn FnMut( fuse_backend_rs::api::filesystem::DirEntry, Entry, ) -> io::Result, ) -> io::Result<()> { debug!("Blocktree::readdirplus called"); Self::not_supported() } fn readlink(&self, _ctx: &Context, inode: Self::Inode) -> io::Result> { debug!("Blocktree::readlink called for inode {inode}"); Self::not_supported() } fn removexattr(&self, _ctx: &Context, inode: Self::Inode, _name: &CStr) -> io::Result<()> { debug!("Blocktree::removexattr called for inode {inode}"); Self::not_supported() } fn rename( &self, _ctx: &Context, _olddir: Self::Inode, _oldname: &CStr, _newdir: Self::Inode, _newname: &CStr, _flags: u32, ) -> io::Result<()> { debug!("Blocktree::rename called"); Self::not_supported() } fn rmdir(&self, _ctx: &Context, parent: Self::Inode, _name: &CStr) -> io::Result<()> { debug!("Blocktree::rmdir called on parent {parent}"); Self::not_supported() } fn setattr( &self, _ctx: &Context, inode: Self::Inode, _attr: stat64, _handle: Option, _valid: fuse_backend_rs::api::filesystem::SetattrValid, ) -> io::Result<(stat64, Duration)> { debug!("Blocktree::setattr called for inode {inode}"); Self::not_supported() } fn setlk( &self, _ctx: &Context, inode: Self::Inode, _handle: Self::Handle, _owner: u64, _lock: fuse_backend_rs::api::filesystem::FileLock, _flags: u32, ) -> io::Result<()> { debug!("Blocktree::setlk called for inode {inode}"); Self::not_supported() } fn setlkw( &self, _ctx: &Context, inode: Self::Inode, _handle: Self::Handle, _owner: u64, _lock: fuse_backend_rs::api::filesystem::FileLock, _flags: u32, ) -> io::Result<()> { debug!("Blocktree::setlkw called for inode {inode}"); Self::not_supported() } fn setxattr( &self, _ctx: &Context, inode: Self::Inode, _name: &CStr, _value: &[u8], _flags: u32, ) -> io::Result<()> { debug!("Blocktree::setxattr called for inode {inode}"); Self::not_supported() } fn statfs(&self, _ctx: &Context, inode: Self::Inode) -> io::Result { debug!("Blocktree::statfs called for inode {inode}"); Self::not_supported() } fn symlink( &self, _ctx: &Context, _linkname: &CStr, _parent: Self::Inode, _name: &CStr, ) -> io::Result { debug!("Blocktree::symlink called"); Self::not_supported() } } } #[cfg(test)] mod tests { use fuse_backend_rs::{ abi::fuse_abi::CreateIn, api::filesystem::{Context, FileSystem, FsOptions}, }; use std::{ffi::CString, io}; use tempdir::TempDir; use test_helpers::*; use crate::{crypto::ConcreteCreds, test_helpers, BlockMeta, Decompose}; use super::{private::SpecInodes, *}; /// Tests for the [ModeAuthorizer] struct. mod mode_authorizer_tests { use super::{ super::private::{Authorizer, AuthzContext}, *, }; struct TestCase { ctx_uid: u32, ctx_gid: u32, meta: BlockMeta, } impl TestCase { const BLOCK_UID: u32 = 1000; const BLOCK_GID: u32 = 1000; const CTX_PID: libc::pid_t = 100; fn new(ctx_uid: u32, ctx_gid: u32, mode: u32) -> TestCase { let mut meta = BlockMeta::new(&*test_helpers::NODE_CREDS) .expect("failed to create block metadata"); meta.body .access_secrets(|secrets| { secrets.uid = Self::BLOCK_UID; secrets.gid = Self::BLOCK_GID; secrets.mode = mode; Ok(()) }) .expect("failed to update secrets"); TestCase { ctx_uid, ctx_gid, meta, } } fn context(&self) -> AuthzContext<'_> { AuthzContext { uid: self.ctx_uid, gid: self.ctx_gid, pid: Self::CTX_PID, meta: &self.meta, } } } #[test] fn cant_read_when_no_bits_set() { let case = TestCase::new(TestCase::BLOCK_UID, TestCase::BLOCK_GID, 0); let result = ModeAuthorizer {}.can_read(&case.context()); assert!(result.is_err()) } #[test] fn cant_write_when_no_bits_set() { let case = TestCase::new(TestCase::BLOCK_UID, TestCase::BLOCK_GID, 0); let result = ModeAuthorizer {}.can_write(&case.context()); assert!(result.is_err()) } #[test] fn cant_exec_when_no_bits_set() { let case = TestCase::new(TestCase::BLOCK_UID, TestCase::BLOCK_GID, 0); let result = ModeAuthorizer {}.can_exec(&case.context()); assert!(result.is_err()) } #[test] fn user_can_read_when_bit_set() { let case = TestCase::new(TestCase::BLOCK_UID, TestCase::BLOCK_GID, libc::S_IRUSR); let result = ModeAuthorizer {}.can_read(&case.context()); assert!(result.is_ok()) } #[test] fn user_can_write_when_bit_set() { let case = TestCase::new(TestCase::BLOCK_UID, TestCase::BLOCK_GID, libc::S_IWUSR); let result = ModeAuthorizer {}.can_write(&case.context()); assert!(result.is_ok()) } #[test] fn user_can_exec_when_bit_set() { let case = TestCase::new(TestCase::BLOCK_UID, TestCase::BLOCK_GID, libc::S_IXUSR); let result = ModeAuthorizer {}.can_exec(&case.context()); assert!(result.is_ok()) } #[test] fn group_can_read_when_bit_set() { let case = TestCase::new(TestCase::BLOCK_UID, TestCase::BLOCK_GID, libc::S_IRGRP); let result = ModeAuthorizer {}.can_read(&case.context()); assert!(result.is_ok()) } #[test] fn group_can_write_when_bit_set() { let case = TestCase::new(TestCase::BLOCK_UID, TestCase::BLOCK_GID, libc::S_IWGRP); let result = ModeAuthorizer {}.can_write(&case.context()); assert!(result.is_ok()) } #[test] fn group_can_exec_when_bit_set() { let case = TestCase::new(TestCase::BLOCK_UID, TestCase::BLOCK_GID, libc::S_IXGRP); let result = ModeAuthorizer {}.can_exec(&case.context()); assert!(result.is_ok()) } #[test] fn other_can_read_when_bit_set() { let case = TestCase::new(TestCase::BLOCK_UID, TestCase::BLOCK_GID, libc::S_IROTH); let result = ModeAuthorizer {}.can_read(&case.context()); assert!(result.is_ok()) } #[test] fn other_can_write_when_bit_set() { let case = TestCase::new(TestCase::BLOCK_UID, TestCase::BLOCK_GID, libc::S_IWOTH); let result = ModeAuthorizer {}.can_write(&case.context()); assert!(result.is_ok()) } #[test] fn other_can_exec_when_bit_set() { let case = TestCase::new(TestCase::BLOCK_UID, TestCase::BLOCK_GID, libc::S_IXOTH); let result = ModeAuthorizer {}.can_exec(&case.context()); assert!(result.is_ok()) } #[test] fn other_cant_write_even_if_user_can() { let case = TestCase::new( TestCase::BLOCK_UID + 1, TestCase::BLOCK_GID + 1, libc::S_IWUSR, ); let result = ModeAuthorizer {}.can_write(&case.context()); assert!(result.is_err()) } #[test] fn other_cant_write_even_if_group_can() { let case = TestCase::new( TestCase::BLOCK_UID + 1, TestCase::BLOCK_GID + 1, libc::S_IWGRP, ); let result = ModeAuthorizer {}.can_write(&case.context()); assert!(result.is_err()) } #[test] fn user_allowed_read_when_only_other_bit_set() { let case = TestCase::new(TestCase::BLOCK_UID, TestCase::BLOCK_GID, libc::S_IROTH); let result = ModeAuthorizer {}.can_read(&case.context()); assert!(result.is_ok()) } } struct BtTestCase { dir: TempDir, bt: Blocktree, } impl BtTestCase { fn new_empty() -> BtTestCase { let dir = TempDir::new("fuse").expect("failed to create temp dir"); let bt = Blocktree::new_empty(dir.path().to_owned(), 0, Self::creds(), ModeAuthorizer {}) .expect("failed to create empty blocktree"); bt.init(FsOptions::empty()).expect("init failed"); BtTestCase { dir, bt } } fn new_existing(dir: TempDir) -> BtTestCase { let bt = Blocktree::new_existing(dir.path().to_owned(), Self::creds(), ModeAuthorizer {}) .expect("failed to create blocktree from existing directory"); bt.init(FsOptions::empty()).expect("init failed"); BtTestCase { dir, bt } } fn creds() -> ConcreteCreds { test_helpers::NODE_CREDS.clone() } fn context(&self) -> Context { let (stat, ..) = self .bt .getattr(&Default::default(), SpecInodes::RootDir.into(), None) .expect("getattr failed"); Context { uid: stat.st_uid, gid: stat.st_gid, pid: 1, } } } /// Tests that a new file can be created, written to and the written data can be read from it. #[test] fn create_write_lseek_read() { let case = BtTestCase::new_empty(); let bt = &case.bt; let ctx = case.context(); let name = CString::new("README.md").unwrap(); let flags = libc::O_RDWR as u32; let (entry, handle, ..) = bt .create( &ctx, SpecInodes::RootDir.into(), name.as_c_str(), CreateIn { mode: libc::S_IFREG | 0o644, umask: 0, flags, fuse_flags: 0, }, ) .expect("failed to create file"); let inode = entry.inode; let handle = handle.unwrap(); const LEN: usize = 32; let mut expected = BtCursor::new([1u8; LEN]); let written = bt .write( &ctx, inode, handle, &mut expected, LEN as u32, 0, None, false, flags, 0, ) .expect("write failed"); assert_eq!(LEN, written); bt.lseek(&ctx, inode, handle, 0, 0).expect("lseek failed"); let mut actual = BtCursor::new([0u8; LEN]); let read = bt .read(&ctx, inode, handle, &mut actual, LEN as u32, 0, None, flags) .expect("failed to read"); assert_eq!(LEN, read); assert_eq!(expected, actual) } #[test] fn lookup() { let case = BtTestCase::new_empty(); let bt = &case.bt; let ctx = case.context(); let name = CString::new("README.md").unwrap(); let (expected, ..) = bt .create( &ctx, SpecInodes::RootDir.into(), name.as_c_str(), Default::default(), ) .expect("failed to create file"); let actual = bt .lookup(&Default::default(), SpecInodes::RootDir.into(), &name) .expect("lookup failed"); assert_eq!(expected.generation, actual.generation); assert_eq!(expected.inode, actual.inode); } /// Tests that data written by one instance of [Blocktree] can be read by a subsequent /// instance. #[test] fn new_existing() { const EXPECTED: &[u8] = b"cool as cucumbers"; let name = CString::new("RESIGNATION.docx").unwrap(); let case = BtTestCase::new_empty(); let bt = &case.bt; let ctx = case.context(); let flags = libc::O_RDWR as u32; { let (entry, handle, ..) = bt .create( &ctx, SpecInodes::RootDir.into(), name.as_c_str(), CreateIn { mode: libc::S_IFREG | 0o644, umask: 0, flags, fuse_flags: 0, }, ) .expect("failed to create file"); let inode = entry.inode; let handle = handle.unwrap(); let mut vec = Vec::with_capacity(EXPECTED.len()); vec.extend_from_slice(EXPECTED); let mut cursor = BtCursor::new(vec); let written = bt .write( &Default::default(), inode, handle, &mut cursor, EXPECTED.len() as u32, 0, None, false, flags, 0, ) .expect("write failed"); assert_eq!(EXPECTED.len(), written); bt.flush(&Default::default(), inode, handle, 0) .expect("flush failed"); } let case = BtTestCase::new_existing(case.dir); let bt = &case.bt; let entry = bt .lookup(&Default::default(), SpecInodes::RootDir.into(), &name) .expect("lookup failed"); let inode = entry.inode; let (handle, ..) = bt .open(&Default::default(), entry.inode, 0, 0) .expect("open failed"); let handle = handle.unwrap(); let mut actual = BtCursor::new([0u8; EXPECTED.len()]); let _ = bt .read( &Default::default(), inode, handle, &mut actual, EXPECTED.len() as u32, 0, None, flags, ) .expect("read failed"); assert_eq!(EXPECTED, actual.into_inner().as_slice()) } /// Tests that an error is returned by the `Blocktree::write` method if the file was opened /// read-only. #[test] fn open_read_only_write_is_error() { let name = CString::new("books.ods").unwrap(); let case = BtTestCase::new_empty(); let bt = &case.bt; let ctx = case.context(); let (entry, handle, ..) = bt .create( &ctx, SpecInodes::RootDir.into(), name.as_c_str(), CreateIn { mode: libc::S_IFREG | 0o644, umask: 0, flags: 0, fuse_flags: 0, }, ) .expect("failed to create file"); let inode = entry.inode; let handle = handle.unwrap(); bt.release(&ctx, inode, 0, handle, false, false, None) .expect("release failed"); let flags = libc::O_RDONLY as u32; let (handle, ..) = bt.open(&ctx, inode, flags, 0).expect("open failed"); let handle = handle.unwrap(); const LEN: usize = 32; let mut reader = BtCursor::new([1u8; LEN]); let result = bt.write( &ctx, inode, handle, &mut reader, LEN.try_into().unwrap(), 0, None, false, flags, 0, ); let err = result.err().unwrap(); assert_eq!(io::ErrorKind::PermissionDenied, err.kind()); } /// Tests that a call to `read` fails when a file is opened write only. #[test] fn open_write_only_read_is_error() { let name = CString::new("books.ods").unwrap(); let case = BtTestCase::new_empty(); let bt = &case.bt; let ctx = case.context(); let (entry, handle, ..) = bt .create( &ctx, SpecInodes::RootDir.into(), name.as_c_str(), CreateIn { mode: libc::S_IFREG | 0o644, umask: 0, flags: 0, fuse_flags: 0, }, ) .expect("failed to create file"); let inode = entry.inode; let handle = handle.unwrap(); bt.release(&ctx, inode, 0, handle, false, false, None) .expect("release failed"); let flags = libc::O_WRONLY as u32; let (handle, ..) = bt.open(&ctx, inode, flags, 0).expect("open failed"); let handle = handle.unwrap(); const LEN: usize = 32; let mut reader = BtCursor::new([1u8; LEN]); let result = bt.read( &ctx, inode, handle, &mut reader, LEN.try_into().unwrap(), 0, None, flags, ); let err = result.err().unwrap(); assert_eq!(io::ErrorKind::PermissionDenied, err.kind()); } }