1
Fork 0

Refactor ReadDir into a state machine

This commit is contained in:
Piotr Osiewicz 2024-11-26 13:45:17 +01:00
parent 529aae6fc3
commit 692c19ae80

View file

@ -27,10 +27,26 @@ pub struct FileAttr {
pub struct ReadDir { pub struct ReadDir {
inner: Arc<ReadDirInner>, inner: Arc<ReadDirInner>,
cookie: Option<wasi::Dircookie>, state: ReadDirState,
buf: Vec<u8>, }
offset: usize,
cap: usize, enum ReadDirState {
/// Next DirEntry should be read from contents of buf at `offset`
FillBuffer {
next_read_offset: wasi::Dircookie,
buf: Vec<u8>,
},
ProcessEntry {
buf: Vec<u8>,
next_read_offset: Option<wasi::Dircookie>,
offset: usize,
},
/// Do not fetch any more entries, process all entries
RunUntilExhaustion {
buf: Vec<u8>,
offset: usize,
},
Done,
} }
struct ReadDirInner { struct ReadDirInner {
@ -147,11 +163,8 @@ impl FileType {
impl ReadDir { impl ReadDir {
fn new(dir: File, root: PathBuf) -> ReadDir { fn new(dir: File, root: PathBuf) -> ReadDir {
ReadDir { ReadDir {
cookie: Some(0),
buf: vec![0; 128],
offset: 0,
cap: 0,
inner: Arc::new(ReadDirInner { dir, root }), inner: Arc::new(ReadDirInner { dir, root }),
state: ReadDirState::FillBuffer { next_read_offset: 0, buf: vec![0; 128] },
} }
} }
} }
@ -162,81 +175,99 @@ impl fmt::Debug for ReadDir {
} }
} }
impl core::iter::FusedIterator for ReadDir {}
impl Iterator for ReadDir { impl Iterator for ReadDir {
type Item = io::Result<DirEntry>; type Item = io::Result<DirEntry>;
fn next(&mut self) -> Option<io::Result<DirEntry>> { fn next(&mut self) -> Option<io::Result<DirEntry>> {
loop { match &mut self.state {
// If we've reached the capacity of our buffer then we need to read ReadDirState::FillBuffer { next_read_offset, ref mut buf } => {
// some more from the OS, otherwise we pick up at our old offset. let result = self.inner.dir.fd.readdir(buf, *next_read_offset);
let offset = if self.offset == self.cap { match result {
let cookie = self.cookie.take()?; Ok(read_bytes) => {
match self.inner.dir.fd.readdir(&mut self.buf, cookie) { if read_bytes < buf.len() {
Ok(bytes) => { buf.truncate(read_bytes);
// No more entries if we read less than buffer size self.state =
if bytes < self.buf.len() { ReadDirState::RunUntilExhaustion { buf: mem::take(buf), offset: 0 };
self.cookie = None;
if bytes == 0 {
return None;
}
} else { } else {
self.cookie = Some(cookie); debug_assert_eq!(read_bytes, buf.len());
self.state = ReadDirState::ProcessEntry {
buf: mem::take(buf),
offset: 0,
next_read_offset: Some(*next_read_offset),
};
} }
self.cap = self.buf.len(); self.next()
self.offset = 0; }
0 Err(e) => {
self.state = ReadDirState::Done;
return Some(Err(e));
} }
Err(e) => return Some(Err(e)),
} }
} else {
self.offset
};
let data = &self.buf[offset..self.cap];
// If we're not able to read a directory entry then that means it
// must have been truncated at the end of the buffer, so reset our
// offset so we can go back and reread into the buffer, picking up
// where we last left off.
let dirent_size = mem::size_of::<wasi::Dirent>();
if data.len() < dirent_size {
assert!(self.buf.len() >= dirent_size);
self.offset = self.cap;
continue;
} }
let (dirent, data) = data.split_at(dirent_size); ReadDirState::ProcessEntry { ref mut buf, next_read_offset, offset } => {
let dirent = unsafe { ptr::read_unaligned(dirent.as_ptr() as *const wasi::Dirent) }; let contents = &buf[*offset..];
const DIRENT_SIZE: usize = crate::mem::size_of::<wasi::Dirent>();
if contents.len() >= DIRENT_SIZE {
let (dirent, data) = contents.split_at(DIRENT_SIZE);
let dirent =
unsafe { ptr::read_unaligned(dirent.as_ptr() as *const wasi::Dirent) };
// If the file name was truncated, then we need to reinvoke
// `readdir` so we truncate our buffer to start over and reread this
// descriptor.
if data.len() < dirent.d_namlen as usize {
if buf.len() < dirent.d_namlen as usize + DIRENT_SIZE {
buf.resize(dirent.d_namlen as usize + DIRENT_SIZE, 0);
}
if let Some(next_read_offset) = *next_read_offset {
self.state =
ReadDirState::FillBuffer { next_read_offset, buf: mem::take(buf) };
} else {
self.state = ReadDirState::Done;
}
// If the file name was truncated, then we need to reinvoke return self.next();
// `readdir` so we truncate our buffer to start over and reread this }
// descriptor. Note that if our offset is 0 that means the file name next_read_offset.as_mut().map(|cookie| {
// is massive and we need a bigger buffer. *cookie = dirent.d_next;
if data.len() < dirent.d_namlen as usize { });
if offset == 0 { *offset = *offset + DIRENT_SIZE + dirent.d_namlen as usize;
let amt_to_add = self.buf.capacity();
self.buf.extend(iter::repeat(0).take(amt_to_add)); let name = &data[..(dirent.d_namlen as usize)];
// These names are skipped on all other platforms, so let's skip
// them here too
if name == b"." || name == b".." {
return self.next();
}
return Some(Ok(DirEntry {
meta: dirent,
name: name.to_vec(),
inner: self.inner.clone(),
}));
} else if let Some(next_read_offset) = *next_read_offset {
self.state = ReadDirState::FillBuffer { next_read_offset, buf: mem::take(buf) };
} else {
self.state = ReadDirState::Done;
} }
assert!(self.cookie.is_some()); self.next()
self.offset = self.cap;
continue;
} }
self.cookie.as_mut().map(|cookie| { ReadDirState::RunUntilExhaustion { buf, offset } => {
*cookie = dirent.d_next; if *offset >= buf.len() {
}); self.state = ReadDirState::Done;
self.offset = offset + dirent_size + dirent.d_namlen as usize; } else {
self.state = ReadDirState::ProcessEntry {
buf: mem::take(buf),
offset: *offset,
next_read_offset: None,
};
}
let name = &data[..(dirent.d_namlen as usize)]; self.next()
// These names are skipped on all other platforms, so let's skip
// them here too
if name == b"." || name == b".." {
continue;
} }
ReadDirState::Done => None,
return Some(Ok(DirEntry {
meta: dirent,
name: name.to_vec(),
inner: self.inner.clone(),
}));
} }
} }
} }