pvmfw: Implement software dirty state handling
Flush only dirty pages when dropping RW memory regions. Implement
handling of dirty bit in software. Mark writable regions read-only
and make the pages writable-dirty when access causes a permission
fault.
Bug: 269738062
Test: atest MicrodroidTestApp
Change-Id: I2e73a7cc867bae8b68c2a3b68d382405327f99e8
diff --git a/pvmfw/src/entry.rs b/pvmfw/src/entry.rs
index 4d2d696..ca74740 100644
--- a/pvmfw/src/entry.rs
+++ b/pvmfw/src/entry.rs
@@ -35,6 +35,7 @@
use log::warn;
use log::LevelFilter;
use vmbase::{console, layout, logger, main, power::reboot};
+use zeroize::Zeroize;
#[derive(Debug, Clone)]
pub enum RebootReason {
@@ -82,18 +83,13 @@
}
impl<'a> MemorySlices<'a> {
- fn new(
- fdt: usize,
- kernel: usize,
- kernel_size: usize,
- memory: &mut MemoryTracker,
- ) -> Result<Self, RebootReason> {
+ fn new(fdt: usize, kernel: usize, kernel_size: usize) -> Result<Self, RebootReason> {
// SAFETY - SIZE_2MB is non-zero.
const FDT_SIZE: NonZeroUsize = unsafe { NonZeroUsize::new_unchecked(helpers::SIZE_2MB) };
// TODO - Only map the FDT as read-only, until we modify it right before jump_to_payload()
// e.g. by generating a DTBO for a template DT in main() and, on return, re-map DT as RW,
// overwrite with the template DT and apply the DTBO.
- let range = memory.alloc_mut(fdt, FDT_SIZE).map_err(|e| {
+ let range = MEMORY.lock().as_mut().unwrap().alloc_mut(fdt, FDT_SIZE).map_err(|e| {
error!("Failed to allocate the FDT range: {e}");
RebootReason::InternalError
})?;
@@ -110,13 +106,13 @@
let memory_range = info.memory_range;
debug!("Resizing MemoryTracker to range {memory_range:#x?}");
- memory.shrink(&memory_range).map_err(|_| {
- error!("Failed to use memory range value from DT: {memory_range:#x?}");
+ MEMORY.lock().as_mut().unwrap().shrink(&memory_range).map_err(|e| {
+ error!("Failed to use memory range value from DT: {memory_range:#x?}: {e}");
RebootReason::InvalidFdt
})?;
if get_hypervisor().has_cap(HypervisorCap::DYNAMIC_MEM_SHARE) {
- memory.init_dynamic_shared_pool().map_err(|e| {
+ MEMORY.lock().as_mut().unwrap().init_dynamic_shared_pool().map_err(|e| {
error!("Failed to initialize dynamically shared pool: {e}");
RebootReason::InternalError
})?;
@@ -126,14 +122,14 @@
RebootReason::InvalidFdt
})?;
- memory.init_static_shared_pool(range).map_err(|e| {
+ MEMORY.lock().as_mut().unwrap().init_static_shared_pool(range).map_err(|e| {
error!("Failed to initialize pre-shared pool {e}");
RebootReason::InvalidFdt
})?;
}
let kernel_range = if let Some(r) = info.kernel_range {
- memory.alloc_range(&r).map_err(|e| {
+ MEMORY.lock().as_mut().unwrap().alloc_range(&r).map_err(|e| {
error!("Failed to obtain the kernel range with DT range: {e}");
RebootReason::InternalError
})?
@@ -145,7 +141,7 @@
RebootReason::InvalidPayload
})?;
- memory.alloc(kernel, kernel_size).map_err(|e| {
+ MEMORY.lock().as_mut().unwrap().alloc(kernel, kernel_size).map_err(|e| {
error!("Failed to obtain the kernel range with legacy range: {e}");
RebootReason::InternalError
})?
@@ -160,7 +156,7 @@
let ramdisk = if let Some(r) = info.initrd_range {
debug!("Located ramdisk at {r:?}");
- let r = memory.alloc_range(&r).map_err(|e| {
+ let r = MEMORY.lock().as_mut().unwrap().alloc_range(&r).map_err(|e| {
error!("Failed to obtain the initrd range: {e}");
RebootReason::InvalidRamdisk
})?;
@@ -242,7 +238,7 @@
debug!("... Success!");
MEMORY.lock().replace(MemoryTracker::new(page_table));
- let slices = MemorySlices::new(fdt, payload, payload_size, MEMORY.lock().as_mut().unwrap())?;
+ let slices = MemorySlices::new(fdt, payload, payload_size)?;
rand::init().map_err(|e| {
error!("Failed to initialize rand: {e}");
@@ -252,7 +248,8 @@
// This wrapper allows main() to be blissfully ignorant of platform details.
let next_bcc = crate::main(slices.fdt, slices.kernel, slices.ramdisk, bcc_slice, debug_policy)?;
- helpers::flushed_zeroize(bcc_slice);
+ // Writable-dirty regions will be flushed when MemoryTracker is dropped.
+ bcc_slice.zeroize();
info!("Expecting a bug making MMIO_GUARD_UNMAP return NOT_SUPPORTED on success");
MEMORY.lock().as_mut().unwrap().mmio_unmap_all().map_err(|e| {
@@ -395,13 +392,10 @@
}
unsafe fn get_appended_data_slice() -> &'static mut [u8] {
- let base = helpers::align_up(layout::binary_end(), helpers::SIZE_4KB).unwrap();
- // pvmfw is contained in a 2MiB region so the payload can't be larger than the 2MiB alignment.
- let size = helpers::align_up(base, helpers::SIZE_2MB).unwrap() - base;
-
+ let range = mmu::PageTable::appended_payload_range();
// SAFETY: This region is mapped and the linker script prevents it from overlapping with other
// objects.
- unsafe { slice::from_raw_parts_mut(base as *mut u8, size) }
+ unsafe { slice::from_raw_parts_mut(range.start as *mut u8, range.len()) }
}
enum AppendedConfigType {
diff --git a/pvmfw/src/exceptions.rs b/pvmfw/src/exceptions.rs
index 39641b0..e819729 100644
--- a/pvmfw/src/exceptions.rs
+++ b/pvmfw/src/exceptions.rs
@@ -89,15 +89,27 @@
}
}
+#[inline]
+fn handle_translation_fault(far: usize) -> Result<(), HandleExceptionError> {
+ let mut guard = MEMORY.try_lock().ok_or(HandleExceptionError::PageTableUnavailable)?;
+ let memory = guard.as_mut().ok_or(HandleExceptionError::PageTableNotInitialized)?;
+ Ok(memory.handle_mmio_fault(far)?)
+}
+
+#[inline]
+fn handle_permission_fault(far: usize) -> Result<(), HandleExceptionError> {
+ let mut guard = MEMORY.try_lock().ok_or(HandleExceptionError::PageTableUnavailable)?;
+ let memory = guard.as_mut().ok_or(HandleExceptionError::PageTableNotInitialized)?;
+ Ok(memory.handle_permission_fault(far)?)
+}
+
fn handle_exception(esr: Esr, far: usize) -> Result<(), HandleExceptionError> {
// Handle all translation faults on both read and write, and MMIO guard map
// flagged invalid pages or blocks that caused the exception.
+ // Handle permission faults for DBM flagged entries, and flag them as dirty on write.
match esr {
- Esr::DataAbortTranslationFault => {
- let mut locked = MEMORY.try_lock().ok_or(HandleExceptionError::PageTableUnavailable)?;
- let memory = locked.as_mut().ok_or(HandleExceptionError::PageTableNotInitialized)?;
- Ok(memory.handle_mmio_fault(far)?)
- }
+ Esr::DataAbortTranslationFault => handle_translation_fault(far),
+ Esr::DataAbortPermissionFault => handle_permission_fault(far),
_ => Err(HandleExceptionError::UnknownException),
}
}
diff --git a/pvmfw/src/helpers.rs b/pvmfw/src/helpers.rs
index 403c7e4..4b669d7 100644
--- a/pvmfw/src/helpers.rs
+++ b/pvmfw/src/helpers.rs
@@ -186,3 +186,46 @@
core::ffi::CStr::from_bytes_with_nul(concat!($str, "\0").as_bytes()).unwrap()
}};
}
+
+/// Executes a data synchronization barrier.
+#[macro_export]
+macro_rules! dsb {
+ ($option:literal) => {{
+ // Safe because this is just a memory barrier and does not affect Rust.
+ #[allow(unused_unsafe)] // In case the macro is used within an unsafe block.
+ unsafe {
+ core::arch::asm!(concat!("dsb ", $option), options(nomem, nostack, preserves_flags));
+ }
+ }};
+}
+
+/// Executes an instruction synchronization barrier.
+#[macro_export]
+macro_rules! isb {
+ () => {{
+ // Safe because this is just a memory barrier and does not affect Rust.
+ #[allow(unused_unsafe)] // In case the macro is used within an unsafe block.
+ unsafe {
+ core::arch::asm!("isb", options(nomem, nostack, preserves_flags));
+ }
+ }};
+}
+
+/// Invalidates cached leaf PTE entries by virtual address.
+#[macro_export]
+macro_rules! tlbi {
+ ($option:literal, $asid:expr, $addr:expr) => {{
+ let asid: usize = $asid;
+ let addr: usize = $addr;
+ // Safe because it invalidates TLB and doesn't affect Rust. When the address matches a
+ // block entry larger than the page size, all translations for the block are invalidated.
+ #[allow(unused_unsafe)] // In case the macro is used within an unsafe block.
+ unsafe {
+ core::arch::asm!(
+ concat!("tlbi ", $option, ", {x}"),
+ x = in(reg) (asid << 48) | (addr >> 12),
+ options(nomem, nostack, preserves_flags)
+ );
+ }
+ }};
+}
diff --git a/pvmfw/src/memory.rs b/pvmfw/src/memory.rs
index 1a2b4b7..4ed3072 100644
--- a/pvmfw/src/memory.rs
+++ b/pvmfw/src/memory.rs
@@ -18,6 +18,7 @@
use crate::helpers::{self, page_4kb_of, RangeExt, PVMFW_PAGE_SIZE, SIZE_4MB};
use crate::mmu;
+use crate::{dsb, isb, tlbi};
use aarch64_paging::paging::{Attributes, Descriptor, MemoryRegion as VaRange};
use alloc::alloc::alloc_zeroed;
use alloc::alloc::dealloc;
@@ -29,6 +30,7 @@
use core::cmp::max;
use core::cmp::min;
use core::fmt;
+use core::iter::once;
use core::num::NonZeroUsize;
use core::ops::Range;
use core::ptr::NonNull;
@@ -50,7 +52,7 @@
pub static MEMORY: SpinMutex<Option<MemoryTracker>> = SpinMutex::new(None);
unsafe impl Send for MemoryTracker {}
-#[derive(Clone, Copy, Debug, Default)]
+#[derive(Clone, Copy, Debug, Default, PartialEq)]
enum MemoryType {
#[default]
ReadOnly,
@@ -121,6 +123,10 @@
SharedPoolSetFailure,
/// Invalid page table entry.
InvalidPte,
+ /// Failed to flush memory region.
+ FlushRegionFailed,
+ /// Failed to set PTE dirty state.
+ SetPteDirtyFailed,
}
impl fmt::Display for MemoryTrackerError {
@@ -138,6 +144,8 @@
Self::SharedMemorySetFailure => write!(f, "Failed to set SHARED_MEMORY"),
Self::SharedPoolSetFailure => write!(f, "Failed to set SHARED_POOL"),
Self::InvalidPte => write!(f, "Page table entry is not valid"),
+ Self::FlushRegionFailed => write!(f, "Failed to flush memory region"),
+ Self::SetPteDirtyFailed => write!(f, "Failed to set PTE dirty state"),
}
}
}
@@ -386,19 +394,38 @@
// Maps a single device page, breaking up block mappings if necessary.
self.page_table.map_device(&page_range).map_err(|_| MemoryTrackerError::FailedToMap)
}
+
+ /// Flush all memory regions marked as writable-dirty.
+ fn flush_dirty_pages(&mut self) -> Result<()> {
+ // Collect memory ranges for which dirty state is tracked.
+ let writable_regions =
+ self.regions.iter().filter(|r| r.mem_type == MemoryType::ReadWrite).map(|r| &r.range);
+ let payload_range = mmu::PageTable::appended_payload_range();
+ // Execute a barrier instruction to ensure all hardware updates to the page table have been
+ // observed before reading PTE flags to determine dirty state.
+ dsb!("ish");
+ // Now flush writable-dirty pages in those regions.
+ for range in writable_regions.chain(once(&payload_range)) {
+ self.page_table
+ .modify_range(range, &flush_dirty_range)
+ .map_err(|_| MemoryTrackerError::FlushRegionFailed)?;
+ }
+ Ok(())
+ }
+
+ /// Handles permission fault for read-only blocks by setting writable-dirty state.
+ /// In general, this should be called from the exception handler when hardware dirty
+ /// state management is disabled or unavailable.
+ pub fn handle_permission_fault(&mut self, addr: usize) -> Result<()> {
+ self.page_table
+ .modify_range(&(addr..addr + 1), &mark_dirty_block)
+ .map_err(|_| MemoryTrackerError::SetPteDirtyFailed)
+ }
}
impl Drop for MemoryTracker {
fn drop(&mut self) {
- for region in &self.regions {
- match region.mem_type {
- MemoryType::ReadWrite => {
- // TODO(b/269738062): Use PT's dirty bit to only flush pages that were touched.
- helpers::flush_region(region.range.start, region.range.len())
- }
- MemoryType::ReadOnly => {}
- }
- }
+ self.flush_dirty_pages().unwrap();
self.unshare_all_memory()
}
}
@@ -494,11 +521,14 @@
level: usize,
) -> result::Result<(), ()> {
let flags = desc.flags().expect("Unsupported PTE flags set");
+ if !is_leaf_pte(&flags, level) {
+ return Ok(());
+ }
// This function will be called on an address range that corresponds to a device. Only if a
// page has been accessed (written to or read from), will it contain the VALID flag and be MMIO
// guard mapped. Therefore, we can skip unmapping invalid pages, they were never MMIO guard
// mapped anyway.
- if is_leaf_pte(&flags, level) && flags.contains(Attributes::VALID) {
+ if flags.contains(Attributes::VALID) {
assert!(
flags.contains(mmu::MMIO_LAZY_MAP_FLAG),
"Attempting MMIO guard unmap for non-device pages"
@@ -519,3 +549,48 @@
}
Ok(())
}
+
+/// Flushes a memory range the descriptor refers to, if the descriptor is in writable-dirty state.
+fn flush_dirty_range(
+ va_range: &VaRange,
+ desc: &mut Descriptor,
+ level: usize,
+) -> result::Result<(), ()> {
+ // Only flush ranges corresponding to dirty leaf PTEs.
+ let flags = desc.flags().ok_or(())?;
+ if !is_leaf_pte(&flags, level) {
+ return Ok(());
+ }
+ if !flags.contains(Attributes::READ_ONLY) {
+ helpers::flush_region(va_range.start().0, va_range.len());
+ }
+ Ok(())
+}
+
+/// Clears read-only flag on a PTE, making it writable-dirty. Used when dirty state is managed
+/// in software to handle permission faults on read-only descriptors.
+fn mark_dirty_block(
+ va_range: &VaRange,
+ desc: &mut Descriptor,
+ level: usize,
+) -> result::Result<(), ()> {
+ let flags = desc.flags().ok_or(())?;
+ if !is_leaf_pte(&flags, level) {
+ return Ok(());
+ }
+ if flags.contains(Attributes::DBM) {
+ assert!(flags.contains(Attributes::READ_ONLY), "unexpected PTE writable state");
+ desc.modify_flags(Attributes::empty(), Attributes::READ_ONLY);
+ // Updating the read-only bit of a PTE requires TLB invalidation.
+ // A TLB maintenance instruction is only guaranteed to be complete after a DSB instruction.
+ // An ISB instruction is required to ensure the effects of completed TLB maintenance
+ // instructions are visible to instructions fetched afterwards.
+ // See ARM ARM E2.3.10, and G5.9.
+ tlbi!("vale1", mmu::PageTable::ASID, va_range.start().0);
+ dsb!("ish");
+ isb!();
+ Ok(())
+ } else {
+ Err(())
+ }
+}
diff --git a/pvmfw/src/mmu.rs b/pvmfw/src/mmu.rs
index ed9b209..c72ceea 100644
--- a/pvmfw/src/mmu.rs
+++ b/pvmfw/src/mmu.rs
@@ -36,20 +36,13 @@
const CODE: Attributes = MEMORY.union(Attributes::READ_ONLY);
const DATA: Attributes = MEMORY.union(Attributes::EXECUTE_NEVER);
const RODATA: Attributes = DATA.union(Attributes::READ_ONLY);
+const DATA_DBM: Attributes = RODATA.union(Attributes::DBM);
/// High-level API for managing MMU mappings.
pub struct PageTable {
idmap: IdMap,
}
-fn appended_payload_range() -> Range<usize> {
- let start = helpers::align_up(layout::binary_end(), helpers::SIZE_4KB).unwrap();
- // pvmfw is contained in a 2MiB region so the payload can't be larger than the 2MiB alignment.
- let end = helpers::align_up(start, helpers::SIZE_2MB).unwrap();
-
- start..end
-}
-
/// Region allocated for the stack.
pub fn stack_range() -> Range<usize> {
const STACK_PAGES: usize = 8;
@@ -58,18 +51,28 @@
}
impl PageTable {
- const ASID: usize = 1;
+ pub const ASID: usize = 1;
const ROOT_LEVEL: usize = 1;
+ /// Returns memory range reserved for the appended payload.
+ pub fn appended_payload_range() -> Range<usize> {
+ let start = helpers::align_up(layout::binary_end(), helpers::SIZE_4KB).unwrap();
+ // pvmfw is contained in a 2MiB region so the payload can't be larger than the 2MiB alignment.
+ let end = helpers::align_up(start, helpers::SIZE_2MB).unwrap();
+ start..end
+ }
+
/// Creates an instance pre-populated with pvmfw's binary layout.
pub fn from_static_layout() -> Result<Self, MapError> {
let mut page_table = Self { idmap: IdMap::new(Self::ASID, Self::ROOT_LEVEL) };
+ // Stack and scratch ranges are explicitly zeroed and flushed before jumping to payload,
+ // so dirty state management can be omitted.
+ page_table.map_range(&layout::scratch_range(), DATA)?;
+ page_table.map_range(&stack_range(), DATA)?;
page_table.map_code(&layout::text_range())?;
- page_table.map_data(&layout::scratch_range())?;
- page_table.map_data(&stack_range())?;
page_table.map_rodata(&layout::rodata_range())?;
- page_table.map_data(&appended_payload_range())?;
+ page_table.map_data(&Self::appended_payload_range())?;
Ok(page_table)
}
@@ -87,7 +90,7 @@
}
pub fn map_data(&mut self, range: &Range<usize>) -> Result<(), MapError> {
- self.map_range(range, DATA)
+ self.map_range(range, DATA_DBM)
}
pub fn map_code(&mut self, range: &Range<usize>) -> Result<(), MapError> {