Merge "Add AVF debug policy documentation"
diff --git a/compos/composd/src/instance_manager.rs b/compos/composd/src/instance_manager.rs
index 2db13c7..98d4a1b 100644
--- a/compos/composd/src/instance_manager.rs
+++ b/compos/composd/src/instance_manager.rs
@@ -27,7 +27,7 @@
 use virtualizationservice::IVirtualizationService::IVirtualizationService;
 
 // Enough memory to complete odrefresh in the VM.
-const VM_MEMORY_MIB: i32 = 1280;
+const VM_MEMORY_MIB: i32 = 1024;
 
 pub struct InstanceManager {
     service: Strong<dyn IVirtualizationService>,
diff --git a/docs/debug/tracing.md b/docs/debug/tracing.md
index 7d7ea0c..ebc0ac3 100644
--- a/docs/debug/tracing.md
+++ b/docs/debug/tracing.md
@@ -32,8 +32,15 @@
 Consider first familiarizing yourself with Perfetto documentation for recording traces on Android:
 https://perfetto.dev/docs/quickstart/android-tracing.
 
-So far it is only possible to capture hypervisor trace events by providing the full trace config
-file to Perfetto. Here is the minimal
+The [record_android_trace](
+https://cs.android.com/android/platform/superproject/+/master:external/perfetto/tools/record_android_trace)
+script supports a shortcut to capture all hypervisor events that are  known to Perfetto:
+
+```shell
+external/perfetto/tools/record_android_trace hyp -t 15s -b 32mb -o /tmp/hyp.pftrace
+```
+
+Alternatively you can use full trace config to capture hypervisor. Example usage:
 
 ```shell
 cat<<EOF>config.pbtx
@@ -66,10 +73,65 @@
 
 #### Capturing hypervisor traces on QEMU
 
-TODO(b/271412868): fill in this section
+Perfetto supports capturing traces on Linux: https://perfetto.dev/docs/quickstart/linux-tracing.
+However, since pKVM hypervisor is only supported on arm64, you will need to cross-compile Perfetto
+binaries for linux-arm64 (unless you have an arm64 workstation).
 
-TODO(b/271412868): Stay tuned, more docs coming soon!
+1. Checkout Perfetto repository: https://perfetto.dev/docs/contributing/getting-started
+2. Follow https://perfetto.dev/docs/contributing/build-instructions#cross-compiling-for-linux-arm-64
+  to compile Perfetto binaries for arm64 architecture.
+3. Copy the tracebox binary to QEMU
+4. Run `tracebox` binary on QEMU to capture traces, it's interface is very similar to the
+`record_android_trace` binary. E.g. to capture all hypervisor events run:
+```shell
+tracebox -t 15s -b 32mb hyp
+```
 
 ## Microdroid VM tracing
 
+IMPORTANT: Tracing is only supported for debuggable Microdroid VMs.
+
+### Capturing trace in Microdroid
+
+Starting with Android U, Microdroid contains Perfetto tracing binaries, which makes it possible to
+capture traces inside Microdroid VM using Perfetto stack. The commands used to capture traces on
+Android should work for Microdroid VM as well, with a difference that Perfetto's tracing binaries
+are not enabled in Microdroid by default, so you need to manually start them by setting
+`persist.traced.enable` system property to `1`.
+
+Here is a quick example on how trace Microdroid VM:
+
+1. First start your VM. For this example we are going to use
+`adb shell /apex/com.android.virt/bin/vm run-microdroid`.
+
+2. Set up an adb connection with the running VM:
+```shell
+adb shell forward tcp:9876 vsock:${CID}:5555
+adb connect localhost:9876
+adb -s localhost:9876 root
+```
+Where `${CID}` corresponds to the running Microdroid VM that you want to establish adb connection
+with. List of running VMs can be obtained by running `adb shell /apex/com.android.virt/bin/vm list`.
+Alternatively you can use `vm_shell` utility to connect to a running VM, i.e.: `vm_shell connect`.
+
+3. Start Perfetto daemons and capture trace
+```shell
+adb -s localhost:9876 shell setprop persist.traced.enable 1
+${ANDROID_BULD_TOP}/external/perfetto/tools/record_android_trace \
+  -s localhost:9876 \
+  -o /tmp/microdroid-trace-file.pftrace \
+  -t 10s \
+  -b 32mb \
+  sched/sched_switch task/task_newtask sched/sched_process_exit
+```
+
+If you don't have Android repo checked out, then you can download the record_android_trace script by
+following the following [instructions](
+https://perfetto.dev/docs/quickstart/android-tracing#recording-a-trace-through-the-cmdline)
+
+More documentation on Perfetto's tracing on Android is available here:
+https://perfetto.dev/docs/quickstart/android-tracing
+
+### Capturing Microdroid boot trace
+
 TODO(b/271412868): Stay tuned, more docs are coming soon!
diff --git a/libs/fdtpci/src/lib.rs b/libs/fdtpci/src/lib.rs
index e32e16d..96d98d6 100644
--- a/libs/fdtpci/src/lib.rs
+++ b/libs/fdtpci/src/lib.rs
@@ -197,24 +197,32 @@
     Ok(memory_address..memory_address + memory_size)
 }
 
+/// Encodes memory flags of a PCI range
 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
-struct PciMemoryFlags(u32);
+pub struct PciMemoryFlags(pub u32);
 
 impl PciMemoryFlags {
+    /// Returns whether this PCI range is prefetchable
     pub fn prefetchable(self) -> bool {
         self.0 & 0x80000000 != 0
     }
 
+    /// Returns the type of this PCI range
     pub fn range_type(self) -> PciRangeType {
         PciRangeType::from((self.0 & 0x3000000) >> 24)
     }
 }
 
+/// Type of a PCI range
 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
-enum PciRangeType {
+pub enum PciRangeType {
+    /// Range represents the PCI configuration space
     ConfigurationSpace,
+    /// Range is on IO space
     IoSpace,
+    /// Range is on 32-bit MMIO space
     Memory32,
+    /// Range is on 64-bit MMIO space
     Memory64,
 }
 
diff --git a/microdroid/Android.bp b/microdroid/Android.bp
index a2a4138..0abaf79 100644
--- a/microdroid/Android.bp
+++ b/microdroid/Android.bp
@@ -70,7 +70,6 @@
         "libartpalette-system",
 
         "apexd.microdroid",
-        "atrace",
         "debuggerd",
         "linker",
         "tombstoned.microdroid",
@@ -89,6 +88,12 @@
         "libvm_payload", // used by payload to interact with microdroid manager
 
         "prng_seeder_microdroid",
+
+        // Binaries required to capture traces in Microdroid.
+        "atrace",
+        "traced",
+        "traced_probes",
+        "perfetto",
     ] + microdroid_shell_and_utilities,
     multilib: {
         common: {
diff --git a/microdroid/init_debug_policy/Android.bp b/microdroid/init_debug_policy/Android.bp
index b56ef79..afc2e73 100644
--- a/microdroid/init_debug_policy/Android.bp
+++ b/microdroid/init_debug_policy/Android.bp
@@ -1,3 +1,7 @@
+package {
+    default_applicable_licenses: ["Android-Apache-2.0"],
+}
+
 rust_binary {
     name: "microdroid_init_debug_policy",
     srcs: ["src/init_debug_policy.rs"],
diff --git a/pvmfw/platform.dts b/pvmfw/platform.dts
index 056fa2f..127f69a 100644
--- a/pvmfw/platform.dts
+++ b/pvmfw/platform.dts
@@ -245,4 +245,11 @@
 		clock-names = "apb_pclk";
 		clocks = <&clk>;
 	};
+
+	vmwdt@3000 {
+		compatible = "qemu,vcpu-stall-detector";
+		reg = <0x00 0x3000 0x00 0x1000>;
+		clock-frequency = <10>;
+		timeout-sec = <8>;
+	};
 };
diff --git a/pvmfw/src/fdt.rs b/pvmfw/src/fdt.rs
index 793eaac..f4b0244 100644
--- a/pvmfw/src/fdt.rs
+++ b/pvmfw/src/fdt.rs
@@ -14,10 +14,19 @@
 
 //! High-level FDT functions.
 
+use crate::helpers::GUEST_PAGE_SIZE;
+use crate::RebootReason;
 use core::ffi::CStr;
+use core::num::NonZeroUsize;
 use core::ops::Range;
+use fdtpci::PciMemoryFlags;
+use fdtpci::PciRangeType;
+use libfdt::AddressRange;
+use libfdt::CellIterator;
 use libfdt::Fdt;
 use libfdt::FdtError;
+use log::error;
+use tinyvec::ArrayVec;
 
 /// Extract from /config the address range containing the pre-loaded kernel.
 pub fn kernel_range(fdt: &libfdt::Fdt) -> libfdt::Result<Option<Range<usize>>> {
@@ -51,6 +60,405 @@
     Ok(None)
 }
 
+/// Read and validate the size and base address of memory, and returns the size
+fn parse_memory_node(fdt: &libfdt::Fdt) -> Result<NonZeroUsize, RebootReason> {
+    let memory_range = fdt
+        .memory()
+        // Actually, these checks are unnecessary because we read /memory node in entry.rs
+        // where the exactly same checks are done. We are repeating the same check just for
+        // extra safety (in case when the code structure changes in the future).
+        .map_err(|e| {
+            error!("Failed to get /memory from the DT: {e}");
+            RebootReason::InvalidFdt
+        })?
+        .ok_or_else(|| {
+            error!("Node /memory was found empty");
+            RebootReason::InvalidFdt
+        })?
+        .next()
+        .ok_or_else(|| {
+            error!("Failed to read memory range from the DT");
+            RebootReason::InvalidFdt
+        })?;
+
+    let base = memory_range.start;
+    if base as u64 != DeviceTreeInfo::RAM_BASE_ADDR {
+        error!("Memory base address {:#x} is not {:#x}", base, DeviceTreeInfo::RAM_BASE_ADDR);
+        return Err(RebootReason::InvalidFdt);
+    }
+
+    let size = memory_range.len(); // end is exclusive
+    if size % GUEST_PAGE_SIZE != 0 {
+        error!("Memory size {:#x} is not a multiple of page size {:#x}", size, GUEST_PAGE_SIZE);
+        return Err(RebootReason::InvalidFdt);
+    }
+    // In the u-boot implementation, we checked if base + size > u64::MAX, but we don't need that
+    // because memory() function uses checked_add when constructing the Range object. If an
+    // overflow happened, we should have gotten None from the next() call above and would have
+    // bailed already.
+
+    NonZeroUsize::new(size).ok_or_else(|| {
+        error!("Memory size can't be 0");
+        RebootReason::InvalidFdt
+    })
+}
+
+/// Read the number of CPUs
+fn parse_cpu_nodes(fdt: &libfdt::Fdt) -> Result<NonZeroUsize, RebootReason> {
+    let num = fdt
+        .compatible_nodes(CStr::from_bytes_with_nul(b"arm,arm-v8\0").unwrap())
+        .map_err(|e| {
+            error!("Failed to read compatible nodes \"arm,arm-v8\" from DT: {e}");
+            RebootReason::InvalidFdt
+        })?
+        .count();
+    NonZeroUsize::new(num).ok_or_else(|| {
+        error!("Number of CPU can't be 0");
+        RebootReason::InvalidFdt
+    })
+}
+
+#[derive(Debug)]
+#[allow(dead_code)] // TODO: remove this
+struct PciInfo {
+    ranges: [Range<u64>; 2],
+    num_irq: usize,
+}
+
+/// Read and validate PCI node
+fn parse_pci_nodes(fdt: &libfdt::Fdt) -> Result<PciInfo, RebootReason> {
+    let node = fdt
+        .compatible_nodes(CStr::from_bytes_with_nul(b"pci-host-cam-generic\0").unwrap())
+        .map_err(|e| {
+            error!("Failed to read compatible node \"pci-host-cam-generic\" from DT: {e}");
+            RebootReason::InvalidFdt
+        })?
+        .next()
+        .ok_or_else(|| {
+            // pvmfw requires at least one pci device (virtio-blk) for the instance disk. So,
+            // let's fail early.
+            error!("Compatible node \"pci-host-cam-generic\" doesn't exist");
+            RebootReason::InvalidFdt
+        })?;
+
+    let mut iter = node
+        .ranges::<(u32, u64), u64, u64>()
+        .map_err(|e| {
+            error!("Failed to read ranges from PCI node: {e}");
+            RebootReason::InvalidFdt
+        })?
+        .ok_or_else(|| {
+            error!("PCI node missing ranges property");
+            RebootReason::InvalidFdt
+        })?;
+
+    let range0 = iter.next().ok_or_else(|| {
+        error!("First range missing in PCI node");
+        RebootReason::InvalidFdt
+    })?;
+    let range0 = get_and_validate_pci_range(&range0)?;
+
+    let range1 = iter.next().ok_or_else(|| {
+        error!("Second range missing in PCI node");
+        RebootReason::InvalidFdt
+    })?;
+    let range1 = get_and_validate_pci_range(&range1)?;
+
+    let num_irq = count_and_validate_pci_irq_masks(&node)?;
+
+    validate_pci_irq_maps(&node)?;
+
+    Ok(PciInfo { ranges: [range0, range1], num_irq })
+}
+
+fn get_and_validate_pci_range(
+    range: &AddressRange<(u32, u64), u64, u64>,
+) -> Result<Range<u64>, RebootReason> {
+    let mem_flags = PciMemoryFlags(range.addr.0);
+    let range_type = mem_flags.range_type();
+    let prefetchable = mem_flags.prefetchable();
+    let bus_addr = range.addr.1;
+    let cpu_addr = range.parent_addr;
+    let size = range.size;
+    if range_type != PciRangeType::Memory64 {
+        error!("Invalid range type {:?} for bus address {:#x} in PCI node", range_type, bus_addr);
+        return Err(RebootReason::InvalidFdt);
+    }
+    if prefetchable {
+        error!("PCI bus address {:#x} in PCI node is prefetchable", bus_addr);
+        return Err(RebootReason::InvalidFdt);
+    }
+    // Enforce ID bus-to-cpu mappings, as used by crosvm.
+    if bus_addr != cpu_addr {
+        error!("PCI bus address: {:#x} is different from CPU address: {:#x}", bus_addr, cpu_addr);
+        return Err(RebootReason::InvalidFdt);
+    }
+    let bus_end = bus_addr.checked_add(size).ok_or_else(|| {
+        error!("PCI address range size {:#x} too big", size);
+        RebootReason::InvalidFdt
+    })?;
+    Ok(bus_addr..bus_end)
+}
+
+/// Iterator that takes N cells as a chunk
+struct CellChunkIterator<'a, const N: usize> {
+    cells: CellIterator<'a>,
+}
+
+impl<'a, const N: usize> CellChunkIterator<'a, N> {
+    fn new(cells: CellIterator<'a>) -> Self {
+        Self { cells }
+    }
+}
+
+impl<'a, const N: usize> Iterator for CellChunkIterator<'a, N> {
+    type Item = [u32; N];
+    fn next(&mut self) -> Option<Self::Item> {
+        let mut ret: Self::Item = [0; N];
+        for i in ret.iter_mut() {
+            *i = self.cells.next()?;
+        }
+        Some(ret)
+    }
+}
+
+fn count_and_validate_pci_irq_masks(pci_node: &libfdt::FdtNode) -> Result<usize, RebootReason> {
+    const IRQ_MASK_CELLS: usize = 4;
+    const IRQ_MASK_ADDR_HI: u32 = 0xf800;
+    const IRQ_MASK_ADDR_ME: u32 = 0x0;
+    const IRQ_MASK_ADDR_LO: u32 = 0x0;
+    const IRQ_MASK_ANY_IRQ: u32 = 0x7;
+    const EXPECTED: [u32; IRQ_MASK_CELLS] =
+        [IRQ_MASK_ADDR_HI, IRQ_MASK_ADDR_ME, IRQ_MASK_ADDR_LO, IRQ_MASK_ANY_IRQ];
+    let name = CStr::from_bytes_with_nul(b"interrupt-map-mask\0").unwrap();
+    let mut irq_count: usize = 0;
+    for irq_mask in CellChunkIterator::<IRQ_MASK_CELLS>::new(
+        pci_node
+            .getprop_cells(name)
+            .map_err(|e| {
+                error!("Failed to read interrupt-map-mask property: {e}");
+                RebootReason::InvalidFdt
+            })?
+            .ok_or_else(|| {
+                error!("PCI node missing interrupt-map-mask property");
+                RebootReason::InvalidFdt
+            })?,
+    ) {
+        if irq_mask != EXPECTED {
+            error!("invalid irq mask {:?}", irq_mask);
+            return Err(RebootReason::InvalidFdt);
+        }
+        irq_count += 1;
+    }
+    Ok(irq_count)
+}
+
+fn validate_pci_irq_maps(pci_node: &libfdt::FdtNode) -> Result<(), RebootReason> {
+    const IRQ_MAP_CELLS: usize = 10;
+    const PCI_DEVICE_IDX: usize = 11;
+    const PCI_IRQ_ADDR_ME: u32 = 0;
+    const PCI_IRQ_ADDR_LO: u32 = 0;
+    const PCI_IRQ_INTC: u32 = 1;
+    const AARCH64_IRQ_BASE: u32 = 4; // from external/crosvm/aarch64/src/lib.rs
+    const GIC_SPI: u32 = 0;
+    const IRQ_TYPE_LEVEL_HIGH: u32 = 4;
+
+    let mut phys_hi: u32 = 0;
+    let mut irq_nr = AARCH64_IRQ_BASE;
+
+    let name = CStr::from_bytes_with_nul(b"interrupt-map\0").unwrap();
+    for irq_map in CellChunkIterator::<IRQ_MAP_CELLS>::new(
+        pci_node
+            .getprop_cells(name)
+            .map_err(|e| {
+                error!("Failed to read interrupt-map property: {e}");
+                RebootReason::InvalidFdt
+            })?
+            .ok_or_else(|| {
+                error!("PCI node missing interrupt-map property");
+                RebootReason::InvalidFdt
+            })?,
+    ) {
+        phys_hi += 0x1 << PCI_DEVICE_IDX;
+
+        let pci_addr = (irq_map[0], irq_map[1], irq_map[2]);
+        let pci_irq_number = irq_map[3];
+        let _controller_phandle = irq_map[4]; // skipped.
+        let gic_addr = (irq_map[5], irq_map[6]); // address-cells is <2> for GIC
+                                                 // interrupt-cells is <3> for GIC
+        let gic_peripheral_interrupt_type = irq_map[7];
+        let gic_irq_number = irq_map[8];
+        let gic_irq_type = irq_map[9];
+
+        let expected_pci_addr = (phys_hi, PCI_IRQ_ADDR_ME, PCI_IRQ_ADDR_LO);
+
+        if pci_addr != expected_pci_addr {
+            error!("PCI device address {:#x} {:#x} {:#x} in interrupt-map is different from expected address \
+                   {:#x} {:#x} {:#x}",
+                   pci_addr.0, pci_addr.1, pci_addr.2, expected_pci_addr.0, expected_pci_addr.1, expected_pci_addr.2);
+            return Err(RebootReason::InvalidFdt);
+        }
+        if pci_irq_number != PCI_IRQ_INTC {
+            error!(
+                "PCI INT# {:#x} in interrupt-map is different from expected value {:#x}",
+                pci_irq_number, PCI_IRQ_INTC
+            );
+            return Err(RebootReason::InvalidFdt);
+        }
+        if gic_addr != (0, 0) {
+            error!(
+                "GIC address {:#x} {:#x} in interrupt-map is different from expected address \
+                   {:#x} {:#x}",
+                gic_addr.0, gic_addr.1, 0, 0
+            );
+            return Err(RebootReason::InvalidFdt);
+        }
+        if gic_peripheral_interrupt_type != GIC_SPI {
+            error!("GIC peripheral interrupt type {:#x} in interrupt-map is different from expected value \
+                   {:#x}", gic_peripheral_interrupt_type, GIC_SPI);
+            return Err(RebootReason::InvalidFdt);
+        }
+        if gic_irq_number != irq_nr {
+            error!(
+                "GIC irq number {:#x} in interrupt-map is unexpected. Expected {:#x}",
+                gic_irq_number, irq_nr
+            );
+            return Err(RebootReason::InvalidFdt);
+        }
+        irq_nr += 1; // move to next irq
+        if gic_irq_type != IRQ_TYPE_LEVEL_HIGH {
+            error!(
+                "IRQ type in {:#x} is invalid. Must be LEVEL_HIGH {:#x}",
+                gic_irq_type, IRQ_TYPE_LEVEL_HIGH
+            );
+            return Err(RebootReason::InvalidFdt);
+        }
+    }
+    Ok(())
+}
+
+#[derive(Default, Debug)]
+#[allow(dead_code)] // TODO: remove this
+pub struct SerialInfo {
+    addrs: ArrayVec<[u64; Self::SERIAL_MAX_COUNT]>,
+}
+
+impl SerialInfo {
+    const SERIAL_MAX_COUNT: usize = 4;
+}
+
+fn parse_serial_nodes(fdt: &libfdt::Fdt) -> Result<SerialInfo, RebootReason> {
+    let mut ret: SerialInfo = Default::default();
+    for (i, node) in fdt
+        .compatible_nodes(CStr::from_bytes_with_nul(b"ns16550a\0").unwrap())
+        .map_err(|e| {
+            error!("Failed to read compatible nodes \"ns16550a\" from DT: {e}");
+            RebootReason::InvalidFdt
+        })?
+        .enumerate()
+    {
+        if i >= ret.addrs.capacity() {
+            error!("Too many serials: {i}");
+            return Err(RebootReason::InvalidFdt);
+        }
+        let reg = node
+            .reg()
+            .map_err(|e| {
+                error!("Failed to read reg property from \"ns16550a\" node: {e}");
+                RebootReason::InvalidFdt
+            })?
+            .ok_or_else(|| {
+                error!("No reg property in \"ns16550a\" node");
+                RebootReason::InvalidFdt
+            })?
+            .next()
+            .ok_or_else(|| {
+                error!("No value in reg property of \"ns16550a\" node");
+                RebootReason::InvalidFdt
+            })?;
+        ret.addrs.push(reg.addr);
+    }
+    Ok(ret)
+}
+
+#[derive(Debug)]
+#[allow(dead_code)] // TODO: remove this
+pub struct SwiotlbInfo {
+    size: u64,
+    align: u64,
+}
+
+fn parse_swiotlb_nodes(fdt: &libfdt::Fdt) -> Result<SwiotlbInfo, RebootReason> {
+    let node = fdt
+        .compatible_nodes(CStr::from_bytes_with_nul(b"restricted-dma-pool\0").unwrap())
+        .map_err(|e| {
+            error!("Failed to read compatible nodes \"restricted-dma-pool\" from DT: {e}");
+            RebootReason::InvalidFdt
+        })?
+        .next()
+        .ok_or_else(|| {
+            error!("No compatible node \"restricted-dma-pool\" in DT");
+            RebootReason::InvalidFdt
+        })?;
+    let size = node
+        .getprop_u64(CStr::from_bytes_with_nul(b"size\0").unwrap())
+        .map_err(|e| {
+            error!("Failed to read \"size\" property of \"restricted-dma-pool\": {e}");
+            RebootReason::InvalidFdt
+        })?
+        .ok_or_else(|| {
+            error!("No \"size\" property in \"restricted-dma-pool\"");
+            RebootReason::InvalidFdt
+        })?;
+
+    let align = node
+        .getprop_u64(CStr::from_bytes_with_nul(b"alignment\0").unwrap())
+        .map_err(|e| {
+            error!("Failed to read \"alignment\" property of \"restricted-dma-pool\": {e}");
+            RebootReason::InvalidFdt
+        })?
+        .ok_or_else(|| {
+            error!("No \"alignment\" property in \"restricted-dma-pool\"");
+            RebootReason::InvalidFdt
+        })?;
+
+    if size == 0 || (size % GUEST_PAGE_SIZE as u64) != 0 {
+        error!("Invalid swiotlb size {:#x}", size);
+        return Err(RebootReason::InvalidFdt);
+    }
+
+    if (align % GUEST_PAGE_SIZE as u64) != 0 {
+        error!("Invalid swiotlb alignment {:#x}", align);
+        return Err(RebootReason::InvalidFdt);
+    }
+
+    Ok(SwiotlbInfo { size, align })
+}
+
+#[derive(Debug)]
+#[allow(dead_code)] // TODO: remove this
+pub struct DeviceTreeInfo {
+    memory_size: NonZeroUsize,
+    num_cpu: NonZeroUsize,
+    pci_info: PciInfo,
+    serial_info: SerialInfo,
+    swiotlb_info: SwiotlbInfo,
+}
+
+impl DeviceTreeInfo {
+    const RAM_BASE_ADDR: u64 = 0x8000_0000;
+}
+
+pub fn parse_device_tree(fdt: &libfdt::Fdt) -> Result<DeviceTreeInfo, RebootReason> {
+    Ok(DeviceTreeInfo {
+        memory_size: parse_memory_node(fdt)?,
+        num_cpu: parse_cpu_nodes(fdt)?,
+        pci_info: parse_pci_nodes(fdt)?,
+        serial_info: parse_serial_nodes(fdt)?,
+        swiotlb_info: parse_swiotlb_nodes(fdt)?,
+    })
+}
+
 /// Modifies the input DT according to the fields of the configuration.
 pub fn modify_for_next_stage(
     fdt: &mut Fdt,
diff --git a/pvmfw/src/heap.rs b/pvmfw/src/heap.rs
index 435a6ff..eea2e98 100644
--- a/pvmfw/src/heap.rs
+++ b/pvmfw/src/heap.rs
@@ -53,7 +53,15 @@
 
 #[no_mangle]
 unsafe extern "C" fn malloc(size: usize) -> *mut c_void {
-    malloc_(size).map_or(ptr::null_mut(), |p| p.cast::<c_void>().as_ptr())
+    malloc_(size, false).map_or(ptr::null_mut(), |p| p.cast::<c_void>().as_ptr())
+}
+
+#[no_mangle]
+unsafe extern "C" fn calloc(nmemb: usize, size: usize) -> *mut c_void {
+    let Some(size) = nmemb.checked_mul(size) else {
+        return ptr::null_mut()
+    };
+    malloc_(size, true).map_or(ptr::null_mut(), |p| p.cast::<c_void>().as_ptr())
 }
 
 #[no_mangle]
@@ -67,9 +75,11 @@
     }
 }
 
-unsafe fn malloc_(size: usize) -> Option<NonNull<usize>> {
+unsafe fn malloc_(size: usize, zeroed: bool) -> Option<NonNull<usize>> {
     let size = NonZeroUsize::new(size)?.checked_add(mem::size_of::<usize>())?;
-    let ptr = HEAP_ALLOCATOR.alloc(malloc_layout(size)?);
+    let layout = malloc_layout(size)?;
+    let ptr =
+        if zeroed { HEAP_ALLOCATOR.alloc_zeroed(layout) } else { HEAP_ALLOCATOR.alloc(layout) };
     let ptr = NonNull::new(ptr)?.cast::<usize>().as_ptr();
     *ptr = size.get();
     NonNull::new(ptr.offset(1))
diff --git a/pvmfw/src/main.rs b/pvmfw/src/main.rs
index 577ad6e..e1ecac4 100644
--- a/pvmfw/src/main.rs
+++ b/pvmfw/src/main.rs
@@ -45,6 +45,7 @@
 use crate::dice::PartialInputs;
 use crate::entry::RebootReason;
 use crate::fdt::modify_for_next_stage;
+use crate::fdt::parse_device_tree;
 use crate::helpers::flush;
 use crate::helpers::GUEST_PAGE_SIZE;
 use crate::instance::get_or_generate_instance_salt;
@@ -83,6 +84,11 @@
     })?;
     trace!("BCC: {bcc_handover:x?}");
 
+    // This parsing step includes validation. So this effectively ensures that the DT can't be
+    // abused by the host to attack pvmfw in pci::initialize below.
+    let device_tree_info = parse_device_tree(fdt)?;
+    debug!("Device tree info: {:?}", device_tree_info);
+
     // Set up PCI bus for VirtIO devices.
     let pci_info = PciInfo::from_fdt(fdt).map_err(handle_pci_error)?;
     debug!("PCI: {:#x?}", pci_info);