VirtualizationService notifies to its client when ramdump is available

When a kernel panic occurs in a pVM and the ramdump is enabled there, a
ramdump file is generated.

This file should eventually be consumed by the client (the owner of the
VM) for further analysis. VirtualizationService let its client know that
a ramdump has been created and provide access to it.

Specifically, the end-to-end flow is as follows:

1) When starting a VM, an empty file is created under the VM-specific
   directory (`/data/misc/virtualizationservice/<cid>/ramdump`).

2) The file becomes a backing store for a virtio-console device
   (/dev/hvc3).

3) When a kernel panic occurs, the `crashdump` binary is executed and
   the `/proc/vmcore` is written to `/dev/hvc3`. After the dump is done,
   the VM triggers a reboot which is kills crosvm.

4) Virtualizationservice is notified with the exit of crosvm. It then
   checks the size of the ramdump file. If that is not empty, it can
   assume that a ramdump was occurred in the pVM.

5) Then virtualizationservice notifies the event via
   `IVirtualMachineCallback.onRamdump(ParcelFileDescriptor)`, where the
   parcel file descriptor is the handle to the ramdump file.

6) Client reads the ramdump file.

This change also adds `--ramdump` option to the `vm` tool to designate
the path where ramdump is saved to.

Bug: 238278104
Test: follow the steps. Automated tests will be added in a followup CL
1) Run a pVM:
adb shell /apex/com.android.virt/bin/vm run-app --debug full --mem 300 \
--ramdump /data/local/tmp/virt/myramdump \
/data/local/tmp/virt/MicrodroidDemoApp.apk \
/data/local/tmp/virt/apk.idsig /data/local/tmp/virt/instance.img \
assets/vm_config.json

2) Adb shell into the VM
adb forward tcp:8000 vsock:10:5555
adb connect localhost:8000
adb -s localhost:8000 root
adb -s localhost:8000 shell

3) Load the crashdump kernel
/system/bin/kexec \
/system/etc/microdroid_crashdump_kernel \
/system/etc/microdroid_crashdump_initrd.img \
"1 rdinit=/bin/crashdump nr_cpus=1 reset_devices console=hvc0 earlycon=uart8250,mmio,0x3f8"

4) Trigger a crash
echo c > /proc/sysrq-trigger

5) Check the ramdump at /data/local/tmp/virt/myramdump

Change-Id: I1f90537961632708ca5a889cdd53390030518bb8
diff --git a/compos/common/compos_client.rs b/compos/common/compos_client.rs
index 1fd939d..23cd505 100644
--- a/compos/common/compos_client.rs
+++ b/compos/common/compos_client.rs
@@ -235,6 +235,11 @@
         log::warn!("VM error, cid = {}, error code = {}, message = {}", cid, error_code, message,);
         Ok(())
     }
+
+    fn onRamdump(&self, _cid: i32, _ramdump: &ParcelFileDescriptor) -> BinderResult<()> {
+        // TODO(b/238295267) send this to tombstone?
+        Ok(())
+    }
 }
 
 fn start_logging(pfd: &ParcelFileDescriptor) -> Result<()> {
diff --git a/demo/java/com/android/microdroid/demo/MainActivity.java b/demo/java/com/android/microdroid/demo/MainActivity.java
index e53f95d..747e98c 100644
--- a/demo/java/com/android/microdroid/demo/MainActivity.java
+++ b/demo/java/com/android/microdroid/demo/MainActivity.java
@@ -278,6 +278,13 @@
                             mService.shutdownNow();
                             mStatus.postValue(VirtualMachine.Status.STOPPED);
                         }
+
+                        @Override
+                        public void onRamdump(VirtualMachine vm, ParcelFileDescriptor ramdump) {
+                            if (!mService.isShutdown()) {
+                                mPayloadOutput.postValue("(Kernel panic. Ramdump created)");
+                            }
+                        }
                     };
 
             try {
diff --git a/javalib/src/android/system/virtualmachine/VirtualMachine.java b/javalib/src/android/system/virtualmachine/VirtualMachine.java
index de44b63..8d74f5e 100644
--- a/javalib/src/android/system/virtualmachine/VirtualMachine.java
+++ b/javalib/src/android/system/virtualmachine/VirtualMachine.java
@@ -445,6 +445,11 @@
                                 executeCallback((cb) -> cb.onDied(VirtualMachine.this, reason));
                             }
                         }
+                        @Override
+                        public void onRamdump(int cid, ParcelFileDescriptor ramdump) {
+                            executeCallback(
+                                    (cb) -> cb.onRamdump(VirtualMachine.this, ramdump));
+                        }
                     }
             );
             service.asBinder().linkToDeath(deathRecipient, 0);
diff --git a/javalib/src/android/system/virtualmachine/VirtualMachineCallback.java b/javalib/src/android/system/virtualmachine/VirtualMachineCallback.java
index 54d0701..a37c15b 100644
--- a/javalib/src/android/system/virtualmachine/VirtualMachineCallback.java
+++ b/javalib/src/android/system/virtualmachine/VirtualMachineCallback.java
@@ -142,4 +142,7 @@
 
     /** Called when the VM died. */
     void onDied(@NonNull VirtualMachine vm, @DeathReason int reason);
+
+    /** Called when kernel panic occurs and as a result ramdump is generated from the VM. */
+    void onRamdump(@NonNull VirtualMachine vm, @NonNull ParcelFileDescriptor ramdump);
 }
diff --git a/tests/benchmark/src/java/com/android/microdroid/benchmark/MicrodroidBenchmarks.java b/tests/benchmark/src/java/com/android/microdroid/benchmark/MicrodroidBenchmarks.java
index 5999af7..864d2d5 100644
--- a/tests/benchmark/src/java/com/android/microdroid/benchmark/MicrodroidBenchmarks.java
+++ b/tests/benchmark/src/java/com/android/microdroid/benchmark/MicrodroidBenchmarks.java
@@ -201,6 +201,9 @@
         public void onDied(VirtualMachine vm, @DeathReason int reason) {
             mExecutorService.shutdown();
         }
+
+        @Override
+        public void onRamdump(VirtualMachine vm, ParcelFileDescriptor ramdump) {}
     }
 
     private static class BootResult {
diff --git a/tests/testapk/src/java/com/android/microdroid/test/MicrodroidTests.java b/tests/testapk/src/java/com/android/microdroid/test/MicrodroidTests.java
index 1f0e107..3a874c4 100644
--- a/tests/testapk/src/java/com/android/microdroid/test/MicrodroidTests.java
+++ b/tests/testapk/src/java/com/android/microdroid/test/MicrodroidTests.java
@@ -204,6 +204,9 @@
         public void onDied(VirtualMachine vm, @DeathReason int reason) {
             mExecutorService.shutdown();
         }
+
+        @Override
+        public void onRamdump(VirtualMachine vm, ParcelFileDescriptor ramdump) {}
     }
 
     private static final int MIN_MEM_ARM64 = 150;
diff --git a/virtualizationservice/aidl/android/system/virtualizationservice/IVirtualMachineCallback.aidl b/virtualizationservice/aidl/android/system/virtualizationservice/IVirtualMachineCallback.aidl
index 12a056c..6c8eb4a 100644
--- a/virtualizationservice/aidl/android/system/virtualizationservice/IVirtualMachineCallback.aidl
+++ b/virtualizationservice/aidl/android/system/virtualizationservice/IVirtualMachineCallback.aidl
@@ -53,4 +53,9 @@
      * also use `link_to_death` to handle that.
      */
     void onDied(int cid, in DeathReason reason);
+
+    /**
+     * Called when kernel panic occurs and as a result ramdump is generated from the VM.
+     */
+    void onRamdump(int cid, in ParcelFileDescriptor ramdump);
 }
diff --git a/virtualizationservice/src/aidl.rs b/virtualizationservice/src/aidl.rs
index d8f0b2e..af5029a 100644
--- a/virtualizationservice/src/aidl.rs
+++ b/virtualizationservice/src/aidl.rs
@@ -464,6 +464,19 @@
             })
             .collect::<Result<Vec<DiskFile>, _>>()?;
 
+        // Creating this ramdump file unconditionally is not harmful as ramdump will be created
+        // only when the VM is configured as such. `ramdump_write` is sent to crosvm and will
+        // be the backing store for the /dev/hvc3 where VM will emit ramdump to. `ramdump_read`
+        // will be sent back to the client (i.e. the VM owner) for readout.
+        let ramdump_path = temporary_directory.join("ramdump");
+        let ramdump = prepare_ramdump_file(&ramdump_path).map_err(|e| {
+            error!("Failed to prepare ramdump file: {}", e);
+            new_binder_exception(
+                ExceptionCode::SERVICE_SPECIFIC,
+                format!("Failed to prepare ramdump file: {}", e),
+            )
+        })?;
+
         // Actually start the VM.
         let crosvm_config = CrosvmConfig {
             cid,
@@ -479,6 +492,7 @@
             task_profiles: config.taskProfiles.clone(),
             console_fd,
             log_fd,
+            ramdump: Some(ramdump),
             indirect_files,
             platform_version: parse_platform_version_req(&config.platformVersion)?,
             detect_hangup: is_app_config,
@@ -558,6 +572,11 @@
     part.flush()
 }
 
+fn prepare_ramdump_file(ramdump_path: &Path) -> Result<File> {
+    File::create(&ramdump_path)
+        .context(format!("Failed to create ramdump file {:?}", &ramdump_path))
+}
+
 /// Given the configuration for a disk image, assembles the `DiskFile` to pass to crosvm.
 ///
 /// This may involve assembling a composite disk from a set of partition images.
@@ -883,6 +902,17 @@
         }
     }
 
+    /// Call all registered callbacks to say that there was a ramdump to download.
+    pub fn callback_on_ramdump(&self, cid: Cid, ramdump: File) {
+        let callbacks = &*self.0.lock().unwrap();
+        let pfd = ParcelFileDescriptor::new(ramdump);
+        for callback in callbacks {
+            if let Err(e) = callback.onRamdump(cid as i32, &pfd) {
+                error!("Error notifying ramdump of VM CID {}: {}", cid, e);
+            }
+        }
+    }
+
     /// Add a new callback to the set.
     fn add(&self, callback: Strong<dyn IVirtualMachineCallback>) {
         self.0.lock().unwrap().push(callback);
diff --git a/virtualizationservice/src/crosvm.rs b/virtualizationservice/src/crosvm.rs
index 23719a7..4c0b25e 100644
--- a/virtualizationservice/src/crosvm.rs
+++ b/virtualizationservice/src/crosvm.rs
@@ -16,7 +16,7 @@
 
 use crate::aidl::VirtualMachineCallbacks;
 use crate::Cid;
-use anyhow::{bail, Error};
+use anyhow::{bail, Context, Error};
 use command_fds::CommandFdExt;
 use lazy_static::lazy_static;
 use log::{debug, error, info};
@@ -81,6 +81,7 @@
     pub task_profiles: Vec<String>,
     pub console_fd: Option<File>,
     pub log_fd: Option<File>,
+    pub ramdump: Option<File>,
     pub indirect_files: Vec<File>,
     pub platform_version: VersionReq,
     pub detect_hangup: bool,
@@ -272,6 +273,7 @@
             Cow::from(s)
         };
 
+        self.handle_ramdump().unwrap_or_else(|e| error!("Error handling ramdump: {}", e));
         self.callbacks.callback_on_died(self.cid, death_reason(&result, &failure_string));
 
         // Delete temporary files.
@@ -313,6 +315,18 @@
             }
         }
     }
+
+    /// Checks if ramdump has been created. If so, send a notification to the user with the handle
+    /// to read the ramdump.
+    fn handle_ramdump(&self) -> Result<(), Error> {
+        let ramdump_path = self.temporary_directory.join("ramdump");
+        if std::fs::metadata(&ramdump_path)?.len() > 0 {
+            let ramdump = File::open(&ramdump_path)
+                .context(format!("Failed to open ramdump {:?} for reading", &ramdump_path))?;
+            self.callbacks.callback_on_ramdump(self.cid, ramdump);
+        }
+        Ok(())
+    }
 }
 
 fn death_reason(result: &Result<ExitStatus, io::Error>, failure_reason: &str) -> DeathReason {
@@ -412,6 +426,7 @@
     let console_arg = format_serial_arg(&mut preserved_fds, &config.console_fd);
     let log_arg = format_serial_arg(&mut preserved_fds, &config.log_fd);
     let failure_serial_path = add_preserved_fd(&mut preserved_fds, &failure_pipe_write);
+    let ramdump_arg = format_serial_arg(&mut preserved_fds, &config.ramdump);
 
     // Warning: Adding more serial devices requires you to shift the PCI device ID of the boot
     // disks in bootconfig.x86_64. This is because x86 crosvm puts serial devices and the block
@@ -427,6 +442,8 @@
     command.arg("--serial=type=sink,hardware=virtio-console,num=2");
     // /dev/hvc2
     command.arg(format!("--serial={},hardware=virtio-console,num=3", &log_arg));
+    // /dev/hvc3
+    command.arg(format!("--serial={},hardware=virtio-console,num=4", &ramdump_arg));
 
     if let Some(bootloader) = &config.bootloader {
         command.arg("--bios").arg(add_preserved_fd(&mut preserved_fds, bootloader));
diff --git a/vm/src/main.rs b/vm/src/main.rs
index 8450b41..60786ac 100644
--- a/vm/src/main.rs
+++ b/vm/src/main.rs
@@ -67,6 +67,10 @@
         #[structopt(long)]
         log: Option<PathBuf>,
 
+        /// Path to file where ramdump is recorded on kernel panic
+        #[structopt(long)]
+        ramdump: Option<PathBuf>,
+
         /// Debug level of the VM. Supported values: "none" (default), "app_only", and "full".
         #[structopt(long, default_value = "none", parse(try_from_str=parse_debug_level))]
         debug: DebugLevel,
@@ -198,6 +202,7 @@
             daemonize,
             console,
             log,
+            ramdump,
             debug,
             protected,
             mem,
@@ -214,6 +219,7 @@
             daemonize,
             console.as_deref(),
             log.as_deref(),
+            ramdump.as_deref(),
             debug,
             protected,
             mem,
diff --git a/vm/src/run.rs b/vm/src/run.rs
index ca71665..44eb27a 100644
--- a/vm/src/run.rs
+++ b/vm/src/run.rs
@@ -49,6 +49,7 @@
     daemonize: bool,
     console_path: Option<&Path>,
     log_path: Option<&Path>,
+    ramdump_path: Option<&Path>,
     debug_level: DebugLevel,
     protected: bool,
     mem: Option<u32>,
@@ -115,6 +116,7 @@
         daemonize,
         console_path,
         log_path,
+        ramdump_path,
     )
 }
 
@@ -149,6 +151,7 @@
         daemonize,
         console_path,
         log_path,
+        /* ramdump_path */ None,
     )
 }
 
@@ -171,6 +174,7 @@
     daemonize: bool,
     console_path: Option<&Path>,
     log_path: Option<&Path>,
+    ramdump_path: Option<&Path>,
 ) -> Result<(), Error> {
     let console = if let Some(console_path) = console_path {
         Some(
@@ -214,12 +218,27 @@
         // Wait until the VM or VirtualizationService dies. If we just returned immediately then the
         // IVirtualMachine Binder object would be dropped and the VM would be killed.
         let death_reason = vm.wait_for_death();
+
+        if let Some(path) = ramdump_path {
+            save_ramdump_if_available(path, &vm)?;
+        }
         println!("{}", death_reason);
     }
 
     Ok(())
 }
 
+fn save_ramdump_if_available(path: &Path, vm: &VmInstance) -> Result<(), Error> {
+    if let Some(mut ramdump) = vm.get_ramdump() {
+        let mut file =
+            File::create(path).context(format!("Failed to create ramdump file {:?}", path))?;
+        let size = std::io::copy(&mut ramdump, &mut file)
+            .context(format!("Failed to save ramdump to file {:?}", path))?;
+        eprintln!("Ramdump ({} bytes) saved to {:?}", size, path);
+    }
+    Ok(())
+}
+
 fn parse_extra_apk_list(apk: &Path, config_path: &str) -> Result<Vec<String>, Error> {
     let mut archive = ZipArchive::new(File::open(apk)?)?;
     let config_file = archive.by_name(config_path)?;
@@ -268,6 +287,11 @@
         Ok(())
     }
 
+    fn onRamdump(&self, _cid: i32, _stream: &ParcelFileDescriptor) -> BinderResult<()> {
+        // Do nothing. We get ramdump from the vmclient library.
+        Ok(())
+    }
+
     fn onDied(&self, _cid: i32, _reason: DeathReason) -> BinderResult<()> {
         Ok(())
     }
diff --git a/vmclient/src/lib.rs b/vmclient/src/lib.rs
index d182b60..867c3a7 100644
--- a/vmclient/src/lib.rs
+++ b/vmclient/src/lib.rs
@@ -143,6 +143,11 @@
 
         FromIBinder::try_from(ibinder).map_err(GetServiceError::WrongServiceType)
     }
+
+    /// Get ramdump
+    pub fn get_ramdump(&self) -> Option<File> {
+        self.state.get_ramdump()
+    }
 }
 
 impl Debug for VmInstance {
@@ -170,6 +175,7 @@
 struct VmState {
     death_reason: Option<DeathReason>,
     reported_state: VirtualMachineState,
+    ramdump: Option<File>,
 }
 
 impl Monitor<VmState> {
@@ -186,6 +192,14 @@
         self.state.lock().unwrap().reported_state = state;
         self.cv.notify_all();
     }
+
+    fn set_ramdump(&self, ramdump: File) {
+        self.state.lock().unwrap().ramdump = Some(ramdump);
+    }
+
+    fn get_ramdump(&self) -> Option<File> {
+        self.state.lock().unwrap().ramdump.as_ref().and_then(|f| f.try_clone().ok())
+    }
 }
 
 #[derive(Debug)]
@@ -220,6 +234,12 @@
         Ok(())
     }
 
+    fn onRamdump(&self, _cid: i32, ramdump: &ParcelFileDescriptor) -> BinderResult<()> {
+        let ramdump: File = ramdump.as_ref().try_clone().unwrap();
+        self.state.set_ramdump(ramdump);
+        Ok(())
+    }
+
     fn onDied(&self, _cid: i32, reason: AidlDeathReason) -> BinderResult<()> {
         self.state.notify_death(reason.into());
         Ok(())