Move virtmgr files to their own directory

Create virtualizationmanager/ folder, move the corresponding files into
it and create build rules.

We also remove now unused dependencies of virtualizationservice.

Bug: 245727626
Test: atest -p packages/modules/Virtualization:avf-presubmit
Change-Id: If33e88bdcdac89e1d451589463ce21e11d6aafc8
diff --git a/virtualizationmanager/Android.bp b/virtualizationmanager/Android.bp
new file mode 100644
index 0000000..a436cea
--- /dev/null
+++ b/virtualizationmanager/Android.bp
@@ -0,0 +1,82 @@
+package {
+    default_applicable_licenses: ["Android-Apache-2.0"],
+}
+
+rust_defaults {
+    name: "virtualizationmanager_defaults",
+    crate_name: "virtualizationmanager",
+    edition: "2021",
+    // Only build on targets which crosvm builds on.
+    enabled: false,
+    target: {
+        android64: {
+            compile_multilib: "64",
+            enabled: true,
+        },
+        linux_bionic_arm64: {
+            enabled: true,
+        },
+    },
+    prefer_rlib: true,
+    rustlibs: [
+        "android.system.virtualizationcommon-rust",
+        "android.system.virtualizationservice-rust",
+        "android.system.virtualizationservice_internal-rust",
+        "android.system.virtualmachineservice-rust",
+        "android.os.permissions_aidl-rust",
+        "libandroid_logger",
+        "libanyhow",
+        "libapkverify",
+        "libbase_rust",
+        "libbinder_rs",
+        "libclap",
+        "libcommand_fds",
+        "libdisk",
+        "liblazy_static",
+        "liblibc",
+        "liblog_rust",
+        "libmicrodroid_metadata",
+        "libmicrodroid_payload_config",
+        "libnested_virt",
+        "libnix",
+        "libonce_cell",
+        "libregex",
+        "librpcbinder_rs",
+        "librustutils",
+        "libsemver",
+        "libselinux_bindgen",
+        "libserde",
+        "libserde_json",
+        "libserde_xml_rs",
+        "libshared_child",
+        "libstatslog_virtualization_rust",
+        "libtombstoned_client_rust",
+        "libvm_control",
+        "libvmconfig",
+        "libzip",
+        "libvsock",
+        // TODO(b/202115393) stabilize the interface
+        "packagemanager_aidl-rust",
+    ],
+    shared_libs: [
+        "libbinder_rpc_unstable",
+        "libselinux",
+    ],
+}
+
+rust_binary {
+    name: "virtmgr",
+    defaults: ["virtualizationmanager_defaults"],
+    srcs: ["src/main.rs"],
+    apex_available: ["com.android.virt"],
+}
+
+rust_test {
+    name: "virtualizationmanager_device_test",
+    srcs: ["src/main.rs"],
+    defaults: ["virtualizationmanager_defaults"],
+    rustlibs: [
+        "libtempfile",
+    ],
+    test_suites: ["general-tests"],
+}
diff --git a/virtualizationmanager/TEST_MAPPING b/virtualizationmanager/TEST_MAPPING
new file mode 100644
index 0000000..a680f6d
--- /dev/null
+++ b/virtualizationmanager/TEST_MAPPING
@@ -0,0 +1,7 @@
+{
+  "avf-presubmit": [
+    {
+      "name": "virtualizationmanager_device_test"
+    }
+  ]
+}
diff --git a/virtualizationmanager/src/aidl.rs b/virtualizationmanager/src/aidl.rs
new file mode 100644
index 0000000..c827c2e
--- /dev/null
+++ b/virtualizationmanager/src/aidl.rs
@@ -0,0 +1,1178 @@
+// Copyright 2021, The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Implementation of the AIDL interface of the VirtualizationService.
+
+use crate::{get_calling_pid, get_calling_uid};
+use crate::atom::{
+    write_vm_booted_stats, write_vm_creation_stats};
+use crate::composite::make_composite_image;
+use crate::crosvm::{CrosvmConfig, DiskFile, PayloadState, VmContext, VmInstance, VmState};
+use crate::payload::{add_microdroid_payload_images, add_microdroid_system_images};
+use crate::selinux::{getfilecon, SeContext};
+use android_os_permissions_aidl::aidl::android::os::IPermissionController;
+use android_system_virtualizationcommon::aidl::android::system::virtualizationcommon::{
+    DeathReason::DeathReason,
+    ErrorCode::ErrorCode,
+};
+use android_system_virtualizationservice::aidl::android::system::virtualizationservice::{
+    DiskImage::DiskImage,
+    IVirtualMachine::{BnVirtualMachine, IVirtualMachine},
+    IVirtualMachineCallback::IVirtualMachineCallback,
+    IVirtualizationService::IVirtualizationService,
+    MemoryTrimLevel::MemoryTrimLevel,
+    Partition::Partition,
+    PartitionType::PartitionType,
+    VirtualMachineAppConfig::{DebugLevel::DebugLevel, Payload::Payload, VirtualMachineAppConfig},
+    VirtualMachineConfig::VirtualMachineConfig,
+    VirtualMachineDebugInfo::VirtualMachineDebugInfo,
+    VirtualMachinePayloadConfig::VirtualMachinePayloadConfig,
+    VirtualMachineRawConfig::VirtualMachineRawConfig,
+    VirtualMachineState::VirtualMachineState,
+};
+use android_system_virtualizationservice_internal::aidl::android::system::virtualizationservice_internal::IVirtualizationServiceInternal::IVirtualizationServiceInternal;
+use android_system_virtualmachineservice::aidl::android::system::virtualmachineservice::IVirtualMachineService::{
+        BnVirtualMachineService, IVirtualMachineService,
+};
+use anyhow::{bail, Context, Result};
+use apkverify::{HashAlgorithm, V4Signature};
+use binder::{
+    self, wait_for_interface, BinderFeatures, ExceptionCode, Interface, ParcelFileDescriptor,
+    Status, StatusCode, Strong,
+};
+use disk::QcowFile;
+use lazy_static::lazy_static;
+use log::{debug, error, info, warn};
+use microdroid_payload_config::{OsConfig, Task, TaskType, VmPayloadConfig};
+use nix::unistd::pipe;
+use rpcbinder::RpcServer;
+use semver::VersionReq;
+use std::convert::TryInto;
+use std::ffi::CStr;
+use std::fs::{read_dir, remove_file, File, OpenOptions};
+use std::io::{BufRead, BufReader, Error, ErrorKind, Write};
+use std::num::NonZeroU32;
+use std::os::unix::io::{FromRawFd, IntoRawFd};
+use std::os::unix::raw::pid_t;
+use std::path::{Path, PathBuf};
+use std::sync::{Arc, Mutex, Weak};
+use vmconfig::VmConfig;
+use vsock::VsockStream;
+use zip::ZipArchive;
+
+/// The unique ID of a VM used (together with a port number) for vsock communication.
+pub type Cid = u32;
+
+pub const BINDER_SERVICE_IDENTIFIER: &str = "android.system.virtualizationservice";
+
+/// The size of zero.img.
+/// Gaps in composite disk images are filled with a shared zero.img.
+const ZERO_FILLER_SIZE: u64 = 4096;
+
+/// Magic string for the instance image
+const ANDROID_VM_INSTANCE_MAGIC: &str = "Android-VM-instance";
+
+/// Version of the instance image format
+const ANDROID_VM_INSTANCE_VERSION: u16 = 1;
+
+const MICRODROID_OS_NAME: &str = "microdroid";
+
+const UNFORMATTED_STORAGE_MAGIC: &str = "UNFORMATTED-STORAGE";
+
+lazy_static! {
+    pub static ref GLOBAL_SERVICE: Strong<dyn IVirtualizationServiceInternal> =
+        wait_for_interface(BINDER_SERVICE_IDENTIFIER)
+            .expect("Could not connect to VirtualizationServiceInternal");
+}
+
+fn create_or_update_idsig_file(
+    input_fd: &ParcelFileDescriptor,
+    idsig_fd: &ParcelFileDescriptor,
+) -> Result<()> {
+    let mut input = clone_file(input_fd)?;
+    let metadata = input.metadata().context("failed to get input metadata")?;
+    if !metadata.is_file() {
+        bail!("input is not a regular file");
+    }
+    let mut sig = V4Signature::create(&mut input, 4096, &[], HashAlgorithm::SHA256)
+        .context("failed to create idsig")?;
+
+    let mut output = clone_file(idsig_fd)?;
+    output.set_len(0).context("failed to set_len on the idsig output")?;
+    sig.write_into(&mut output).context("failed to write idsig")?;
+    Ok(())
+}
+
+pub fn remove_temporary_files(path: &PathBuf) -> Result<()> {
+    for dir_entry in read_dir(path)? {
+        remove_file(dir_entry?.path())?;
+    }
+    Ok(())
+}
+
+/// Implementation of `IVirtualizationService`, the entry point of the AIDL service.
+#[derive(Debug, Default)]
+pub struct VirtualizationService {
+    state: Arc<Mutex<State>>,
+}
+
+impl Interface for VirtualizationService {
+    fn dump(&self, mut file: &File, _args: &[&CStr]) -> Result<(), StatusCode> {
+        check_permission("android.permission.DUMP").or(Err(StatusCode::PERMISSION_DENIED))?;
+        let state = &mut *self.state.lock().unwrap();
+        let vms = state.vms();
+        writeln!(file, "Running {0} VMs:", vms.len()).or(Err(StatusCode::UNKNOWN_ERROR))?;
+        for vm in vms {
+            writeln!(file, "VM CID: {}", vm.cid).or(Err(StatusCode::UNKNOWN_ERROR))?;
+            writeln!(file, "\tState: {:?}", vm.vm_state.lock().unwrap())
+                .or(Err(StatusCode::UNKNOWN_ERROR))?;
+            writeln!(file, "\tPayload state {:?}", vm.payload_state())
+                .or(Err(StatusCode::UNKNOWN_ERROR))?;
+            writeln!(file, "\tProtected: {}", vm.protected).or(Err(StatusCode::UNKNOWN_ERROR))?;
+            writeln!(file, "\ttemporary_directory: {}", vm.temporary_directory.to_string_lossy())
+                .or(Err(StatusCode::UNKNOWN_ERROR))?;
+            writeln!(file, "\trequester_uid: {}", vm.requester_uid)
+                .or(Err(StatusCode::UNKNOWN_ERROR))?;
+            writeln!(file, "\trequester_debug_pid: {}", vm.requester_debug_pid)
+                .or(Err(StatusCode::UNKNOWN_ERROR))?;
+        }
+        Ok(())
+    }
+}
+
+impl IVirtualizationService for VirtualizationService {
+    /// Creates (but does not start) a new VM with the given configuration, assigning it the next
+    /// available CID.
+    ///
+    /// Returns a binder `IVirtualMachine` object referring to it, as a handle for the client.
+    fn createVm(
+        &self,
+        config: &VirtualMachineConfig,
+        console_fd: Option<&ParcelFileDescriptor>,
+        log_fd: Option<&ParcelFileDescriptor>,
+    ) -> binder::Result<Strong<dyn IVirtualMachine>> {
+        let mut is_protected = false;
+        let ret = self.create_vm_internal(config, console_fd, log_fd, &mut is_protected);
+        write_vm_creation_stats(config, is_protected, &ret);
+        ret
+    }
+
+    /// Initialise an empty partition image of the given size to be used as a writable partition.
+    fn initializeWritablePartition(
+        &self,
+        image_fd: &ParcelFileDescriptor,
+        size: i64,
+        partition_type: PartitionType,
+    ) -> binder::Result<()> {
+        check_manage_access()?;
+        let size = size.try_into().map_err(|e| {
+            Status::new_exception_str(
+                ExceptionCode::ILLEGAL_ARGUMENT,
+                Some(format!("Invalid size {}: {:?}", size, e)),
+            )
+        })?;
+        let image = clone_file(image_fd)?;
+        // initialize the file. Any data in the file will be erased.
+        image.set_len(0).map_err(|e| {
+            Status::new_service_specific_error_str(
+                -1,
+                Some(format!("Failed to reset a file: {:?}", e)),
+            )
+        })?;
+        let mut part = QcowFile::new(image, size).map_err(|e| {
+            Status::new_service_specific_error_str(
+                -1,
+                Some(format!("Failed to create QCOW2 image: {:?}", e)),
+            )
+        })?;
+
+        match partition_type {
+            PartitionType::RAW => Ok(()),
+            PartitionType::ANDROID_VM_INSTANCE => format_as_android_vm_instance(&mut part),
+            PartitionType::ENCRYPTEDSTORE => format_as_encryptedstore(&mut part),
+            _ => Err(Error::new(
+                ErrorKind::Unsupported,
+                format!("Unsupported partition type {:?}", partition_type),
+            )),
+        }
+        .map_err(|e| {
+            Status::new_service_specific_error_str(
+                -1,
+                Some(format!("Failed to initialize partition as {:?}: {:?}", partition_type, e)),
+            )
+        })?;
+
+        Ok(())
+    }
+
+    /// Creates or update the idsig file by digesting the input APK file.
+    fn createOrUpdateIdsigFile(
+        &self,
+        input_fd: &ParcelFileDescriptor,
+        idsig_fd: &ParcelFileDescriptor,
+    ) -> binder::Result<()> {
+        // TODO(b/193504400): do this only when (1) idsig_fd is empty or (2) the APK digest in
+        // idsig_fd is different from APK digest in input_fd
+
+        check_manage_access()?;
+
+        create_or_update_idsig_file(input_fd, idsig_fd)
+            .map_err(|e| Status::new_service_specific_error_str(-1, Some(format!("{:?}", e))))?;
+        Ok(())
+    }
+
+    /// Get a list of all currently running VMs. This method is only intended for debug purposes,
+    /// and as such is only permitted from the shell user.
+    fn debugListVms(&self) -> binder::Result<Vec<VirtualMachineDebugInfo>> {
+        // Delegate to the global service, including checking the debug permission.
+        GLOBAL_SERVICE.debugListVms()
+    }
+}
+
+impl VirtualizationService {
+    pub fn init() -> VirtualizationService {
+        VirtualizationService::default()
+    }
+
+    fn create_vm_context(
+        &self,
+        requester_debug_pid: pid_t,
+    ) -> binder::Result<(VmContext, Cid, PathBuf)> {
+        const NUM_ATTEMPTS: usize = 5;
+
+        for _ in 0..NUM_ATTEMPTS {
+            let vm_context = GLOBAL_SERVICE.allocateGlobalVmContext(requester_debug_pid as i32)?;
+            let cid = vm_context.getCid()? as Cid;
+            let temp_dir: PathBuf = vm_context.getTemporaryDirectory()?.into();
+            let service = VirtualMachineService::new_binder(self.state.clone(), cid).as_binder();
+
+            // Start VM service listening for connections from the new CID on port=CID.
+            let port = cid;
+            match RpcServer::new_vsock(service, cid, port) {
+                Ok(vm_server) => {
+                    vm_server.start();
+                    return Ok((VmContext::new(vm_context, vm_server), cid, temp_dir));
+                }
+                Err(err) => {
+                    warn!("Could not start RpcServer on port {}: {}", port, err);
+                }
+            }
+        }
+        Err(Status::new_service_specific_error_str(
+            -1,
+            Some("Too many attempts to create VM context failed."),
+        ))
+    }
+
+    fn create_vm_internal(
+        &self,
+        config: &VirtualMachineConfig,
+        console_fd: Option<&ParcelFileDescriptor>,
+        log_fd: Option<&ParcelFileDescriptor>,
+        is_protected: &mut bool,
+    ) -> binder::Result<Strong<dyn IVirtualMachine>> {
+        let requester_uid = get_calling_uid();
+        let requester_debug_pid = get_calling_pid();
+
+        // Allocating VM context checks the MANAGE_VIRTUAL_MACHINE permission.
+        let (vm_context, cid, temporary_directory) = self.create_vm_context(requester_debug_pid)?;
+
+        let is_custom = match config {
+            VirtualMachineConfig::RawConfig(_) => true,
+            VirtualMachineConfig::AppConfig(config) => {
+                // Some features are reserved for platform apps only, even when using
+                // VirtualMachineAppConfig:
+                // - controlling CPUs;
+                // - specifying a config file in the APK.
+                !config.taskProfiles.is_empty() || matches!(config.payload, Payload::ConfigPath(_))
+            }
+        };
+        if is_custom {
+            check_use_custom_virtual_machine()?;
+        }
+
+        let state = &mut *self.state.lock().unwrap();
+        let console_fd =
+            clone_or_prepare_logger_fd(config, console_fd, format!("Console({})", cid))?;
+        let log_fd = clone_or_prepare_logger_fd(config, log_fd, format!("Log({})", cid))?;
+
+        // Counter to generate unique IDs for temporary image files.
+        let mut next_temporary_image_id = 0;
+        // Files which are referred to from composite images. These must be mapped to the crosvm
+        // child process, and not closed before it is started.
+        let mut indirect_files = vec![];
+
+        let (is_app_config, config) = match config {
+            VirtualMachineConfig::RawConfig(config) => (false, BorrowedOrOwned::Borrowed(config)),
+            VirtualMachineConfig::AppConfig(config) => {
+                let config = load_app_config(config, &temporary_directory).map_err(|e| {
+                    *is_protected = config.protectedVm;
+                    let message = format!("Failed to load app config: {:?}", e);
+                    error!("{}", message);
+                    Status::new_service_specific_error_str(-1, Some(message))
+                })?;
+                (true, BorrowedOrOwned::Owned(config))
+            }
+        };
+        let config = config.as_ref();
+        *is_protected = config.protectedVm;
+
+        // Check if partition images are labeled incorrectly. This is to prevent random images
+        // which are not protected by the Android Verified Boot (e.g. bits downloaded by apps) from
+        // being loaded in a pVM. This applies to everything in the raw config, and everything but
+        // the non-executable, generated partitions in the app config.
+        config
+            .disks
+            .iter()
+            .flat_map(|disk| disk.partitions.iter())
+            .filter(|partition| {
+                if is_app_config {
+                    !is_safe_app_partition(&partition.label)
+                } else {
+                    true // all partitions are checked
+                }
+            })
+            .try_for_each(check_label_for_partition)
+            .map_err(|e| Status::new_service_specific_error_str(-1, Some(format!("{:?}", e))))?;
+
+        let kernel = maybe_clone_file(&config.kernel)?;
+        let initrd = maybe_clone_file(&config.initrd)?;
+
+        // In a protected VM, we require custom kernels to come from a trusted source (b/237054515).
+        if config.protectedVm {
+            check_label_for_kernel_files(&kernel, &initrd).map_err(|e| {
+                Status::new_service_specific_error_str(-1, Some(format!("{:?}", e)))
+            })?;
+        }
+
+        let zero_filler_path = temporary_directory.join("zero.img");
+        write_zero_filler(&zero_filler_path).map_err(|e| {
+            error!("Failed to make composite image: {:?}", e);
+            Status::new_service_specific_error_str(
+                -1,
+                Some(format!("Failed to make composite image: {:?}", e)),
+            )
+        })?;
+
+        // Assemble disk images if needed.
+        let disks = config
+            .disks
+            .iter()
+            .map(|disk| {
+                assemble_disk_image(
+                    disk,
+                    &zero_filler_path,
+                    &temporary_directory,
+                    &mut next_temporary_image_id,
+                    &mut indirect_files,
+                )
+            })
+            .collect::<Result<Vec<DiskFile>, _>>()?;
+
+        // Creating this ramdump file unconditionally is not harmful as ramdump will be created
+        // only when the VM is configured as such. `ramdump_write` is sent to crosvm and will
+        // be the backing store for the /dev/hvc1 where VM will emit ramdump to. `ramdump_read`
+        // will be sent back to the client (i.e. the VM owner) for readout.
+        let ramdump_path = temporary_directory.join("ramdump");
+        let ramdump = prepare_ramdump_file(&ramdump_path).map_err(|e| {
+            error!("Failed to prepare ramdump file: {:?}", e);
+            Status::new_service_specific_error_str(
+                -1,
+                Some(format!("Failed to prepare ramdump file: {:?}", e)),
+            )
+        })?;
+
+        // Actually start the VM.
+        let crosvm_config = CrosvmConfig {
+            cid,
+            name: config.name.clone(),
+            bootloader: maybe_clone_file(&config.bootloader)?,
+            kernel,
+            initrd,
+            disks,
+            params: config.params.to_owned(),
+            protected: *is_protected,
+            memory_mib: config.memoryMib.try_into().ok().and_then(NonZeroU32::new),
+            cpus: config.numCpus.try_into().ok().and_then(NonZeroU32::new),
+            task_profiles: config.taskProfiles.clone(),
+            console_fd,
+            log_fd,
+            ramdump: Some(ramdump),
+            indirect_files,
+            platform_version: parse_platform_version_req(&config.platformVersion)?,
+            detect_hangup: is_app_config,
+        };
+        let instance = Arc::new(
+            VmInstance::new(
+                crosvm_config,
+                temporary_directory,
+                requester_uid,
+                requester_debug_pid,
+                vm_context,
+            )
+            .map_err(|e| {
+                error!("Failed to create VM with config {:?}: {:?}", config, e);
+                Status::new_service_specific_error_str(
+                    -1,
+                    Some(format!("Failed to create VM: {:?}", e)),
+                )
+            })?,
+        );
+        state.add_vm(Arc::downgrade(&instance));
+        Ok(VirtualMachine::create(instance))
+    }
+}
+
+fn write_zero_filler(zero_filler_path: &Path) -> Result<()> {
+    let file = OpenOptions::new()
+        .create_new(true)
+        .read(true)
+        .write(true)
+        .open(zero_filler_path)
+        .with_context(|| "Failed to create zero.img")?;
+    file.set_len(ZERO_FILLER_SIZE)?;
+    Ok(())
+}
+
+fn format_as_android_vm_instance(part: &mut dyn Write) -> std::io::Result<()> {
+    part.write_all(ANDROID_VM_INSTANCE_MAGIC.as_bytes())?;
+    part.write_all(&ANDROID_VM_INSTANCE_VERSION.to_le_bytes())?;
+    part.flush()
+}
+
+fn format_as_encryptedstore(part: &mut dyn Write) -> std::io::Result<()> {
+    part.write_all(UNFORMATTED_STORAGE_MAGIC.as_bytes())?;
+    part.flush()
+}
+
+fn prepare_ramdump_file(ramdump_path: &Path) -> Result<File> {
+    File::create(ramdump_path).context(format!("Failed to create ramdump file {:?}", &ramdump_path))
+}
+
+/// Given the configuration for a disk image, assembles the `DiskFile` to pass to crosvm.
+///
+/// This may involve assembling a composite disk from a set of partition images.
+fn assemble_disk_image(
+    disk: &DiskImage,
+    zero_filler_path: &Path,
+    temporary_directory: &Path,
+    next_temporary_image_id: &mut u64,
+    indirect_files: &mut Vec<File>,
+) -> Result<DiskFile, Status> {
+    let image = if !disk.partitions.is_empty() {
+        if disk.image.is_some() {
+            warn!("DiskImage {:?} contains both image and partitions.", disk);
+            return Err(Status::new_exception_str(
+                ExceptionCode::ILLEGAL_ARGUMENT,
+                Some("DiskImage contains both image and partitions."),
+            ));
+        }
+
+        let composite_image_filenames =
+            make_composite_image_filenames(temporary_directory, next_temporary_image_id);
+        let (image, partition_files) = make_composite_image(
+            &disk.partitions,
+            zero_filler_path,
+            &composite_image_filenames.composite,
+            &composite_image_filenames.header,
+            &composite_image_filenames.footer,
+        )
+        .map_err(|e| {
+            error!("Failed to make composite image with config {:?}: {:?}", disk, e);
+            Status::new_service_specific_error_str(
+                -1,
+                Some(format!("Failed to make composite image: {:?}", e)),
+            )
+        })?;
+
+        // Pass the file descriptors for the various partition files to crosvm when it
+        // is run.
+        indirect_files.extend(partition_files);
+
+        image
+    } else if let Some(image) = &disk.image {
+        clone_file(image)?
+    } else {
+        warn!("DiskImage {:?} didn't contain image or partitions.", disk);
+        return Err(Status::new_exception_str(
+            ExceptionCode::ILLEGAL_ARGUMENT,
+            Some("DiskImage didn't contain image or partitions."),
+        ));
+    };
+
+    Ok(DiskFile { image, writable: disk.writable })
+}
+
+fn load_app_config(
+    config: &VirtualMachineAppConfig,
+    temporary_directory: &Path,
+) -> Result<VirtualMachineRawConfig> {
+    let apk_file = clone_file(config.apk.as_ref().unwrap())?;
+    let idsig_file = clone_file(config.idsig.as_ref().unwrap())?;
+    let instance_file = clone_file(config.instanceImage.as_ref().unwrap())?;
+
+    let storage_image = if let Some(file) = config.encryptedStorageImage.as_ref() {
+        Some(clone_file(file)?)
+    } else {
+        None
+    };
+
+    let vm_payload_config = match &config.payload {
+        Payload::ConfigPath(config_path) => {
+            load_vm_payload_config_from_file(&apk_file, config_path.as_str())
+                .with_context(|| format!("Couldn't read config from {}", config_path))?
+        }
+        Payload::PayloadConfig(payload_config) => create_vm_payload_config(payload_config)?,
+    };
+
+    // For now, the only supported OS is Microdroid
+    let os_name = vm_payload_config.os.name.as_str();
+    if os_name != MICRODROID_OS_NAME {
+        bail!("Unknown OS \"{}\"", os_name);
+    }
+
+    // It is safe to construct a filename based on the os_name because we've already checked that it
+    // is one of the allowed values.
+    let vm_config_path = PathBuf::from(format!("/apex/com.android.virt/etc/{}.json", os_name));
+    let vm_config_file = File::open(vm_config_path)?;
+    let mut vm_config = VmConfig::load(&vm_config_file)?.to_parcelable()?;
+
+    if config.memoryMib > 0 {
+        vm_config.memoryMib = config.memoryMib;
+    }
+
+    vm_config.name = config.name.clone();
+    vm_config.protectedVm = config.protectedVm;
+    vm_config.numCpus = config.numCpus;
+    vm_config.taskProfiles = config.taskProfiles.clone();
+
+    // Microdroid takes additional init ramdisk & (optionally) storage image
+    add_microdroid_system_images(config, instance_file, storage_image, &mut vm_config)?;
+
+    // Include Microdroid payload disk (contains apks, idsigs) in vm config
+    add_microdroid_payload_images(
+        config,
+        temporary_directory,
+        apk_file,
+        idsig_file,
+        &vm_payload_config,
+        &mut vm_config,
+    )?;
+
+    Ok(vm_config)
+}
+
+fn load_vm_payload_config_from_file(apk_file: &File, config_path: &str) -> Result<VmPayloadConfig> {
+    let mut apk_zip = ZipArchive::new(apk_file)?;
+    let config_file = apk_zip.by_name(config_path)?;
+    Ok(serde_json::from_reader(config_file)?)
+}
+
+fn create_vm_payload_config(
+    payload_config: &VirtualMachinePayloadConfig,
+) -> Result<VmPayloadConfig> {
+    // There isn't an actual config file. Construct a synthetic VmPayloadConfig from the explicit
+    // parameters we've been given. Microdroid will do something equivalent inside the VM using the
+    // payload config that we send it via the metadata file.
+
+    let payload_binary_name = &payload_config.payloadBinaryName;
+    if payload_binary_name.contains('/') {
+        bail!("Payload binary name must not specify a path: {payload_binary_name}");
+    }
+
+    let task = Task { type_: TaskType::MicrodroidLauncher, command: payload_binary_name.clone() };
+    Ok(VmPayloadConfig {
+        os: OsConfig { name: MICRODROID_OS_NAME.to_owned() },
+        task: Some(task),
+        apexes: vec![],
+        extra_apks: vec![],
+        prefer_staged: false,
+        export_tombstones: false,
+        enable_authfs: false,
+    })
+}
+
+/// Generates a unique filename to use for a composite disk image.
+fn make_composite_image_filenames(
+    temporary_directory: &Path,
+    next_temporary_image_id: &mut u64,
+) -> CompositeImageFilenames {
+    let id = *next_temporary_image_id;
+    *next_temporary_image_id += 1;
+    CompositeImageFilenames {
+        composite: temporary_directory.join(format!("composite-{}.img", id)),
+        header: temporary_directory.join(format!("composite-{}-header.img", id)),
+        footer: temporary_directory.join(format!("composite-{}-footer.img", id)),
+    }
+}
+
+/// Filenames for a composite disk image, including header and footer partitions.
+#[derive(Clone, Debug, Eq, PartialEq)]
+struct CompositeImageFilenames {
+    /// The composite disk image itself.
+    composite: PathBuf,
+    /// The header partition image.
+    header: PathBuf,
+    /// The footer partition image.
+    footer: PathBuf,
+}
+
+/// Checks whether the caller has a specific permission
+fn check_permission(perm: &str) -> binder::Result<()> {
+    let calling_pid = get_calling_pid();
+    let calling_uid = get_calling_uid();
+    // Root can do anything
+    if calling_uid == 0 {
+        return Ok(());
+    }
+    let perm_svc: Strong<dyn IPermissionController::IPermissionController> =
+        binder::get_interface("permission")?;
+    if perm_svc.checkPermission(perm, calling_pid, calling_uid as i32)? {
+        Ok(())
+    } else {
+        Err(Status::new_exception_str(
+            ExceptionCode::SECURITY,
+            Some(format!("does not have the {} permission", perm)),
+        ))
+    }
+}
+
+/// Check whether the caller of the current Binder method is allowed to manage VMs
+fn check_manage_access() -> binder::Result<()> {
+    check_permission("android.permission.MANAGE_VIRTUAL_MACHINE")
+}
+
+/// Check whether the caller of the current Binder method is allowed to create custom VMs
+fn check_use_custom_virtual_machine() -> binder::Result<()> {
+    check_permission("android.permission.USE_CUSTOM_VIRTUAL_MACHINE")
+}
+
+/// Return whether a partition is exempt from selinux label checks, because we know that it does
+/// not contain code and is likely to be generated in an app-writable directory.
+fn is_safe_app_partition(label: &str) -> bool {
+    // See add_microdroid_system_images & add_microdroid_payload_images in payload.rs.
+    label == "vm-instance"
+        || label == "encryptedstore"
+        || label == "microdroid-apk-idsig"
+        || label == "payload-metadata"
+        || label.starts_with("extra-idsig-")
+}
+
+/// Check that a file SELinux label is acceptable.
+///
+/// We only want to allow code in a VM to be sourced from places that apps, and the
+/// system, do not have write access to.
+///
+/// Note that sepolicy must also grant read access for these types to both virtualization
+/// service and crosvm.
+///
+/// App private data files are deliberately excluded, to avoid arbitrary payloads being run on
+/// user devices (W^X).
+fn check_label_is_allowed(context: &SeContext) -> Result<()> {
+    match context.selinux_type()? {
+        | "system_file" // immutable dm-verity protected partition
+        | "apk_data_file" // APKs of an installed app
+        | "staging_data_file" // updated/staged APEX images
+        | "shell_data_file" // test files created via adb shell
+         => Ok(()),
+        _ => bail!("Label {} is not allowed", context),
+    }
+}
+
+fn check_label_for_partition(partition: &Partition) -> Result<()> {
+    let file = partition.image.as_ref().unwrap().as_ref();
+    check_label_is_allowed(&getfilecon(file)?)
+        .with_context(|| format!("Partition {} invalid", &partition.label))
+}
+
+fn check_label_for_kernel_files(kernel: &Option<File>, initrd: &Option<File>) -> Result<()> {
+    if let Some(f) = kernel {
+        check_label_for_file(f, "kernel")?;
+    }
+    if let Some(f) = initrd {
+        check_label_for_file(f, "initrd")?;
+    }
+    Ok(())
+}
+fn check_label_for_file(file: &File, name: &str) -> Result<()> {
+    check_label_is_allowed(&getfilecon(file)?).with_context(|| format!("{} file invalid", name))
+}
+
+/// Implementation of the AIDL `IVirtualMachine` interface. Used as a handle to a VM.
+#[derive(Debug)]
+struct VirtualMachine {
+    instance: Arc<VmInstance>,
+}
+
+impl VirtualMachine {
+    fn create(instance: Arc<VmInstance>) -> Strong<dyn IVirtualMachine> {
+        BnVirtualMachine::new_binder(VirtualMachine { instance }, BinderFeatures::default())
+    }
+}
+
+impl Interface for VirtualMachine {}
+
+impl IVirtualMachine for VirtualMachine {
+    fn getCid(&self) -> binder::Result<i32> {
+        // Don't check permission. The owner of the VM might have passed this binder object to
+        // others.
+        Ok(self.instance.cid as i32)
+    }
+
+    fn getState(&self) -> binder::Result<VirtualMachineState> {
+        // Don't check permission. The owner of the VM might have passed this binder object to
+        // others.
+        Ok(get_state(&self.instance))
+    }
+
+    fn registerCallback(
+        &self,
+        callback: &Strong<dyn IVirtualMachineCallback>,
+    ) -> binder::Result<()> {
+        // Don't check permission. The owner of the VM might have passed this binder object to
+        // others.
+        //
+        // TODO: Should this give an error if the VM is already dead?
+        self.instance.callbacks.add(callback.clone());
+        Ok(())
+    }
+
+    fn start(&self) -> binder::Result<()> {
+        self.instance.start().map_err(|e| {
+            error!("Error starting VM with CID {}: {:?}", self.instance.cid, e);
+            Status::new_service_specific_error_str(-1, Some(e.to_string()))
+        })
+    }
+
+    fn stop(&self) -> binder::Result<()> {
+        self.instance.kill().map_err(|e| {
+            error!("Error stopping VM with CID {}: {:?}", self.instance.cid, e);
+            Status::new_service_specific_error_str(-1, Some(e.to_string()))
+        })
+    }
+
+    fn onTrimMemory(&self, level: MemoryTrimLevel) -> binder::Result<()> {
+        self.instance.trim_memory(level).map_err(|e| {
+            error!("Error trimming VM with CID {}: {:?}", self.instance.cid, e);
+            Status::new_service_specific_error_str(-1, Some(e.to_string()))
+        })
+    }
+
+    fn connectVsock(&self, port: i32) -> binder::Result<ParcelFileDescriptor> {
+        if !matches!(&*self.instance.vm_state.lock().unwrap(), VmState::Running { .. }) {
+            return Err(Status::new_service_specific_error_str(-1, Some("VM is not running")));
+        }
+        let port = port as u32;
+        if port < 1024 {
+            return Err(Status::new_service_specific_error_str(
+                -1,
+                Some(format!("Can't connect to privileged port {port}")),
+            ));
+        }
+        let stream = VsockStream::connect_with_cid_port(self.instance.cid, port).map_err(|e| {
+            Status::new_service_specific_error_str(-1, Some(format!("Failed to connect: {:?}", e)))
+        })?;
+        Ok(vsock_stream_to_pfd(stream))
+    }
+}
+
+impl Drop for VirtualMachine {
+    fn drop(&mut self) {
+        debug!("Dropping {:?}", self);
+        if let Err(e) = self.instance.kill() {
+            debug!("Error stopping dropped VM with CID {}: {:?}", self.instance.cid, e);
+        }
+    }
+}
+
+/// A set of Binders to be called back in response to various events on the VM, such as when it
+/// dies.
+#[derive(Debug, Default)]
+pub struct VirtualMachineCallbacks(Mutex<Vec<Strong<dyn IVirtualMachineCallback>>>);
+
+impl VirtualMachineCallbacks {
+    /// Call all registered callbacks to notify that the payload has started.
+    pub fn notify_payload_started(&self, cid: Cid) {
+        let callbacks = &*self.0.lock().unwrap();
+        for callback in callbacks {
+            if let Err(e) = callback.onPayloadStarted(cid as i32) {
+                error!("Error notifying payload start event from VM CID {}: {:?}", cid, e);
+            }
+        }
+    }
+
+    /// Call all registered callbacks to notify that the payload is ready to serve.
+    pub fn notify_payload_ready(&self, cid: Cid) {
+        let callbacks = &*self.0.lock().unwrap();
+        for callback in callbacks {
+            if let Err(e) = callback.onPayloadReady(cid as i32) {
+                error!("Error notifying payload ready event from VM CID {}: {:?}", cid, e);
+            }
+        }
+    }
+
+    /// Call all registered callbacks to notify that the payload has finished.
+    pub fn notify_payload_finished(&self, cid: Cid, exit_code: i32) {
+        let callbacks = &*self.0.lock().unwrap();
+        for callback in callbacks {
+            if let Err(e) = callback.onPayloadFinished(cid as i32, exit_code) {
+                error!("Error notifying payload finish event from VM CID {}: {:?}", cid, e);
+            }
+        }
+    }
+
+    /// Call all registered callbacks to say that the VM encountered an error.
+    pub fn notify_error(&self, cid: Cid, error_code: ErrorCode, message: &str) {
+        let callbacks = &*self.0.lock().unwrap();
+        for callback in callbacks {
+            if let Err(e) = callback.onError(cid as i32, error_code, message) {
+                error!("Error notifying error event from VM CID {}: {:?}", cid, e);
+            }
+        }
+    }
+
+    /// Call all registered callbacks to say that the VM has died.
+    pub fn callback_on_died(&self, cid: Cid, reason: DeathReason) {
+        let callbacks = &*self.0.lock().unwrap();
+        for callback in callbacks {
+            if let Err(e) = callback.onDied(cid as i32, reason) {
+                error!("Error notifying exit of VM CID {}: {:?}", cid, e);
+            }
+        }
+    }
+
+    /// Add a new callback to the set.
+    fn add(&self, callback: Strong<dyn IVirtualMachineCallback>) {
+        self.0.lock().unwrap().push(callback);
+    }
+}
+
+/// The mutable state of the VirtualizationService. There should only be one instance of this
+/// struct.
+#[derive(Debug, Default)]
+struct State {
+    /// The VMs which have been started. When VMs are started a weak reference is added to this list
+    /// while a strong reference is returned to the caller over Binder. Once all copies of the
+    /// Binder client are dropped the weak reference here will become invalid, and will be removed
+    /// from the list opportunistically the next time `add_vm` is called.
+    vms: Vec<Weak<VmInstance>>,
+}
+
+impl State {
+    /// Get a list of VMs which still have Binder references to them.
+    fn vms(&self) -> Vec<Arc<VmInstance>> {
+        // Attempt to upgrade the weak pointers to strong pointers.
+        self.vms.iter().filter_map(Weak::upgrade).collect()
+    }
+
+    /// Add a new VM to the list.
+    fn add_vm(&mut self, vm: Weak<VmInstance>) {
+        // Garbage collect any entries from the stored list which no longer exist.
+        self.vms.retain(|vm| vm.strong_count() > 0);
+
+        // Actually add the new VM.
+        self.vms.push(vm);
+    }
+
+    /// Get a VM that corresponds to the given cid
+    fn get_vm(&self, cid: Cid) -> Option<Arc<VmInstance>> {
+        self.vms().into_iter().find(|vm| vm.cid == cid)
+    }
+}
+
+/// Gets the `VirtualMachineState` of the given `VmInstance`.
+fn get_state(instance: &VmInstance) -> VirtualMachineState {
+    match &*instance.vm_state.lock().unwrap() {
+        VmState::NotStarted { .. } => VirtualMachineState::NOT_STARTED,
+        VmState::Running { .. } => match instance.payload_state() {
+            PayloadState::Starting => VirtualMachineState::STARTING,
+            PayloadState::Started => VirtualMachineState::STARTED,
+            PayloadState::Ready => VirtualMachineState::READY,
+            PayloadState::Finished => VirtualMachineState::FINISHED,
+            PayloadState::Hangup => VirtualMachineState::DEAD,
+        },
+        VmState::Dead => VirtualMachineState::DEAD,
+        VmState::Failed => VirtualMachineState::DEAD,
+    }
+}
+
+/// Converts a `&ParcelFileDescriptor` to a `File` by cloning the file.
+pub fn clone_file(file: &ParcelFileDescriptor) -> Result<File, Status> {
+    file.as_ref().try_clone().map_err(|e| {
+        Status::new_exception_str(
+            ExceptionCode::BAD_PARCELABLE,
+            Some(format!("Failed to clone File from ParcelFileDescriptor: {:?}", e)),
+        )
+    })
+}
+
+/// Converts an `&Option<ParcelFileDescriptor>` to an `Option<File>` by cloning the file.
+fn maybe_clone_file(file: &Option<ParcelFileDescriptor>) -> Result<Option<File>, Status> {
+    file.as_ref().map(clone_file).transpose()
+}
+
+/// Converts a `VsockStream` to a `ParcelFileDescriptor`.
+fn vsock_stream_to_pfd(stream: VsockStream) -> ParcelFileDescriptor {
+    // SAFETY: ownership is transferred from stream to f
+    let f = unsafe { File::from_raw_fd(stream.into_raw_fd()) };
+    ParcelFileDescriptor::new(f)
+}
+
+/// Parses the platform version requirement string.
+fn parse_platform_version_req(s: &str) -> Result<VersionReq, Status> {
+    VersionReq::parse(s).map_err(|e| {
+        Status::new_exception_str(
+            ExceptionCode::BAD_PARCELABLE,
+            Some(format!("Invalid platform version requirement {}: {:?}", s, e)),
+        )
+    })
+}
+
+fn is_debuggable(config: &VirtualMachineConfig) -> bool {
+    match config {
+        VirtualMachineConfig::AppConfig(config) => config.debugLevel != DebugLevel::NONE,
+        _ => false,
+    }
+}
+
+fn clone_or_prepare_logger_fd(
+    config: &VirtualMachineConfig,
+    fd: Option<&ParcelFileDescriptor>,
+    tag: String,
+) -> Result<Option<File>, Status> {
+    if let Some(fd) = fd {
+        return Ok(Some(clone_file(fd)?));
+    }
+
+    if !is_debuggable(config) {
+        return Ok(None);
+    }
+
+    let (raw_read_fd, raw_write_fd) = pipe().map_err(|e| {
+        Status::new_service_specific_error_str(-1, Some(format!("Failed to create pipe: {:?}", e)))
+    })?;
+
+    // SAFETY: We are the sole owners of these fds as they were just created.
+    let mut reader = BufReader::new(unsafe { File::from_raw_fd(raw_read_fd) });
+    let write_fd = unsafe { File::from_raw_fd(raw_write_fd) };
+
+    std::thread::spawn(move || loop {
+        let mut buf = vec![];
+        match reader.read_until(b'\n', &mut buf) {
+            Ok(0) => {
+                // EOF
+                return;
+            }
+            Ok(size) => {
+                if buf[size - 1] == b'\n' {
+                    buf.pop();
+                }
+                info!("{}: {}", &tag, &String::from_utf8_lossy(&buf));
+            }
+            Err(e) => {
+                error!("Could not read console pipe: {:?}", e);
+                return;
+            }
+        };
+    });
+
+    Ok(Some(write_fd))
+}
+
+/// Simple utility for referencing Borrowed or Owned. Similar to std::borrow::Cow, but
+/// it doesn't require that T implements Clone.
+enum BorrowedOrOwned<'a, T> {
+    Borrowed(&'a T),
+    Owned(T),
+}
+
+impl<'a, T> AsRef<T> for BorrowedOrOwned<'a, T> {
+    fn as_ref(&self) -> &T {
+        match self {
+            Self::Borrowed(b) => b,
+            Self::Owned(o) => o,
+        }
+    }
+}
+
+/// Implementation of `IVirtualMachineService`, the entry point of the AIDL service.
+#[derive(Debug, Default)]
+struct VirtualMachineService {
+    state: Arc<Mutex<State>>,
+    cid: Cid,
+}
+
+impl Interface for VirtualMachineService {}
+
+impl IVirtualMachineService for VirtualMachineService {
+    fn notifyPayloadStarted(&self) -> binder::Result<()> {
+        let cid = self.cid;
+        if let Some(vm) = self.state.lock().unwrap().get_vm(cid) {
+            info!("VM with CID {} started payload", cid);
+            vm.update_payload_state(PayloadState::Started).map_err(|e| {
+                Status::new_exception_str(ExceptionCode::ILLEGAL_STATE, Some(e.to_string()))
+            })?;
+            vm.callbacks.notify_payload_started(cid);
+
+            let vm_start_timestamp = vm.vm_metric.lock().unwrap().start_timestamp;
+            write_vm_booted_stats(vm.requester_uid as i32, &vm.name, vm_start_timestamp);
+            Ok(())
+        } else {
+            error!("notifyPayloadStarted is called from an unknown CID {}", cid);
+            Err(Status::new_service_specific_error_str(
+                -1,
+                Some(format!("cannot find a VM with CID {}", cid)),
+            ))
+        }
+    }
+
+    fn notifyPayloadReady(&self) -> binder::Result<()> {
+        let cid = self.cid;
+        if let Some(vm) = self.state.lock().unwrap().get_vm(cid) {
+            info!("VM with CID {} reported payload is ready", cid);
+            vm.update_payload_state(PayloadState::Ready).map_err(|e| {
+                Status::new_exception_str(ExceptionCode::ILLEGAL_STATE, Some(e.to_string()))
+            })?;
+            vm.callbacks.notify_payload_ready(cid);
+            Ok(())
+        } else {
+            error!("notifyPayloadReady is called from an unknown CID {}", cid);
+            Err(Status::new_service_specific_error_str(
+                -1,
+                Some(format!("cannot find a VM with CID {}", cid)),
+            ))
+        }
+    }
+
+    fn notifyPayloadFinished(&self, exit_code: i32) -> binder::Result<()> {
+        let cid = self.cid;
+        if let Some(vm) = self.state.lock().unwrap().get_vm(cid) {
+            info!("VM with CID {} finished payload", cid);
+            vm.update_payload_state(PayloadState::Finished).map_err(|e| {
+                Status::new_exception_str(ExceptionCode::ILLEGAL_STATE, Some(e.to_string()))
+            })?;
+            vm.callbacks.notify_payload_finished(cid, exit_code);
+            Ok(())
+        } else {
+            error!("notifyPayloadFinished is called from an unknown CID {}", cid);
+            Err(Status::new_service_specific_error_str(
+                -1,
+                Some(format!("cannot find a VM with CID {}", cid)),
+            ))
+        }
+    }
+
+    fn notifyError(&self, error_code: ErrorCode, message: &str) -> binder::Result<()> {
+        let cid = self.cid;
+        if let Some(vm) = self.state.lock().unwrap().get_vm(cid) {
+            info!("VM with CID {} encountered an error", cid);
+            vm.update_payload_state(PayloadState::Finished).map_err(|e| {
+                Status::new_exception_str(ExceptionCode::ILLEGAL_STATE, Some(e.to_string()))
+            })?;
+            vm.callbacks.notify_error(cid, error_code, message);
+            Ok(())
+        } else {
+            error!("notifyError is called from an unknown CID {}", cid);
+            Err(Status::new_service_specific_error_str(
+                -1,
+                Some(format!("cannot find a VM with CID {}", cid)),
+            ))
+        }
+    }
+}
+
+impl VirtualMachineService {
+    fn new_binder(state: Arc<Mutex<State>>, cid: Cid) -> Strong<dyn IVirtualMachineService> {
+        BnVirtualMachineService::new_binder(
+            VirtualMachineService { state, cid },
+            BinderFeatures::default(),
+        )
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_is_allowed_label_for_partition() -> Result<()> {
+        let expected_results = vec![
+            ("u:object_r:system_file:s0", true),
+            ("u:object_r:apk_data_file:s0", true),
+            ("u:object_r:app_data_file:s0", false),
+            ("u:object_r:app_data_file:s0:c512,c768", false),
+            ("u:object_r:privapp_data_file:s0:c512,c768", false),
+            ("invalid", false),
+            ("user:role:apk_data_file:severity:categories", true),
+            ("user:role:apk_data_file:severity:categories:extraneous", false),
+        ];
+
+        for (label, expected_valid) in expected_results {
+            let context = SeContext::new(label)?;
+            let result = check_label_is_allowed(&context);
+            if expected_valid {
+                assert!(result.is_ok(), "Expected label {} to be allowed, got {:?}", label, result);
+            } else if result.is_ok() {
+                bail!("Expected label {} to be disallowed", label);
+            }
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn test_create_or_update_idsig_file_empty_apk() -> Result<()> {
+        let apk = tempfile::tempfile().unwrap();
+        let idsig = tempfile::tempfile().unwrap();
+
+        let ret = create_or_update_idsig_file(
+            &ParcelFileDescriptor::new(apk),
+            &ParcelFileDescriptor::new(idsig),
+        );
+        assert!(ret.is_err(), "should fail");
+        Ok(())
+    }
+
+    #[test]
+    fn test_create_or_update_idsig_dir_instead_of_file_for_apk() -> Result<()> {
+        let tmp_dir = tempfile::TempDir::new().unwrap();
+        let apk = File::open(tmp_dir.path()).unwrap();
+        let idsig = tempfile::tempfile().unwrap();
+
+        let ret = create_or_update_idsig_file(
+            &ParcelFileDescriptor::new(apk),
+            &ParcelFileDescriptor::new(idsig),
+        );
+        assert!(ret.is_err(), "should fail");
+        Ok(())
+    }
+
+    /// Verifies that create_or_update_idsig_file won't oom if a fd that corresponds to a directory
+    /// on ext4 filesystem is passed.
+    /// On ext4 lseek on a directory fd will return (off_t)-1 (see:
+    /// https://bugzilla.kernel.org/show_bug.cgi?id=200043), which will result in
+    /// create_or_update_idsig_file ooming while attempting to allocate petabytes of memory.
+    #[test]
+    fn test_create_or_update_idsig_does_not_crash_dir_on_ext4() -> Result<()> {
+        // APEXes are backed by the ext4.
+        let apk = File::open("/apex/com.android.virt/").unwrap();
+        let idsig = tempfile::tempfile().unwrap();
+
+        let ret = create_or_update_idsig_file(
+            &ParcelFileDescriptor::new(apk),
+            &ParcelFileDescriptor::new(idsig),
+        );
+        assert!(ret.is_err(), "should fail");
+        Ok(())
+    }
+}
diff --git a/virtualizationmanager/src/atom.rs b/virtualizationmanager/src/atom.rs
new file mode 100644
index 0000000..c33f262
--- /dev/null
+++ b/virtualizationmanager/src/atom.rs
@@ -0,0 +1,186 @@
+// Copyright 2022, The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Functions for creating and collecting atoms.
+
+use crate::aidl::{clone_file, GLOBAL_SERVICE};
+use crate::crosvm::VmMetric;
+use crate::get_calling_uid;
+use android_system_virtualizationcommon::aidl::android::system::virtualizationcommon::DeathReason::DeathReason;
+use android_system_virtualizationservice::aidl::android::system::virtualizationservice::{
+    IVirtualMachine::IVirtualMachine,
+    VirtualMachineAppConfig::{Payload::Payload, VirtualMachineAppConfig},
+    VirtualMachineConfig::VirtualMachineConfig,
+};
+use android_system_virtualizationservice::binder::{Status, Strong};
+use android_system_virtualizationservice_internal::aidl::android::system::virtualizationservice_internal::{
+    AtomVmBooted::AtomVmBooted,
+    AtomVmCreationRequested::AtomVmCreationRequested,
+    AtomVmExited::AtomVmExited,
+};
+use anyhow::{anyhow, Result};
+use binder::ParcelFileDescriptor;
+use log::warn;
+use microdroid_payload_config::VmPayloadConfig;
+use statslog_virtualization_rust::vm_creation_requested;
+use std::thread;
+use std::time::{Duration, SystemTime};
+use zip::ZipArchive;
+
+fn get_apex_list(config: &VirtualMachineAppConfig) -> String {
+    match &config.payload {
+        Payload::PayloadConfig(_) => String::new(),
+        Payload::ConfigPath(config_path) => {
+            let vm_payload_config = get_vm_payload_config(&config.apk, config_path);
+            if let Ok(vm_payload_config) = vm_payload_config {
+                vm_payload_config
+                    .apexes
+                    .iter()
+                    .map(|x| x.name.clone())
+                    .collect::<Vec<String>>()
+                    .join(":")
+            } else {
+                "INFO: Can't get VmPayloadConfig".to_owned()
+            }
+        }
+    }
+}
+
+fn get_vm_payload_config(
+    apk_fd: &Option<ParcelFileDescriptor>,
+    config_path: &str,
+) -> Result<VmPayloadConfig> {
+    let apk = apk_fd.as_ref().ok_or_else(|| anyhow!("APK is none"))?;
+    let apk_file = clone_file(apk)?;
+    let mut apk_zip = ZipArchive::new(&apk_file)?;
+    let config_file = apk_zip.by_name(config_path)?;
+    let vm_payload_config: VmPayloadConfig = serde_json::from_reader(config_file)?;
+    Ok(vm_payload_config)
+}
+
+fn get_duration(vm_start_timestamp: Option<SystemTime>) -> Duration {
+    match vm_start_timestamp {
+        Some(vm_start_timestamp) => vm_start_timestamp.elapsed().unwrap_or_default(),
+        None => Duration::default(),
+    }
+}
+
+/// Write the stats of VMCreation to statsd
+pub fn write_vm_creation_stats(
+    config: &VirtualMachineConfig,
+    is_protected: bool,
+    ret: &binder::Result<Strong<dyn IVirtualMachine>>,
+) {
+    let creation_succeeded;
+    let binder_exception_code;
+    match ret {
+        Ok(_) => {
+            creation_succeeded = true;
+            binder_exception_code = Status::ok().exception_code() as i32;
+        }
+        Err(ref e) => {
+            creation_succeeded = false;
+            binder_exception_code = e.exception_code() as i32;
+        }
+    }
+    let (vm_identifier, config_type, num_cpus, memory_mib, apexes) = match config {
+        VirtualMachineConfig::AppConfig(config) => (
+            config.name.clone(),
+            vm_creation_requested::ConfigType::VirtualMachineAppConfig,
+            config.numCpus,
+            config.memoryMib,
+            get_apex_list(config),
+        ),
+        VirtualMachineConfig::RawConfig(config) => (
+            config.name.clone(),
+            vm_creation_requested::ConfigType::VirtualMachineRawConfig,
+            config.numCpus,
+            config.memoryMib,
+            String::new(),
+        ),
+    };
+
+    let atom = AtomVmCreationRequested {
+        uid: get_calling_uid() as i32,
+        vmIdentifier: vm_identifier,
+        isProtected: is_protected,
+        creationSucceeded: creation_succeeded,
+        binderExceptionCode: binder_exception_code,
+        configType: config_type as i32,
+        numCpus: num_cpus,
+        memoryMib: memory_mib,
+        apexes,
+    };
+
+    thread::spawn(move || {
+        GLOBAL_SERVICE.atomVmCreationRequested(&atom).unwrap_or_else(|e| {
+            warn!("Failed to write VmCreationRequested atom: {e}");
+        });
+    });
+}
+
+/// Write the stats of VM boot to statsd
+/// The function creates a separate thread which waits fro statsd to start to push atom
+pub fn write_vm_booted_stats(
+    uid: i32,
+    vm_identifier: &str,
+    vm_start_timestamp: Option<SystemTime>,
+) {
+    let vm_identifier = vm_identifier.to_owned();
+    let duration = get_duration(vm_start_timestamp);
+
+    let atom = AtomVmBooted {
+        uid,
+        vmIdentifier: vm_identifier,
+        elapsedTimeMillis: duration.as_millis() as i64,
+    };
+
+    thread::spawn(move || {
+        GLOBAL_SERVICE.atomVmBooted(&atom).unwrap_or_else(|e| {
+            warn!("Failed to write VmCreationRequested atom: {e}");
+        });
+    });
+}
+
+/// Write the stats of VM exit to statsd
+/// The function creates a separate thread which waits fro statsd to start to push atom
+pub fn write_vm_exited_stats(
+    uid: i32,
+    vm_identifier: &str,
+    reason: DeathReason,
+    exit_signal: Option<i32>,
+    vm_metric: &VmMetric,
+) {
+    let vm_identifier = vm_identifier.to_owned();
+    let elapsed_time_millis = get_duration(vm_metric.start_timestamp).as_millis() as i64;
+    let guest_time_millis = vm_metric.cpu_guest_time.unwrap_or_default();
+    let rss = vm_metric.rss.unwrap_or_default();
+
+    let atom = AtomVmExited {
+        uid,
+        vmIdentifier: vm_identifier,
+        elapsedTimeMillis: elapsed_time_millis,
+        deathReason: reason,
+        guestTimeMillis: guest_time_millis,
+        rssVmKb: rss.vm,
+        rssCrosvmKb: rss.crosvm,
+        exitSignal: exit_signal.unwrap_or_default(),
+    };
+
+    thread::spawn(move || {
+        GLOBAL_SERVICE.atomVmExited(&atom).unwrap_or_else(|e| {
+            warn!("Failed to write VmExited atom: {e}");
+        });
+    });
+}
diff --git a/virtualizationmanager/src/composite.rs b/virtualizationmanager/src/composite.rs
new file mode 100644
index 0000000..fe17ff4
--- /dev/null
+++ b/virtualizationmanager/src/composite.rs
@@ -0,0 +1,133 @@
+// Copyright 2021, The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Functions for creating a composite disk image.
+
+use android_system_virtualizationservice::aidl::android::system::virtualizationservice::Partition::Partition;
+use anyhow::{anyhow, Context, Error};
+use disk::{
+    create_composite_disk, create_disk_file, ImagePartitionType, PartitionInfo, MAX_NESTING_DEPTH,
+};
+use std::fs::{File, OpenOptions};
+use std::os::unix::io::AsRawFd;
+use std::path::{Path, PathBuf};
+
+/// Constructs a composite disk image for the given list of partitions, and opens it ready to use.
+///
+/// Returns the composite disk image file, and a list of files whose file descriptors must be passed
+/// to any process which wants to use it. This is necessary because the composite image contains
+/// paths of the form `/proc/self/fd/N` for the partition images.
+pub fn make_composite_image(
+    partitions: &[Partition],
+    zero_filler_path: &Path,
+    output_path: &Path,
+    header_path: &Path,
+    footer_path: &Path,
+) -> Result<(File, Vec<File>), Error> {
+    let (partitions, mut files) = convert_partitions(partitions)?;
+
+    let mut composite_image = OpenOptions::new()
+        .create_new(true)
+        .read(true)
+        .write(true)
+        .open(output_path)
+        .with_context(|| format!("Failed to create composite image {:?}", output_path))?;
+    let mut header_file =
+        OpenOptions::new().create_new(true).read(true).write(true).open(header_path).with_context(
+            || format!("Failed to create composite image header {:?}", header_path),
+        )?;
+    let mut footer_file =
+        OpenOptions::new().create_new(true).read(true).write(true).open(footer_path).with_context(
+            || format!("Failed to create composite image header {:?}", footer_path),
+        )?;
+    let zero_filler_file = File::open(zero_filler_path).with_context(|| {
+        format!("Failed to open composite image zero filler {:?}", zero_filler_path)
+    })?;
+
+    create_composite_disk(
+        &partitions,
+        &fd_path_for_file(&zero_filler_file),
+        &fd_path_for_file(&header_file),
+        &mut header_file,
+        &fd_path_for_file(&footer_file),
+        &mut footer_file,
+        &mut composite_image,
+    )?;
+
+    // Re-open the composite image as read-only.
+    let composite_image = File::open(output_path)
+        .with_context(|| format!("Failed to open composite image {:?}", output_path))?;
+
+    files.push(header_file);
+    files.push(footer_file);
+    files.push(zero_filler_file);
+
+    Ok((composite_image, files))
+}
+
+/// Given the AIDL config containing a list of partitions, with a [`ParcelFileDescriptor`] for each
+/// partition, returns the corresponding list of PartitionInfo and the list of files whose file
+/// descriptors must be passed to any process using the composite image.
+fn convert_partitions(partitions: &[Partition]) -> Result<(Vec<PartitionInfo>, Vec<File>), Error> {
+    // File descriptors to pass to child process.
+    let mut files = vec![];
+
+    let partitions = partitions
+        .iter()
+        .map(|partition| {
+            // TODO(b/187187765): This shouldn't be an Option.
+            let file = partition
+                .image
+                .as_ref()
+                .context("Invalid partition image file descriptor")?
+                .as_ref()
+                .try_clone()
+                .context("Failed to clone partition image file descriptor")?;
+            let path = fd_path_for_file(&file);
+            let size = get_partition_size(&file, &path)?;
+            files.push(file);
+
+            Ok(PartitionInfo {
+                label: partition.label.to_owned(),
+                path,
+                partition_type: ImagePartitionType::LinuxFilesystem,
+                writable: partition.writable,
+                size,
+            })
+        })
+        .collect::<Result<_, Error>>()?;
+
+    Ok((partitions, files))
+}
+
+fn fd_path_for_file(file: &File) -> PathBuf {
+    let fd = file.as_raw_fd();
+    format!("/proc/self/fd/{}", fd).into()
+}
+
+/// Find the size of the partition image in the given file by parsing the header.
+///
+/// This will work for raw, QCOW2, composite and Android sparse images.
+fn get_partition_size(partition: &File, path: &Path) -> Result<u64, Error> {
+    // TODO: Use `context` once disk::Error implements std::error::Error.
+    // TODO: Add check for is_sparse_file
+    Ok(create_disk_file(
+        partition.try_clone()?,
+        /* is_sparse_file */ false,
+        MAX_NESTING_DEPTH,
+        path,
+    )
+    .map_err(|e| anyhow!("Failed to open partition image: {}", e))?
+    .get_len()?)
+}
diff --git a/virtualizationmanager/src/crosvm.rs b/virtualizationmanager/src/crosvm.rs
new file mode 100644
index 0000000..98e7d99
--- /dev/null
+++ b/virtualizationmanager/src/crosvm.rs
@@ -0,0 +1,861 @@
+// Copyright 2021, The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Functions for running instances of `crosvm`.
+
+use crate::aidl::{remove_temporary_files, Cid, VirtualMachineCallbacks};
+use crate::atom::write_vm_exited_stats;
+use anyhow::{anyhow, bail, Context, Error, Result};
+use command_fds::CommandFdExt;
+use lazy_static::lazy_static;
+use libc::{sysconf, _SC_CLK_TCK};
+use log::{debug, error, info};
+use semver::{Version, VersionReq};
+use nix::{fcntl::OFlag, unistd::pipe2, unistd::Uid, unistd::User};
+use regex::{Captures, Regex};
+use rustutils::system_properties;
+use shared_child::SharedChild;
+use std::borrow::Cow;
+use std::cmp::max;
+use std::fmt;
+use std::fs::{read_to_string, File};
+use std::io::{self, Read};
+use std::mem;
+use std::num::NonZeroU32;
+use std::os::unix::io::{AsRawFd, RawFd, FromRawFd};
+use std::os::unix::process::ExitStatusExt;
+use std::path::{Path, PathBuf};
+use std::process::{Command, ExitStatus};
+use std::sync::{Arc, Condvar, Mutex};
+use std::time::{Duration, SystemTime};
+use std::thread::{self, JoinHandle};
+use android_system_virtualizationcommon::aidl::android::system::virtualizationcommon::DeathReason::DeathReason;
+use android_system_virtualizationservice::aidl::android::system::virtualizationservice::MemoryTrimLevel::MemoryTrimLevel;
+use android_system_virtualizationservice_internal::aidl::android::system::virtualizationservice_internal::IGlobalVmContext::IGlobalVmContext;
+use binder::Strong;
+use android_system_virtualmachineservice::aidl::android::system::virtualmachineservice::IVirtualMachineService::IVirtualMachineService;
+use tombstoned_client::{TombstonedConnection, DebuggerdDumpType};
+use rpcbinder::RpcServer;
+
+/// external/crosvm
+use base::UnixSeqpacketListener;
+use vm_control::{BalloonControlCommand, VmRequest, VmResponse};
+
+const CROSVM_PATH: &str = "/apex/com.android.virt/bin/crosvm";
+
+/// Version of the platform that crosvm currently implements. The format follows SemVer. This
+/// should be updated when there is a platform change in the crosvm side. Having this value here is
+/// fine because virtualizationservice and crosvm are supposed to be updated together in the virt
+/// APEX.
+const CROSVM_PLATFORM_VERSION: &str = "1.0.0";
+
+/// The exit status which crosvm returns when it has an error starting a VM.
+const CROSVM_START_ERROR_STATUS: i32 = 1;
+/// The exit status which crosvm returns when a VM requests a reboot.
+const CROSVM_REBOOT_STATUS: i32 = 32;
+/// The exit status which crosvm returns when it crashes due to an error.
+const CROSVM_CRASH_STATUS: i32 = 33;
+/// The exit status which crosvm returns when vcpu is stalled.
+const CROSVM_WATCHDOG_REBOOT_STATUS: i32 = 36;
+
+const MILLIS_PER_SEC: i64 = 1000;
+
+const SYSPROP_CUSTOM_PVMFW_PATH: &str = "hypervisor.pvmfw.path";
+
+lazy_static! {
+    /// If the VM doesn't move to the Started state within this amount time, a hang-up error is
+    /// triggered.
+    static ref BOOT_HANGUP_TIMEOUT: Duration = if nested_virt::is_nested_virtualization().unwrap() {
+        // Nested virtualization is slow, so we need a longer timeout.
+        Duration::from_secs(100)
+    } else {
+        Duration::from_secs(10)
+    };
+}
+
+/// Configuration for a VM to run with crosvm.
+#[derive(Debug)]
+pub struct CrosvmConfig {
+    pub cid: Cid,
+    pub name: String,
+    pub bootloader: Option<File>,
+    pub kernel: Option<File>,
+    pub initrd: Option<File>,
+    pub disks: Vec<DiskFile>,
+    pub params: Option<String>,
+    pub protected: bool,
+    pub memory_mib: Option<NonZeroU32>,
+    pub cpus: Option<NonZeroU32>,
+    pub task_profiles: Vec<String>,
+    pub console_fd: Option<File>,
+    pub log_fd: Option<File>,
+    pub ramdump: Option<File>,
+    pub indirect_files: Vec<File>,
+    pub platform_version: VersionReq,
+    pub detect_hangup: bool,
+}
+
+/// A disk image to pass to crosvm for a VM.
+#[derive(Debug)]
+pub struct DiskFile {
+    pub image: File,
+    pub writable: bool,
+}
+
+/// The lifecycle state which the payload in the VM has reported itself to be in.
+///
+/// Note that the order of enum variants is significant; only forward transitions are allowed by
+/// [`VmInstance::update_payload_state`].
+#[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
+pub enum PayloadState {
+    Starting,
+    Started,
+    Ready,
+    Finished,
+    Hangup, // Hasn't reached to Ready before timeout expires
+}
+
+/// The current state of the VM itself.
+#[derive(Debug)]
+pub enum VmState {
+    /// The VM has not yet tried to start.
+    NotStarted {
+        ///The configuration needed to start the VM, if it has not yet been started.
+        config: CrosvmConfig,
+    },
+    /// The VM has been started.
+    Running {
+        /// The crosvm child process.
+        child: Arc<SharedChild>,
+        /// The thread waiting for crosvm to finish.
+        monitor_vm_exit_thread: Option<JoinHandle<()>>,
+    },
+    /// The VM died or was killed.
+    Dead,
+    /// The VM failed to start.
+    Failed,
+}
+
+/// RSS values of VM and CrosVM process itself.
+#[derive(Copy, Clone, Debug, Default)]
+pub struct Rss {
+    pub vm: i64,
+    pub crosvm: i64,
+}
+
+/// Metrics regarding the VM.
+#[derive(Debug, Default)]
+pub struct VmMetric {
+    /// Recorded timestamp when the VM is started.
+    pub start_timestamp: Option<SystemTime>,
+    /// Update most recent guest_time periodically from /proc/[crosvm pid]/stat while VM is running.
+    pub cpu_guest_time: Option<i64>,
+    /// Update maximum RSS values periodically from /proc/[crosvm pid]/smaps while VM is running.
+    pub rss: Option<Rss>,
+}
+
+impl VmState {
+    /// Tries to start the VM, if it is in the `NotStarted` state.
+    ///
+    /// Returns an error if the VM is in the wrong state, or fails to start.
+    fn start(&mut self, instance: Arc<VmInstance>) -> Result<(), Error> {
+        let state = mem::replace(self, VmState::Failed);
+        if let VmState::NotStarted { config } = state {
+            let detect_hangup = config.detect_hangup;
+            let (failure_pipe_read, failure_pipe_write) = create_pipe()?;
+
+            // If this fails and returns an error, `self` will be left in the `Failed` state.
+            let child =
+                Arc::new(run_vm(config, &instance.crosvm_control_socket_path, failure_pipe_write)?);
+
+            let instance_monitor_status = instance.clone();
+            let child_monitor_status = child.clone();
+            thread::spawn(move || {
+                instance_monitor_status.clone().monitor_vm_status(child_monitor_status);
+            });
+
+            let child_clone = child.clone();
+            let instance_clone = instance.clone();
+            let monitor_vm_exit_thread = Some(thread::spawn(move || {
+                instance_clone.monitor_vm_exit(child_clone, failure_pipe_read);
+            }));
+
+            if detect_hangup {
+                let child_clone = child.clone();
+                thread::spawn(move || {
+                    instance.monitor_payload_hangup(child_clone);
+                });
+            }
+
+            // If it started correctly, update the state.
+            *self = VmState::Running { child, monitor_vm_exit_thread };
+            Ok(())
+        } else {
+            *self = state;
+            bail!("VM already started or failed")
+        }
+    }
+}
+
+/// Internal struct that holds the handles to globally unique resources of a VM.
+#[derive(Debug)]
+pub struct VmContext {
+    #[allow(dead_code)] // Keeps the global context alive
+    global_context: Strong<dyn IGlobalVmContext>,
+    #[allow(dead_code)] // Keeps the server alive
+    vm_server: RpcServer,
+}
+
+impl VmContext {
+    /// Construct new VmContext.
+    pub fn new(global_context: Strong<dyn IGlobalVmContext>, vm_server: RpcServer) -> VmContext {
+        VmContext { global_context, vm_server }
+    }
+}
+
+/// Information about a particular instance of a VM which may be running.
+#[derive(Debug)]
+pub struct VmInstance {
+    /// The current state of the VM.
+    pub vm_state: Mutex<VmState>,
+    /// Global resources allocated for this VM.
+    #[allow(dead_code)] // Keeps the context alive
+    vm_context: VmContext,
+    /// The CID assigned to the VM for vsock communication.
+    pub cid: Cid,
+    /// Path to crosvm control socket
+    crosvm_control_socket_path: PathBuf,
+    /// The name of the VM.
+    pub name: String,
+    /// Whether the VM is a protected VM.
+    pub protected: bool,
+    /// Directory of temporary files used by the VM while it is running.
+    pub temporary_directory: PathBuf,
+    /// The UID of the process which requested the VM.
+    pub requester_uid: u32,
+    /// The PID of the process which requested the VM. Note that this process may no longer exist
+    /// and the PID may have been reused for a different process, so this should not be trusted.
+    pub requester_debug_pid: i32,
+    /// Callbacks to clients of the VM.
+    pub callbacks: VirtualMachineCallbacks,
+    /// VirtualMachineService binder object for the VM.
+    pub vm_service: Mutex<Option<Strong<dyn IVirtualMachineService>>>,
+    /// Recorded metrics of VM such as timestamp or cpu / memory usage.
+    pub vm_metric: Mutex<VmMetric>,
+    /// The latest lifecycle state which the payload reported itself to be in.
+    payload_state: Mutex<PayloadState>,
+    /// Represents the condition that payload_state was updated
+    payload_state_updated: Condvar,
+    /// The human readable name of requester_uid
+    requester_uid_name: String,
+}
+
+impl fmt::Display for VmInstance {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let adj = if self.protected { "Protected" } else { "Non-protected" };
+        write!(
+            f,
+            "{} virtual machine \"{}\" (owner: {}, cid: {})",
+            adj, self.name, self.requester_uid_name, self.cid
+        )
+    }
+}
+
+impl VmInstance {
+    /// Validates the given config and creates a new `VmInstance` but doesn't start running it.
+    pub fn new(
+        config: CrosvmConfig,
+        temporary_directory: PathBuf,
+        requester_uid: u32,
+        requester_debug_pid: i32,
+        vm_context: VmContext,
+    ) -> Result<VmInstance, Error> {
+        validate_config(&config)?;
+        let cid = config.cid;
+        let name = config.name.clone();
+        let protected = config.protected;
+        let requester_uid_name = User::from_uid(Uid::from_raw(requester_uid))
+            .ok()
+            .flatten()
+            .map_or_else(|| format!("{}", requester_uid), |u| u.name);
+        let instance = VmInstance {
+            vm_state: Mutex::new(VmState::NotStarted { config }),
+            vm_context,
+            cid,
+            crosvm_control_socket_path: temporary_directory.join("crosvm.sock"),
+            name,
+            protected,
+            temporary_directory,
+            requester_uid,
+            requester_debug_pid,
+            callbacks: Default::default(),
+            vm_service: Mutex::new(None),
+            vm_metric: Mutex::new(Default::default()),
+            payload_state: Mutex::new(PayloadState::Starting),
+            payload_state_updated: Condvar::new(),
+            requester_uid_name,
+        };
+        info!("{} created", &instance);
+        Ok(instance)
+    }
+
+    /// Starts an instance of `crosvm` to manage the VM. The `crosvm` instance will be killed when
+    /// the `VmInstance` is dropped.
+    pub fn start(self: &Arc<Self>) -> Result<(), Error> {
+        let mut vm_metric = self.vm_metric.lock().unwrap();
+        vm_metric.start_timestamp = Some(SystemTime::now());
+        let ret = self.vm_state.lock().unwrap().start(self.clone());
+        if ret.is_ok() {
+            info!("{} started", &self);
+        }
+        ret.with_context(|| format!("{} failed to start", &self))
+    }
+
+    /// Monitors the exit of the VM (i.e. termination of the `child` process). When that happens,
+    /// handles the event by updating the state, noityfing the event to clients by calling
+    /// callbacks, and removing temporary files for the VM.
+    fn monitor_vm_exit(&self, child: Arc<SharedChild>, mut failure_pipe_read: File) {
+        let result = child.wait();
+        match &result {
+            Err(e) => error!("Error waiting for crosvm({}) instance to die: {}", child.id(), e),
+            Ok(status) => {
+                info!("crosvm({}) exited with status {}", child.id(), status);
+                if let Some(exit_status_code) = status.code() {
+                    if exit_status_code == CROSVM_WATCHDOG_REBOOT_STATUS {
+                        info!("detected vcpu stall on crosvm");
+                    }
+                }
+            }
+        }
+
+        let mut vm_state = self.vm_state.lock().unwrap();
+        *vm_state = VmState::Dead;
+        // Ensure that the mutex is released before calling the callbacks.
+        drop(vm_state);
+        info!("{} exited", &self);
+
+        // Read the pipe to see if any failure reason is written
+        let mut failure_reason = String::new();
+        match failure_pipe_read.read_to_string(&mut failure_reason) {
+            Err(e) => error!("Error reading VM failure reason from pipe: {}", e),
+            Ok(len) if len > 0 => info!("VM returned failure reason '{}'", &failure_reason),
+            _ => (),
+        };
+
+        // In case of hangup, the pipe doesn't give us any information because the hangup can't be
+        // detected on the VM side (otherwise, it isn't a hangup), but in the
+        // monitor_payload_hangup function below which updates the payload state to Hangup.
+        let failure_reason =
+            if failure_reason.is_empty() && self.payload_state() == PayloadState::Hangup {
+                Cow::from("HANGUP")
+            } else {
+                Cow::from(failure_reason)
+            };
+
+        self.handle_ramdump().unwrap_or_else(|e| error!("Error handling ramdump: {}", e));
+
+        let death_reason = death_reason(&result, &failure_reason);
+        let exit_signal = exit_signal(&result);
+
+        self.callbacks.callback_on_died(self.cid, death_reason);
+
+        let vm_metric = self.vm_metric.lock().unwrap();
+        write_vm_exited_stats(
+            self.requester_uid as i32,
+            &self.name,
+            death_reason,
+            exit_signal,
+            &*vm_metric,
+        );
+
+        // Delete temporary files. The folder itself is removed by VirtualizationServiceInternal.
+        remove_temporary_files(&self.temporary_directory).unwrap_or_else(|e| {
+            error!("Error removing temporary files from {:?}: {}", self.temporary_directory, e);
+        });
+    }
+
+    /// Waits until payload is started, or timeout expires. When timeout occurs, kill
+    /// the VM to prevent indefinite hangup and update the payload_state accordingly.
+    fn monitor_payload_hangup(&self, child: Arc<SharedChild>) {
+        debug!("Starting to monitor hangup for Microdroid({})", child.id());
+        let (_, result) = self
+            .payload_state_updated
+            .wait_timeout_while(self.payload_state.lock().unwrap(), *BOOT_HANGUP_TIMEOUT, |s| {
+                *s < PayloadState::Started
+            })
+            .unwrap();
+        let child_still_running = child.try_wait().ok() == Some(None);
+        if result.timed_out() && child_still_running {
+            error!(
+                "Microdroid({}) failed to start payload within {} secs timeout. Shutting down.",
+                child.id(),
+                BOOT_HANGUP_TIMEOUT.as_secs()
+            );
+            self.update_payload_state(PayloadState::Hangup).unwrap();
+            if let Err(e) = self.kill() {
+                error!("Error stopping timed-out VM with CID {}: {:?}", child.id(), e);
+            }
+        }
+    }
+
+    fn monitor_vm_status(&self, child: Arc<SharedChild>) {
+        let pid = child.id();
+
+        loop {
+            {
+                // Check VM state
+                let vm_state = &*self.vm_state.lock().unwrap();
+                if let VmState::Dead = vm_state {
+                    break;
+                }
+
+                let mut vm_metric = self.vm_metric.lock().unwrap();
+
+                // Get CPU Information
+                if let Ok(guest_time) = get_guest_time(pid) {
+                    vm_metric.cpu_guest_time = Some(guest_time);
+                } else {
+                    error!("Failed to parse /proc/[pid]/stat");
+                }
+
+                // Get Memory Information
+                if let Ok(rss) = get_rss(pid) {
+                    vm_metric.rss = match &vm_metric.rss {
+                        Some(x) => Some(Rss::extract_max(x, &rss)),
+                        None => Some(rss),
+                    }
+                } else {
+                    error!("Failed to parse /proc/[pid]/smaps");
+                }
+            }
+
+            thread::sleep(Duration::from_secs(1));
+        }
+    }
+
+    /// Returns the last reported state of the VM payload.
+    pub fn payload_state(&self) -> PayloadState {
+        *self.payload_state.lock().unwrap()
+    }
+
+    /// Updates the payload state to the given value, if it is a valid state transition.
+    pub fn update_payload_state(&self, new_state: PayloadState) -> Result<(), Error> {
+        let mut state_locked = self.payload_state.lock().unwrap();
+        // Only allow forward transitions, e.g. from starting to started or finished, not back in
+        // the other direction.
+        if new_state > *state_locked {
+            *state_locked = new_state;
+            self.payload_state_updated.notify_all();
+            Ok(())
+        } else {
+            bail!("Invalid payload state transition from {:?} to {:?}", *state_locked, new_state)
+        }
+    }
+
+    /// Kills the crosvm instance, if it is running.
+    pub fn kill(&self) -> Result<(), Error> {
+        let monitor_vm_exit_thread = {
+            let vm_state = &mut *self.vm_state.lock().unwrap();
+            if let VmState::Running { child, monitor_vm_exit_thread } = vm_state {
+                let id = child.id();
+                debug!("Killing crosvm({})", id);
+                // TODO: Talk to crosvm to shutdown cleanly.
+                child.kill().with_context(|| format!("Error killing crosvm({id}) instance"))?;
+                monitor_vm_exit_thread.take()
+            } else {
+                bail!("VM is not running")
+            }
+        };
+
+        // Wait for monitor_vm_exit() to finish. Must release vm_state lock
+        // first, as monitor_vm_exit() takes it as well.
+        monitor_vm_exit_thread.map(JoinHandle::join);
+
+        Ok(())
+    }
+
+    /// Responds to memory-trimming notifications by inflating the virtio
+    /// balloon to reclaim guest memory.
+    pub fn trim_memory(&self, level: MemoryTrimLevel) -> Result<(), Error> {
+        let request = VmRequest::BalloonCommand(BalloonControlCommand::Stats {});
+        match vm_control::client::handle_request(&request, &self.crosvm_control_socket_path) {
+            Ok(VmResponse::BalloonStats { stats, balloon_actual: _ }) => {
+                if let Some(total_memory) = stats.total_memory {
+                    // Reclaim up to 50% of total memory assuming worst case
+                    // most memory is anonymous and must be swapped to zram
+                    // with an approximate 2:1 compression ratio.
+                    let pct = match level {
+                        MemoryTrimLevel::TRIM_MEMORY_RUNNING_CRITICAL => 50,
+                        MemoryTrimLevel::TRIM_MEMORY_RUNNING_LOW => 30,
+                        MemoryTrimLevel::TRIM_MEMORY_RUNNING_MODERATE => 10,
+                        _ => bail!("Invalid memory trim level {:?}", level),
+                    };
+                    let command =
+                        BalloonControlCommand::Adjust { num_bytes: total_memory * pct / 100 };
+                    if let Err(e) = vm_control::client::handle_request(
+                        &VmRequest::BalloonCommand(command),
+                        &self.crosvm_control_socket_path,
+                    ) {
+                        bail!("Error sending balloon adjustment: {:?}", e);
+                    }
+                }
+            }
+            Ok(VmResponse::Err(e)) => {
+                // ENOTSUP is returned when the balloon protocol is not initialised. This
+                // can occur for numerous reasons: Guest is still booting, guest doesn't
+                // support ballooning, host doesn't support ballooning. We don't log or
+                // raise an error in this case: trim is just a hint and we can ignore it.
+                if e.errno() != libc::ENOTSUP {
+                    bail!("Errno return when requesting balloon stats: {}", e.errno())
+                }
+            }
+            e => bail!("Error requesting balloon stats: {:?}", e),
+        }
+        Ok(())
+    }
+
+    /// Checks if ramdump has been created. If so, send it to tombstoned.
+    fn handle_ramdump(&self) -> Result<(), Error> {
+        let ramdump_path = self.temporary_directory.join("ramdump");
+        if std::fs::metadata(&ramdump_path)?.len() > 0 {
+            Self::send_ramdump_to_tombstoned(&ramdump_path)?;
+        }
+        Ok(())
+    }
+
+    fn send_ramdump_to_tombstoned(ramdump_path: &Path) -> Result<(), Error> {
+        let mut input = File::open(ramdump_path)
+            .context(format!("Failed to open ramdump {:?} for reading", ramdump_path))?;
+
+        let pid = std::process::id() as i32;
+        let conn = TombstonedConnection::connect(pid, DebuggerdDumpType::Tombstone)
+            .context("Failed to connect to tombstoned")?;
+        let mut output = conn
+            .text_output
+            .as_ref()
+            .ok_or_else(|| anyhow!("Could not get file to write the tombstones on"))?;
+
+        std::io::copy(&mut input, &mut output).context("Failed to send ramdump to tombstoned")?;
+        info!("Ramdump {:?} sent to tombstoned", ramdump_path);
+
+        conn.notify_completion()?;
+        Ok(())
+    }
+}
+
+impl Rss {
+    fn extract_max(x: &Rss, y: &Rss) -> Rss {
+        Rss { vm: max(x.vm, y.vm), crosvm: max(x.crosvm, y.crosvm) }
+    }
+}
+
+// Get guest time from /proc/[crosvm pid]/stat
+fn get_guest_time(pid: u32) -> Result<i64> {
+    let file = read_to_string(format!("/proc/{}/stat", pid))?;
+    let data_list: Vec<_> = file.split_whitespace().collect();
+
+    // Information about guest_time is at 43th place of the file split with the whitespace.
+    // Example of /proc/[pid]/stat :
+    // 6603 (kworker/104:1H-kblockd) I 2 0 0 0 -1 69238880 0 0 0 0 0 88 0 0 0 -20 1 0 1845 0 0
+    // 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 104 0 0 0 0 0 0 0 0 0 0 0 0 0
+    if data_list.len() < 43 {
+        bail!("Failed to parse command result for getting guest time : {}", file);
+    }
+
+    let guest_time_ticks = data_list[42].parse::<i64>()?;
+    // SAFETY : It just returns an integer about CPU tick information.
+    let ticks_per_sec = unsafe { sysconf(_SC_CLK_TCK) } as i64;
+    Ok(guest_time_ticks * MILLIS_PER_SEC / ticks_per_sec)
+}
+
+// Get rss from /proc/[crosvm pid]/smaps
+fn get_rss(pid: u32) -> Result<Rss> {
+    let file = read_to_string(format!("/proc/{}/smaps", pid))?;
+    let lines: Vec<_> = file.split('\n').collect();
+
+    let mut rss_vm_total = 0i64;
+    let mut rss_crosvm_total = 0i64;
+    let mut is_vm = false;
+    for line in lines {
+        if line.contains("crosvm_guest") {
+            is_vm = true;
+        } else if line.contains("Rss:") {
+            let data_list: Vec<_> = line.split_whitespace().collect();
+            if data_list.len() < 2 {
+                bail!("Failed to parse command result for getting rss :\n{}", line);
+            }
+            let rss = data_list[1].parse::<i64>()?;
+
+            if is_vm {
+                rss_vm_total += rss;
+                is_vm = false;
+            }
+            rss_crosvm_total += rss;
+        }
+    }
+
+    Ok(Rss { vm: rss_vm_total, crosvm: rss_crosvm_total })
+}
+
+fn death_reason(result: &Result<ExitStatus, io::Error>, mut failure_reason: &str) -> DeathReason {
+    if let Some(position) = failure_reason.find('|') {
+        // Separator indicates extra context information is present after the failure name.
+        error!("Failure info: {}", &failure_reason[(position + 1)..]);
+        failure_reason = &failure_reason[..position];
+    }
+    if let Ok(status) = result {
+        match failure_reason {
+            "PVM_FIRMWARE_PUBLIC_KEY_MISMATCH" => {
+                return DeathReason::PVM_FIRMWARE_PUBLIC_KEY_MISMATCH
+            }
+            "PVM_FIRMWARE_INSTANCE_IMAGE_CHANGED" => {
+                return DeathReason::PVM_FIRMWARE_INSTANCE_IMAGE_CHANGED
+            }
+            "BOOTLOADER_PUBLIC_KEY_MISMATCH" => return DeathReason::BOOTLOADER_PUBLIC_KEY_MISMATCH,
+            "BOOTLOADER_INSTANCE_IMAGE_CHANGED" => {
+                return DeathReason::BOOTLOADER_INSTANCE_IMAGE_CHANGED
+            }
+            "MICRODROID_FAILED_TO_CONNECT_TO_VIRTUALIZATION_SERVICE" => {
+                return DeathReason::MICRODROID_FAILED_TO_CONNECT_TO_VIRTUALIZATION_SERVICE
+            }
+            "MICRODROID_PAYLOAD_HAS_CHANGED" => return DeathReason::MICRODROID_PAYLOAD_HAS_CHANGED,
+            "MICRODROID_PAYLOAD_VERIFICATION_FAILED" => {
+                return DeathReason::MICRODROID_PAYLOAD_VERIFICATION_FAILED
+            }
+            "MICRODROID_INVALID_PAYLOAD_CONFIG" => {
+                return DeathReason::MICRODROID_INVALID_PAYLOAD_CONFIG
+            }
+            "MICRODROID_UNKNOWN_RUNTIME_ERROR" => {
+                return DeathReason::MICRODROID_UNKNOWN_RUNTIME_ERROR
+            }
+            "HANGUP" => return DeathReason::HANGUP,
+            _ => {}
+        }
+        match status.code() {
+            None => DeathReason::KILLED,
+            Some(0) => DeathReason::SHUTDOWN,
+            Some(CROSVM_START_ERROR_STATUS) => DeathReason::START_FAILED,
+            Some(CROSVM_REBOOT_STATUS) => DeathReason::REBOOT,
+            Some(CROSVM_CRASH_STATUS) => DeathReason::CRASH,
+            Some(CROSVM_WATCHDOG_REBOOT_STATUS) => DeathReason::WATCHDOG_REBOOT,
+            Some(_) => DeathReason::UNKNOWN,
+        }
+    } else {
+        DeathReason::INFRASTRUCTURE_ERROR
+    }
+}
+
+fn exit_signal(result: &Result<ExitStatus, io::Error>) -> Option<i32> {
+    match result {
+        Ok(status) => status.signal(),
+        Err(_) => None,
+    }
+}
+
+/// Starts an instance of `crosvm` to manage a new VM.
+fn run_vm(
+    config: CrosvmConfig,
+    crosvm_control_socket_path: &Path,
+    failure_pipe_write: File,
+) -> Result<SharedChild, Error> {
+    validate_config(&config)?;
+
+    let mut command = Command::new(CROSVM_PATH);
+    // TODO(qwandor): Remove --disable-sandbox.
+    command
+        .arg("--extended-status")
+        // Configure the logger for the crosvm process to silence logs from the disk crate which
+        // don't provide much information to us (but do spamming us).
+        .arg("--log-level")
+        .arg("info,disk=off")
+        .arg("run")
+        .arg("--disable-sandbox")
+        .arg("--cid")
+        .arg(config.cid.to_string());
+
+    if system_properties::read_bool("hypervisor.memory_reclaim.supported", false)? {
+        command.arg("--balloon-page-reporting");
+    } else {
+        command.arg("--no-balloon");
+    }
+
+    if config.protected {
+        match system_properties::read(SYSPROP_CUSTOM_PVMFW_PATH)? {
+            Some(pvmfw_path) if !pvmfw_path.is_empty() => {
+                command.arg("--protected-vm-with-firmware").arg(pvmfw_path)
+            }
+            _ => command.arg("--protected-vm"),
+        };
+
+        // 3 virtio-console devices + vsock = 4.
+        let virtio_pci_device_count = 4 + config.disks.len();
+        // crosvm virtio queue has 256 entries, so 2 MiB per device (2 pages per entry) should be
+        // enough.
+        let swiotlb_size_mib = 2 * virtio_pci_device_count;
+        command.arg("--swiotlb").arg(swiotlb_size_mib.to_string());
+    }
+
+    if let Some(memory_mib) = config.memory_mib {
+        command.arg("--mem").arg(memory_mib.to_string());
+    }
+
+    if let Some(cpus) = config.cpus {
+        command.arg("--cpus").arg(cpus.to_string());
+    }
+
+    if !config.task_profiles.is_empty() {
+        command.arg("--task-profiles").arg(config.task_profiles.join(","));
+    }
+
+    // Keep track of what file descriptors should be mapped to the crosvm process.
+    let mut preserved_fds = config.indirect_files.iter().map(|file| file.as_raw_fd()).collect();
+
+    // Setup the serial devices.
+    // 1. uart device: used as the output device by bootloaders and as early console by linux
+    // 2. uart device: used to report the reason for the VM failing.
+    // 3. virtio-console device: used as the console device where kmsg is redirected to
+    // 4. virtio-console device: used as the ramdump output
+    // 5. virtio-console device: used as the logcat output
+    //
+    // When [console|log]_fd is not specified, the devices are attached to sink, which means what's
+    // written there is discarded.
+    let console_arg = format_serial_arg(&mut preserved_fds, &config.console_fd);
+    let log_arg = format_serial_arg(&mut preserved_fds, &config.log_fd);
+    let failure_serial_path = add_preserved_fd(&mut preserved_fds, &failure_pipe_write);
+    let ramdump_arg = format_serial_arg(&mut preserved_fds, &config.ramdump);
+
+    // Warning: Adding more serial devices requires you to shift the PCI device ID of the boot
+    // disks in bootconfig.x86_64. This is because x86 crosvm puts serial devices and the block
+    // devices in the same PCI bus and serial devices comes before the block devices. Arm crosvm
+    // doesn't have the issue.
+    // /dev/ttyS0
+    command.arg(format!("--serial={},hardware=serial,num=1", &console_arg));
+    // /dev/ttyS1
+    command.arg(format!("--serial=type=file,path={},hardware=serial,num=2", &failure_serial_path));
+    // /dev/hvc0
+    command.arg(format!("--serial={},hardware=virtio-console,num=1", &console_arg));
+    // /dev/hvc1
+    command.arg(format!("--serial={},hardware=virtio-console,num=2", &ramdump_arg));
+    // /dev/hvc2
+    command.arg(format!("--serial={},hardware=virtio-console,num=3", &log_arg));
+
+    if let Some(bootloader) = &config.bootloader {
+        command.arg("--bios").arg(add_preserved_fd(&mut preserved_fds, bootloader));
+    }
+
+    if let Some(initrd) = &config.initrd {
+        command.arg("--initrd").arg(add_preserved_fd(&mut preserved_fds, initrd));
+    }
+
+    if let Some(params) = &config.params {
+        command.arg("--params").arg(params);
+    }
+
+    for disk in &config.disks {
+        command
+            .arg(if disk.writable { "--rwdisk" } else { "--disk" })
+            .arg(add_preserved_fd(&mut preserved_fds, &disk.image));
+    }
+
+    if let Some(kernel) = &config.kernel {
+        command.arg(add_preserved_fd(&mut preserved_fds, kernel));
+    }
+
+    let control_server_socket = UnixSeqpacketListener::bind(crosvm_control_socket_path)
+        .context("failed to create control server")?;
+    command.arg("--socket").arg(add_preserved_fd(&mut preserved_fds, &control_server_socket));
+
+    debug!("Preserving FDs {:?}", preserved_fds);
+    command.preserved_fds(preserved_fds);
+
+    command.arg("--params").arg("crashkernel=17M");
+    print_crosvm_args(&command);
+
+    let result = SharedChild::spawn(&mut command)?;
+    debug!("Spawned crosvm({}).", result.id());
+    Ok(result)
+}
+
+/// Ensure that the configuration has a valid combination of fields set, or return an error if not.
+fn validate_config(config: &CrosvmConfig) -> Result<(), Error> {
+    if config.bootloader.is_none() && config.kernel.is_none() {
+        bail!("VM must have either a bootloader or a kernel image.");
+    }
+    if config.bootloader.is_some() && (config.kernel.is_some() || config.initrd.is_some()) {
+        bail!("Can't have both bootloader and kernel/initrd image.");
+    }
+    let version = Version::parse(CROSVM_PLATFORM_VERSION).unwrap();
+    if !config.platform_version.matches(&version) {
+        bail!(
+            "Incompatible platform version. The config is compatible with platform version(s) \
+              {}, but the actual platform version is {}",
+            config.platform_version,
+            version
+        );
+    }
+
+    Ok(())
+}
+
+/// Print arguments of the crosvm command. In doing so, /proc/self/fd/XX is annotated with the
+/// actual file path if the FD is backed by a regular file. If not, the /proc path is printed
+/// unmodified.
+fn print_crosvm_args(command: &Command) {
+    let re = Regex::new(r"/proc/self/fd/[\d]+").unwrap();
+    info!(
+        "Running crosvm with args: {:?}",
+        command
+            .get_args()
+            .map(|s| s.to_string_lossy())
+            .map(|s| {
+                re.replace_all(&s, |caps: &Captures| {
+                    let path = &caps[0];
+                    if let Ok(realpath) = std::fs::canonicalize(path) {
+                        format!("{} ({})", path, realpath.to_string_lossy())
+                    } else {
+                        path.to_owned()
+                    }
+                })
+                .into_owned()
+            })
+            .collect::<Vec<_>>()
+    );
+}
+
+/// Adds the file descriptor for `file` to `preserved_fds`, and returns a string of the form
+/// "/proc/self/fd/N" where N is the file descriptor.
+fn add_preserved_fd(preserved_fds: &mut Vec<RawFd>, file: &dyn AsRawFd) -> String {
+    let fd = file.as_raw_fd();
+    preserved_fds.push(fd);
+    format!("/proc/self/fd/{}", fd)
+}
+
+/// Adds the file descriptor for `file` (if any) to `preserved_fds`, and returns the appropriate
+/// string for a crosvm `--serial` flag. If `file` is none, creates a dummy sink device.
+fn format_serial_arg(preserved_fds: &mut Vec<RawFd>, file: &Option<File>) -> String {
+    if let Some(file) = file {
+        format!("type=file,path={}", add_preserved_fd(preserved_fds, file))
+    } else {
+        "type=sink".to_string()
+    }
+}
+
+/// Creates a new pipe with the `O_CLOEXEC` flag set, and returns the read side and write side.
+fn create_pipe() -> Result<(File, File), Error> {
+    let (raw_read, raw_write) = pipe2(OFlag::O_CLOEXEC)?;
+    // SAFETY: We are the sole owners of these fds as they were just created.
+    let read_fd = unsafe { File::from_raw_fd(raw_read) };
+    let write_fd = unsafe { File::from_raw_fd(raw_write) };
+    Ok((read_fd, write_fd))
+}
diff --git a/virtualizationmanager/src/main.rs b/virtualizationmanager/src/main.rs
new file mode 100644
index 0000000..dca64cb
--- /dev/null
+++ b/virtualizationmanager/src/main.rs
@@ -0,0 +1,145 @@
+// Copyright 2022, The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Android Virtualization Manager
+
+mod aidl;
+mod atom;
+mod composite;
+mod crosvm;
+mod payload;
+mod selinux;
+
+use crate::aidl::{GLOBAL_SERVICE, VirtualizationService};
+use android_system_virtualizationservice::aidl::android::system::virtualizationservice::IVirtualizationService::BnVirtualizationService;
+use anyhow::{bail, Context};
+use binder::{BinderFeatures, ProcessState};
+use lazy_static::lazy_static;
+use log::{info, Level};
+use rpcbinder::{FileDescriptorTransportMode, RpcServer};
+use std::os::unix::io::{FromRawFd, OwnedFd, RawFd};
+use clap::Parser;
+use nix::fcntl::{fcntl, F_GETFD, F_SETFD, FdFlag};
+use nix::unistd::{Pid, Uid};
+use std::os::unix::raw::{pid_t, uid_t};
+use rustutils::system_properties;
+
+const LOG_TAG: &str = "virtmgr";
+
+lazy_static! {
+    static ref PID_PARENT: Pid = Pid::parent();
+    static ref UID_CURRENT: Uid = Uid::current();
+}
+
+fn get_calling_pid() -> pid_t {
+    // The caller is the parent of this process.
+    PID_PARENT.as_raw()
+}
+
+fn get_calling_uid() -> uid_t {
+    // The caller and this process share the same UID.
+    UID_CURRENT.as_raw()
+}
+
+#[derive(Parser)]
+struct Args {
+    /// File descriptor inherited from the caller to run RpcBinder server on.
+    /// This should be one end of a socketpair() compatible with RpcBinder's
+    /// UDS bootstrap transport.
+    #[clap(long)]
+    rpc_server_fd: RawFd,
+    /// File descriptor inherited from the caller to signal RpcBinder server
+    /// readiness. This should be one end of pipe() and the caller should be
+    /// waiting for HUP on the other end.
+    #[clap(long)]
+    ready_fd: RawFd,
+}
+
+fn take_fd_ownership(raw_fd: RawFd, owned_fds: &mut Vec<RawFd>) -> Result<OwnedFd, anyhow::Error> {
+    // Basic check that the integer value does correspond to a file descriptor.
+    fcntl(raw_fd, F_GETFD).with_context(|| format!("Invalid file descriptor {raw_fd}"))?;
+
+    // The file descriptor had CLOEXEC disabled to be inherited from the parent.
+    // Re-enable it to make sure it is not accidentally inherited further.
+    fcntl(raw_fd, F_SETFD(FdFlag::FD_CLOEXEC))
+        .with_context(|| format!("Could not set CLOEXEC on file descriptor {raw_fd}"))?;
+
+    // Creating OwnedFd for stdio FDs is not safe.
+    if [libc::STDIN_FILENO, libc::STDOUT_FILENO, libc::STDERR_FILENO].contains(&raw_fd) {
+        bail!("File descriptor {raw_fd} is standard I/O descriptor");
+    }
+
+    // Reject RawFds that already have a corresponding OwnedFd.
+    if owned_fds.contains(&raw_fd) {
+        bail!("File descriptor {raw_fd} already owned");
+    }
+    owned_fds.push(raw_fd);
+
+    // SAFETY - Initializing OwnedFd for a RawFd provided in cmdline arguments.
+    // We checked that the integer value corresponds to a valid FD and that this
+    // is the first argument to claim its ownership.
+    Ok(unsafe { OwnedFd::from_raw_fd(raw_fd) })
+}
+
+fn is_property_set(name: &str) -> bool {
+    system_properties::read_bool(name, false)
+        .unwrap_or_else(|e| panic!("Failed to read {name}: {e:?}"))
+}
+
+fn main() {
+    android_logger::init_once(
+        android_logger::Config::default()
+            .with_tag(LOG_TAG)
+            .with_min_level(Level::Info)
+            .with_log_id(android_logger::LogId::System),
+    );
+
+    let non_protected_vm_supported = is_property_set("ro.boot.hypervisor.vm.supported");
+    let protected_vm_supported = is_property_set("ro.boot.hypervisor.protected_vm.supported");
+    if !non_protected_vm_supported && !protected_vm_supported {
+        // This should never happen, it indicates a misconfigured device where the virt APEX
+        // is present but VMs are not supported. If it does happen, fail fast to avoid wasting
+        // resources trying.
+        panic!("Device doesn't support protected or unprotected VMs");
+    }
+
+    let args = Args::parse();
+
+    let mut owned_fds = vec![];
+    let rpc_server_fd = take_fd_ownership(args.rpc_server_fd, &mut owned_fds)
+        .expect("Failed to take ownership of rpc_server_fd");
+    let ready_fd = take_fd_ownership(args.ready_fd, &mut owned_fds)
+        .expect("Failed to take ownership of ready_fd");
+
+    // Start thread pool for kernel Binder connection to VirtualizationServiceInternal.
+    ProcessState::start_thread_pool();
+
+    GLOBAL_SERVICE.removeMemlockRlimit().expect("Failed to remove memlock rlimit");
+
+    let service = VirtualizationService::init();
+    let service =
+        BnVirtualizationService::new_binder(service, BinderFeatures::default()).as_binder();
+
+    let server = RpcServer::new_unix_domain_bootstrap(service, rpc_server_fd)
+        .expect("Failed to start RpcServer");
+    server.set_supported_file_descriptor_transport_modes(&[FileDescriptorTransportMode::Unix]);
+
+    info!("Started VirtualizationService RpcServer. Ready to accept connections");
+
+    // Signal readiness to the caller by closing our end of the pipe.
+    drop(ready_fd);
+
+    server.join();
+    info!("Shutting down VirtualizationService RpcServer");
+}
diff --git a/virtualizationmanager/src/payload.rs b/virtualizationmanager/src/payload.rs
new file mode 100644
index 0000000..02e8f8e
--- /dev/null
+++ b/virtualizationmanager/src/payload.rs
@@ -0,0 +1,684 @@
+// Copyright 2021, The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Payload disk image
+
+use android_system_virtualizationservice::aidl::android::system::virtualizationservice::{
+    DiskImage::DiskImage,
+    Partition::Partition,
+    VirtualMachineAppConfig::DebugLevel::DebugLevel,
+    VirtualMachineAppConfig::{Payload::Payload, VirtualMachineAppConfig},
+    VirtualMachineRawConfig::VirtualMachineRawConfig,
+};
+use anyhow::{anyhow, bail, Context, Result};
+use binder::{wait_for_interface, ParcelFileDescriptor};
+use log::{info, warn};
+use microdroid_metadata::{ApexPayload, ApkPayload, Metadata, PayloadConfig, PayloadMetadata};
+use microdroid_payload_config::{ApexConfig, VmPayloadConfig};
+use once_cell::sync::OnceCell;
+use packagemanager_aidl::aidl::android::content::pm::{
+    IPackageManagerNative::IPackageManagerNative, StagedApexInfo::StagedApexInfo,
+};
+use regex::Regex;
+use serde::Deserialize;
+use serde_xml_rs::from_reader;
+use std::collections::HashSet;
+use std::fs::{metadata, File, OpenOptions};
+use std::path::{Path, PathBuf};
+use std::process::Command;
+use std::time::SystemTime;
+use vmconfig::open_parcel_file;
+
+/// The list of APEXes which microdroid requires.
+// TODO(b/192200378) move this to microdroid.json?
+const MICRODROID_REQUIRED_APEXES: [&str; 1] = ["com.android.os.statsd"];
+const MICRODROID_REQUIRED_APEXES_DEBUG: [&str; 1] = ["com.android.adbd"];
+
+const APEX_INFO_LIST_PATH: &str = "/apex/apex-info-list.xml";
+
+const PACKAGE_MANAGER_NATIVE_SERVICE: &str = "package_native";
+
+/// Represents the list of APEXes
+#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
+struct ApexInfoList {
+    #[serde(rename = "apex-info")]
+    list: Vec<ApexInfo>,
+}
+
+#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq)]
+struct ApexInfo {
+    #[serde(rename = "moduleName")]
+    name: String,
+    #[serde(rename = "versionCode")]
+    version: u64,
+    #[serde(rename = "modulePath")]
+    path: PathBuf,
+
+    #[serde(default)]
+    has_classpath_jar: bool,
+
+    // The field claims to be milliseconds but is actually seconds.
+    #[serde(rename = "lastUpdateMillis")]
+    last_update_seconds: u64,
+
+    #[serde(rename = "isFactory")]
+    is_factory: bool,
+
+    #[serde(rename = "isActive")]
+    is_active: bool,
+
+    #[serde(rename = "provideSharedApexLibs")]
+    provide_shared_apex_libs: bool,
+}
+
+impl ApexInfoList {
+    /// Loads ApexInfoList
+    fn load() -> Result<&'static ApexInfoList> {
+        static INSTANCE: OnceCell<ApexInfoList> = OnceCell::new();
+        INSTANCE.get_or_try_init(|| {
+            let apex_info_list = File::open(APEX_INFO_LIST_PATH)
+                .context(format!("Failed to open {}", APEX_INFO_LIST_PATH))?;
+            let mut apex_info_list: ApexInfoList = from_reader(apex_info_list)
+                .context(format!("Failed to parse {}", APEX_INFO_LIST_PATH))?;
+
+            // For active APEXes, we run derive_classpath and parse its output to see if it
+            // contributes to the classpath(s). (This allows us to handle any new classpath env
+            // vars seamlessly.)
+            let classpath_vars = run_derive_classpath()?;
+            let classpath_apexes = find_apex_names_in_classpath(&classpath_vars)?;
+
+            for apex_info in apex_info_list.list.iter_mut() {
+                apex_info.has_classpath_jar = classpath_apexes.contains(&apex_info.name);
+            }
+
+            Ok(apex_info_list)
+        })
+    }
+
+    // Override apex info with the staged one
+    fn override_staged_apex(&mut self, staged_apex_info: &StagedApexInfo) -> Result<()> {
+        let mut need_to_add: Option<ApexInfo> = None;
+        for apex_info in self.list.iter_mut() {
+            if staged_apex_info.moduleName == apex_info.name {
+                if apex_info.is_active && apex_info.is_factory {
+                    // Copy the entry to the end as factory/non-active after the loop
+                    // to keep the factory version. Typically this step is unncessary,
+                    // but some apexes (like sharedlibs) need to be kept even if it's inactive.
+                    need_to_add.replace(ApexInfo { is_active: false, ..apex_info.clone() });
+                    // And make this one as non-factory. Note that this one is still active
+                    // and overridden right below.
+                    apex_info.is_factory = false;
+                }
+                // Active one is overridden with the staged one.
+                if apex_info.is_active {
+                    apex_info.version = staged_apex_info.versionCode as u64;
+                    apex_info.path = PathBuf::from(&staged_apex_info.diskImagePath);
+                    apex_info.has_classpath_jar = staged_apex_info.hasClassPathJars;
+                    apex_info.last_update_seconds = last_updated(&apex_info.path)?;
+                }
+            }
+        }
+        if let Some(info) = need_to_add {
+            self.list.push(info);
+        }
+        Ok(())
+    }
+}
+
+fn last_updated<P: AsRef<Path>>(path: P) -> Result<u64> {
+    let metadata = metadata(path)?;
+    Ok(metadata.modified()?.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
+}
+
+impl ApexInfo {
+    fn matches(&self, apex_config: &ApexConfig) -> bool {
+        // Match with pseudo name "{CLASSPATH}" which represents APEXes contributing
+        // to any derive_classpath environment variable
+        if apex_config.name == "{CLASSPATH}" && self.has_classpath_jar {
+            return true;
+        }
+        if apex_config.name == self.name {
+            return true;
+        }
+        false
+    }
+}
+
+struct PackageManager {
+    apex_info_list: &'static ApexInfoList,
+}
+
+impl PackageManager {
+    fn new() -> Result<Self> {
+        let apex_info_list = ApexInfoList::load()?;
+        Ok(Self { apex_info_list })
+    }
+
+    fn get_apex_list(&self, prefer_staged: bool) -> Result<ApexInfoList> {
+        // get the list of active apexes
+        let mut list = self.apex_info_list.clone();
+        // When prefer_staged, we override ApexInfo by consulting "package_native"
+        if prefer_staged {
+            let pm =
+                wait_for_interface::<dyn IPackageManagerNative>(PACKAGE_MANAGER_NATIVE_SERVICE)
+                    .context("Failed to get service when prefer_staged is set.")?;
+            let staged =
+                pm.getStagedApexModuleNames().context("getStagedApexModuleNames failed")?;
+            for name in staged {
+                if let Some(staged_apex_info) =
+                    pm.getStagedApexInfo(&name).context("getStagedApexInfo failed")?
+                {
+                    list.override_staged_apex(&staged_apex_info)?;
+                }
+            }
+        }
+        Ok(list)
+    }
+}
+
+fn make_metadata_file(
+    app_config: &VirtualMachineAppConfig,
+    apex_infos: &[&ApexInfo],
+    temporary_directory: &Path,
+) -> Result<ParcelFileDescriptor> {
+    let payload_metadata = match &app_config.payload {
+        Payload::PayloadConfig(payload_config) => PayloadMetadata::config(PayloadConfig {
+            payload_binary_name: payload_config.payloadBinaryName.clone(),
+            ..Default::default()
+        }),
+        Payload::ConfigPath(config_path) => {
+            PayloadMetadata::config_path(format!("/mnt/apk/{}", config_path))
+        }
+    };
+
+    let metadata = Metadata {
+        version: 1,
+        apexes: apex_infos
+            .iter()
+            .enumerate()
+            .map(|(i, apex_info)| {
+                Ok(ApexPayload {
+                    name: apex_info.name.clone(),
+                    partition_name: format!("microdroid-apex-{}", i),
+                    last_update_seconds: apex_info.last_update_seconds,
+                    is_factory: apex_info.is_factory,
+                    ..Default::default()
+                })
+            })
+            .collect::<Result<_>>()?,
+        apk: Some(ApkPayload {
+            name: "apk".to_owned(),
+            payload_partition_name: "microdroid-apk".to_owned(),
+            idsig_partition_name: "microdroid-apk-idsig".to_owned(),
+            ..Default::default()
+        })
+        .into(),
+        payload: Some(payload_metadata),
+        ..Default::default()
+    };
+
+    // Write metadata to file.
+    let metadata_path = temporary_directory.join("metadata");
+    let mut metadata_file = OpenOptions::new()
+        .create_new(true)
+        .read(true)
+        .write(true)
+        .open(&metadata_path)
+        .with_context(|| format!("Failed to open metadata file {:?}", metadata_path))?;
+    microdroid_metadata::write_metadata(&metadata, &mut metadata_file)?;
+
+    // Re-open the metadata file as read-only.
+    open_parcel_file(&metadata_path, false)
+}
+
+/// Creates a DiskImage with partitions:
+///   payload-metadata: metadata
+///   microdroid-apex-0: apex 0
+///   microdroid-apex-1: apex 1
+///   ..
+///   microdroid-apk: apk
+///   microdroid-apk-idsig: idsig
+///   extra-apk-0:   additional apk 0
+///   extra-idsig-0: additional idsig 0
+///   extra-apk-1:   additional apk 1
+///   extra-idsig-1: additional idsig 1
+///   ..
+fn make_payload_disk(
+    app_config: &VirtualMachineAppConfig,
+    apk_file: File,
+    idsig_file: File,
+    vm_payload_config: &VmPayloadConfig,
+    temporary_directory: &Path,
+) -> Result<DiskImage> {
+    if vm_payload_config.extra_apks.len() != app_config.extraIdsigs.len() {
+        bail!(
+            "payload config has {} apks, but app config has {} idsigs",
+            vm_payload_config.extra_apks.len(),
+            app_config.extraIdsigs.len()
+        );
+    }
+
+    let pm = PackageManager::new()?;
+    let apex_list = pm.get_apex_list(vm_payload_config.prefer_staged)?;
+
+    // collect APEXes from config
+    let mut apex_infos =
+        collect_apex_infos(&apex_list, &vm_payload_config.apexes, app_config.debugLevel);
+
+    // Pass sorted list of apexes. Sorting key shouldn't use `path` because it will change after
+    // reboot with prefer_staged. `last_update_seconds` is added to distinguish "samegrade"
+    // update.
+    apex_infos.sort_by_key(|info| (&info.name, &info.version, &info.last_update_seconds));
+    info!("Microdroid payload APEXes: {:?}", apex_infos.iter().map(|ai| &ai.name));
+
+    let metadata_file = make_metadata_file(app_config, &apex_infos, temporary_directory)?;
+    // put metadata at the first partition
+    let mut partitions = vec![Partition {
+        label: "payload-metadata".to_owned(),
+        image: Some(metadata_file),
+        writable: false,
+    }];
+
+    for (i, apex_info) in apex_infos.iter().enumerate() {
+        let apex_file = open_parcel_file(&apex_info.path, false)?;
+        partitions.push(Partition {
+            label: format!("microdroid-apex-{}", i),
+            image: Some(apex_file),
+            writable: false,
+        });
+    }
+    partitions.push(Partition {
+        label: "microdroid-apk".to_owned(),
+        image: Some(ParcelFileDescriptor::new(apk_file)),
+        writable: false,
+    });
+    partitions.push(Partition {
+        label: "microdroid-apk-idsig".to_owned(),
+        image: Some(ParcelFileDescriptor::new(idsig_file)),
+        writable: false,
+    });
+
+    // we've already checked that extra_apks and extraIdsigs are in the same size.
+    let extra_apks = &vm_payload_config.extra_apks;
+    let extra_idsigs = &app_config.extraIdsigs;
+    for (i, (extra_apk, extra_idsig)) in extra_apks.iter().zip(extra_idsigs.iter()).enumerate() {
+        partitions.push(Partition {
+            label: format!("extra-apk-{}", i),
+            image: Some(ParcelFileDescriptor::new(
+                File::open(PathBuf::from(&extra_apk.path)).with_context(|| {
+                    format!("Failed to open the extra apk #{} {}", i, extra_apk.path)
+                })?,
+            )),
+            writable: false,
+        });
+
+        partitions.push(Partition {
+            label: format!("extra-idsig-{}", i),
+            image: Some(ParcelFileDescriptor::new(
+                extra_idsig
+                    .as_ref()
+                    .try_clone()
+                    .with_context(|| format!("Failed to clone the extra idsig #{}", i))?,
+            )),
+            writable: false,
+        });
+    }
+
+    Ok(DiskImage { image: None, partitions, writable: false })
+}
+
+fn run_derive_classpath() -> Result<String> {
+    let result = Command::new("/apex/com.android.sdkext/bin/derive_classpath")
+        .arg("/proc/self/fd/1")
+        .output()
+        .context("Failed to run derive_classpath")?;
+
+    if !result.status.success() {
+        bail!("derive_classpath returned {}", result.status);
+    }
+
+    String::from_utf8(result.stdout).context("Converting derive_classpath output")
+}
+
+fn find_apex_names_in_classpath(classpath_vars: &str) -> Result<HashSet<String>> {
+    // Each line should be in the format "export <var name> <paths>", where <paths> is a
+    // colon-separated list of paths to JARs. We don't care about the var names, and we're only
+    // interested in paths that look like "/apex/<apex name>/<anything>" so we know which APEXes
+    // contribute to at least one var.
+    let mut apexes = HashSet::new();
+
+    let pattern = Regex::new(r"^export [^ ]+ ([^ ]+)$").context("Failed to construct Regex")?;
+    for line in classpath_vars.lines() {
+        if let Some(captures) = pattern.captures(line) {
+            if let Some(paths) = captures.get(1) {
+                apexes.extend(paths.as_str().split(':').filter_map(|path| {
+                    let path = path.strip_prefix("/apex/")?;
+                    Some(path[..path.find('/')?].to_owned())
+                }));
+                continue;
+            }
+        }
+        warn!("Malformed line from derive_classpath: {}", line);
+    }
+
+    Ok(apexes)
+}
+
+// Collect ApexInfos from VM config
+fn collect_apex_infos<'a>(
+    apex_list: &'a ApexInfoList,
+    apex_configs: &[ApexConfig],
+    debug_level: DebugLevel,
+) -> Vec<&'a ApexInfo> {
+    let mut additional_apexes: Vec<&str> = MICRODROID_REQUIRED_APEXES.to_vec();
+    if debug_level != DebugLevel::NONE {
+        additional_apexes.extend(MICRODROID_REQUIRED_APEXES_DEBUG.to_vec());
+    }
+
+    apex_list
+        .list
+        .iter()
+        .filter(|ai| {
+            apex_configs.iter().any(|cfg| ai.matches(cfg) && ai.is_active)
+                || additional_apexes.iter().any(|name| name == &ai.name && ai.is_active)
+                || ai.provide_shared_apex_libs
+        })
+        .collect()
+}
+
+pub fn add_microdroid_system_images(
+    config: &VirtualMachineAppConfig,
+    instance_file: File,
+    storage_image: Option<File>,
+    vm_config: &mut VirtualMachineRawConfig,
+) -> Result<()> {
+    let debug_suffix = match config.debugLevel {
+        DebugLevel::NONE => "normal",
+        DebugLevel::FULL => "debuggable",
+        _ => return Err(anyhow!("unsupported debug level: {:?}", config.debugLevel)),
+    };
+    let initrd = format!("/apex/com.android.virt/etc/microdroid_initrd_{}.img", debug_suffix);
+    vm_config.initrd = Some(open_parcel_file(Path::new(&initrd), false)?);
+
+    let mut writable_partitions = vec![Partition {
+        label: "vm-instance".to_owned(),
+        image: Some(ParcelFileDescriptor::new(instance_file)),
+        writable: true,
+    }];
+
+    if let Some(file) = storage_image {
+        writable_partitions.push(Partition {
+            label: "encryptedstore".to_owned(),
+            image: Some(ParcelFileDescriptor::new(file)),
+            writable: true,
+        });
+    }
+
+    vm_config.disks.push(DiskImage {
+        image: None,
+        partitions: writable_partitions,
+        writable: true,
+    });
+
+    Ok(())
+}
+
+pub fn add_microdroid_payload_images(
+    config: &VirtualMachineAppConfig,
+    temporary_directory: &Path,
+    apk_file: File,
+    idsig_file: File,
+    vm_payload_config: &VmPayloadConfig,
+    vm_config: &mut VirtualMachineRawConfig,
+) -> Result<()> {
+    vm_config.disks.push(make_payload_disk(
+        config,
+        apk_file,
+        idsig_file,
+        vm_payload_config,
+        temporary_directory,
+    )?);
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::NamedTempFile;
+
+    #[test]
+    fn test_find_apex_names_in_classpath() {
+        let vars = r#"
+export FOO /apex/unterminated
+export BAR /apex/valid.apex/something
+wrong
+export EMPTY
+export OTHER /foo/bar:/baz:/apex/second.valid.apex/:gibberish:"#;
+        let expected = vec!["valid.apex", "second.valid.apex"];
+        let expected: HashSet<_> = expected.into_iter().map(ToString::to_string).collect();
+
+        assert_eq!(find_apex_names_in_classpath(vars).unwrap(), expected);
+    }
+
+    #[test]
+    fn test_collect_apexes() {
+        let apex_info_list = ApexInfoList {
+            list: vec![
+                ApexInfo {
+                    // 0
+                    name: "com.android.adbd".to_string(),
+                    path: PathBuf::from("adbd"),
+                    has_classpath_jar: false,
+                    last_update_seconds: 12345678,
+                    is_factory: true,
+                    is_active: true,
+                    ..Default::default()
+                },
+                ApexInfo {
+                    // 1
+                    name: "com.android.os.statsd".to_string(),
+                    path: PathBuf::from("statsd"),
+                    has_classpath_jar: false,
+                    last_update_seconds: 12345678,
+                    is_factory: true,
+                    is_active: false,
+                    ..Default::default()
+                },
+                ApexInfo {
+                    // 2
+                    name: "com.android.os.statsd".to_string(),
+                    path: PathBuf::from("statsd/updated"),
+                    has_classpath_jar: false,
+                    last_update_seconds: 12345678 + 1,
+                    is_factory: false,
+                    is_active: true,
+                    ..Default::default()
+                },
+                ApexInfo {
+                    // 3
+                    name: "no_classpath".to_string(),
+                    path: PathBuf::from("no_classpath"),
+                    has_classpath_jar: false,
+                    last_update_seconds: 12345678,
+                    is_factory: true,
+                    is_active: true,
+                    ..Default::default()
+                },
+                ApexInfo {
+                    // 4
+                    name: "has_classpath".to_string(),
+                    path: PathBuf::from("has_classpath"),
+                    has_classpath_jar: true,
+                    last_update_seconds: 87654321,
+                    is_factory: true,
+                    is_active: false,
+                    ..Default::default()
+                },
+                ApexInfo {
+                    // 5
+                    name: "has_classpath".to_string(),
+                    path: PathBuf::from("has_classpath/updated"),
+                    has_classpath_jar: true,
+                    last_update_seconds: 87654321 + 1,
+                    is_factory: false,
+                    is_active: true,
+                    ..Default::default()
+                },
+                ApexInfo {
+                    // 6
+                    name: "apex-foo".to_string(),
+                    path: PathBuf::from("apex-foo"),
+                    has_classpath_jar: false,
+                    last_update_seconds: 87654321,
+                    is_factory: true,
+                    is_active: false,
+                    ..Default::default()
+                },
+                ApexInfo {
+                    // 7
+                    name: "apex-foo".to_string(),
+                    path: PathBuf::from("apex-foo/updated"),
+                    has_classpath_jar: false,
+                    last_update_seconds: 87654321 + 1,
+                    is_factory: false,
+                    is_active: true,
+                    ..Default::default()
+                },
+                ApexInfo {
+                    // 8
+                    name: "sharedlibs".to_string(),
+                    path: PathBuf::from("apex-foo"),
+                    last_update_seconds: 87654321,
+                    is_factory: true,
+                    provide_shared_apex_libs: true,
+                    ..Default::default()
+                },
+                ApexInfo {
+                    // 9
+                    name: "sharedlibs".to_string(),
+                    path: PathBuf::from("apex-foo/updated"),
+                    last_update_seconds: 87654321 + 1,
+                    is_active: true,
+                    provide_shared_apex_libs: true,
+                    ..Default::default()
+                },
+            ],
+        };
+        let apex_configs = vec![
+            ApexConfig { name: "apex-foo".to_string() },
+            ApexConfig { name: "{CLASSPATH}".to_string() },
+        ];
+        assert_eq!(
+            collect_apex_infos(&apex_info_list, &apex_configs, DebugLevel::FULL),
+            vec![
+                // Pass active/required APEXes
+                &apex_info_list.list[0],
+                &apex_info_list.list[2],
+                // Pass active APEXes specified in the config
+                &apex_info_list.list[5],
+                &apex_info_list.list[7],
+                // Pass both preinstalled(inactive) and updated(active) for "sharedlibs" APEXes
+                &apex_info_list.list[8],
+                &apex_info_list.list[9],
+            ]
+        );
+    }
+
+    #[test]
+    fn test_prefer_staged_apex_with_factory_active_apex() {
+        let single_apex = ApexInfo {
+            name: "foo".to_string(),
+            version: 1,
+            path: PathBuf::from("foo.apex"),
+            is_factory: true,
+            is_active: true,
+            ..Default::default()
+        };
+        let mut apex_info_list = ApexInfoList { list: vec![single_apex.clone()] };
+
+        let staged = NamedTempFile::new().unwrap();
+        apex_info_list
+            .override_staged_apex(&StagedApexInfo {
+                moduleName: "foo".to_string(),
+                versionCode: 2,
+                diskImagePath: staged.path().to_string_lossy().to_string(),
+                ..Default::default()
+            })
+            .expect("should be ok");
+
+        assert_eq!(
+            apex_info_list,
+            ApexInfoList {
+                list: vec![
+                    ApexInfo {
+                        version: 2,
+                        is_factory: false,
+                        path: staged.path().to_owned(),
+                        last_update_seconds: last_updated(staged.path()).unwrap(),
+                        ..single_apex.clone()
+                    },
+                    ApexInfo { is_active: false, ..single_apex },
+                ],
+            }
+        );
+    }
+
+    #[test]
+    fn test_prefer_staged_apex_with_factory_and_inactive_apex() {
+        let factory_apex = ApexInfo {
+            name: "foo".to_string(),
+            version: 1,
+            path: PathBuf::from("foo.apex"),
+            is_factory: true,
+            ..Default::default()
+        };
+        let active_apex = ApexInfo {
+            name: "foo".to_string(),
+            version: 2,
+            path: PathBuf::from("foo.downloaded.apex"),
+            is_active: true,
+            ..Default::default()
+        };
+        let mut apex_info_list =
+            ApexInfoList { list: vec![factory_apex.clone(), active_apex.clone()] };
+
+        let staged = NamedTempFile::new().unwrap();
+        apex_info_list
+            .override_staged_apex(&StagedApexInfo {
+                moduleName: "foo".to_string(),
+                versionCode: 3,
+                diskImagePath: staged.path().to_string_lossy().to_string(),
+                ..Default::default()
+            })
+            .expect("should be ok");
+
+        assert_eq!(
+            apex_info_list,
+            ApexInfoList {
+                list: vec![
+                    // factory apex isn't touched
+                    factory_apex,
+                    // update active one
+                    ApexInfo {
+                        version: 3,
+                        path: staged.path().to_owned(),
+                        last_update_seconds: last_updated(staged.path()).unwrap(),
+                        ..active_apex
+                    },
+                ],
+            }
+        );
+    }
+}
diff --git a/virtualizationmanager/src/selinux.rs b/virtualizationmanager/src/selinux.rs
new file mode 100644
index 0000000..0485943
--- /dev/null
+++ b/virtualizationmanager/src/selinux.rs
@@ -0,0 +1,120 @@
+// Copyright 2021, The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Wrapper to libselinux
+
+use anyhow::{anyhow, bail, Context, Result};
+use std::ffi::{CStr, CString};
+use std::fmt;
+use std::fs::File;
+use std::io;
+use std::ops::Deref;
+use std::os::raw::c_char;
+use std::os::unix::io::AsRawFd;
+use std::ptr;
+
+// Partially copied from system/security/keystore2/selinux/src/lib.rs
+/// SeContext represents an SELinux context string. It can take ownership of a raw
+/// s-string as allocated by `getcon` or `selabel_lookup`. In this case it uses
+/// `freecon` to free the resources when dropped. In its second variant it stores
+/// an `std::ffi::CString` that can be initialized from a Rust string slice.
+#[derive(Debug)]
+#[allow(dead_code)] // CString variant is used in tests
+pub enum SeContext {
+    /// Wraps a raw context c-string as returned by libselinux.
+    Raw(*mut ::std::os::raw::c_char),
+    /// Stores a context string as `std::ffi::CString`.
+    CString(CString),
+}
+
+impl PartialEq for SeContext {
+    fn eq(&self, other: &Self) -> bool {
+        // We dereference both and thereby delegate the comparison
+        // to `CStr`'s implementation of `PartialEq`.
+        **self == **other
+    }
+}
+
+impl Eq for SeContext {}
+
+impl fmt::Display for SeContext {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", self.to_str().unwrap_or("Invalid context"))
+    }
+}
+
+impl Drop for SeContext {
+    fn drop(&mut self) {
+        if let Self::Raw(p) = self {
+            // SAFETY: SeContext::Raw is created only with a pointer that is set by libselinux and
+            // has to be freed with freecon.
+            unsafe { selinux_bindgen::freecon(*p) };
+        }
+    }
+}
+
+impl Deref for SeContext {
+    type Target = CStr;
+
+    fn deref(&self) -> &Self::Target {
+        match self {
+            // SAFETY: the non-owned C string pointed by `p` is guaranteed to be valid (non-null
+            // and shorter than i32::MAX). It is freed when SeContext is dropped.
+            Self::Raw(p) => unsafe { CStr::from_ptr(*p) },
+            Self::CString(cstr) => cstr,
+        }
+    }
+}
+
+impl SeContext {
+    /// Initializes the `SeContext::CString` variant from a Rust string slice.
+    #[allow(dead_code)] // Used in tests
+    pub fn new(con: &str) -> Result<Self> {
+        Ok(Self::CString(
+            CString::new(con)
+                .with_context(|| format!("Failed to create SeContext with \"{}\"", con))?,
+        ))
+    }
+
+    pub fn selinux_type(&self) -> Result<&str> {
+        let context = self.deref().to_str().context("Label is not valid UTF8")?;
+
+        // The syntax is user:role:type:sensitivity[:category,...],
+        // ignoring security level ranges, which don't occur on Android. See
+        // https://github.com/SELinuxProject/selinux-notebook/blob/main/src/security_context.md
+        // We only want the type.
+        let fields: Vec<_> = context.split(':').collect();
+        if fields.len() < 4 || fields.len() > 5 {
+            bail!("Syntactically invalid label {}", self);
+        }
+        Ok(fields[2])
+    }
+}
+
+pub fn getfilecon(file: &File) -> Result<SeContext> {
+    let fd = file.as_raw_fd();
+    let mut con: *mut c_char = ptr::null_mut();
+    // SAFETY: the returned pointer `con` is wrapped in SeContext::Raw which is freed with
+    // `freecon` when it is dropped.
+    match unsafe { selinux_bindgen::fgetfilecon(fd, &mut con) } {
+        1.. => {
+            if !con.is_null() {
+                Ok(SeContext::Raw(con))
+            } else {
+                Err(anyhow!("fgetfilecon returned a NULL context"))
+            }
+        }
+        _ => Err(anyhow!(io::Error::last_os_error())).context("fgetfilecon failed"),
+    }
+}