virtmgr: Pass hugepages to crosvm

Now crosvm applies block alignment ("arm64: Align RAM region with the
block size") when mmaping the RAM region for a VM, it is possible to
back this memory with THP (transparent-hugepages). This is controlled by
either the VM config option "hugepages" or the "bin/vm" option
"--hugepages"

Enabling --hugepages makes crosvm "madvise(MADV_HUGEPAGE)" that region.
This will have at the moment no effect on Android as the default value
for /sys/kernel/mm/transparent_hugepages/shmem_enabled is [never].
However, it'll enable OEMs to turn on the feature by tweaking the latter
knob.

Using THP brings a significant performance improvement by reducing the
number of fault necessary when using a VM (/512 for a 4K pages Arm
machine) and reducing the pressure on the TLB (for both stage-1 and
stage-2). However, finding huge pages might be a difficult task when the
system has been running for a long time and the memory is quite
fragmented. khugepaged helps promoting pages to huge-pages but running
it has a cost and might delay the memory allocation depending on the
chosen defrag policy.

Bug: 278011447
Change-Id: I954f93df4f08ad015958d36d115d9f9e0c3547b5
diff --git a/README.md b/README.md
index 4905b56..7560a45 100644
--- a/README.md
+++ b/README.md
@@ -32,3 +32,4 @@
 * [Debugging](docs/debug)
 * [Using custom VM](docs/custom_vm.md)
 * [Device assignment](docs/device_assignment.md)
+* [Huge Pages](docs/hugepages.md)
diff --git a/docs/hugepages.md b/docs/hugepages.md
new file mode 100644
index 0000000..b379e9b
--- /dev/null
+++ b/docs/hugepages.md
@@ -0,0 +1,47 @@
+# Huge Pages
+
+From Android 15, the pKVM hypervisor supports Transparent Hugepages. This is a
+Linux feature which allows the kernel to allocate, when possible, a huge-page
+(typically, 2MiB on a 4K system). This huge-page being the size of a block,
+the hypervisor can leverage this allocation to also use a block mapping
+in the stage-2 page tables, instead of 512 individual contiguous single page
+mappings.
+
+Using block mappings brings a significant performance improvement by reducing
+the number of stage-2 page faults as well as the TLB pressure. However, finding
+a huge-page can be difficult on a system where the memory is fragmented.
+
+By default, huge-pages are disabled.
+
+## Enabling THP
+
+### 1. Sysfs configuration
+
+The sysfs configuration file that will enable THP for AVF is
+
+```
+/sys/kernel/mm/transparent_hugepages/shmem_enabled
+```
+
+This always defaults to `never`. It is recommended to set it to `advise` to
+benefit from the THP performance improvement.
+
+THPs can have an impact on the system depending on the chosen policy. The
+policy is configured with the following sysfs file:
+
+```
+/sys/kernel/mm/transparent_hugepages/defrag
+```
+
+The recommended policy is `never` as this has zero impact on the system. THPs
+would be used only if some are available.
+
+More information can be found in the Linux
+[admin guide](https://docs.kernel.org/admin-guide/mm/transhuge.html).
+
+### 2. AVF configuration
+
+The guest VM configuration can select huge-pages with the `vm_config.json`
+option `"hugepages": true`.
+
+Alternatively, the `vm` command can also pass `--hugepages`.
diff --git a/microdroid/payload/config/src/lib.rs b/microdroid/payload/config/src/lib.rs
index d6f65bd..28c3c70 100644
--- a/microdroid/payload/config/src/lib.rs
+++ b/microdroid/payload/config/src/lib.rs
@@ -49,6 +49,13 @@
     /// files with integrity checking, but not confidentiality.
     #[serde(default)]
     pub enable_authfs: bool,
+
+    /// Ask the kernel for transparent huge-pages (THP). This is only a hint and
+    /// the kernel will allocate THP-backed memory only if globally enabled by
+    /// the system and if any can be found. See
+    /// https://docs.kernel.org/admin-guide/mm/transhuge.html
+    #[serde(default)]
+    pub hugepages: bool,
 }
 
 /// OS config
diff --git a/microdroid_manager/src/main.rs b/microdroid_manager/src/main.rs
index 7da9ea4..990d27a 100644
--- a/microdroid_manager/src/main.rs
+++ b/microdroid_manager/src/main.rs
@@ -629,6 +629,7 @@
                 prefer_staged: false,
                 export_tombstones: None,
                 enable_authfs: false,
+                hugepages: false,
             })
         }
         _ => bail!("Failed to match config against a config type."),
diff --git a/virtualizationmanager/src/aidl.rs b/virtualizationmanager/src/aidl.rs
index f1509e2..a245e11 100644
--- a/virtualizationmanager/src/aidl.rs
+++ b/virtualizationmanager/src/aidl.rs
@@ -633,6 +633,7 @@
             device_tree_overlay,
             display_config,
             input_device_options,
+            hugepages: config.hugePages,
         };
         let instance = Arc::new(
             VmInstance::new(
@@ -918,6 +919,7 @@
     vm_config.name.clone_from(&config.name);
     vm_config.protectedVm = config.protectedVm;
     vm_config.cpuTopology = config.cpuTopology;
+    vm_config.hugePages = config.hugePages || vm_payload_config.hugepages;
 
     // Microdroid takes additional init ramdisk & (optionally) storage image
     add_microdroid_system_images(config, instance_file, storage_image, os_name, &mut vm_config)?;
diff --git a/virtualizationmanager/src/crosvm.rs b/virtualizationmanager/src/crosvm.rs
index 040e552..b426051 100644
--- a/virtualizationmanager/src/crosvm.rs
+++ b/virtualizationmanager/src/crosvm.rs
@@ -121,6 +121,7 @@
     pub device_tree_overlay: Option<File>,
     pub display_config: Option<DisplayConfig>,
     pub input_device_options: Vec<InputDeviceOption>,
+    pub hugepages: bool,
 }
 
 #[derive(Debug)]
@@ -1005,6 +1006,11 @@
             });
         }
     }
+
+    if config.hugepages {
+        command.arg("--hugepages");
+    }
+
     append_platform_devices(&mut command, &mut preserved_fds, &config)?;
 
     debug!("Preserving FDs {:?}", preserved_fds);
diff --git a/virtualizationservice/aidl/android/system/virtualizationservice/VirtualMachineAppConfig.aidl b/virtualizationservice/aidl/android/system/virtualizationservice/VirtualMachineAppConfig.aidl
index 417d5d3..9951bfd 100644
--- a/virtualizationservice/aidl/android/system/virtualizationservice/VirtualMachineAppConfig.aidl
+++ b/virtualizationservice/aidl/android/system/virtualizationservice/VirtualMachineAppConfig.aidl
@@ -128,4 +128,12 @@
 
     /** Configuration parameters guarded by android.permission.USE_CUSTOM_VIRTUAL_MACHINE */
     @nullable CustomConfig customConfig;
+
+    /**
+     *  Ask the kernel for transparent huge-pages (THP). This is only a hint and
+     *  the kernel will allocate THP-backed memory only if globally enabled by
+     *  the system and if any can be found. See
+     *  https://docs.kernel.org/admin-guide/mm/transhuge.html
+     */
+    boolean hugePages;
 }
diff --git a/virtualizationservice/aidl/android/system/virtualizationservice/VirtualMachineRawConfig.aidl b/virtualizationservice/aidl/android/system/virtualizationservice/VirtualMachineRawConfig.aidl
index 86e26da..cf9d25a 100644
--- a/virtualizationservice/aidl/android/system/virtualizationservice/VirtualMachineRawConfig.aidl
+++ b/virtualizationservice/aidl/android/system/virtualizationservice/VirtualMachineRawConfig.aidl
@@ -70,6 +70,14 @@
      */
     int gdbPort = 0;
 
+    /**
+     *  Ask the kernel for transparent huge-pages (THP). This is only a hint and
+     *  the kernel will allocate THP-backed memory only if globally enabled by
+     *  the system and if any can be found. See
+     *  https://docs.kernel.org/admin-guide/mm/transhuge.html
+     */
+    boolean hugePages;
+
     /** List of SysFS nodes of devices to be assigned */
     String[] devices;
 
diff --git a/vm/src/main.rs b/vm/src/main.rs
index b60f2db..d6ee3a5 100644
--- a/vm/src/main.rs
+++ b/vm/src/main.rs
@@ -53,6 +53,13 @@
     /// Run VM in protected mode.
     #[arg(short, long)]
     protected: bool,
+
+    /// Ask the kernel for transparent huge-pages (THP). This is only a hint and
+    /// the kernel will allocate THP-backed memory only if globally enabled by
+    /// the system and if any can be found. See
+    /// https://docs.kernel.org/admin-guide/mm/transhuge.html
+    #[arg(short, long)]
+    hugepages: bool,
 }
 
 #[derive(Args, Default)]
diff --git a/vm/src/run.rs b/vm/src/run.rs
index f3a5987..5e797f8 100644
--- a/vm/src/run.rs
+++ b/vm/src/run.rs
@@ -177,6 +177,7 @@
         cpuTopology: config.common.cpu_topology,
         customConfig: Some(custom_config),
         osName: os_name,
+        hugePages: config.common.hugepages,
     });
     run(
         service.as_ref(),
@@ -257,6 +258,7 @@
         vm_config.gdbPort = gdb.get() as i32;
     }
     vm_config.cpuTopology = config.common.cpu_topology;
+    vm_config.hugePages = config.common.hugepages;
     run(
         get_service()?.as_ref(),
         &VirtualMachineConfig::RawConfig(vm_config),