Don't start authfs until fd_server is ready
We pass a pipe fd to fd_server which it closes when it is ready to
accept connections. pvm_exec waits for this before triggering
compilation, which will cause authfs in the VM to connect back to
fd_server.
Also improved a bunch of error handling/error logging code, since I
managed to hit a lot of failure cases.
Fix: 201764092
Bug: 186126194
Test: atest ComposTestCase
Change-Id: I99ea5ba4fb390d47485aef3cbaaccedd046ae9fe
diff --git a/authfs/fd_server/src/main.rs b/authfs/fd_server/src/main.rs
index 12f013c..32ffc0a 100644
--- a/authfs/fd_server/src/main.rs
+++ b/authfs/fd_server/src/main.rs
@@ -32,6 +32,7 @@
use std::convert::TryInto;
use std::fs::File;
use std::io;
+use std::os::raw;
use std::os::unix::fs::FileExt;
use std::os::unix::io::{AsRawFd, FromRawFd};
@@ -292,7 +293,12 @@
Ok((fd, FdConfig::ReadWrite(file)))
}
-fn parse_args() -> Result<BTreeMap<i32, FdConfig>> {
+struct Args {
+ fd_pool: BTreeMap<i32, FdConfig>,
+ ready_fd: Option<File>,
+}
+
+fn parse_args() -> Result<Args> {
#[rustfmt::skip]
let matches = clap::App::new("fd_server")
.arg(clap::Arg::with_name("ro-fds")
@@ -303,6 +309,9 @@
.long("rw-fds")
.multiple(true)
.number_of_values(1))
+ .arg(clap::Arg::with_name("ready-fd")
+ .long("ready-fd")
+ .takes_value(true))
.get_matches();
let mut fd_pool = BTreeMap::new();
@@ -318,8 +327,13 @@
fd_pool.insert(fd, config);
}
}
-
- Ok(fd_pool)
+ let ready_fd = if let Some(arg) = matches.value_of("ready-fd") {
+ let fd = arg.parse::<i32>()?;
+ Some(fd_to_file(fd)?)
+ } else {
+ None
+ };
+ Ok(Args { fd_pool, ready_fd })
}
fn main() -> Result<()> {
@@ -327,16 +341,22 @@
android_logger::Config::default().with_tag("fd_server").with_min_level(log::Level::Debug),
);
- let fd_pool = parse_args()?;
+ let args = parse_args()?;
- let mut service = FdService::new_binder(fd_pool).as_binder();
+ let mut service = FdService::new_binder(args.fd_pool).as_binder();
+ let mut ready_notifier = ReadyNotifier(args.ready_fd);
+
debug!("fd_server is starting as a rpc service.");
// SAFETY: Service ownership is transferring to the server and won't be valid afterward.
// Plus the binder objects are threadsafe.
+ // RunRpcServerCallback does not retain a reference to ready_callback, and only ever
+ // calls it with the param we provide during the lifetime of ready_notifier.
let retval = unsafe {
- binder_rpc_unstable_bindgen::RunRpcServer(
+ binder_rpc_unstable_bindgen::RunRpcServerCallback(
service.as_native_mut() as *mut binder_rpc_unstable_bindgen::AIBinder,
RPC_SERVICE_PORT,
+ Some(ReadyNotifier::ready_callback),
+ ready_notifier.as_void_ptr(),
)
};
if retval {
@@ -346,3 +366,25 @@
bail!("Premature termination of RPC server");
}
}
+
+struct ReadyNotifier(Option<File>);
+
+impl ReadyNotifier {
+ fn notify(&mut self) {
+ debug!("fd_server is ready");
+ // Close the ready-fd if we were given one to signal our readiness.
+ drop(self.0.take());
+ }
+
+ fn as_void_ptr(&mut self) -> *mut raw::c_void {
+ self as *mut _ as *mut raw::c_void
+ }
+
+ unsafe extern "C" fn ready_callback(param: *mut raw::c_void) {
+ // SAFETY: This is only ever called by RunRpcServerCallback, within the lifetime of the
+ // ReadyNotifier, with param taking the value returned by as_void_ptr (so a properly aligned
+ // non-null pointer to an initialized instance).
+ let ready_notifier = param as *mut Self;
+ ready_notifier.as_mut().unwrap().notify()
+ }
+}
diff --git a/authfs/service/src/authfs.rs b/authfs/service/src/authfs.rs
index 5601738..6d87243 100644
--- a/authfs/service/src/authfs.rs
+++ b/authfs/service/src/authfs.rs
@@ -82,8 +82,11 @@
&config.outputFdAnnotations,
debuggable,
)?;
- wait_until_authfs_ready(&mountpoint).map_err(|e| {
- debug!("Wait for authfs: {:?}", child.wait());
+ wait_until_authfs_ready(&child, &mountpoint).map_err(|e| {
+ match child.wait() {
+ Ok(status) => debug!("Wait for authfs: {}", status),
+ Err(e) => warn!("Failed to wait for child: {}", e),
+ }
e
})?;
@@ -144,13 +147,18 @@
SharedChild::spawn(&mut command).context("Spawn authfs")
}
-fn wait_until_authfs_ready(mountpoint: &OsStr) -> Result<()> {
+fn wait_until_authfs_ready(child: &SharedChild, mountpoint: &OsStr) -> Result<()> {
let start_time = Instant::now();
loop {
if is_fuse(mountpoint)? {
break;
}
+ if let Some(exit_status) = child.try_wait()? {
+ // If the child has exited, we will never become ready.
+ bail!("Child has exited: {}", exit_status);
+ }
if start_time.elapsed() > AUTHFS_SETUP_TIMEOUT_SEC {
+ let _ = child.kill();
bail!("Time out mounting authfs");
}
sleep(AUTHFS_SETUP_POLL_INTERVAL_MS);
diff --git a/authfs/service/src/main.rs b/authfs/service/src/main.rs
index af8c7f9..890e108 100644
--- a/authfs/service/src/main.rs
+++ b/authfs/service/src/main.rs
@@ -59,7 +59,7 @@
create_dir(&mountpoint).map_err(|e| {
new_binder_exception(
ExceptionCode::SERVICE_SPECIFIC,
- format!("Cannot create mount directory {:?}: {}", &mountpoint, e),
+ format!("Cannot create mount directory {:?}: {:?}", &mountpoint, e),
)
})?;
@@ -109,7 +109,7 @@
Ok(())
}
-fn main() -> Result<()> {
+fn try_main() -> Result<()> {
let debuggable = env!("TARGET_BUILD_VARIANT") != "user";
let log_level = if debuggable { log::Level::Trace } else { log::Level::Info };
android_logger::init_once(
@@ -128,3 +128,10 @@
ProcessState::join_thread_pool();
bail!("Unexpected exit after join_thread_pool")
}
+
+fn main() {
+ if let Err(e) = try_main() {
+ error!("failed with {:?}", e);
+ std::process::exit(1);
+ }
+}
diff --git a/authfs/src/main.rs b/authfs/src/main.rs
index c85d801..ecb0e68 100644
--- a/authfs/src/main.rs
+++ b/authfs/src/main.rs
@@ -28,6 +28,7 @@
//! e.g. /mountpoint/42.
use anyhow::{bail, Context, Result};
+use log::error;
use std::collections::BTreeMap;
use std::convert::TryInto;
use std::fs::File;
@@ -325,7 +326,7 @@
Ok(file_pool)
}
-fn main() -> Result<()> {
+fn try_main() -> Result<()> {
let args = Args::from_args();
let log_level = if args.debug { log::Level::Debug } else { log::Level::Info };
@@ -337,3 +338,10 @@
fusefs::loop_forever(file_pool, &args.mount_point, &args.extra_options)?;
bail!("Unexpected exit after the handler loop")
}
+
+fn main() {
+ if let Err(e) = try_main() {
+ error!("failed with {:?}", e);
+ std::process::exit(1);
+ }
+}
diff --git a/compos/src/compsvc_main.rs b/compos/src/compsvc_main.rs
index 388e79b..d0c920a 100644
--- a/compos/src/compsvc_main.rs
+++ b/compos/src/compsvc_main.rs
@@ -43,7 +43,14 @@
/// The CID representing the host VM
const VMADDR_CID_HOST: u32 = 2;
-fn main() -> Result<()> {
+fn main() {
+ if let Err(e) = try_main() {
+ error!("failed with {:?}", e);
+ std::process::exit(1);
+ }
+}
+
+fn try_main() -> Result<()> {
let args = clap::App::new("compsvc")
.arg(clap::Arg::with_name("log_to_stderr").long("log_to_stderr"))
.get_matches();
diff --git a/compos/src/pvm_exec.rs b/compos/src/pvm_exec.rs
index fdd9c57..8ce27db 100644
--- a/compos/src/pvm_exec.rs
+++ b/compos/src/pvm_exec.rs
@@ -32,8 +32,11 @@
use clap::{value_t, App, Arg};
use log::{debug, error, warn};
use minijail::Minijail;
-use nix::fcntl::{fcntl, FcntlArg::F_GETFD};
-use std::os::unix::io::RawFd;
+use nix::fcntl::{fcntl, FcntlArg::F_GETFD, OFlag};
+use nix::unistd::pipe2;
+use std::fs::File;
+use std::io::Read;
+use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
use std::path::Path;
use std::process::exit;
@@ -69,7 +72,11 @@
}
}
-fn spawn_fd_server(fd_annotation: &FdAnnotation, debuggable: bool) -> Result<Minijail> {
+fn spawn_fd_server(
+ fd_annotation: &FdAnnotation,
+ ready_file: File,
+ debuggable: bool,
+) -> Result<Minijail> {
let mut inheritable_fds = if debuggable {
vec![1, 2] // inherit/redirect stdout/stderr for debugging
} else {
@@ -87,6 +94,10 @@
args.push(fd.to_string());
inheritable_fds.push(*fd);
}
+ let ready_fd = ready_file.as_raw_fd();
+ args.push("--ready-fd".to_string());
+ args.push(ready_fd.to_string());
+ inheritable_fds.push(ready_fd);
let jail = Minijail::new()?;
let _pid = jail.run(Path::new(FD_SERVER_BIN), &inheritable_fds, &args)?;
@@ -153,12 +164,31 @@
Ok(Config { args, fd_annotation: FdAnnotation { input_fds, output_fds }, cid, debuggable })
}
+fn create_pipe() -> Result<(File, File)> {
+ let (raw_read, raw_write) = pipe2(OFlag::O_CLOEXEC)?;
+ // SAFETY: We are the sole owners of these fds as they were just created.
+ let read_fd = unsafe { File::from_raw_fd(raw_read) };
+ let write_fd = unsafe { File::from_raw_fd(raw_write) };
+ Ok((read_fd, write_fd))
+}
+
+fn wait_for_fd_server_ready(mut ready_fd: File) -> Result<()> {
+ let mut buffer = [0];
+ // When fd_server is ready it closes its end of the pipe. And if it exits, the pipe is also
+ // closed. Either way this read will return 0 bytes at that point, and there's no point waiting
+ // any longer.
+ let _ = ready_fd.read(&mut buffer).context("Waiting for fd_server to be ready")?;
+ debug!("fd_server is ready");
+ Ok(())
+}
+
fn try_main() -> Result<()> {
// 1. Parse the command line arguments for collect execution data.
let Config { args, fd_annotation, cid, debuggable } = parse_args()?;
// 2. Spawn and configure a fd_server to serve remote read/write requests.
- let fd_server_jail = spawn_fd_server(&fd_annotation, debuggable)?;
+ let (ready_read_fd, ready_write_fd) = create_pipe()?;
+ let fd_server_jail = spawn_fd_server(&fd_annotation, ready_write_fd, debuggable)?;
let fd_server_lifetime = scopeguard::guard(fd_server_jail, |fd_server_jail| {
if let Err(e) = fd_server_jail.kill() {
if !matches!(e, minijail::Error::Killed(_)) {
@@ -171,10 +201,12 @@
let result = if cid == VMADDR_CID_ANY {
// Sentinel value that indicates we should use composd
let composd = get_composd()?;
+ wait_for_fd_server_ready(ready_read_fd)?;
composd.compile(&args, &fd_annotation)
} else {
// Call directly into the VM
let compos_vm = get_rpc_binder(cid)?;
+ wait_for_fd_server_ready(ready_read_fd)?;
compos_vm.compile(&args, &fd_annotation)
};
let result = result.context("Binder call failed")?;
@@ -190,7 +222,7 @@
// Be explicit about the lifetime, which should last at least until the task is finished.
drop(fd_server_lifetime);
- if result.exitCode > 0 {
+ if result.exitCode != 0 {
error!("remote execution failed with exit code {}", result.exitCode);
exit(result.exitCode as i32);
}