init: non-crashing service can restart immediately am: 555d2393b6
Original change: https://googleplex-android-review.googlesource.com/c/platform/system/core/+/28218458
Change-Id: I7073b0c1ef253661c44ee89872539a71ac84a833
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
diff --git a/init/README.md b/init/README.md
index 13c6ebd..888c65b 100644
--- a/init/README.md
+++ b/init/README.md
@@ -316,11 +316,14 @@
intended to be used with the `exec_start` builtin for any must-have checks during boot.
`restart_period <seconds>`
-> If a non-oneshot service exits, it will be restarted at its start time plus
- this period. It defaults to 5s to rate limit crashing services.
- This can be increased for services that are meant to run periodically. For
- example, it may be set to 3600 to indicate that the service should run every hour
- or 86400 to indicate that the service should run every day.
+> If a non-oneshot service exits, it will be restarted at its previous start time plus this period.
+ The default value is 5s. This can be used to implement periodic services together with the
+ `timeout_period` command below. For example, it may be set to 3600 to indicate that the service
+ should run every hour or 86400 to indicate that the service should run every day. This can be set
+ to a value shorter than 5s for example 0, but the minimum 5s delay is enforced if the restart was
+ due to a crash. This is to rate limit persistentally crashing services. In other words,
+ `<seconds>` smaller than 5 is respected only when the service exits deliverately and successfully
+ (i.e. by calling exit(0)).
`rlimit <resource> <cur> <max>`
> This applies the given rlimit to the service. rlimits are inherited by child
diff --git a/init/service.cpp b/init/service.cpp
index bd704cf..7d83d60 100644
--- a/init/service.cpp
+++ b/init/service.cpp
@@ -298,6 +298,7 @@
pid_ = 0;
flags_ &= (~SVC_RUNNING);
start_order_ = 0;
+ was_last_exit_ok_ = siginfo.si_code == CLD_EXITED && siginfo.si_status == 0;
// Oneshot processes go into the disabled state on exit,
// except when manually restarted.
@@ -321,7 +322,8 @@
// If we crash > 4 times in 'fatal_crash_window_' minutes or before boot_completed,
// reboot into bootloader or set crashing property
boot_clock::time_point now = boot_clock::now();
- if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART)) {
+ if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART) &&
+ !was_last_exit_ok_) {
bool boot_completed = GetBoolProperty("sys.boot_completed", false);
if (now < time_crashed_ + fatal_crash_window_ || !boot_completed) {
if (++crash_count_ > 4) {
diff --git a/init/service.h b/init/service.h
index c314aa1..f5f3619 100644
--- a/init/service.h
+++ b/init/service.h
@@ -19,6 +19,7 @@
#include <signal.h>
#include <sys/types.h>
+#include <algorithm>
#include <chrono>
#include <memory>
#include <optional>
@@ -113,6 +114,7 @@
pid_t pid() const { return pid_; }
android::base::boot_clock::time_point time_started() const { return time_started_; }
int crash_count() const { return crash_count_; }
+ int was_last_exit_ok() const { return was_last_exit_ok_; }
uid_t uid() const { return proc_attr_.uid; }
gid_t gid() const { return proc_attr_.gid; }
int namespace_flags() const { return namespaces_.flags; }
@@ -128,7 +130,15 @@
bool process_cgroup_empty() const { return process_cgroup_empty_; }
unsigned long start_order() const { return start_order_; }
void set_sigstop(bool value) { sigstop_ = value; }
- std::chrono::seconds restart_period() const { return restart_period_; }
+ std::chrono::seconds restart_period() const {
+ // If the service exited abnormally or due to timeout, late limit the restart even if
+ // restart_period is set to a very short value.
+ // If not, i.e. restart after a deliberate and successful exit, respect the period.
+ if (!was_last_exit_ok_) {
+ return std::max(restart_period_, default_restart_period_);
+ }
+ return restart_period_;
+ }
std::optional<std::chrono::seconds> timeout_period() const { return timeout_period_; }
const std::vector<std::string>& args() const { return args_; }
bool is_updatable() const { return updatable_; }
@@ -171,6 +181,8 @@
int crash_count_; // number of times crashed within window
std::chrono::minutes fatal_crash_window_ = 4min; // fatal() when more than 4 crashes in it
std::optional<std::string> fatal_reboot_target_; // reboot target of fatal handler
+ bool was_last_exit_ok_ =
+ true; // true if the service never exited, or exited with status code 0
std::optional<CapSet> capabilities_;
ProcessAttributes proc_attr_;
@@ -211,7 +223,8 @@
bool sigstop_ = false;
- std::chrono::seconds restart_period_ = 5s;
+ const std::chrono::seconds default_restart_period_ = 5s;
+ std::chrono::seconds restart_period_ = default_restart_period_;
std::optional<std::chrono::seconds> timeout_period_;
bool updatable_ = false;
diff --git a/init/service_parser.cpp b/init/service_parser.cpp
index 9e914ee..08d82c1 100644
--- a/init/service_parser.cpp
+++ b/init/service_parser.cpp
@@ -364,8 +364,8 @@
Result<void> ServiceParser::ParseRestartPeriod(std::vector<std::string>&& args) {
int period;
- if (!ParseInt(args[1], &period, 5)) {
- return Error() << "restart_period value must be an integer >= 5";
+ if (!ParseInt(args[1], &period, 0)) {
+ return Error() << "restart_period value must be an integer >= 0";
}
service_->restart_period_ = std::chrono::seconds(period);
return {};