llkd: bootstat: propagate detailed livelock canonical boot reason

Report kernel_panic,sysrq,livelock,<state> reboot reason via last
dmesg (pstore console).  Add ro.llk.killtest property, which will
allow reliable ABA platforms to drop kill test and go directly
to kernel panic.  This should also allow some manual unit testing
of the canonical boot reason report.

New canonical boot reasons from llkd are:
- kernel_panic,sysrq,livelock,alarm llkd itself locked up (Hail Mary)
- kernel_panic,sysrq,livelock,driver uninterrruptible D state
- kernel_panic,sysrq,livelock,zombie uninterrruptible Z state

Manual test assumptions:
- llkd is built by the platform and landed on system partition
- unit test is built and landed in /data/nativetest (could
  land in /data/nativetest64, adjust test correspondingly)
- llkd not enabled, ro.llk.enable and ro.llk.killtest
  are not set by platform allowing test to adjust all the
  configuration properties and start llkd.
- or, llkd is enabled, ro.llk.enable is true, and killtest is
  disabled, ro.llk.killtest is false, setup by the platform.
  This breaks the go/apct generic operations of the unit test
  for llk.zombie and llk.driver as kernel panic results
  requiring manual intervention otherwise.  If test moves to
  go/apct, then we will be forced to bypass these tests under
  this condition (but allow them to run if ro.llk.killtest
  is "off" so specific testing above/below can be run).

for i in driver zombie; do
        adb shell su root setprop ro.llk.killtest off
        adb shell /data/nativetest/llkd_unit_test/llkd_unit_test --gtest_filter=llkd.${i}
        adb wait-for-device
        adb shell su root setprop ro.llk.killtest off
        sleep 60
        adb shell getprop sys.boot.reason
        adb shell /data/nativetest/llkd_unit_test/llkd_unit_test --gtest_filter=llkd.${i}
done

Test: llkd_unit_test (see test assumptions)
Bug: 33808187
Bug: 72838192
Change-Id: I2b24875376ddfdbc282ba3da5c5b3567de85dbc0
diff --git a/llkd/libllkd.cpp b/llkd/libllkd.cpp
index d828105..f357cc2 100644
--- a/llkd/libllkd.cpp
+++ b/llkd/libllkd.cpp
@@ -70,6 +70,7 @@
 bool llkEnable = LLK_ENABLE_DEFAULT;                 // llk daemon enabled
 bool llkRunning = false;                             // thread is running
 bool llkMlockall = LLK_MLOCKALL_DEFAULT;             // run mlocked
+bool llkTestWithKill = LLK_KILLTEST_DEFAULT;         // issue test kills
 milliseconds llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT;  // default timeout
 enum { llkStateD, llkStateZ, llkNumStates };         // state indexes
 milliseconds llkStateTimeoutMs[llkNumStates];        // timeout override for each detection state
@@ -292,7 +293,7 @@
           exeMissingValid(false),
           cmdlineValid(false),
           updated(true),
-          killed(false) {
+          killed(!llkTestWithKill) {
         memset(comm, '\0', sizeof(comm));
         setComm(_comm);
     }
@@ -475,8 +476,8 @@
     return android::base::Trim(content) == string;
 }
 
-void llkPanicKernel(bool dump, pid_t tid) __noreturn;
-void llkPanicKernel(bool dump, pid_t tid) {
+void llkPanicKernel(bool dump, pid_t tid, const char* state) __noreturn;
+void llkPanicKernel(bool dump, pid_t tid, const char* state) {
     auto sysrqTriggerFd = llkFileToWriteFd("/proc/sysrq-trigger");
     if (sysrqTriggerFd < 0) {
         // DYB
@@ -496,6 +497,8 @@
         }
         ::usleep(200000);  // let everything settle
     }
+    llkWriteStringToFile(std::string("SysRq : Trigger a crash : 'livelock,") + state + "'\n",
+                         "/dev/kmsg");
     android::base::WriteStringToFd("c", sysrqTriggerFd);
     // NOTREACHED
     // DYB
@@ -507,7 +510,7 @@
 }
 
 void llkAlarmHandler(int) {
-    llkPanicKernel(false, ::getpid());
+    llkPanicKernel(false, ::getpid(), "alarm");
 }
 
 milliseconds GetUintProperty(const std::string& key, milliseconds def) {
@@ -686,7 +689,7 @@
             (val != procp->nrSwitches)) {
             procp->nrSwitches = val;
             procp->count = 0ms;
-            procp->killed = false;
+            procp->killed = !llkTestWithKill;
         }
         return;
     }
@@ -700,7 +703,7 @@
         if (schedUpdate != procp->schedUpdate) {
             procp->schedUpdate = schedUpdate;
             procp->count = 0ms;
-            procp->killed = false;
+            procp->killed = !llkTestWithKill;
         }
     }
 
@@ -709,7 +712,7 @@
         if (static_cast<uint64_t>(val) != procp->nrSwitches) {
             procp->nrSwitches = val;
             procp->count = 0ms;
-            procp->killed = false;
+            procp->killed = !llkTestWithKill;
         }
     }
 }
@@ -719,6 +722,7 @@
               << LLK_ENABLE_PROPERTY "=" << llkFormat(llkEnable) << "\n"
               << KHT_ENABLE_PROPERTY "=" << llkFormat(khtEnable) << "\n"
               << LLK_MLOCKALL_PROPERTY "=" << llkFormat(llkMlockall) << "\n"
+              << LLK_KILLTEST_PROPERTY "=" << llkFormat(llkTestWithKill) << "\n"
               << KHT_TIMEOUT_PROPERTY "=" << llkFormat(khtTimeout) << "\n"
               << LLK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkTimeoutMs) << "\n"
               << LLK_D_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateD]) << "\n"
@@ -869,7 +873,7 @@
                 procp->time = utime + stime;
                 if (procp->state != state) {
                     procp->count = 0ms;
-                    procp->killed = false;
+                    procp->killed = !llkTestWithKill;
                     procp->state = state;
                 } else {
                     procp->count += llkCycle;
@@ -973,7 +977,7 @@
             // We are here because we have confirmed kernel live-lock
             LOG(ERROR) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->" << pid
                        << "->" << tid << ' ' << procp->getComm() << " [panic]";
-            llkPanicKernel(true, tid);
+            llkPanicKernel(true, tid, (state == 'Z') ? "zombie" : "driver");
         }
         LOG(VERBOSE) << "+closedir()";
     }
@@ -1045,6 +1049,7 @@
     }
     khtEnable = android::base::GetBoolProperty(KHT_ENABLE_PROPERTY, khtEnable);
     llkMlockall = android::base::GetBoolProperty(LLK_MLOCKALL_PROPERTY, llkMlockall);
+    llkTestWithKill = android::base::GetBoolProperty(LLK_KILLTEST_PROPERTY, llkTestWithKill);
     // if LLK_TIMOUT_MS_PROPERTY was not set, we will use a set
     // KHT_TIMEOUT_PROPERTY as co-operative guidance for the default value.
     khtTimeout = GetUintProperty(KHT_TIMEOUT_PROPERTY, khtTimeout);