lmkd: Isolate statslog related code from lmkd code
Move statsd related code out of lmkd.c to minimize ifdefs sprinkled around
the code and make it more maintainable.
Bug: 74119935
Test: lmkd_unit_test
Change-Id: Ib22f90fd380b9a31e09ab18ef16787bc07415ddf
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
diff --git a/lmkd/statslog.c b/lmkd/statslog.c
index f3a6e55..c0fd6df 100644
--- a/lmkd/statslog.c
+++ b/lmkd/statslog.c
@@ -18,11 +18,21 @@
#include <errno.h>
#include <log/log_id.h>
#include <stats_event_list.h>
+#include <statslog.h>
+#include <stdlib.h>
+#include <string.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
+#ifdef LMKD_LOG_STATS
+
#define LINE_MAX 128
+#define STRINGIFY(x) STRINGIFY_INTERNAL(x)
+#define STRINGIFY_INTERNAL(x) #x
+
+static bool enable_stats_log;
+static android_log_context log_ctx;
struct proc {
int pid;
@@ -41,34 +51,53 @@
return (int64_t)t.tv_sec * 1000000000LL + t.tv_nsec;
}
+void statslog_init() {
+ enable_stats_log = property_get_bool("ro.lmk.log_stats", false);
+
+ if (enable_stats_log) {
+ log_ctx = create_android_logger(kStatsEventTag);
+ }
+}
+
+void statslog_destroy() {
+ if (log_ctx) {
+ android_log_destroy(&log_ctx);
+ }
+}
+
/**
* Logs the change in LMKD state which is used as start/stop boundaries for logging
* LMK_KILL_OCCURRED event.
* Code: LMK_STATE_CHANGED = 54
*/
int
-stats_write_lmk_state_changed(android_log_context ctx, int32_t code, int32_t state) {
- assert(ctx != NULL);
+stats_write_lmk_state_changed(int32_t code, int32_t state) {
int ret = -EINVAL;
- if (!ctx) {
+
+ if (!enable_stats_log) {
return ret;
}
- reset_log_context(ctx);
-
- if ((ret = android_log_write_int64(ctx, getElapsedRealTimeNs())) < 0) {
+ assert(log_ctx != NULL);
+ if (!log_ctx) {
return ret;
}
- if ((ret = android_log_write_int32(ctx, code)) < 0) {
+ reset_log_context(log_ctx);
+
+ if ((ret = android_log_write_int64(log_ctx, getElapsedRealTimeNs())) < 0) {
return ret;
}
- if ((ret = android_log_write_int32(ctx, state)) < 0) {
+ if ((ret = android_log_write_int32(log_ctx, code)) < 0) {
return ret;
}
- return write_to_logger(ctx, LOG_ID_STATS);
+ if ((ret = android_log_write_int32(log_ctx, state)) < 0) {
+ return ret;
+ }
+
+ return write_to_logger(log_ctx, LOG_ID_STATS);
}
static struct proc* pid_lookup(int pid) {
@@ -87,92 +116,261 @@
* Code: LMK_KILL_OCCURRED = 51
*/
int
-stats_write_lmk_kill_occurred(android_log_context ctx, int32_t code, int32_t uid,
- char const* process_name, int32_t oom_score, int64_t pgfault,
- int64_t pgmajfault, int64_t rss_in_bytes, int64_t cache_in_bytes,
- int64_t swap_in_bytes, int64_t process_start_time_ns,
- int32_t min_oom_score) {
- assert(ctx != NULL);
+stats_write_lmk_kill_occurred(int32_t code, int32_t uid, char const* process_name,
+ int32_t oom_score, int32_t min_oom_score, int tasksize,
+ struct memory_stat *mem_st) {
int ret = -EINVAL;
- if (!ctx) {
+ if (!enable_stats_log) {
return ret;
}
- reset_log_context(ctx);
+ if (!log_ctx) {
+ return ret;
+ }
+ reset_log_context(log_ctx);
- if ((ret = android_log_write_int64(ctx, getElapsedRealTimeNs())) < 0) {
+ if ((ret = android_log_write_int64(log_ctx, getElapsedRealTimeNs())) < 0) {
return ret;
}
- if ((ret = android_log_write_int32(ctx, code)) < 0) {
+ if ((ret = android_log_write_int32(log_ctx, code)) < 0) {
return ret;
}
- if ((ret = android_log_write_int32(ctx, uid)) < 0) {
+ if ((ret = android_log_write_int32(log_ctx, uid)) < 0) {
return ret;
}
- if ((ret = android_log_write_string8(ctx, (process_name == NULL) ? "" : process_name)) < 0) {
+ if ((ret = android_log_write_string8(log_ctx, (process_name == NULL) ? "" : process_name)) < 0) {
return ret;
}
- if ((ret = android_log_write_int32(ctx, oom_score)) < 0) {
+ if ((ret = android_log_write_int32(log_ctx, oom_score)) < 0) {
return ret;
}
- if ((ret = android_log_write_int64(ctx, pgfault)) < 0) {
+ if ((ret = android_log_write_int64(log_ctx, mem_st ? mem_st->pgfault : -1)) < 0) {
return ret;
}
- if ((ret = android_log_write_int64(ctx, pgmajfault)) < 0) {
+ if ((ret = android_log_write_int64(log_ctx, mem_st ? mem_st->pgmajfault : -1)) < 0) {
return ret;
}
- if ((ret = android_log_write_int64(ctx, rss_in_bytes)) < 0) {
+ if ((ret = android_log_write_int64(log_ctx, mem_st ? mem_st->rss_in_bytes
+ : tasksize * BYTES_IN_KILOBYTE)) < 0) {
return ret;
}
- if ((ret = android_log_write_int64(ctx, cache_in_bytes)) < 0) {
+ if ((ret = android_log_write_int64(log_ctx, mem_st ? mem_st->cache_in_bytes : -1)) < 0) {
return ret;
}
- if ((ret = android_log_write_int64(ctx, swap_in_bytes)) < 0) {
+ if ((ret = android_log_write_int64(log_ctx, mem_st ? mem_st->swap_in_bytes : -1)) < 0) {
return ret;
}
- if ((ret = android_log_write_int64(ctx, process_start_time_ns)) < 0) {
+ if ((ret = android_log_write_int64(log_ctx, mem_st ? mem_st->process_start_time_ns
+ : -1)) < 0) {
return ret;
}
- if ((ret = android_log_write_int32(ctx, min_oom_score)) < 0) {
+ if ((ret = android_log_write_int32(log_ctx, min_oom_score)) < 0) {
return ret;
}
- return write_to_logger(ctx, LOG_ID_STATS);
+ return write_to_logger(log_ctx, LOG_ID_STATS);
}
-int stats_write_lmk_kill_occurred_pid(android_log_context ctx, int32_t code, int32_t uid, int pid,
- int32_t oom_score, int64_t pgfault, int64_t pgmajfault,
- int64_t rss_in_bytes, int64_t cache_in_bytes,
- int64_t swap_in_bytes, int64_t process_start_time_ns,
- int32_t min_oom_score) {
+static int stats_write_lmk_kill_occurred_pid(int32_t code, int32_t uid, int pid,
+ int32_t oom_score, int32_t min_oom_score, int tasksize,
+ struct memory_stat *mem_st) {
struct proc* proc = pid_lookup(pid);
if (!proc) return -EINVAL;
- return stats_write_lmk_kill_occurred(ctx, code, uid, proc->taskname, oom_score, pgfault,
- pgmajfault, rss_in_bytes, cache_in_bytes, swap_in_bytes,
- process_start_time_ns, min_oom_score);
+ return stats_write_lmk_kill_occurred(code, uid, proc->taskname, oom_score, min_oom_score,
+ tasksize, mem_st);
+}
+
+static void memory_stat_parse_line(char* line, struct memory_stat* mem_st) {
+ char key[LINE_MAX + 1];
+ int64_t value;
+
+ sscanf(line, "%" STRINGIFY(LINE_MAX) "s %" SCNd64 "", key, &value);
+
+ if (strcmp(key, "total_") < 0) {
+ return;
+ }
+
+ if (!strcmp(key, "total_pgfault"))
+ mem_st->pgfault = value;
+ else if (!strcmp(key, "total_pgmajfault"))
+ mem_st->pgmajfault = value;
+ else if (!strcmp(key, "total_rss"))
+ mem_st->rss_in_bytes = value;
+ else if (!strcmp(key, "total_cache"))
+ mem_st->cache_in_bytes = value;
+ else if (!strcmp(key, "total_swap"))
+ mem_st->swap_in_bytes = value;
+}
+
+static int memory_stat_from_cgroup(struct memory_stat* mem_st, int pid, uid_t uid) {
+ FILE *fp;
+ char buf[PATH_MAX];
+
+ snprintf(buf, sizeof(buf), MEMCG_PROCESS_MEMORY_STAT_PATH, uid, pid);
+
+ fp = fopen(buf, "r");
+
+ if (fp == NULL) {
+ return -1;
+ }
+
+ while (fgets(buf, PAGE_SIZE, fp) != NULL) {
+ memory_stat_parse_line(buf, mem_st);
+ }
+ fclose(fp);
+
+ return 0;
+}
+
+static int memory_stat_from_procfs(struct memory_stat* mem_st, int pid) {
+ char path[PATH_MAX];
+ char buffer[PROC_STAT_BUFFER_SIZE];
+ int fd, ret;
+
+ snprintf(path, sizeof(path), PROC_STAT_FILE_PATH, pid);
+ if ((fd = open(path, O_RDONLY | O_CLOEXEC)) < 0) {
+ return -1;
+ }
+
+ ret = read(fd, buffer, sizeof(buffer));
+ if (ret < 0) {
+ close(fd);
+ return -1;
+ }
+ close(fd);
+
+ // field 10 is pgfault
+ // field 12 is pgmajfault
+ // field 22 is starttime
+ // field 24 is rss_in_pages
+ int64_t pgfault = 0, pgmajfault = 0, starttime = 0, rss_in_pages = 0;
+ if (sscanf(buffer,
+ "%*u %*s %*s %*d %*d %*d %*d %*d %*d %" SCNd64 " %*d "
+ "%" SCNd64 " %*d %*u %*u %*d %*d %*d %*d %*d %*d "
+ "%" SCNd64 " %*d %" SCNd64 "",
+ &pgfault, &pgmajfault, &starttime, &rss_in_pages) != 4) {
+ return -1;
+ }
+ mem_st->pgfault = pgfault;
+ mem_st->pgmajfault = pgmajfault;
+ mem_st->rss_in_bytes = (rss_in_pages * PAGE_SIZE);
+ mem_st->process_start_time_ns = starttime * (NS_PER_SEC / sysconf(_SC_CLK_TCK));
+
+ return 0;
+}
+
+struct memory_stat *stats_read_memory_stat(bool per_app_memcg, int pid, uid_t uid) {
+ static struct memory_stat mem_st = {};
+
+ if (!enable_stats_log) {
+ return NULL;
+ }
+
+ if (per_app_memcg) {
+ if (memory_stat_from_cgroup(&mem_st, pid, uid) == 0) {
+ return &mem_st;
+ }
+ } else {
+ if (memory_stat_from_procfs(&mem_st, pid) == 0) {
+ return &mem_st;
+ }
+ }
+
+ return NULL;
+}
+
+static void poll_kernel(int poll_fd) {
+ if (poll_fd == -1) {
+ // not waiting
+ return;
+ }
+
+ while (1) {
+ char rd_buf[256];
+ int bytes_read =
+ TEMP_FAILURE_RETRY(pread(poll_fd, (void*)rd_buf, sizeof(rd_buf), 0));
+ if (bytes_read <= 0) break;
+ rd_buf[bytes_read] = '\0';
+
+ int64_t pid;
+ int64_t uid;
+ int64_t group_leader_pid;
+ int64_t rss_in_pages;
+ struct memory_stat mem_st = {};
+ int16_t oom_score_adj;
+ int16_t min_score_adj;
+ int64_t starttime;
+ char* taskname = 0;
+
+ int fields_read = sscanf(rd_buf,
+ "%" SCNd64 " %" SCNd64 " %" SCNd64 " %" SCNd64 " %" SCNd64
+ " %" SCNd64 " %" SCNd16 " %" SCNd16 " %" SCNd64 "\n%m[^\n]",
+ &pid, &uid, &group_leader_pid, &mem_st.pgfault,
+ &mem_st.pgmajfault, &rss_in_pages, &oom_score_adj,
+ &min_score_adj, &starttime, &taskname);
+
+ /* only the death of the group leader process is logged */
+ if (fields_read == 10 && group_leader_pid == pid) {
+ mem_st.process_start_time_ns = starttime * (NS_PER_SEC / sysconf(_SC_CLK_TCK));
+ mem_st.rss_in_bytes = rss_in_pages * PAGE_SIZE;
+ stats_write_lmk_kill_occurred_pid(LMK_KILL_OCCURRED, uid, pid, oom_score_adj,
+ min_score_adj, 0, &mem_st);
+ }
+
+ free(taskname);
+ }
+}
+
+bool init_poll_kernel(struct kernel_poll_info *poll_info) {
+ if (!enable_stats_log) {
+ return false;
+ }
+
+ poll_info->poll_fd =
+ TEMP_FAILURE_RETRY(open("/proc/lowmemorykiller", O_RDONLY | O_NONBLOCK | O_CLOEXEC));
+
+ if (poll_info->poll_fd < 0) {
+ ALOGE("kernel lmk event file could not be opened; errno=%d", errno);
+ return false;
+ }
+ poll_info->handler = poll_kernel;
+
+ return true;
}
static void proc_insert(struct proc* procp) {
- if (!pidhash)
+ if (!pidhash) {
pidhash = calloc(PIDHASH_SZ, sizeof(struct proc));
+ }
+
int hval = pid_hashfn(procp->pid);
procp->pidhash_next = pidhash[hval];
pidhash[hval] = procp;
}
-void stats_remove_taskname(int pid) {
- if (!pidhash) return;
+void stats_remove_taskname(int pid, int poll_fd) {
+ if (!enable_stats_log || !pidhash) {
+ return;
+ }
+
+ /*
+ * Perform an extra check before the pid is removed, after which it
+ * will be impossible for poll_kernel to get the taskname. poll_kernel()
+ * is potentially a long-running blocking function; however this method
+ * handles AMS requests but does not block AMS.
+ */
+ poll_kernel(poll_fd);
int hval = pid_hashfn(pid);
struct proc* procp;
@@ -193,12 +391,19 @@
free(procp);
}
-void stats_store_taskname(int pid, const char* taskname) {
- struct proc* procp = pid_lookup(pid);
- if (procp != NULL && strcmp(procp->taskname, taskname) == 0)
+void stats_store_taskname(int pid, const char* taskname, int poll_fd) {
+ if (!enable_stats_log) {
return;
+ }
+
+ struct proc* procp = pid_lookup(pid);
+ if (procp != NULL) {
+ if (strcmp(procp->taskname, taskname) == 0) {
+ return;
+ }
+ stats_remove_taskname(pid, poll_fd);
+ }
procp = malloc(sizeof(struct proc));
- stats_remove_taskname(pid);
procp->pid = pid;
strncpy(procp->taskname, taskname, LINE_MAX - 1);
procp->taskname[LINE_MAX - 1] = '\0';
@@ -206,7 +411,10 @@
}
void stats_purge_tasknames() {
- if (!pidhash) return;
+ if (!enable_stats_log || !pidhash) {
+ return;
+ }
+
struct proc* procp;
struct proc* next;
int i;
@@ -220,3 +428,5 @@
}
memset(pidhash, 0, PIDHASH_SZ * sizeof(struct proc));
}
+
+#endif /* LMKD_LOG_STATS */