blob: 28381199d67a0ac3cf46a502d1848bf53bf1b3fb [file] [log] [blame]
Bertrand SIMONNET52e5b992015-08-10 15:18:00 -07001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Darin Petkov65b01462010-04-14 13:32:20 -070016
Bertrand SIMONNET4b915ae2015-07-28 15:38:14 -070017#include "metrics_daemon.h"
Darin Petkov65b01462010-04-14 13:32:20 -070018
Luigi Semenzatoc88e42d2011-02-17 10:21:16 -080019#include <fcntl.h>
Luigi Semenzato4a6c9422014-06-30 18:12:28 -070020#include <inttypes.h>
Luigi Semenzato8accd332011-05-17 16:37:18 -070021#include <math.h>
Ken Mixter4c5daa42010-08-26 18:35:06 -070022#include <string.h>
Steve Funge86591e2014-12-01 13:38:21 -080023#include <sysexits.h>
Luigi Semenzato8accd332011-05-17 16:37:18 -070024#include <time.h>
Darin Petkov65b01462010-04-14 13:32:20 -070025
Bertrand SIMONNET4b915ae2015-07-28 15:38:14 -070026#include <base/bind.h>
Luigi Semenzato859b3f02014-02-05 15:33:19 -080027#include <base/files/file_path.h>
Ben Chan51bf92a2014-09-05 08:21:06 -070028#include <base/files/file_util.h>
Luigi Semenzato859b3f02014-02-05 15:33:19 -080029#include <base/hash.h>
Darin Petkov65b01462010-04-14 13:32:20 -070030#include <base/logging.h>
Ben Chan2e6543d2014-02-05 23:26:25 -080031#include <base/strings/string_number_conversions.h>
32#include <base/strings/string_split.h>
33#include <base/strings/string_util.h>
34#include <base/strings/stringprintf.h>
Bertrand SIMONNET26993622015-08-20 14:08:41 -070035#include <cutils/properties.h>
Steve Funge86591e2014-12-01 13:38:21 -080036#include <dbus/dbus.h>
37#include <dbus/message.h>
Bertrand SIMONNETbae5dcc2015-08-04 14:12:10 -070038
39#include "constants.h"
Bertrand SIMONNET46b49da2014-06-25 14:38:07 -070040#include "uploader/upload_service.h"
Darin Petkov65b01462010-04-14 13:32:20 -070041
Ben Chan2e6543d2014-02-05 23:26:25 -080042using base::FilePath;
43using base::StringPrintf;
Darin Petkovf27f0362010-06-04 13:14:19 -070044using base::Time;
45using base::TimeDelta;
46using base::TimeTicks;
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -080047using chromeos_metrics::PersistentInteger;
Luigi Semenzato8accd332011-05-17 16:37:18 -070048using std::map;
Darin Petkov38d5cb02010-06-24 12:10:26 -070049using std::string;
Luigi Semenzato8accd332011-05-17 16:37:18 -070050using std::vector;
51
Daniel Eratc83975a2014-04-04 08:53:44 -070052namespace {
Darin Petkovf27f0362010-06-04 13:14:19 -070053
Daniel Eratc83975a2014-04-04 08:53:44 -070054const char kCrashReporterInterface[] = "org.chromium.CrashReporter";
55const char kCrashReporterUserCrashSignal[] = "UserCrash";
Steve Funge86591e2014-12-01 13:38:21 -080056const char kCrashReporterMatchRule[] =
57 "type='signal',interface='%s',path='/',member='%s'";
Darin Petkov41e06232010-05-03 16:45:37 -070058
Daniel Eratc83975a2014-04-04 08:53:44 -070059const int kSecondsPerMinute = 60;
60const int kMinutesPerHour = 60;
61const int kHoursPerDay = 24;
62const int kMinutesPerDay = kHoursPerDay * kMinutesPerHour;
63const int kSecondsPerDay = kSecondsPerMinute * kMinutesPerDay;
64const int kDaysPerWeek = 7;
65const int kSecondsPerWeek = kSecondsPerDay * kDaysPerWeek;
Darin Petkov41e06232010-05-03 16:45:37 -070066
Daniel Eratc83975a2014-04-04 08:53:44 -070067// Interval between calls to UpdateStats().
Steve Funge86591e2014-12-01 13:38:21 -080068const uint32_t kUpdateStatsIntervalMs = 300000;
Darin Petkov65b01462010-04-14 13:32:20 -070069
Luigi Semenzatoc5a92342014-02-14 15:05:51 -080070const char kKernelCrashDetectedFile[] = "/var/run/kernel-crash-detected";
Daniel Eratc83975a2014-04-04 08:53:44 -070071const char kUncleanShutdownDetectedFile[] =
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -080072 "/var/run/unclean-shutdown-detected";
Ken Mixterccd84c02010-08-16 19:57:13 -070073
Luigi Semenzatoc88e42d2011-02-17 10:21:16 -080074// disk stats metrics
75
76// The {Read,Write}Sectors numbers are in sectors/second.
77// A sector is usually 512 bytes.
78
Bertrand SIMONNET675a10c2015-08-25 14:11:43 -070079const char kMetricReadSectorsLongName[] = "Platform.ReadSectorsLong";
80const char kMetricWriteSectorsLongName[] = "Platform.WriteSectorsLong";
81const char kMetricReadSectorsShortName[] = "Platform.ReadSectorsShort";
82const char kMetricWriteSectorsShortName[] = "Platform.WriteSectorsShort";
Luigi Semenzatoc88e42d2011-02-17 10:21:16 -080083
Bertrand SIMONNET675a10c2015-08-25 14:11:43 -070084const int kMetricStatsShortInterval = 1; // seconds
85const int kMetricStatsLongInterval = 30; // seconds
Luigi Semenzatoc88e42d2011-02-17 10:21:16 -080086
87// Assume a max rate of 250Mb/s for reads (worse for writes) and 512 byte
88// sectors.
Bertrand SIMONNET675a10c2015-08-25 14:11:43 -070089const int kMetricSectorsIOMax = 500000; // sectors/second
90const int kMetricSectorsBuckets = 50; // buckets
Luigi Semenzato5bd764f2011-10-14 12:03:35 -070091// Page size is 4k, sector size is 0.5k. We're not interested in page fault
92// rates that the disk cannot sustain.
Bertrand SIMONNET675a10c2015-08-25 14:11:43 -070093const int kMetricPageFaultsMax = kMetricSectorsIOMax / 8;
94const int kMetricPageFaultsBuckets = 50;
Luigi Semenzato5bd764f2011-10-14 12:03:35 -070095
96// Major page faults, i.e. the ones that require data to be read from disk.
97
Bertrand SIMONNET675a10c2015-08-25 14:11:43 -070098const char kMetricPageFaultsLongName[] = "Platform.PageFaultsLong";
99const char kMetricPageFaultsShortName[] = "Platform.PageFaultsShort";
Luigi Semenzatoc88e42d2011-02-17 10:21:16 -0800100
Sonny Rao4b8aebb2013-07-31 23:18:31 -0700101// Swap in and Swap out
102
Bertrand SIMONNET675a10c2015-08-25 14:11:43 -0700103const char kMetricSwapInLongName[] = "Platform.SwapInLong";
104const char kMetricSwapInShortName[] = "Platform.SwapInShort";
Sonny Rao4b8aebb2013-07-31 23:18:31 -0700105
Bertrand SIMONNET675a10c2015-08-25 14:11:43 -0700106const char kMetricSwapOutLongName[] = "Platform.SwapOutLong";
107const char kMetricSwapOutShortName[] = "Platform.SwapOutShort";
Sonny Rao4b8aebb2013-07-31 23:18:31 -0700108
Bertrand SIMONNET675a10c2015-08-25 14:11:43 -0700109const char kMetricsProcStatFileName[] = "/proc/stat";
110const char kVmStatFileName[] = "/proc/vmstat";
111const char kMeminfoFileName[] = "/proc/meminfo";
112const int kMetricsProcStatFirstLineItemsCount = 11;
Luigi Semenzatoba0c65d2014-03-17 12:28:38 -0700113
Luigi Semenzatofb3a8212013-05-07 16:55:00 -0700114// Thermal CPU throttling.
115
Bertrand SIMONNET675a10c2015-08-25 14:11:43 -0700116const char kMetricScaledCpuFrequencyName[] =
Luigi Semenzatofb3a8212013-05-07 16:55:00 -0700117 "Platform.CpuFrequencyThermalScaling";
118
Bertrand SIMONNET675a10c2015-08-25 14:11:43 -0700119} // namespace
120
Luigi Semenzato96360192014-06-04 10:53:35 -0700121// Zram sysfs entries.
122
123const char MetricsDaemon::kComprDataSizeName[] = "compr_data_size";
124const char MetricsDaemon::kOrigDataSizeName[] = "orig_data_size";
125const char MetricsDaemon::kZeroPagesName[] = "zero_pages";
126
Luigi Semenzato8accd332011-05-17 16:37:18 -0700127// Memory use stats collection intervals. We collect some memory use interval
128// at these intervals after boot, and we stop collecting after the last one,
129// with the assumption that in most cases the memory use won't change much
130// after that.
131static const int kMemuseIntervals[] = {
132 1 * kSecondsPerMinute, // 1 minute mark
133 4 * kSecondsPerMinute, // 5 minute mark
134 25 * kSecondsPerMinute, // 0.5 hour mark
135 120 * kSecondsPerMinute, // 2.5 hour mark
136 600 * kSecondsPerMinute, // 12.5 hour mark
137};
138
Darin Petkovf1e85e42010-06-10 15:59:53 -0700139MetricsDaemon::MetricsDaemon()
Steve Funge86591e2014-12-01 13:38:21 -0800140 : memuse_final_time_(0),
Luigi Semenzato8accd332011-05-17 16:37:18 -0700141 memuse_interval_index_(0),
142 read_sectors_(0),
143 write_sectors_(0),
Sonny Rao4b8aebb2013-07-31 23:18:31 -0700144 vmstats_(),
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700145 stats_state_(kStatsShort),
Luigi Semenzatoba0c65d2014-03-17 12:28:38 -0700146 stats_initial_time_(0),
147 ticks_per_second_(0),
148 latest_cpu_use_ticks_(0) {}
Darin Petkovf1e85e42010-06-10 15:59:53 -0700149
Ken Mixter4c5daa42010-08-26 18:35:06 -0700150MetricsDaemon::~MetricsDaemon() {
Ken Mixter4c5daa42010-08-26 18:35:06 -0700151}
152
Luigi Semenzato8accd332011-05-17 16:37:18 -0700153double MetricsDaemon::GetActiveTime() {
154 struct timespec ts;
155 int r = clock_gettime(CLOCK_MONOTONIC, &ts);
156 if (r < 0) {
157 PLOG(WARNING) << "clock_gettime(CLOCK_MONOTONIC) failed";
158 return 0;
159 } else {
Luigi Semenzato4a6c9422014-06-30 18:12:28 -0700160 return ts.tv_sec + static_cast<double>(ts.tv_nsec) / (1000 * 1000 * 1000);
Luigi Semenzato8accd332011-05-17 16:37:18 -0700161 }
162}
163
Steve Funge86591e2014-12-01 13:38:21 -0800164int MetricsDaemon::Run() {
Ken Mixterccd84c02010-08-16 19:57:13 -0700165 if (CheckSystemCrash(kKernelCrashDetectedFile)) {
166 ProcessKernelCrash();
167 }
168
169 if (CheckSystemCrash(kUncleanShutdownDetectedFile)) {
170 ProcessUncleanShutdown();
171 }
172
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800173 // On OS version change, clear version stats (which are reported daily).
Ben Chanf05ab402014-08-07 00:54:59 -0700174 int32_t version = GetOsVersionHash();
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800175 if (version_cycle_->Get() != version) {
176 version_cycle_->Set(version);
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800177 kernel_crashes_version_count_->Set(0);
Luigi Semenzatoe5883fa2014-04-18 17:00:35 -0700178 version_cumulative_active_use_->Set(0);
Luigi Semenzatoba0c65d2014-03-17 12:28:38 -0700179 version_cumulative_cpu_use_->Set(0);
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800180 }
181
Steve Funge86591e2014-12-01 13:38:21 -0800182 return chromeos::DBusDaemon::Run();
Darin Petkov65b01462010-04-14 13:32:20 -0700183}
184
Bertrand SIMONNET46b49da2014-06-25 14:38:07 -0700185void MetricsDaemon::RunUploaderTest() {
Bertrand SIMONNET12531862015-08-31 11:11:57 -0700186 upload_service_.reset(new UploadService(
187 new SystemProfileCache(true, base::FilePath(config_root_)),
188 metrics_lib_,
189 server_));
Bertrand SIMONNETcac74e12014-10-09 10:14:13 -0700190 upload_service_->Init(upload_interval_, metrics_file_);
Bertrand SIMONNET46b49da2014-06-25 14:38:07 -0700191 upload_service_->UploadEvent();
192}
193
Ben Chanf05ab402014-08-07 00:54:59 -0700194uint32_t MetricsDaemon::GetOsVersionHash() {
195 static uint32_t cached_version_hash = 0;
Luigi Semenzato859b3f02014-02-05 15:33:19 -0800196 static bool version_hash_is_cached = false;
197 if (version_hash_is_cached)
198 return cached_version_hash;
199 version_hash_is_cached = true;
Bertrand SIMONNET26993622015-08-20 14:08:41 -0700200
201 char version[PROPERTY_VALUE_MAX];
Bertrand SIMONNETbae5dcc2015-08-04 14:12:10 -0700202 // The version might not be set for development devices. In this case, use the
203 // zero version.
Bertrand SIMONNET26993622015-08-20 14:08:41 -0700204 property_get(metrics::kProductVersionProperty, version,
205 metrics::kDefaultVersion);
206
Bertrand SIMONNETbae5dcc2015-08-04 14:12:10 -0700207 cached_version_hash = base::Hash(version);
208 if (testing_) {
Luigi Semenzato859b3f02014-02-05 15:33:19 -0800209 cached_version_hash = 42; // return any plausible value for the hash
Luigi Semenzato859b3f02014-02-05 15:33:19 -0800210 }
211 return cached_version_hash;
212}
213
Bertrand SIMONNET46b49da2014-06-25 14:38:07 -0700214void MetricsDaemon::Init(bool testing,
215 bool uploader_active,
Bertrand SIMONNETfec4d2c2015-08-05 16:04:14 -0700216 bool dbus_enabled,
Bertrand SIMONNET46b49da2014-06-25 14:38:07 -0700217 MetricsLibraryInterface* metrics_lib,
Luigi Semenzatofb3a8212013-05-07 16:55:00 -0700218 const string& scaling_max_freq_path,
Steve Fung67906c62014-10-06 15:15:30 -0700219 const string& cpuinfo_max_freq_path,
Bertrand SIMONNETcac74e12014-10-09 10:14:13 -0700220 const base::TimeDelta& upload_interval,
Steve Fung67906c62014-10-06 15:15:30 -0700221 const string& server,
Bertrand SIMONNET71a62ef2014-10-07 11:26:25 -0700222 const string& metrics_file,
223 const string& config_root) {
Bertrand SIMONNET675a10c2015-08-25 14:11:43 -0700224 CHECK(metrics_lib);
Darin Petkov65b01462010-04-14 13:32:20 -0700225 testing_ = testing;
Steve Funge86591e2014-12-01 13:38:21 -0800226 uploader_active_ = uploader_active;
Bertrand SIMONNETfec4d2c2015-08-05 16:04:14 -0700227 dbus_enabled_ = dbus_enabled;
Bertrand SIMONNET71a62ef2014-10-07 11:26:25 -0700228 config_root_ = config_root;
Darin Petkovfc91b422010-05-12 13:05:45 -0700229 metrics_lib_ = metrics_lib;
Darin Petkov38d5cb02010-06-24 12:10:26 -0700230
Bertrand SIMONNETcac74e12014-10-09 10:14:13 -0700231 upload_interval_ = upload_interval;
Steve Fung67906c62014-10-06 15:15:30 -0700232 server_ = server;
233 metrics_file_ = metrics_file;
234
Luigi Semenzatoba0c65d2014-03-17 12:28:38 -0700235 // Get ticks per second (HZ) on this system.
236 // Sysconf cannot fail, so no sanity checks are needed.
237 ticks_per_second_ = sysconf(_SC_CLK_TCK);
238
Luigi Semenzatoe5883fa2014-04-18 17:00:35 -0700239 daily_active_use_.reset(
Luigi Semenzatodc865892015-07-09 08:28:08 -0700240 new PersistentInteger("Platform.DailyUseTime"));
Luigi Semenzatoe5883fa2014-04-18 17:00:35 -0700241 version_cumulative_active_use_.reset(
Luigi Semenzatodc865892015-07-09 08:28:08 -0700242 new PersistentInteger("Platform.CumulativeDailyUseTime"));
Luigi Semenzatoba0c65d2014-03-17 12:28:38 -0700243 version_cumulative_cpu_use_.reset(
Luigi Semenzatodc865892015-07-09 08:28:08 -0700244 new PersistentInteger("Platform.CumulativeCpuTime"));
Darin Petkov38d5cb02010-06-24 12:10:26 -0700245
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800246 kernel_crash_interval_.reset(
Luigi Semenzatodc865892015-07-09 08:28:08 -0700247 new PersistentInteger("Platform.KernelCrashInterval"));
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800248 unclean_shutdown_interval_.reset(
Luigi Semenzatodc865892015-07-09 08:28:08 -0700249 new PersistentInteger("Platform.UncleanShutdownInterval"));
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800250 user_crash_interval_.reset(
Luigi Semenzatodc865892015-07-09 08:28:08 -0700251 new PersistentInteger("Platform.UserCrashInterval"));
Darin Petkov2ccef012010-05-05 16:06:37 -0700252
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800253 any_crashes_daily_count_.reset(
Luigi Semenzatodc865892015-07-09 08:28:08 -0700254 new PersistentInteger("Platform.AnyCrashesDaily"));
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800255 any_crashes_weekly_count_.reset(
Luigi Semenzatodc865892015-07-09 08:28:08 -0700256 new PersistentInteger("Platform.AnyCrashesWeekly"));
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800257 user_crashes_daily_count_.reset(
Luigi Semenzatodc865892015-07-09 08:28:08 -0700258 new PersistentInteger("Platform.UserCrashesDaily"));
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800259 user_crashes_weekly_count_.reset(
Luigi Semenzatodc865892015-07-09 08:28:08 -0700260 new PersistentInteger("Platform.UserCrashesWeekly"));
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800261 kernel_crashes_daily_count_.reset(
Luigi Semenzatodc865892015-07-09 08:28:08 -0700262 new PersistentInteger("Platform.KernelCrashesDaily"));
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800263 kernel_crashes_weekly_count_.reset(
Luigi Semenzatodc865892015-07-09 08:28:08 -0700264 new PersistentInteger("Platform.KernelCrashesWeekly"));
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800265 kernel_crashes_version_count_.reset(
Luigi Semenzatodc865892015-07-09 08:28:08 -0700266 new PersistentInteger("Platform.KernelCrashesSinceUpdate"));
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800267 unclean_shutdowns_daily_count_.reset(
Luigi Semenzatodc865892015-07-09 08:28:08 -0700268 new PersistentInteger("Platform.UncleanShutdownsDaily"));
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800269 unclean_shutdowns_weekly_count_.reset(
Luigi Semenzatodc865892015-07-09 08:28:08 -0700270 new PersistentInteger("Platform.UncleanShutdownsWeekly"));
Darin Petkov38d5cb02010-06-24 12:10:26 -0700271
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800272 daily_cycle_.reset(new PersistentInteger("daily.cycle"));
273 weekly_cycle_.reset(new PersistentInteger("weekly.cycle"));
274 version_cycle_.reset(new PersistentInteger("version.cycle"));
Luigi Semenzato859b3f02014-02-05 15:33:19 -0800275
Luigi Semenzatofb3a8212013-05-07 16:55:00 -0700276 scaling_max_freq_path_ = scaling_max_freq_path;
277 cpuinfo_max_freq_path_ = cpuinfo_max_freq_path;
Steve Funge86591e2014-12-01 13:38:21 -0800278}
279
280int MetricsDaemon::OnInit() {
Bertrand SIMONNETfec4d2c2015-08-05 16:04:14 -0700281 int return_code = dbus_enabled_ ? chromeos::DBusDaemon::OnInit() :
282 chromeos::Daemon::OnInit();
Steve Funge86591e2014-12-01 13:38:21 -0800283 if (return_code != EX_OK)
284 return return_code;
285
Steve Funge86591e2014-12-01 13:38:21 -0800286 if (testing_)
287 return EX_OK;
Darin Petkov65b01462010-04-14 13:32:20 -0700288
Bertrand SIMONNETfec4d2c2015-08-05 16:04:14 -0700289 if (dbus_enabled_) {
290 bus_->AssertOnDBusThread();
291 CHECK(bus_->SetUpAsyncOperations());
Darin Petkov65b01462010-04-14 13:32:20 -0700292
Bertrand SIMONNETfec4d2c2015-08-05 16:04:14 -0700293 if (bus_->is_connected()) {
294 const std::string match_rule =
295 base::StringPrintf(kCrashReporterMatchRule,
296 kCrashReporterInterface,
297 kCrashReporterUserCrashSignal);
Darin Petkov65b01462010-04-14 13:32:20 -0700298
Bertrand SIMONNETfec4d2c2015-08-05 16:04:14 -0700299 bus_->AddFilterFunction(&MetricsDaemon::MessageFilter, this);
Darin Petkov65b01462010-04-14 13:32:20 -0700300
Bertrand SIMONNETfec4d2c2015-08-05 16:04:14 -0700301 DBusError error;
302 dbus_error_init(&error);
303 bus_->AddMatch(match_rule, &error);
Darin Petkov65b01462010-04-14 13:32:20 -0700304
Bertrand SIMONNETfec4d2c2015-08-05 16:04:14 -0700305 if (dbus_error_is_set(&error)) {
306 LOG(ERROR) << "Failed to add match rule \"" << match_rule << "\". Got "
307 << error.name << ": " << error.message;
308 return EX_SOFTWARE;
309 }
310 } else {
311 LOG(ERROR) << "DBus isn't connected.";
312 return EX_UNAVAILABLE;
Steve Funge86591e2014-12-01 13:38:21 -0800313 }
Darin Petkov703ec972010-04-27 11:02:18 -0700314 }
315
Steve Funge86591e2014-12-01 13:38:21 -0800316 if (uploader_active_) {
Bertrand SIMONNETbae5dcc2015-08-04 14:12:10 -0700317 upload_service_.reset(
318 new UploadService(new SystemProfileCache(), metrics_lib_, server_));
319 upload_service_->Init(upload_interval_, metrics_file_);
Bertrand SIMONNET46b49da2014-06-25 14:38:07 -0700320 }
Steve Funge86591e2014-12-01 13:38:21 -0800321
322 return EX_OK;
Darin Petkov65b01462010-04-14 13:32:20 -0700323}
324
Steve Funge86591e2014-12-01 13:38:21 -0800325void MetricsDaemon::OnShutdown(int* return_code) {
Bertrand SIMONNETfec4d2c2015-08-05 16:04:14 -0700326 if (!testing_ && dbus_enabled_ && bus_->is_connected()) {
Steve Funge86591e2014-12-01 13:38:21 -0800327 const std::string match_rule =
328 base::StringPrintf(kCrashReporterMatchRule,
329 kCrashReporterInterface,
330 kCrashReporterUserCrashSignal);
331
332 bus_->RemoveFilterFunction(&MetricsDaemon::MessageFilter, this);
333
334 DBusError error;
335 dbus_error_init(&error);
336 bus_->RemoveMatch(match_rule, &error);
337
338 if (dbus_error_is_set(&error)) {
339 LOG(ERROR) << "Failed to remove match rule \"" << match_rule << "\". Got "
340 << error.name << ": " << error.message;
341 }
342 }
343 chromeos::DBusDaemon::OnShutdown(return_code);
Darin Petkov65b01462010-04-14 13:32:20 -0700344}
345
Darin Petkov703ec972010-04-27 11:02:18 -0700346// static
347DBusHandlerResult MetricsDaemon::MessageFilter(DBusConnection* connection,
348 DBusMessage* message,
349 void* user_data) {
Darin Petkov703ec972010-04-27 11:02:18 -0700350 int message_type = dbus_message_get_type(message);
351 if (message_type != DBUS_MESSAGE_TYPE_SIGNAL) {
Darin Petkov41e06232010-05-03 16:45:37 -0700352 DLOG(WARNING) << "unexpected message type " << message_type;
Darin Petkov703ec972010-04-27 11:02:18 -0700353 return DBUS_HANDLER_RESULT_NOT_YET_HANDLED;
354 }
355
356 // Signal messages always have interfaces.
Daniel Eratc83975a2014-04-04 08:53:44 -0700357 const std::string interface(dbus_message_get_interface(message));
358 const std::string member(dbus_message_get_member(message));
359 DLOG(INFO) << "Got " << interface << "." << member << " D-Bus signal";
Darin Petkov703ec972010-04-27 11:02:18 -0700360
361 MetricsDaemon* daemon = static_cast<MetricsDaemon*>(user_data);
362
363 DBusMessageIter iter;
364 dbus_message_iter_init(message, &iter);
Daniel Eratc83975a2014-04-04 08:53:44 -0700365 if (interface == kCrashReporterInterface) {
366 CHECK_EQ(member, kCrashReporterUserCrashSignal);
Darin Petkov1bb904e2010-06-16 15:58:06 -0700367 daemon->ProcessUserCrash();
Darin Petkov703ec972010-04-27 11:02:18 -0700368 } else {
Daniel Eratc83975a2014-04-04 08:53:44 -0700369 // Ignore messages from the bus itself.
Darin Petkov703ec972010-04-27 11:02:18 -0700370 return DBUS_HANDLER_RESULT_NOT_YET_HANDLED;
371 }
372
373 return DBUS_HANDLER_RESULT_HANDLED;
Darin Petkov65b01462010-04-14 13:32:20 -0700374}
375
Luigi Semenzatoba0c65d2014-03-17 12:28:38 -0700376// One might argue that parts of this should go into
377// chromium/src/base/sys_info_chromeos.c instead, but put it here for now.
378
379TimeDelta MetricsDaemon::GetIncrementalCpuUse() {
Luigi Semenzatoba0c65d2014-03-17 12:28:38 -0700380 FilePath proc_stat_path = FilePath(kMetricsProcStatFileName);
381 std::string proc_stat_string;
382 if (!base::ReadFileToString(proc_stat_path, &proc_stat_string)) {
383 LOG(WARNING) << "cannot open " << kMetricsProcStatFileName;
384 return TimeDelta();
385 }
386
387 std::vector<std::string> proc_stat_lines;
388 base::SplitString(proc_stat_string, '\n', &proc_stat_lines);
389 if (proc_stat_lines.empty()) {
390 LOG(WARNING) << "cannot parse " << kMetricsProcStatFileName
391 << ": " << proc_stat_string;
392 return TimeDelta();
393 }
394 std::vector<std::string> proc_stat_totals;
395 base::SplitStringAlongWhitespace(proc_stat_lines[0], &proc_stat_totals);
396
Ben Chanf05ab402014-08-07 00:54:59 -0700397 uint64_t user_ticks, user_nice_ticks, system_ticks;
Luigi Semenzatoba0c65d2014-03-17 12:28:38 -0700398 if (proc_stat_totals.size() != kMetricsProcStatFirstLineItemsCount ||
399 proc_stat_totals[0] != "cpu" ||
400 !base::StringToUint64(proc_stat_totals[1], &user_ticks) ||
401 !base::StringToUint64(proc_stat_totals[2], &user_nice_ticks) ||
402 !base::StringToUint64(proc_stat_totals[3], &system_ticks)) {
403 LOG(WARNING) << "cannot parse first line: " << proc_stat_lines[0];
404 return TimeDelta(base::TimeDelta::FromSeconds(0));
405 }
406
Ben Chanf05ab402014-08-07 00:54:59 -0700407 uint64_t total_cpu_use_ticks = user_ticks + user_nice_ticks + system_ticks;
Luigi Semenzatoba0c65d2014-03-17 12:28:38 -0700408
409 // Sanity check.
410 if (total_cpu_use_ticks < latest_cpu_use_ticks_) {
411 LOG(WARNING) << "CPU time decreasing from " << latest_cpu_use_ticks_
412 << " to " << total_cpu_use_ticks;
413 return TimeDelta();
414 }
415
Ben Chanf05ab402014-08-07 00:54:59 -0700416 uint64_t diff = total_cpu_use_ticks - latest_cpu_use_ticks_;
Luigi Semenzatoba0c65d2014-03-17 12:28:38 -0700417 latest_cpu_use_ticks_ = total_cpu_use_ticks;
418 // Use microseconds to avoid significant truncations.
419 return base::TimeDelta::FromMicroseconds(
420 diff * 1000 * 1000 / ticks_per_second_);
421}
422
Darin Petkov1bb904e2010-06-16 15:58:06 -0700423void MetricsDaemon::ProcessUserCrash() {
Daniel Eratc83975a2014-04-04 08:53:44 -0700424 // Counts the active time up to now.
425 UpdateStats(TimeTicks::Now(), Time::Now());
Darin Petkov1bb904e2010-06-16 15:58:06 -0700426
427 // Reports the active use time since the last crash and resets it.
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800428 SendCrashIntervalSample(user_crash_interval_);
Ken Mixterccd84c02010-08-16 19:57:13 -0700429
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800430 any_crashes_daily_count_->Add(1);
431 any_crashes_weekly_count_->Add(1);
432 user_crashes_daily_count_->Add(1);
433 user_crashes_weekly_count_->Add(1);
Darin Petkov1bb904e2010-06-16 15:58:06 -0700434}
435
Darin Petkov38d5cb02010-06-24 12:10:26 -0700436void MetricsDaemon::ProcessKernelCrash() {
Daniel Eratc83975a2014-04-04 08:53:44 -0700437 // Counts the active time up to now.
438 UpdateStats(TimeTicks::Now(), Time::Now());
Darin Petkov38d5cb02010-06-24 12:10:26 -0700439
440 // Reports the active use time since the last crash and resets it.
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800441 SendCrashIntervalSample(kernel_crash_interval_);
Ken Mixterccd84c02010-08-16 19:57:13 -0700442
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800443 any_crashes_daily_count_->Add(1);
444 any_crashes_weekly_count_->Add(1);
445 kernel_crashes_daily_count_->Add(1);
446 kernel_crashes_weekly_count_->Add(1);
Luigi Semenzato859b3f02014-02-05 15:33:19 -0800447
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800448 kernel_crashes_version_count_->Add(1);
Darin Petkov38d5cb02010-06-24 12:10:26 -0700449}
450
Ken Mixterccd84c02010-08-16 19:57:13 -0700451void MetricsDaemon::ProcessUncleanShutdown() {
Daniel Eratc83975a2014-04-04 08:53:44 -0700452 // Counts the active time up to now.
453 UpdateStats(TimeTicks::Now(), Time::Now());
Ken Mixterccd84c02010-08-16 19:57:13 -0700454
455 // Reports the active use time since the last crash and resets it.
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800456 SendCrashIntervalSample(unclean_shutdown_interval_);
Ken Mixterccd84c02010-08-16 19:57:13 -0700457
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800458 unclean_shutdowns_daily_count_->Add(1);
459 unclean_shutdowns_weekly_count_->Add(1);
460 any_crashes_daily_count_->Add(1);
461 any_crashes_weekly_count_->Add(1);
Ken Mixterccd84c02010-08-16 19:57:13 -0700462}
463
Luigi Semenzato8accd332011-05-17 16:37:18 -0700464bool MetricsDaemon::CheckSystemCrash(const string& crash_file) {
Darin Petkov38d5cb02010-06-24 12:10:26 -0700465 FilePath crash_detected(crash_file);
Ben Chan2e6543d2014-02-05 23:26:25 -0800466 if (!base::PathExists(crash_detected))
Ken Mixterccd84c02010-08-16 19:57:13 -0700467 return false;
Darin Petkov38d5cb02010-06-24 12:10:26 -0700468
469 // Deletes the crash-detected file so that the daemon doesn't report
470 // another kernel crash in case it's restarted.
Luigi Semenzato859b3f02014-02-05 15:33:19 -0800471 base::DeleteFile(crash_detected, false); // not recursive
Ken Mixterccd84c02010-08-16 19:57:13 -0700472 return true;
Darin Petkov38d5cb02010-06-24 12:10:26 -0700473}
474
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700475void MetricsDaemon::StatsReporterInit() {
Luigi Semenzatoc88e42d2011-02-17 10:21:16 -0800476 DiskStatsReadStats(&read_sectors_, &write_sectors_);
Sonny Rao4b8aebb2013-07-31 23:18:31 -0700477 VmStatsReadStats(&vmstats_);
Luigi Semenzatoc88e42d2011-02-17 10:21:16 -0800478 // The first time around just run the long stat, so we don't delay boot.
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700479 stats_state_ = kStatsLong;
480 stats_initial_time_ = GetActiveTime();
481 if (stats_initial_time_ < 0) {
Luigi Semenzato8accd332011-05-17 16:37:18 -0700482 LOG(WARNING) << "not collecting disk stats";
483 } else {
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700484 ScheduleStatsCallback(kMetricStatsLongInterval);
Luigi Semenzato8accd332011-05-17 16:37:18 -0700485 }
Luigi Semenzatoc88e42d2011-02-17 10:21:16 -0800486}
487
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700488void MetricsDaemon::ScheduleStatsCallback(int wait) {
Luigi Semenzatoc88e42d2011-02-17 10:21:16 -0800489 if (testing_) {
490 return;
491 }
Steve Funge86591e2014-12-01 13:38:21 -0800492 base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
493 base::Bind(&MetricsDaemon::StatsCallback, base::Unretained(this)),
494 base::TimeDelta::FromSeconds(wait));
Luigi Semenzatoc88e42d2011-02-17 10:21:16 -0800495}
496
Sonny Rao4b8aebb2013-07-31 23:18:31 -0700497bool MetricsDaemon::VmStatsParseStats(const char* stats,
498 struct VmstatRecord* record) {
Bertrand SIMONNET675a10c2015-08-25 14:11:43 -0700499 CHECK(stats);
500 CHECK(record);
501 base::StringPairs pairs;
502 base::SplitStringIntoKeyValuePairs(stats, ' ', '\n', &pairs);
Sonny Rao4b8aebb2013-07-31 23:18:31 -0700503
Bertrand SIMONNET675a10c2015-08-25 14:11:43 -0700504 for (base::StringPairs::iterator it = pairs.begin(); it != pairs.end(); ++it) {
505 if (it->first == "pgmajfault" &&
506 !base::StringToUint64(it->second, &record->page_faults_)) {
507 return false;
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700508 }
Bertrand SIMONNET675a10c2015-08-25 14:11:43 -0700509 if (it->first == "pswpin" &&
510 !base::StringToUint64(it->second, &record->swap_in_)) {
511 return false;
512 }
513 if (it->first == "pswpout" &&
514 !base::StringToUint64(it->second, &record->swap_out_)) {
Sonny Rao4b8aebb2013-07-31 23:18:31 -0700515 return false;
516 }
517 }
518 return true;
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700519}
520
Sonny Rao4b8aebb2013-07-31 23:18:31 -0700521bool MetricsDaemon::VmStatsReadStats(struct VmstatRecord* stats) {
Bertrand SIMONNET675a10c2015-08-25 14:11:43 -0700522 CHECK(stats);
Sonny Rao4b8aebb2013-07-31 23:18:31 -0700523 string value_string;
Bertrand SIMONNET675a10c2015-08-25 14:11:43 -0700524 if (!base::ReadFileToString(base::FilePath(kVmStatFileName), &value_string)) {
525 LOG(WARNING) << "cannot read " << kVmStatFileName;
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700526 return false;
527 }
Sonny Rao4b8aebb2013-07-31 23:18:31 -0700528 return VmStatsParseStats(value_string.c_str(), stats);
Luigi Semenzatoc88e42d2011-02-17 10:21:16 -0800529}
530
Luigi Semenzatofb3a8212013-05-07 16:55:00 -0700531bool MetricsDaemon::ReadFreqToInt(const string& sysfs_file_name, int* value) {
Luigi Semenzatod92d18c2013-06-04 13:24:21 -0700532 const FilePath sysfs_path(sysfs_file_name);
Luigi Semenzatofb3a8212013-05-07 16:55:00 -0700533 string value_string;
Ben Chan2e6543d2014-02-05 23:26:25 -0800534 if (!base::ReadFileToString(sysfs_path, &value_string)) {
Luigi Semenzatofb3a8212013-05-07 16:55:00 -0700535 LOG(WARNING) << "cannot read " << sysfs_path.value().c_str();
536 return false;
537 }
Ben Chan2e6543d2014-02-05 23:26:25 -0800538 if (!base::RemoveChars(value_string, "\n", &value_string)) {
Luigi Semenzatofb3a8212013-05-07 16:55:00 -0700539 LOG(WARNING) << "no newline in " << value_string;
540 // Continue even though the lack of newline is suspicious.
541 }
542 if (!base::StringToInt(value_string, value)) {
543 LOG(WARNING) << "cannot convert " << value_string << " to int";
544 return false;
545 }
546 return true;
547}
548
549void MetricsDaemon::SendCpuThrottleMetrics() {
550 // |max_freq| is 0 only the first time through.
551 static int max_freq = 0;
552 if (max_freq == -1)
553 // Give up, as sysfs did not report max_freq correctly.
554 return;
555 if (max_freq == 0 || testing_) {
556 // One-time initialization of max_freq. (Every time when testing.)
557 if (!ReadFreqToInt(cpuinfo_max_freq_path_, &max_freq)) {
558 max_freq = -1;
559 return;
560 }
561 if (max_freq == 0) {
562 LOG(WARNING) << "sysfs reports 0 max CPU frequency\n";
563 max_freq = -1;
564 return;
565 }
566 if (max_freq % 10000 == 1000) {
567 // Special case: system has turbo mode, and max non-turbo frequency is
568 // max_freq - 1000. This relies on "normal" (non-turbo) frequencies
569 // being multiples of (at least) 10 MHz. Although there is no guarantee
570 // of this, it seems a fairly reasonable assumption. Otherwise we should
571 // read scaling_available_frequencies, sort the frequencies, compare the
572 // two highest ones, and check if they differ by 1000 (kHz) (and that's a
573 // hack too, no telling when it will change).
574 max_freq -= 1000;
575 }
576 }
577 int scaled_freq = 0;
578 if (!ReadFreqToInt(scaling_max_freq_path_, &scaled_freq))
579 return;
580 // Frequencies are in kHz. If scaled_freq > max_freq, turbo is on, but
581 // scaled_freq is not the actual turbo frequency. We indicate this situation
582 // with a 101% value.
583 int percent = scaled_freq > max_freq ? 101 : scaled_freq / (max_freq / 100);
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800584 SendLinearSample(kMetricScaledCpuFrequencyName, percent, 101, 102);
Luigi Semenzatofb3a8212013-05-07 16:55:00 -0700585}
586
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700587// Collects disk and vm stats alternating over a short and a long interval.
Luigi Semenzato8accd332011-05-17 16:37:18 -0700588
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700589void MetricsDaemon::StatsCallback() {
Ben Chanf05ab402014-08-07 00:54:59 -0700590 uint64_t read_sectors_now, write_sectors_now;
Sonny Rao4b8aebb2013-07-31 23:18:31 -0700591 struct VmstatRecord vmstats_now;
Luigi Semenzato8accd332011-05-17 16:37:18 -0700592 double time_now = GetActiveTime();
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700593 double delta_time = time_now - stats_initial_time_;
Luigi Semenzato8accd332011-05-17 16:37:18 -0700594 if (testing_) {
595 // Fake the time when testing.
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700596 delta_time = stats_state_ == kStatsShort ?
597 kMetricStatsShortInterval : kMetricStatsLongInterval;
Luigi Semenzato8accd332011-05-17 16:37:18 -0700598 }
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700599 bool diskstats_success = DiskStatsReadStats(&read_sectors_now,
600 &write_sectors_now);
Luigi Semenzato8accd332011-05-17 16:37:18 -0700601 int delta_read = read_sectors_now - read_sectors_;
602 int delta_write = write_sectors_now - write_sectors_;
603 int read_sectors_per_second = delta_read / delta_time;
604 int write_sectors_per_second = delta_write / delta_time;
Sonny Rao4b8aebb2013-07-31 23:18:31 -0700605 bool vmstats_success = VmStatsReadStats(&vmstats_now);
606 uint64_t delta_faults = vmstats_now.page_faults_ - vmstats_.page_faults_;
607 uint64_t delta_swap_in = vmstats_now.swap_in_ - vmstats_.swap_in_;
608 uint64_t delta_swap_out = vmstats_now.swap_out_ - vmstats_.swap_out_;
609 uint64_t page_faults_per_second = delta_faults / delta_time;
610 uint64_t swap_in_per_second = delta_swap_in / delta_time;
611 uint64_t swap_out_per_second = delta_swap_out / delta_time;
Luigi Semenzatoc88e42d2011-02-17 10:21:16 -0800612
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700613 switch (stats_state_) {
614 case kStatsShort:
615 if (diskstats_success) {
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800616 SendSample(kMetricReadSectorsShortName,
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700617 read_sectors_per_second,
618 1,
619 kMetricSectorsIOMax,
620 kMetricSectorsBuckets);
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800621 SendSample(kMetricWriteSectorsShortName,
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700622 write_sectors_per_second,
623 1,
624 kMetricSectorsIOMax,
625 kMetricSectorsBuckets);
626 }
627 if (vmstats_success) {
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800628 SendSample(kMetricPageFaultsShortName,
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700629 page_faults_per_second,
630 1,
631 kMetricPageFaultsMax,
632 kMetricPageFaultsBuckets);
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800633 SendSample(kMetricSwapInShortName,
Sonny Rao4b8aebb2013-07-31 23:18:31 -0700634 swap_in_per_second,
635 1,
636 kMetricPageFaultsMax,
637 kMetricPageFaultsBuckets);
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800638 SendSample(kMetricSwapOutShortName,
Sonny Rao4b8aebb2013-07-31 23:18:31 -0700639 swap_out_per_second,
640 1,
641 kMetricPageFaultsMax,
642 kMetricPageFaultsBuckets);
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700643 }
Luigi Semenzatoc88e42d2011-02-17 10:21:16 -0800644 // Schedule long callback.
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700645 stats_state_ = kStatsLong;
646 ScheduleStatsCallback(kMetricStatsLongInterval -
647 kMetricStatsShortInterval);
Luigi Semenzatoc88e42d2011-02-17 10:21:16 -0800648 break;
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700649 case kStatsLong:
650 if (diskstats_success) {
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800651 SendSample(kMetricReadSectorsLongName,
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700652 read_sectors_per_second,
653 1,
654 kMetricSectorsIOMax,
655 kMetricSectorsBuckets);
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800656 SendSample(kMetricWriteSectorsLongName,
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700657 write_sectors_per_second,
658 1,
659 kMetricSectorsIOMax,
660 kMetricSectorsBuckets);
661 // Reset sector counters.
662 read_sectors_ = read_sectors_now;
663 write_sectors_ = write_sectors_now;
664 }
665 if (vmstats_success) {
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800666 SendSample(kMetricPageFaultsLongName,
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700667 page_faults_per_second,
668 1,
669 kMetricPageFaultsMax,
670 kMetricPageFaultsBuckets);
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800671 SendSample(kMetricSwapInLongName,
Sonny Rao4b8aebb2013-07-31 23:18:31 -0700672 swap_in_per_second,
673 1,
674 kMetricPageFaultsMax,
675 kMetricPageFaultsBuckets);
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800676 SendSample(kMetricSwapOutLongName,
Sonny Rao4b8aebb2013-07-31 23:18:31 -0700677 swap_out_per_second,
678 1,
679 kMetricPageFaultsMax,
680 kMetricPageFaultsBuckets);
681
682 vmstats_ = vmstats_now;
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700683 }
Luigi Semenzatofb3a8212013-05-07 16:55:00 -0700684 SendCpuThrottleMetrics();
Luigi Semenzato8accd332011-05-17 16:37:18 -0700685 // Set start time for new cycle.
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700686 stats_initial_time_ = time_now;
Luigi Semenzatoc88e42d2011-02-17 10:21:16 -0800687 // Schedule short callback.
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700688 stats_state_ = kStatsShort;
689 ScheduleStatsCallback(kMetricStatsShortInterval);
Luigi Semenzatoc88e42d2011-02-17 10:21:16 -0800690 break;
691 default:
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700692 LOG(FATAL) << "Invalid stats state";
Luigi Semenzatoc88e42d2011-02-17 10:21:16 -0800693 }
694}
695
Luigi Semenzato29c7ef92011-04-12 14:12:35 -0700696void MetricsDaemon::ScheduleMeminfoCallback(int wait) {
697 if (testing_) {
698 return;
699 }
Steve Funge86591e2014-12-01 13:38:21 -0800700 base::TimeDelta waitDelta = base::TimeDelta::FromSeconds(wait);
701 base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
702 base::Bind(&MetricsDaemon::MeminfoCallback, base::Unretained(this),
Steve Fung8ab89c52015-01-05 13:48:30 -0800703 waitDelta),
Steve Funge86591e2014-12-01 13:38:21 -0800704 waitDelta);
Luigi Semenzato29c7ef92011-04-12 14:12:35 -0700705}
706
Steve Funge86591e2014-12-01 13:38:21 -0800707void MetricsDaemon::MeminfoCallback(base::TimeDelta wait) {
Luigi Semenzato8accd332011-05-17 16:37:18 -0700708 string meminfo_raw;
Bertrand SIMONNET675a10c2015-08-25 14:11:43 -0700709 const FilePath meminfo_path(kMeminfoFileName);
Ben Chan2e6543d2014-02-05 23:26:25 -0800710 if (!base::ReadFileToString(meminfo_path, &meminfo_raw)) {
Luigi Semenzato29c7ef92011-04-12 14:12:35 -0700711 LOG(WARNING) << "cannot read " << meminfo_path.value().c_str();
Steve Funge86591e2014-12-01 13:38:21 -0800712 return;
Luigi Semenzato29c7ef92011-04-12 14:12:35 -0700713 }
Luigi Semenzato96360192014-06-04 10:53:35 -0700714 // Make both calls even if the first one fails.
715 bool success = ProcessMeminfo(meminfo_raw);
Bill Yi6d5822e2015-09-03 19:18:56 +0000716 bool reschedule =
717 ReportZram(base::FilePath(FILE_PATH_LITERAL("/sys/block/zram0"))) &&
718 success;
719 if (reschedule) {
Steve Funge86591e2014-12-01 13:38:21 -0800720 base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
721 base::Bind(&MetricsDaemon::MeminfoCallback, base::Unretained(this),
Steve Fung8ab89c52015-01-05 13:48:30 -0800722 wait),
Steve Funge86591e2014-12-01 13:38:21 -0800723 wait);
724 }
Luigi Semenzato96360192014-06-04 10:53:35 -0700725}
726
727// static
728bool MetricsDaemon::ReadFileToUint64(const base::FilePath& path,
Ben Chanf05ab402014-08-07 00:54:59 -0700729 uint64_t* value) {
Luigi Semenzato96360192014-06-04 10:53:35 -0700730 std::string content;
731 if (!base::ReadFileToString(path, &content)) {
732 PLOG(WARNING) << "cannot read " << path.MaybeAsASCII();
733 return false;
734 }
Luigi Semenzato4a6c9422014-06-30 18:12:28 -0700735 // Remove final newline.
736 base::TrimWhitespaceASCII(content, base::TRIM_TRAILING, &content);
Luigi Semenzato96360192014-06-04 10:53:35 -0700737 if (!base::StringToUint64(content, value)) {
738 LOG(WARNING) << "invalid integer: " << content;
739 return false;
740 }
741 return true;
742}
743
744bool MetricsDaemon::ReportZram(const base::FilePath& zram_dir) {
745 // Data sizes are in bytes. |zero_pages| is in number of pages.
Ben Chanf05ab402014-08-07 00:54:59 -0700746 uint64_t compr_data_size, orig_data_size, zero_pages;
Luigi Semenzato96360192014-06-04 10:53:35 -0700747 const size_t page_size = 4096;
748
749 if (!ReadFileToUint64(zram_dir.Append(kComprDataSizeName),
750 &compr_data_size) ||
751 !ReadFileToUint64(zram_dir.Append(kOrigDataSizeName), &orig_data_size) ||
752 !ReadFileToUint64(zram_dir.Append(kZeroPagesName), &zero_pages)) {
753 return false;
754 }
755
756 // |orig_data_size| does not include zero-filled pages.
757 orig_data_size += zero_pages * page_size;
758
759 const int compr_data_size_mb = compr_data_size >> 20;
760 const int savings_mb = (orig_data_size - compr_data_size) >> 20;
761 const int zero_ratio_percent = zero_pages * page_size * 100 / orig_data_size;
762
763 // Report compressed size in megabytes. 100 MB or less has little impact.
764 SendSample("Platform.ZramCompressedSize", compr_data_size_mb, 100, 4000, 50);
765 SendSample("Platform.ZramSavings", savings_mb, 100, 4000, 50);
766 // The compression ratio is multiplied by 100 for better resolution. The
767 // ratios of interest are between 1 and 6 (100% and 600% as reported). We
768 // don't want samples when very little memory is being compressed.
769 if (compr_data_size_mb >= 1) {
770 SendSample("Platform.ZramCompressionRatioPercent",
771 orig_data_size * 100 / compr_data_size, 100, 600, 50);
772 }
773 // The values of interest for zero_pages are between 1MB and 1GB. The units
774 // are number of pages.
775 SendSample("Platform.ZramZeroPages", zero_pages, 256, 256 * 1024, 50);
776 SendSample("Platform.ZramZeroRatioPercent", zero_ratio_percent, 1, 50, 50);
777
778 return true;
Luigi Semenzato29c7ef92011-04-12 14:12:35 -0700779}
780
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700781bool MetricsDaemon::ProcessMeminfo(const string& meminfo_raw) {
Luigi Semenzato8accd332011-05-17 16:37:18 -0700782 static const MeminfoRecord fields_array[] = {
Luigi Semenzato29c7ef92011-04-12 14:12:35 -0700783 { "MemTotal", "MemTotal" }, // SPECIAL CASE: total system memory
784 { "MemFree", "MemFree" },
785 { "Buffers", "Buffers" },
786 { "Cached", "Cached" },
787 // { "SwapCached", "SwapCached" },
788 { "Active", "Active" },
789 { "Inactive", "Inactive" },
790 { "ActiveAnon", "Active(anon)" },
791 { "InactiveAnon", "Inactive(anon)" },
792 { "ActiveFile" , "Active(file)" },
793 { "InactiveFile", "Inactive(file)" },
Luigi Semenzato942cbab2013-02-12 13:17:07 -0800794 { "Unevictable", "Unevictable", kMeminfoOp_HistLog },
Luigi Semenzato29c7ef92011-04-12 14:12:35 -0700795 // { "Mlocked", "Mlocked" },
Luigi Semenzato942cbab2013-02-12 13:17:07 -0800796 { "SwapTotal", "SwapTotal", kMeminfoOp_SwapTotal },
797 { "SwapFree", "SwapFree", kMeminfoOp_SwapFree },
Luigi Semenzato29c7ef92011-04-12 14:12:35 -0700798 // { "Dirty", "Dirty" },
799 // { "Writeback", "Writeback" },
800 { "AnonPages", "AnonPages" },
801 { "Mapped", "Mapped" },
Luigi Semenzato942cbab2013-02-12 13:17:07 -0800802 { "Shmem", "Shmem", kMeminfoOp_HistLog },
803 { "Slab", "Slab", kMeminfoOp_HistLog },
Luigi Semenzato29c7ef92011-04-12 14:12:35 -0700804 // { "SReclaimable", "SReclaimable" },
805 // { "SUnreclaim", "SUnreclaim" },
806 };
Luigi Semenzato8accd332011-05-17 16:37:18 -0700807 vector<MeminfoRecord> fields(fields_array,
808 fields_array + arraysize(fields_array));
809 if (!FillMeminfo(meminfo_raw, &fields)) {
810 return false;
811 }
812 int total_memory = fields[0].value;
813 if (total_memory == 0) {
814 // this "cannot happen"
815 LOG(WARNING) << "borked meminfo parser";
816 return false;
817 }
Luigi Semenzato942cbab2013-02-12 13:17:07 -0800818 int swap_total = 0;
819 int swap_free = 0;
Luigi Semenzato8accd332011-05-17 16:37:18 -0700820 // Send all fields retrieved, except total memory.
821 for (unsigned int i = 1; i < fields.size(); i++) {
Luigi Semenzato859b3f02014-02-05 15:33:19 -0800822 string metrics_name = base::StringPrintf("Platform.Meminfo%s",
823 fields[i].name);
Luigi Semenzato3ccca062013-02-04 19:50:45 -0800824 int percent;
Luigi Semenzato942cbab2013-02-12 13:17:07 -0800825 switch (fields[i].op) {
826 case kMeminfoOp_HistPercent:
Luigi Semenzato3ccca062013-02-04 19:50:45 -0800827 // report value as percent of total memory
828 percent = fields[i].value * 100 / total_memory;
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800829 SendLinearSample(metrics_name, percent, 100, 101);
Luigi Semenzato3ccca062013-02-04 19:50:45 -0800830 break;
Luigi Semenzato942cbab2013-02-12 13:17:07 -0800831 case kMeminfoOp_HistLog:
Luigi Semenzato3ccca062013-02-04 19:50:45 -0800832 // report value in kbytes, log scale, 4Gb max
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800833 SendSample(metrics_name, fields[i].value, 1, 4 * 1000 * 1000, 100);
Luigi Semenzato3ccca062013-02-04 19:50:45 -0800834 break;
Luigi Semenzato942cbab2013-02-12 13:17:07 -0800835 case kMeminfoOp_SwapTotal:
836 swap_total = fields[i].value;
837 case kMeminfoOp_SwapFree:
838 swap_free = fields[i].value;
Luigi Semenzato3ccca062013-02-04 19:50:45 -0800839 break;
Luigi Semenzato8accd332011-05-17 16:37:18 -0700840 }
841 }
Luigi Semenzato942cbab2013-02-12 13:17:07 -0800842 if (swap_total > 0) {
843 int swap_used = swap_total - swap_free;
844 int swap_used_percent = swap_used * 100 / swap_total;
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800845 SendSample("Platform.MeminfoSwapUsed", swap_used, 1, 8 * 1000 * 1000, 100);
846 SendLinearSample("Platform.MeminfoSwapUsedPercent", swap_used_percent,
Luigi Semenzato942cbab2013-02-12 13:17:07 -0800847 100, 101);
848 }
Luigi Semenzato8accd332011-05-17 16:37:18 -0700849 return true;
850}
851
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700852bool MetricsDaemon::FillMeminfo(const string& meminfo_raw,
853 vector<MeminfoRecord>* fields) {
Luigi Semenzato8accd332011-05-17 16:37:18 -0700854 vector<string> lines;
855 unsigned int nlines = Tokenize(meminfo_raw, "\n", &lines);
Luigi Semenzato29c7ef92011-04-12 14:12:35 -0700856
857 // Scan meminfo output and collect field values. Each field name has to
858 // match a meminfo entry (case insensitive) after removing non-alpha
859 // characters from the entry.
Luigi Semenzato8accd332011-05-17 16:37:18 -0700860 unsigned int ifield = 0;
861 for (unsigned int iline = 0;
862 iline < nlines && ifield < fields->size();
863 iline++) {
864 vector<string> tokens;
Luigi Semenzato29c7ef92011-04-12 14:12:35 -0700865 Tokenize(lines[iline], ": ", &tokens);
Luigi Semenzato8accd332011-05-17 16:37:18 -0700866 if (strcmp((*fields)[ifield].match, tokens[0].c_str()) == 0) {
867 // Name matches. Parse value and save.
Bertrand SIMONNET675a10c2015-08-25 14:11:43 -0700868 if (!base::StringToInt(tokens[1], &(*fields)[ifield].value)) {
869 LOG(WARNING) << "Cound not convert " << tokens[1] << " to int";
Luigi Semenzato29c7ef92011-04-12 14:12:35 -0700870 return false;
871 }
Luigi Semenzato8accd332011-05-17 16:37:18 -0700872 ifield++;
Luigi Semenzato29c7ef92011-04-12 14:12:35 -0700873 }
Luigi Semenzato29c7ef92011-04-12 14:12:35 -0700874 }
Luigi Semenzato8accd332011-05-17 16:37:18 -0700875 if (ifield < fields->size()) {
876 // End of input reached while scanning.
877 LOG(WARNING) << "cannot find field " << (*fields)[ifield].match
878 << " and following";
879 return false;
880 }
881 return true;
882}
883
Luigi Semenzato0d9a9c92013-12-05 15:55:12 -0800884void MetricsDaemon::ScheduleMemuseCallback(double interval) {
Luigi Semenzato8accd332011-05-17 16:37:18 -0700885 if (testing_) {
886 return;
887 }
Steve Funge86591e2014-12-01 13:38:21 -0800888 base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
889 base::Bind(&MetricsDaemon::MemuseCallback, base::Unretained(this)),
890 base::TimeDelta::FromSeconds(interval));
Luigi Semenzato8accd332011-05-17 16:37:18 -0700891}
892
893void MetricsDaemon::MemuseCallback() {
894 // Since we only care about active time (i.e. uptime minus sleep time) but
895 // the callbacks are driven by real time (uptime), we check if we should
896 // reschedule this callback due to intervening sleep periods.
897 double now = GetActiveTime();
Luigi Semenzato0d9a9c92013-12-05 15:55:12 -0800898 // Avoid intervals of less than one second.
899 double remaining_time = ceil(memuse_final_time_ - now);
900 if (remaining_time > 0) {
901 ScheduleMemuseCallback(remaining_time);
Luigi Semenzato8accd332011-05-17 16:37:18 -0700902 } else {
Luigi Semenzato0d9a9c92013-12-05 15:55:12 -0800903 // Report stats and advance the measurement interval unless there are
904 // errors or we've completed the last interval.
Luigi Semenzato8accd332011-05-17 16:37:18 -0700905 if (MemuseCallbackWork() &&
Luigi Semenzato0d9a9c92013-12-05 15:55:12 -0800906 memuse_interval_index_ < arraysize(kMemuseIntervals)) {
907 double interval = kMemuseIntervals[memuse_interval_index_++];
908 memuse_final_time_ = now + interval;
909 ScheduleMemuseCallback(interval);
Luigi Semenzato8accd332011-05-17 16:37:18 -0700910 }
911 }
912}
913
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700914bool MetricsDaemon::MemuseCallbackWork() {
Luigi Semenzato8accd332011-05-17 16:37:18 -0700915 string meminfo_raw;
Bertrand SIMONNET675a10c2015-08-25 14:11:43 -0700916 const FilePath meminfo_path(kMeminfoFileName);
Ben Chan2e6543d2014-02-05 23:26:25 -0800917 if (!base::ReadFileToString(meminfo_path, &meminfo_raw)) {
Luigi Semenzato8accd332011-05-17 16:37:18 -0700918 LOG(WARNING) << "cannot read " << meminfo_path.value().c_str();
919 return false;
920 }
921 return ProcessMemuse(meminfo_raw);
922}
923
Luigi Semenzato5bd764f2011-10-14 12:03:35 -0700924bool MetricsDaemon::ProcessMemuse(const string& meminfo_raw) {
Luigi Semenzato8accd332011-05-17 16:37:18 -0700925 static const MeminfoRecord fields_array[] = {
926 { "MemTotal", "MemTotal" }, // SPECIAL CASE: total system memory
927 { "ActiveAnon", "Active(anon)" },
928 { "InactiveAnon", "Inactive(anon)" },
929 };
930 vector<MeminfoRecord> fields(fields_array,
931 fields_array + arraysize(fields_array));
932 if (!FillMeminfo(meminfo_raw, &fields)) {
933 return false;
934 }
935 int total = fields[0].value;
936 int active_anon = fields[1].value;
937 int inactive_anon = fields[2].value;
938 if (total == 0) {
939 // this "cannot happen"
940 LOG(WARNING) << "borked meminfo parser";
941 return false;
942 }
Luigi Semenzato859b3f02014-02-05 15:33:19 -0800943 string metrics_name = base::StringPrintf("Platform.MemuseAnon%d",
944 memuse_interval_index_);
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800945 SendLinearSample(metrics_name, (active_anon + inactive_anon) * 100 / total,
Luigi Semenzato8accd332011-05-17 16:37:18 -0700946 100, 101);
947 return true;
Luigi Semenzato29c7ef92011-04-12 14:12:35 -0700948}
949
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800950void MetricsDaemon::SendSample(const string& name, int sample,
Darin Petkov11b8eb32010-05-18 11:00:59 -0700951 int min, int max, int nbuckets) {
Darin Petkovfc91b422010-05-12 13:05:45 -0700952 metrics_lib_->SendToUMA(name, sample, min, max, nbuckets);
Darin Petkov65b01462010-04-14 13:32:20 -0700953}
Luigi Semenzato29c7ef92011-04-12 14:12:35 -0700954
Luigi Semenzatoe5883fa2014-04-18 17:00:35 -0700955void MetricsDaemon::SendKernelCrashesCumulativeCountStats() {
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800956 // Report the number of crashes for this OS version, but don't clear the
957 // counter. It is cleared elsewhere on version change.
Ben Chanf05ab402014-08-07 00:54:59 -0700958 int64_t crashes_count = kernel_crashes_version_count_->Get();
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800959 SendSample(kernel_crashes_version_count_->Name(),
Luigi Semenzatoba0c65d2014-03-17 12:28:38 -0700960 crashes_count,
961 1, // value of first bucket
962 500, // value of last bucket
963 100); // number of buckets
964
965
Ben Chanf05ab402014-08-07 00:54:59 -0700966 int64_t cpu_use_ms = version_cumulative_cpu_use_->Get();
Luigi Semenzatoba0c65d2014-03-17 12:28:38 -0700967 SendSample(version_cumulative_cpu_use_->Name(),
968 cpu_use_ms / 1000, // stat is in seconds
969 1, // device may be used very little...
970 8 * 1000 * 1000, // ... or a lot (a little over 90 days)
971 100);
972
973 // On the first run after an autoupdate, cpu_use_ms and active_use_seconds
974 // can be zero. Avoid division by zero.
975 if (cpu_use_ms > 0) {
976 // Send the crash frequency since update in number of crashes per CPU year.
977 SendSample("Logging.KernelCrashesPerCpuYear",
978 crashes_count * kSecondsPerDay * 365 * 1000 / cpu_use_ms,
979 1,
980 1000 * 1000, // about one crash every 30s of CPU time
981 100);
982 }
983
Ben Chanf05ab402014-08-07 00:54:59 -0700984 int64_t active_use_seconds = version_cumulative_active_use_->Get();
Luigi Semenzatoba0c65d2014-03-17 12:28:38 -0700985 if (active_use_seconds > 0) {
Luigi Semenzatoe5883fa2014-04-18 17:00:35 -0700986 SendSample(version_cumulative_active_use_->Name(),
987 active_use_seconds / 1000, // stat is in seconds
988 1, // device may be used very little...
989 8 * 1000 * 1000, // ... or a lot (about 90 days)
990 100);
Luigi Semenzatoba0c65d2014-03-17 12:28:38 -0700991 // Same as above, but per year of active time.
992 SendSample("Logging.KernelCrashesPerActiveYear",
993 crashes_count * kSecondsPerDay * 365 / active_use_seconds,
994 1,
995 1000 * 1000, // about one crash every 30s of active time
996 100);
997 }
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -0800998}
999
Luigi Semenzatoe5883fa2014-04-18 17:00:35 -07001000void MetricsDaemon::SendDailyUseSample(
1001 const scoped_ptr<PersistentInteger>& use) {
1002 SendSample(use->Name(),
1003 use->GetAndClear(),
1004 1, // value of first bucket
1005 kSecondsPerDay, // value of last bucket
1006 50); // number of buckets
1007}
1008
Luigi Semenzato2fd51cc2014-02-26 11:53:16 -08001009void MetricsDaemon::SendCrashIntervalSample(
1010 const scoped_ptr<PersistentInteger>& interval) {
1011 SendSample(interval->Name(),
1012 interval->GetAndClear(),
1013 1, // value of first bucket
1014 4 * kSecondsPerWeek, // value of last bucket
1015 50); // number of buckets
1016}
1017
1018void MetricsDaemon::SendCrashFrequencySample(
1019 const scoped_ptr<PersistentInteger>& frequency) {
1020 SendSample(frequency->Name(),
1021 frequency->GetAndClear(),
1022 1, // value of first bucket
1023 100, // value of last bucket
1024 50); // number of buckets
1025}
1026
1027void MetricsDaemon::SendLinearSample(const string& name, int sample,
Luigi Semenzato29c7ef92011-04-12 14:12:35 -07001028 int max, int nbuckets) {
Luigi Semenzato29c7ef92011-04-12 14:12:35 -07001029 // TODO(semenzato): add a proper linear histogram to the Chrome external
1030 // metrics API.
1031 LOG_IF(FATAL, nbuckets != max + 1) << "unsupported histogram scale";
1032 metrics_lib_->SendEnumToUMA(name, sample, max);
1033}
Daniel Eratc83975a2014-04-04 08:53:44 -07001034
1035void MetricsDaemon::UpdateStats(TimeTicks now_ticks,
1036 Time now_wall_time) {
1037 const int elapsed_seconds = (now_ticks - last_update_stats_time_).InSeconds();
Luigi Semenzatoe5883fa2014-04-18 17:00:35 -07001038 daily_active_use_->Add(elapsed_seconds);
1039 version_cumulative_active_use_->Add(elapsed_seconds);
Daniel Eratc83975a2014-04-04 08:53:44 -07001040 user_crash_interval_->Add(elapsed_seconds);
1041 kernel_crash_interval_->Add(elapsed_seconds);
1042 version_cumulative_cpu_use_->Add(GetIncrementalCpuUse().InMilliseconds());
1043 last_update_stats_time_ = now_ticks;
1044
1045 const TimeDelta since_epoch = now_wall_time - Time::UnixEpoch();
1046 const int day = since_epoch.InDays();
1047 const int week = day / 7;
1048
1049 if (daily_cycle_->Get() != day) {
1050 daily_cycle_->Set(day);
Luigi Semenzatoe5883fa2014-04-18 17:00:35 -07001051 SendDailyUseSample(daily_active_use_);
1052 SendDailyUseSample(version_cumulative_active_use_);
Daniel Eratc83975a2014-04-04 08:53:44 -07001053 SendCrashFrequencySample(any_crashes_daily_count_);
1054 SendCrashFrequencySample(user_crashes_daily_count_);
1055 SendCrashFrequencySample(kernel_crashes_daily_count_);
1056 SendCrashFrequencySample(unclean_shutdowns_daily_count_);
Luigi Semenzatoe5883fa2014-04-18 17:00:35 -07001057 SendKernelCrashesCumulativeCountStats();
Daniel Eratc83975a2014-04-04 08:53:44 -07001058 }
1059
1060 if (weekly_cycle_->Get() != week) {
1061 weekly_cycle_->Set(week);
1062 SendCrashFrequencySample(any_crashes_weekly_count_);
1063 SendCrashFrequencySample(user_crashes_weekly_count_);
1064 SendCrashFrequencySample(kernel_crashes_weekly_count_);
1065 SendCrashFrequencySample(unclean_shutdowns_weekly_count_);
1066 }
1067}
1068
Steve Funge86591e2014-12-01 13:38:21 -08001069void MetricsDaemon::HandleUpdateStatsTimeout() {
1070 UpdateStats(TimeTicks::Now(), Time::Now());
1071 base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
1072 base::Bind(&MetricsDaemon::HandleUpdateStatsTimeout,
1073 base::Unretained(this)),
1074 base::TimeDelta::FromMilliseconds(kUpdateStatsIntervalMs));
Daniel Eratc83975a2014-04-04 08:53:44 -07001075}