| Pierre Imai | 6b3f0d6 | 2016-02-22 17:50:41 +0900 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright (C) 2016 The Android Open Source Project | 
|  | 3 | * | 
|  | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 5 | * you may not use this file except in compliance with the License. | 
|  | 6 | * You may obtain a copy of the License at | 
|  | 7 | * | 
|  | 8 | *      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 9 | * | 
|  | 10 | * Unless required by applicable law or agreed to in writing, software | 
|  | 11 | * distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 13 | * See the License for the specific language governing permissions and | 
|  | 14 | * limitations under the License. | 
|  | 15 | */ | 
|  | 16 |  | 
|  | 17 | #include <stdbool.h> | 
|  | 18 | #include <arpa/nameser.h> | 
|  | 19 | #include <string.h> | 
|  | 20 |  | 
| Christopher Ferris | 7a3681e | 2017-04-24 17:48:32 -0700 | [diff] [blame] | 21 | #include <async_safe/log.h> | 
|  | 22 |  | 
| Pierre Imai | 6b3f0d6 | 2016-02-22 17:50:41 +0900 | [diff] [blame] | 23 | #include "isc/eventlib.h" | 
| Christopher Ferris | 7a3681e | 2017-04-24 17:48:32 -0700 | [diff] [blame] | 24 | #include "resolv_stats.h" | 
| Pierre Imai | 6b3f0d6 | 2016-02-22 17:50:41 +0900 | [diff] [blame] | 25 |  | 
| Pierre Imai | 360800d | 2016-04-06 11:23:47 +0900 | [diff] [blame] | 26 | #define DBG 0 | 
| Pierre Imai | 6b3f0d6 | 2016-02-22 17:50:41 +0900 | [diff] [blame] | 27 |  | 
|  | 28 | /* Calculate the round-trip-time from start time t0 and end time t1. */ | 
|  | 29 | int | 
|  | 30 | _res_stats_calculate_rtt(const struct timespec* t1, const struct timespec* t0) { | 
|  | 31 | // Divide ns by one million to get ms, multiply s by thousand to get ms (obvious) | 
|  | 32 | long ms0 = t0->tv_sec * 1000 + t0->tv_nsec / 1000000; | 
|  | 33 | long ms1 = t1->tv_sec * 1000 + t1->tv_nsec / 1000000; | 
|  | 34 | return (int) (ms1 - ms0); | 
|  | 35 | } | 
|  | 36 |  | 
|  | 37 | /* Create a sample for calculating server reachability statistics. */ | 
|  | 38 | void | 
|  | 39 | _res_stats_set_sample(struct __res_sample* sample, time_t now, int rcode, int rtt) | 
|  | 40 | { | 
|  | 41 | if (DBG) { | 
| Christopher Ferris | 7a3681e | 2017-04-24 17:48:32 -0700 | [diff] [blame] | 42 | async_safe_format_log(ANDROID_LOG_INFO, "libc", "rcode = %d, sec = %d", rcode, rtt); | 
| Pierre Imai | 6b3f0d6 | 2016-02-22 17:50:41 +0900 | [diff] [blame] | 43 | } | 
|  | 44 | sample->at = now; | 
|  | 45 | sample->rcode = rcode; | 
|  | 46 | sample->rtt = rtt; | 
|  | 47 | } | 
|  | 48 |  | 
|  | 49 | /* Clears all stored samples for the given server. */ | 
|  | 50 | void | 
|  | 51 | _res_stats_clear_samples(struct __res_stats* stats) | 
|  | 52 | { | 
|  | 53 | stats->sample_count = stats->sample_next = 0; | 
|  | 54 | } | 
|  | 55 |  | 
|  | 56 | /* Aggregates the reachability statistics for the given server based on on the stored samples. */ | 
|  | 57 | void | 
| Pierre Imai | 97c9d73 | 2016-04-18 12:00:12 +0900 | [diff] [blame] | 58 | android_net_res_stats_aggregate(struct __res_stats* stats, int* successes, int* errors, | 
|  | 59 | int* timeouts, int* internal_errors, int* rtt_avg, time_t* last_sample_time) | 
| Pierre Imai | 6b3f0d6 | 2016-02-22 17:50:41 +0900 | [diff] [blame] | 60 | { | 
|  | 61 | int s = 0;   // successes | 
|  | 62 | int e = 0;   // errors | 
|  | 63 | int t = 0;   // timouts | 
|  | 64 | int ie = 0;  // internal errors | 
|  | 65 | long rtt_sum = 0; | 
|  | 66 | time_t last = 0; | 
|  | 67 | int rtt_count = 0; | 
|  | 68 | for (int i = 0 ; i < stats->sample_count ; ++i) { | 
|  | 69 | // Treat everything as an error that the code in send_dg() already considers a | 
|  | 70 | // rejection by the server, i.e. SERVFAIL, NOTIMP and REFUSED. Assume that NXDOMAIN | 
|  | 71 | // and NOTAUTH can actually occur for user queries. NOERROR with empty answer section | 
|  | 72 | // is not treated as an error here either. FORMERR seems to sometimes be returned by | 
|  | 73 | // some versions of BIND in response to DNSSEC or EDNS0. Whether to treat such responses | 
|  | 74 | // as an indication of a broken server is unclear, though. For now treat such responses, | 
|  | 75 | // as well as unknown codes as errors. | 
|  | 76 | switch (stats->samples[i].rcode) { | 
|  | 77 | case NOERROR: | 
|  | 78 | case NOTAUTH: | 
|  | 79 | case NXDOMAIN: | 
|  | 80 | ++s; | 
|  | 81 | rtt_sum += stats->samples[i].rtt; | 
|  | 82 | ++rtt_count; | 
|  | 83 | break; | 
|  | 84 | case RCODE_TIMEOUT: | 
|  | 85 | ++t; | 
|  | 86 | break; | 
|  | 87 | case RCODE_INTERNAL_ERROR: | 
|  | 88 | ++ie; | 
|  | 89 | break; | 
|  | 90 | case SERVFAIL: | 
|  | 91 | case NOTIMP: | 
|  | 92 | case REFUSED: | 
|  | 93 | default: | 
|  | 94 | ++e; | 
|  | 95 | break; | 
|  | 96 | } | 
|  | 97 | } | 
|  | 98 | *successes = s; | 
|  | 99 | *errors = e; | 
|  | 100 | *timeouts = t; | 
|  | 101 | *internal_errors = ie; | 
|  | 102 | /* If there was at least one successful sample, calculate average RTT. */ | 
|  | 103 | if (rtt_count) { | 
|  | 104 | *rtt_avg = rtt_sum / rtt_count; | 
|  | 105 | } else { | 
|  | 106 | *rtt_avg = -1; | 
|  | 107 | } | 
|  | 108 | /* If we had at least one sample, populate last sample time. */ | 
|  | 109 | if (stats->sample_count > 0) { | 
|  | 110 | if (stats->sample_next > 0) { | 
|  | 111 | last = stats->samples[stats->sample_next - 1].at; | 
|  | 112 | } else { | 
|  | 113 | last = stats->samples[stats->sample_count - 1].at; | 
|  | 114 | } | 
|  | 115 | } | 
|  | 116 | *last_sample_time = last; | 
|  | 117 | } | 
|  | 118 |  | 
|  | 119 | bool | 
|  | 120 | _res_stats_usable_server(const struct __res_params* params, struct __res_stats* stats) { | 
|  | 121 | int successes = -1; | 
|  | 122 | int errors = -1; | 
|  | 123 | int timeouts = -1; | 
|  | 124 | int internal_errors = -1; | 
|  | 125 | int rtt_avg = -1; | 
|  | 126 | time_t last_sample_time = 0; | 
| Pierre Imai | 97c9d73 | 2016-04-18 12:00:12 +0900 | [diff] [blame] | 127 | android_net_res_stats_aggregate(stats, &successes, &errors, &timeouts, &internal_errors, | 
|  | 128 | &rtt_avg, &last_sample_time); | 
| Pierre Imai | 6b3f0d6 | 2016-02-22 17:50:41 +0900 | [diff] [blame] | 129 | if (successes >= 0 && errors >= 0 && timeouts >= 0) { | 
|  | 130 | int total = successes + errors + timeouts; | 
|  | 131 | if (DBG) { | 
| Christopher Ferris | 7a3681e | 2017-04-24 17:48:32 -0700 | [diff] [blame] | 132 | async_safe_format_log(ANDROID_LOG_DEBUG, "libc", "NS stats: S %d + E %d + T %d + I %d " | 
| Pierre Imai | 6b3f0d6 | 2016-02-22 17:50:41 +0900 | [diff] [blame] | 133 | "= %d, rtt = %d, min_samples = %d\n", successes, errors, timeouts, internal_errors, | 
|  | 134 | total, rtt_avg, params->min_samples); | 
|  | 135 | } | 
|  | 136 | if (total >= params->min_samples && (errors > 0 || timeouts > 0)) { | 
|  | 137 | int success_rate = successes * 100 / total; | 
|  | 138 | if (DBG) { | 
| Christopher Ferris | 7a3681e | 2017-04-24 17:48:32 -0700 | [diff] [blame] | 139 | async_safe_format_log(ANDROID_LOG_DEBUG, "libc", "success rate %d%%\n", | 
|  | 140 | success_rate); | 
| Pierre Imai | 6b3f0d6 | 2016-02-22 17:50:41 +0900 | [diff] [blame] | 141 | } | 
|  | 142 | if (success_rate < params->success_threshold) { | 
|  | 143 | // evNowTime() is used here instead of time() to stay consistent with the rest of | 
|  | 144 | // the code base | 
|  | 145 | time_t now = evNowTime().tv_sec; | 
|  | 146 | if (now - last_sample_time > params->sample_validity) { | 
|  | 147 | // Note: It might be worth considering to expire old servers after their expiry | 
|  | 148 | // date has been reached, however the code for returning the ring buffer to its | 
|  | 149 | // previous non-circular state would induce additional complexity. | 
|  | 150 | if (DBG) { | 
| Christopher Ferris | 7a3681e | 2017-04-24 17:48:32 -0700 | [diff] [blame] | 151 | async_safe_format_log(ANDROID_LOG_INFO, "libc", | 
| Pierre Imai | 6b3f0d6 | 2016-02-22 17:50:41 +0900 | [diff] [blame] | 152 | "samples stale, retrying server\n"); | 
|  | 153 | } | 
|  | 154 | _res_stats_clear_samples(stats); | 
|  | 155 | } else { | 
|  | 156 | if (DBG) { | 
| Christopher Ferris | 7a3681e | 2017-04-24 17:48:32 -0700 | [diff] [blame] | 157 | async_safe_format_log(ANDROID_LOG_INFO, "libc", | 
| Pierre Imai | 6b3f0d6 | 2016-02-22 17:50:41 +0900 | [diff] [blame] | 158 | "too many resolution errors, ignoring server\n"); | 
|  | 159 | } | 
|  | 160 | return 0; | 
|  | 161 | } | 
|  | 162 | } | 
|  | 163 | } | 
|  | 164 | } | 
|  | 165 | return 1; | 
|  | 166 | } | 
|  | 167 |  | 
|  | 168 | void | 
| Pierre Imai | 97c9d73 | 2016-04-18 12:00:12 +0900 | [diff] [blame] | 169 | android_net_res_stats_get_usable_servers(const struct __res_params* params, | 
|  | 170 | struct __res_stats stats[], int nscount, bool usable_servers[]) { | 
| Pierre Imai | 6b3f0d6 | 2016-02-22 17:50:41 +0900 | [diff] [blame] | 171 | unsigned usable_servers_found = 0; | 
|  | 172 | for (int ns = 0; ns < nscount; ns++) { | 
|  | 173 | bool usable = _res_stats_usable_server(params, &stats[ns]); | 
|  | 174 | if (usable) { | 
|  | 175 | ++usable_servers_found; | 
|  | 176 | } | 
|  | 177 | usable_servers[ns] = usable; | 
|  | 178 | } | 
|  | 179 | // If there are no usable servers, consider all of them usable. | 
|  | 180 | // TODO: Explore other possibilities, such as enabling only the best N servers, etc. | 
|  | 181 | if (usable_servers_found == 0) { | 
|  | 182 | for (int ns = 0; ns < nscount; ns++) { | 
|  | 183 | usable_servers[ns] = true; | 
|  | 184 | } | 
|  | 185 | } | 
|  | 186 | } |