blob: a2f7a256c622582831deae28e12cb5039ba42df3 [file] [log] [blame]
Pierre Ossmanc09e5582015-12-11 20:23:17 +01001/* Copyright 2009-2015 Pierre Ossman for Cendio AB
2 *
3 * This is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This software is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this software; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
16 * USA.
17 */
18
19/*
20 * This code implements congestion control in the same way as TCP in
21 * order to avoid excessive latency in the transport. This is needed
22 * because "buffer bloat" is unfortunately still a very real problem.
23 *
24 * The basic principle is TCP Congestion Control (RFC 5618), with the
25 * addition of using the TCP Vegas algorithm. The reason we use Vegas
26 * is that we run on top of a reliable transport so we need a latency
Pierre Ossmana99d14d2015-12-13 15:43:46 +010027 * based algorithm rather than a loss based one. There is also a lot of
28 * interpolation of values. This is because we have rather horrible
29 * granularity in our measurements.
Pierre Ossmanda8904c2015-12-21 07:59:20 +010030 *
31 * We use a simplistic form of slow start in order to ramp up quickly
32 * from an idle state. We do not have any persistent threshold though
33 * as we have too much noise for it to be reliable.
Pierre Ossmanc09e5582015-12-11 20:23:17 +010034 */
35
Pierre Ossmana99d14d2015-12-13 15:43:46 +010036#include <assert.h>
Pierre Ossmanc09e5582015-12-11 20:23:17 +010037#include <sys/time.h>
38
Pierre Ossman8cf71632016-03-11 09:38:08 +010039#ifdef __linux__
40#include <sys/ioctl.h>
41#include <sys/socket.h>
42#include <netinet/in.h>
43#include <netinet/tcp.h>
44#include <linux/sockios.h>
45#endif
46
Pierre Ossmanc09e5582015-12-11 20:23:17 +010047#include <rfb/Congestion.h>
Pierre Ossmana99d14d2015-12-13 15:43:46 +010048#include <rfb/LogWriter.h>
Pierre Ossmanc09e5582015-12-11 20:23:17 +010049#include <rfb/util.h>
50
51// Debug output on what the congestion control is up to
52#undef CONGESTION_DEBUG
53
Pierre Ossman8cf71632016-03-11 09:38:08 +010054// Dump socket congestion window debug trace to disk
55#undef CONGESTION_TRACE
56
Pierre Ossmanc09e5582015-12-11 20:23:17 +010057using namespace rfb;
58
59// This window should get us going fairly fast on a decent bandwidth network.
60// If it's too high, it will rapidly be reduced and stay low.
61static const unsigned INITIAL_WINDOW = 16384;
62
63// TCP's minimal window is 3*MSS. But since we don't know the MSS, we
64// make a guess at 4 KiB (it's probably a bit higher).
65static const unsigned MINIMUM_WINDOW = 4096;
66
67// The current default maximum window for Linux (4 MiB). Should be a good
68// limit for now...
69static const unsigned MAXIMUM_WINDOW = 4194304;
70
Pierre Ossmana99d14d2015-12-13 15:43:46 +010071static LogWriter vlog("Congestion");
Pierre Ossmanc09e5582015-12-11 20:23:17 +010072
73Congestion::Congestion() :
Pierre Ossmana99d14d2015-12-13 15:43:46 +010074 lastPosition(0), extraBuffer(0),
Pierre Ossmanda8904c2015-12-21 07:59:20 +010075 baseRTT(-1), congWindow(INITIAL_WINDOW), inSlowStart(true),
Pierre Ossmana99d14d2015-12-13 15:43:46 +010076 measurements(0), minRTT(-1), minCongestedRTT(-1)
Pierre Ossmanc09e5582015-12-11 20:23:17 +010077{
Pierre Ossmana99d14d2015-12-13 15:43:46 +010078 gettimeofday(&lastUpdate, NULL);
79 gettimeofday(&lastSent, NULL);
80 memset(&lastPong, 0, sizeof(lastPong));
81 gettimeofday(&lastPongArrival, NULL);
82 gettimeofday(&lastAdjustment, NULL);
Pierre Ossmanc09e5582015-12-11 20:23:17 +010083}
84
85Congestion::~Congestion()
86{
87}
88
Pierre Ossmana99d14d2015-12-13 15:43:46 +010089void Congestion::updatePosition(unsigned pos)
90{
91 struct timeval now;
92 unsigned delta, consumed;
Pierre Ossmanc09e5582015-12-11 20:23:17 +010093
Pierre Ossmana99d14d2015-12-13 15:43:46 +010094 gettimeofday(&now, NULL);
95
96 delta = pos - lastPosition;
97 if ((delta > 0) || (extraBuffer > 0))
98 lastSent = now;
99
100 // Idle for too long?
101 // We use a very crude RTO calculation in order to keep things simple
102 // FIXME: should implement RFC 2861
103 if (msBetween(&lastSent, &now) > __rfbmax(baseRTT*2, 100)) {
104
105#ifdef CONGESTION_DEBUG
106 vlog.debug("Connection idle for %d ms, resetting congestion control",
107 msBetween(&lastSent, &now));
108#endif
109
110 // Close congestion window and redo wire latency measurement
111 congWindow = __rfbmin(INITIAL_WINDOW, congWindow);
112 baseRTT = -1;
113 measurements = 0;
114 gettimeofday(&lastAdjustment, NULL);
115 minRTT = minCongestedRTT = -1;
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100116 inSlowStart = true;
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100117 }
118
119 // Commonly we will be in a state of overbuffering. We need to
120 // estimate the extra delay that causes so we can separate it from
121 // the delay caused by an incorrect congestion window.
122 // (we cannot do this until we have a RTT measurement though)
123 if (baseRTT != (unsigned)-1) {
124 extraBuffer += delta;
125 consumed = msBetween(&lastUpdate, &now) * congWindow / baseRTT;
126 if (extraBuffer < consumed)
127 extraBuffer = 0;
128 else
129 extraBuffer -= consumed;
130 }
131
132 lastPosition = pos;
133 lastUpdate = now;
134}
135
136void Congestion::sentPing()
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100137{
138 struct RTTInfo rttInfo;
139
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100140 memset(&rttInfo, 0, sizeof(struct RTTInfo));
141
142 gettimeofday(&rttInfo.tv, NULL);
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100143 rttInfo.pos = lastPosition;
144 rttInfo.extra = getExtraBuffer();
145 rttInfo.congested = isCongested();
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100146
147 pings.push_back(rttInfo);
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100148}
149
150void Congestion::gotPong()
151{
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100152 struct timeval now;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100153 struct RTTInfo rttInfo;
154 unsigned rtt, delay;
155
156 if (pings.empty())
157 return;
158
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100159 gettimeofday(&now, NULL);
160
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100161 rttInfo = pings.front();
162 pings.pop_front();
163
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100164 lastPong = rttInfo;
165 lastPongArrival = now;
166
167 rtt = msBetween(&rttInfo.tv, &now);
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100168 if (rtt < 1)
169 rtt = 1;
170
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100171 // Try to estimate wire latency by tracking lowest seen latency
172 if (rtt < baseRTT)
173 baseRTT = rtt;
174
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100175 // Pings sent before the last adjustment aren't interesting as they
176 // aren't a measurement of the current congestion window
177 if (isBefore(&rttInfo.tv, &lastAdjustment))
178 return;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100179
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100180 // Estimate added delay because of overtaxed buffers (see above)
181 delay = rttInfo.extra * baseRTT / congWindow;
182 if (delay < rtt)
183 rtt -= delay;
184 else
185 rtt = 1;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100186
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100187 // A latency less than the wire latency means that we've
188 // understimated the congestion window. We can't really determine
189 // how much, so pretend that we got no buffer latency at all.
190 if (rtt < baseRTT)
191 rtt = baseRTT;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100192
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100193 // Record the minimum seen delay (hopefully ignores jitter) and let
194 // the congestion control do its thing.
195 //
196 // Note: We are delay based rather than loss based, which means we
197 // need to look at pongs even if they weren't limited by the
198 // current window ("congested"). Otherwise we will fail to
199 // detect increasing congestion until the application exceeds
200 // the congestion window.
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100201 if (rtt < minRTT)
202 minRTT = rtt;
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100203 if (rttInfo.congested) {
204 if (rtt < minCongestedRTT)
205 minCongestedRTT = rtt;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100206 }
207
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100208 measurements++;
209 updateCongestion();
210}
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100211
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100212bool Congestion::isCongested()
213{
214 if (getInFlight() < congWindow)
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100215 return false;
216
217 return true;
218}
219
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100220int Congestion::getUncongestedETA()
221{
222 unsigned targetAcked;
223
224 const struct RTTInfo* prevPing;
225 unsigned eta, elapsed;
226 unsigned etaNext, delay;
227
228 std::list<struct RTTInfo>::const_iterator iter;
229
230 targetAcked = lastPosition - congWindow;
231
232 // Simple case?
233 if (lastPong.pos > targetAcked)
234 return 0;
235
236 // No measurements yet?
237 if (baseRTT == (unsigned)-1)
238 return -1;
239
240 prevPing = &lastPong;
241 eta = 0;
242 elapsed = msSince(&lastPongArrival);
243
244 // Walk the ping queue and figure out which one we are waiting for to
245 // get to an uncongested state
246
247 for (iter = pings.begin(); ;++iter) {
248 struct RTTInfo curPing;
249
250 // If we aren't waiting for a pong that will clear the congested
251 // state then we have to estimate the final bit by pretending that
252 // we had a ping just after the last position update.
253 if (iter == pings.end()) {
254 curPing.tv = lastUpdate;
255 curPing.pos = lastPosition;
256 curPing.extra = extraBuffer;
257 } else {
258 curPing = *iter;
259 }
260
261 etaNext = msBetween(&prevPing->tv, &curPing.tv);
262 // Compensate for buffering delays
263 delay = curPing.extra * baseRTT / congWindow;
264 etaNext += delay;
265 delay = prevPing->extra * baseRTT / congWindow;
266 if (delay >= etaNext)
267 etaNext = 0;
268 else
269 etaNext -= delay;
270
271 // Found it?
272 if (curPing.pos > targetAcked) {
273 eta += etaNext * (curPing.pos - targetAcked) / (curPing.pos - prevPing->pos);
274 if (elapsed > eta)
275 return 0;
276 else
277 return eta - elapsed;
278 }
279
280 assert(iter != pings.end());
281
282 eta += etaNext;
283 prevPing = &*iter;
284 }
285}
286
Pierre Ossman8cf71632016-03-11 09:38:08 +0100287void Congestion::debugTrace(const char* filename, int fd)
288{
289#ifdef CONGESTION_TRACE
290#ifdef __linux__
291 FILE *f;
292 f = fopen(filename, "ab");
293 if (f != NULL) {
294 struct tcp_info info;
295 int buffered;
296 socklen_t len;
297 len = sizeof(info);
298 if ((getsockopt(fd, IPPROTO_TCP,
299 TCP_INFO, &info, &len) == 0) &&
300 (ioctl(fd, SIOCOUTQ, &buffered) == 0)) {
301 struct timeval now;
302 gettimeofday(&now, NULL);
303 fprintf(f, "%u.%06u,%u,%u,%u,%u\n",
304 (unsigned)now.tv_sec, (unsigned)now.tv_usec,
305 congWindow, info.tcpi_snd_cwnd * info.tcpi_snd_mss,
306 getInFlight(), buffered);
307 }
308 fclose(f);
309 }
310#endif
311#endif
312}
313
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100314unsigned Congestion::getExtraBuffer()
315{
316 unsigned elapsed;
317 unsigned consumed;
318
319 if (baseRTT == (unsigned)-1)
320 return 0;
321
322 elapsed = msSince(&lastUpdate);
323 consumed = elapsed * congWindow / baseRTT;
324
325 if (consumed >= extraBuffer)
326 return 0;
327 else
328 return extraBuffer - consumed;
329}
330
331unsigned Congestion::getInFlight()
332{
333 struct RTTInfo nextPong;
334 unsigned etaNext, delay, elapsed, acked;
335
336 // Simple case?
337 if (lastPosition == lastPong.pos)
338 return 0;
339
340 // No measurements yet?
341 if (baseRTT == (unsigned)-1) {
342 if (!pings.empty())
343 return lastPosition - pings.front().pos;
344 return 0;
345 }
346
347 // If we aren't waiting for any pong then we have to estimate things
348 // by pretending that we had a ping just after the last position
349 // update.
350 if (pings.empty()) {
351 nextPong.tv = lastUpdate;
352 nextPong.pos = lastPosition;
353 nextPong.extra = extraBuffer;
354 } else {
355 nextPong = pings.front();
356 }
357
358 // First we need to estimate how many bytes have made it through
359 // completely. Look at the next ping that should arrive and figure
360 // out how far behind it should be and interpolate the positions.
361
362 etaNext = msBetween(&lastPong.tv, &nextPong.tv);
363 // Compensate for buffering delays
364 delay = nextPong.extra * baseRTT / congWindow;
365 etaNext += delay;
366 delay = lastPong.extra * baseRTT / congWindow;
367 if (delay >= etaNext)
368 etaNext = 0;
369 else
370 etaNext -= delay;
371
372 elapsed = msSince(&lastPongArrival);
373
374 // The pong should be here any second. Be optimistic and assume
375 // we can already use its value.
376 if (etaNext <= elapsed)
377 acked = nextPong.pos;
378 else {
379 acked = lastPong.pos;
380 acked += (nextPong.pos - lastPong.pos) * elapsed / etaNext;
381 }
382
383 return lastPosition - acked;
384}
385
386void Congestion::updateCongestion()
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100387{
388 unsigned diff;
389
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100390 // We want at least three measurements to avoid noise
391 if (measurements < 3)
392 return;
393
394 assert(minRTT >= baseRTT);
395 assert(minCongestedRTT >= baseRTT);
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100396
397 // The goal is to have a slightly too large congestion window since
398 // a "perfect" one cannot be distinguished from a too small one. This
399 // translates to a goal of a few extra milliseconds of delay.
400
401 diff = minRTT - baseRTT;
402
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100403 if (diff > __rfbmax(100, baseRTT/2)) {
404 // We have no way of detecting loss, so assume massive latency
405 // spike means packet loss. Adjust the window and go directly
406 // to congestion avoidance.
407#ifdef CONGESTION_DEBUG
408 vlog.debug("Latency spike! Backing off...");
409#endif
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100410 congWindow = congWindow * baseRTT / minRTT;
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100411 inSlowStart = false;
412 }
413
414 if (inSlowStart) {
415 // Slow start. Aggressive growth until we see congestion.
416
417 if (diff > 25) {
418 // If we see an increased latency then we assume we've hit the
419 // limit and it's time to leave slow start and switch to
420 // congestion avoidance
421 congWindow = congWindow * baseRTT / minRTT;
422 inSlowStart = false;
423 } else {
424 // It's not safe to increase unless we actually used the entire
425 // congestion window, hence we look at minCongestedRTT and not
426 // minRTT
427
428 diff = minCongestedRTT - baseRTT;
429 if (diff < 25)
430 congWindow *= 2;
431 }
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100432 } else {
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100433 // Congestion avoidance (VEGAS)
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100434
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100435 if (diff > 50) {
436 // Slightly too fast
437 congWindow -= 4096;
438 } else {
439 // Only the "congested" pongs are checked to see if the
440 // window is too small.
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100441
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100442 diff = minCongestedRTT - baseRTT;
443
444 if (diff < 5) {
445 // Way too slow
446 congWindow += 8192;
447 } else if (diff < 25) {
448 // Too slow
449 congWindow += 4096;
450 }
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100451 }
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100452 }
453
454 if (congWindow < MINIMUM_WINDOW)
455 congWindow = MINIMUM_WINDOW;
456 if (congWindow > MAXIMUM_WINDOW)
457 congWindow = MAXIMUM_WINDOW;
458
459#ifdef CONGESTION_DEBUG
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100460 vlog.debug("RTT: %d/%d ms (%d ms), Window: %d KiB, Bandwidth: %g Mbps%s",
461 minRTT, minCongestedRTT, baseRTT, congWindow / 1024,
462 congWindow * 8.0 / baseRTT / 1000.0,
463 inSlowStart ? " (slow start)" : "");
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100464#endif
465
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100466 measurements = 0;
467 gettimeofday(&lastAdjustment, NULL);
468 minRTT = minCongestedRTT = -1;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100469}
470