blob: f3f9ceea8eef8d1fe75935414f5a04ab80190db1 [file] [log] [blame]
Pierre Ossmana2b80d62018-03-23 09:30:09 +01001/* Copyright 2009-2018 Pierre Ossman for Cendio AB
Pierre Ossmanc09e5582015-12-11 20:23:17 +01002 *
3 * This is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This software is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this software; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
16 * USA.
17 */
18
19/*
20 * This code implements congestion control in the same way as TCP in
21 * order to avoid excessive latency in the transport. This is needed
22 * because "buffer bloat" is unfortunately still a very real problem.
23 *
24 * The basic principle is TCP Congestion Control (RFC 5618), with the
25 * addition of using the TCP Vegas algorithm. The reason we use Vegas
26 * is that we run on top of a reliable transport so we need a latency
Pierre Ossmana99d14d2015-12-13 15:43:46 +010027 * based algorithm rather than a loss based one. There is also a lot of
28 * interpolation of values. This is because we have rather horrible
29 * granularity in our measurements.
Pierre Ossmanda8904c2015-12-21 07:59:20 +010030 *
31 * We use a simplistic form of slow start in order to ramp up quickly
32 * from an idle state. We do not have any persistent threshold though
33 * as we have too much noise for it to be reliable.
Pierre Ossmanc09e5582015-12-11 20:23:17 +010034 */
35
Pierre Ossmana99d14d2015-12-13 15:43:46 +010036#include <assert.h>
Pierre Ossmanc09e5582015-12-11 20:23:17 +010037#include <sys/time.h>
38
Pierre Ossman8cf71632016-03-11 09:38:08 +010039#ifdef __linux__
40#include <sys/ioctl.h>
41#include <sys/socket.h>
42#include <netinet/in.h>
43#include <netinet/tcp.h>
44#include <linux/sockios.h>
45#endif
46
Pierre Ossmanc09e5582015-12-11 20:23:17 +010047#include <rfb/Congestion.h>
Pierre Ossmana99d14d2015-12-13 15:43:46 +010048#include <rfb/LogWriter.h>
Pierre Ossmanc09e5582015-12-11 20:23:17 +010049#include <rfb/util.h>
50
51// Debug output on what the congestion control is up to
52#undef CONGESTION_DEBUG
53
Pierre Ossman8cf71632016-03-11 09:38:08 +010054// Dump socket congestion window debug trace to disk
55#undef CONGESTION_TRACE
56
Pierre Ossmanc09e5582015-12-11 20:23:17 +010057using namespace rfb;
58
59// This window should get us going fairly fast on a decent bandwidth network.
60// If it's too high, it will rapidly be reduced and stay low.
61static const unsigned INITIAL_WINDOW = 16384;
62
63// TCP's minimal window is 3*MSS. But since we don't know the MSS, we
64// make a guess at 4 KiB (it's probably a bit higher).
65static const unsigned MINIMUM_WINDOW = 4096;
66
67// The current default maximum window for Linux (4 MiB). Should be a good
68// limit for now...
69static const unsigned MAXIMUM_WINDOW = 4194304;
70
Peter Åstrand (astrand)c05fba72018-05-29 10:04:53 +020071// Compare position even when wrapped around
72static inline bool isAfter(unsigned a, unsigned b) {
Loic Prylli95c46d82019-04-10 14:25:35 -070073 return a != b && a - b <= UINT_MAX / 2;
Peter Åstrand (astrand)c05fba72018-05-29 10:04:53 +020074}
75
Pierre Ossmana99d14d2015-12-13 15:43:46 +010076static LogWriter vlog("Congestion");
Pierre Ossmanc09e5582015-12-11 20:23:17 +010077
78Congestion::Congestion() :
Pierre Ossmana99d14d2015-12-13 15:43:46 +010079 lastPosition(0), extraBuffer(0),
Pierre Ossmanda8904c2015-12-21 07:59:20 +010080 baseRTT(-1), congWindow(INITIAL_WINDOW), inSlowStart(true),
Pierre Ossmana2b80d62018-03-23 09:30:09 +010081 safeBaseRTT(-1), measurements(0), minRTT(-1), minCongestedRTT(-1)
Pierre Ossmanc09e5582015-12-11 20:23:17 +010082{
Pierre Ossmana99d14d2015-12-13 15:43:46 +010083 gettimeofday(&lastUpdate, NULL);
84 gettimeofday(&lastSent, NULL);
85 memset(&lastPong, 0, sizeof(lastPong));
86 gettimeofday(&lastPongArrival, NULL);
87 gettimeofday(&lastAdjustment, NULL);
Pierre Ossmanc09e5582015-12-11 20:23:17 +010088}
89
90Congestion::~Congestion()
91{
92}
93
Pierre Ossmana99d14d2015-12-13 15:43:46 +010094void Congestion::updatePosition(unsigned pos)
95{
96 struct timeval now;
97 unsigned delta, consumed;
Pierre Ossmanc09e5582015-12-11 20:23:17 +010098
Pierre Ossmana99d14d2015-12-13 15:43:46 +010099 gettimeofday(&now, NULL);
100
101 delta = pos - lastPosition;
102 if ((delta > 0) || (extraBuffer > 0))
103 lastSent = now;
104
105 // Idle for too long?
106 // We use a very crude RTO calculation in order to keep things simple
107 // FIXME: should implement RFC 2861
108 if (msBetween(&lastSent, &now) > __rfbmax(baseRTT*2, 100)) {
109
110#ifdef CONGESTION_DEBUG
111 vlog.debug("Connection idle for %d ms, resetting congestion control",
112 msBetween(&lastSent, &now));
113#endif
114
115 // Close congestion window and redo wire latency measurement
116 congWindow = __rfbmin(INITIAL_WINDOW, congWindow);
117 baseRTT = -1;
118 measurements = 0;
119 gettimeofday(&lastAdjustment, NULL);
120 minRTT = minCongestedRTT = -1;
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100121 inSlowStart = true;
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100122 }
123
124 // Commonly we will be in a state of overbuffering. We need to
125 // estimate the extra delay that causes so we can separate it from
126 // the delay caused by an incorrect congestion window.
127 // (we cannot do this until we have a RTT measurement though)
128 if (baseRTT != (unsigned)-1) {
129 extraBuffer += delta;
130 consumed = msBetween(&lastUpdate, &now) * congWindow / baseRTT;
131 if (extraBuffer < consumed)
132 extraBuffer = 0;
133 else
134 extraBuffer -= consumed;
135 }
136
137 lastPosition = pos;
138 lastUpdate = now;
139}
140
141void Congestion::sentPing()
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100142{
143 struct RTTInfo rttInfo;
144
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100145 memset(&rttInfo, 0, sizeof(struct RTTInfo));
146
147 gettimeofday(&rttInfo.tv, NULL);
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100148 rttInfo.pos = lastPosition;
149 rttInfo.extra = getExtraBuffer();
150 rttInfo.congested = isCongested();
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100151
152 pings.push_back(rttInfo);
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100153}
154
155void Congestion::gotPong()
156{
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100157 struct timeval now;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100158 struct RTTInfo rttInfo;
159 unsigned rtt, delay;
160
161 if (pings.empty())
162 return;
163
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100164 gettimeofday(&now, NULL);
165
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100166 rttInfo = pings.front();
167 pings.pop_front();
168
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100169 lastPong = rttInfo;
170 lastPongArrival = now;
171
172 rtt = msBetween(&rttInfo.tv, &now);
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100173 if (rtt < 1)
174 rtt = 1;
175
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100176 // Try to estimate wire latency by tracking lowest seen latency
177 if (rtt < baseRTT)
Pierre Ossmana2b80d62018-03-23 09:30:09 +0100178 safeBaseRTT = baseRTT = rtt;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100179
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100180 // Pings sent before the last adjustment aren't interesting as they
181 // aren't a measurement of the current congestion window
182 if (isBefore(&rttInfo.tv, &lastAdjustment))
183 return;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100184
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100185 // Estimate added delay because of overtaxed buffers (see above)
186 delay = rttInfo.extra * baseRTT / congWindow;
187 if (delay < rtt)
188 rtt -= delay;
189 else
190 rtt = 1;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100191
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100192 // A latency less than the wire latency means that we've
193 // understimated the congestion window. We can't really determine
194 // how much, so pretend that we got no buffer latency at all.
195 if (rtt < baseRTT)
196 rtt = baseRTT;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100197
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100198 // Record the minimum seen delay (hopefully ignores jitter) and let
199 // the congestion control do its thing.
200 //
201 // Note: We are delay based rather than loss based, which means we
202 // need to look at pongs even if they weren't limited by the
203 // current window ("congested"). Otherwise we will fail to
204 // detect increasing congestion until the application exceeds
205 // the congestion window.
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100206 if (rtt < minRTT)
207 minRTT = rtt;
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100208 if (rttInfo.congested) {
209 if (rtt < minCongestedRTT)
210 minCongestedRTT = rtt;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100211 }
212
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100213 measurements++;
214 updateCongestion();
215}
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100216
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100217bool Congestion::isCongested()
218{
219 if (getInFlight() < congWindow)
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100220 return false;
221
222 return true;
223}
224
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100225int Congestion::getUncongestedETA()
226{
227 unsigned targetAcked;
228
229 const struct RTTInfo* prevPing;
230 unsigned eta, elapsed;
231 unsigned etaNext, delay;
232
233 std::list<struct RTTInfo>::const_iterator iter;
234
235 targetAcked = lastPosition - congWindow;
236
237 // Simple case?
Peter Åstrand (astrand)c05fba72018-05-29 10:04:53 +0200238 if (isAfter(lastPong.pos, targetAcked))
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100239 return 0;
240
241 // No measurements yet?
242 if (baseRTT == (unsigned)-1)
243 return -1;
244
245 prevPing = &lastPong;
246 eta = 0;
247 elapsed = msSince(&lastPongArrival);
248
249 // Walk the ping queue and figure out which one we are waiting for to
250 // get to an uncongested state
251
252 for (iter = pings.begin(); ;++iter) {
253 struct RTTInfo curPing;
254
255 // If we aren't waiting for a pong that will clear the congested
256 // state then we have to estimate the final bit by pretending that
257 // we had a ping just after the last position update.
258 if (iter == pings.end()) {
259 curPing.tv = lastUpdate;
260 curPing.pos = lastPosition;
261 curPing.extra = extraBuffer;
262 } else {
263 curPing = *iter;
264 }
265
266 etaNext = msBetween(&prevPing->tv, &curPing.tv);
267 // Compensate for buffering delays
268 delay = curPing.extra * baseRTT / congWindow;
269 etaNext += delay;
270 delay = prevPing->extra * baseRTT / congWindow;
271 if (delay >= etaNext)
272 etaNext = 0;
273 else
274 etaNext -= delay;
275
276 // Found it?
Peter Åstrand (astrand)c05fba72018-05-29 10:04:53 +0200277 if (isAfter(curPing.pos, targetAcked)) {
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100278 eta += etaNext * (curPing.pos - targetAcked) / (curPing.pos - prevPing->pos);
279 if (elapsed > eta)
280 return 0;
281 else
282 return eta - elapsed;
283 }
284
285 assert(iter != pings.end());
286
287 eta += etaNext;
288 prevPing = &*iter;
289 }
290}
291
Pierre Ossmana2b80d62018-03-23 09:30:09 +0100292size_t Congestion::getBandwidth()
293{
Pierre Ossman4f04ebb2018-09-20 16:35:37 +0200294 size_t bandwidth;
295
Pierre Ossmana2b80d62018-03-23 09:30:09 +0100296 // No measurements yet? Guess RTT of 60 ms
297 if (safeBaseRTT == (unsigned)-1)
Pierre Ossman4f04ebb2018-09-20 16:35:37 +0200298 bandwidth = congWindow * 1000 / 60;
299 else
300 bandwidth = congWindow * 1000 / safeBaseRTT;
Pierre Ossmana2b80d62018-03-23 09:30:09 +0100301
Pierre Ossman4f04ebb2018-09-20 16:35:37 +0200302 // We're still probing so guess actual bandwidth is halfway between
303 // the current guess and the next one (slow start doubles each time)
304 if (inSlowStart)
305 bandwidth = bandwidth + bandwidth / 2;
306
307 return bandwidth;
Pierre Ossmana2b80d62018-03-23 09:30:09 +0100308}
309
Pierre Ossman8cf71632016-03-11 09:38:08 +0100310void Congestion::debugTrace(const char* filename, int fd)
311{
312#ifdef CONGESTION_TRACE
313#ifdef __linux__
314 FILE *f;
315 f = fopen(filename, "ab");
316 if (f != NULL) {
317 struct tcp_info info;
318 int buffered;
319 socklen_t len;
320 len = sizeof(info);
321 if ((getsockopt(fd, IPPROTO_TCP,
322 TCP_INFO, &info, &len) == 0) &&
323 (ioctl(fd, SIOCOUTQ, &buffered) == 0)) {
324 struct timeval now;
325 gettimeofday(&now, NULL);
326 fprintf(f, "%u.%06u,%u,%u,%u,%u\n",
327 (unsigned)now.tv_sec, (unsigned)now.tv_usec,
328 congWindow, info.tcpi_snd_cwnd * info.tcpi_snd_mss,
329 getInFlight(), buffered);
330 }
331 fclose(f);
332 }
333#endif
334#endif
335}
336
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100337unsigned Congestion::getExtraBuffer()
338{
339 unsigned elapsed;
340 unsigned consumed;
341
342 if (baseRTT == (unsigned)-1)
343 return 0;
344
345 elapsed = msSince(&lastUpdate);
346 consumed = elapsed * congWindow / baseRTT;
347
348 if (consumed >= extraBuffer)
349 return 0;
350 else
351 return extraBuffer - consumed;
352}
353
354unsigned Congestion::getInFlight()
355{
356 struct RTTInfo nextPong;
357 unsigned etaNext, delay, elapsed, acked;
358
359 // Simple case?
360 if (lastPosition == lastPong.pos)
361 return 0;
362
363 // No measurements yet?
364 if (baseRTT == (unsigned)-1) {
365 if (!pings.empty())
366 return lastPosition - pings.front().pos;
367 return 0;
368 }
369
370 // If we aren't waiting for any pong then we have to estimate things
371 // by pretending that we had a ping just after the last position
372 // update.
373 if (pings.empty()) {
374 nextPong.tv = lastUpdate;
375 nextPong.pos = lastPosition;
376 nextPong.extra = extraBuffer;
377 } else {
378 nextPong = pings.front();
379 }
380
381 // First we need to estimate how many bytes have made it through
382 // completely. Look at the next ping that should arrive and figure
383 // out how far behind it should be and interpolate the positions.
384
385 etaNext = msBetween(&lastPong.tv, &nextPong.tv);
386 // Compensate for buffering delays
387 delay = nextPong.extra * baseRTT / congWindow;
388 etaNext += delay;
389 delay = lastPong.extra * baseRTT / congWindow;
390 if (delay >= etaNext)
391 etaNext = 0;
392 else
393 etaNext -= delay;
394
395 elapsed = msSince(&lastPongArrival);
396
397 // The pong should be here any second. Be optimistic and assume
398 // we can already use its value.
399 if (etaNext <= elapsed)
400 acked = nextPong.pos;
401 else {
402 acked = lastPong.pos;
403 acked += (nextPong.pos - lastPong.pos) * elapsed / etaNext;
404 }
405
406 return lastPosition - acked;
407}
408
409void Congestion::updateCongestion()
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100410{
411 unsigned diff;
412
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100413 // We want at least three measurements to avoid noise
414 if (measurements < 3)
415 return;
416
417 assert(minRTT >= baseRTT);
418 assert(minCongestedRTT >= baseRTT);
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100419
420 // The goal is to have a slightly too large congestion window since
421 // a "perfect" one cannot be distinguished from a too small one. This
422 // translates to a goal of a few extra milliseconds of delay.
423
424 diff = minRTT - baseRTT;
425
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100426 if (diff > __rfbmax(100, baseRTT/2)) {
427 // We have no way of detecting loss, so assume massive latency
428 // spike means packet loss. Adjust the window and go directly
429 // to congestion avoidance.
430#ifdef CONGESTION_DEBUG
431 vlog.debug("Latency spike! Backing off...");
432#endif
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100433 congWindow = congWindow * baseRTT / minRTT;
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100434 inSlowStart = false;
435 }
436
437 if (inSlowStart) {
438 // Slow start. Aggressive growth until we see congestion.
439
440 if (diff > 25) {
441 // If we see an increased latency then we assume we've hit the
442 // limit and it's time to leave slow start and switch to
443 // congestion avoidance
444 congWindow = congWindow * baseRTT / minRTT;
445 inSlowStart = false;
446 } else {
447 // It's not safe to increase unless we actually used the entire
448 // congestion window, hence we look at minCongestedRTT and not
449 // minRTT
450
451 diff = minCongestedRTT - baseRTT;
452 if (diff < 25)
453 congWindow *= 2;
454 }
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100455 } else {
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100456 // Congestion avoidance (VEGAS)
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100457
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100458 if (diff > 50) {
459 // Slightly too fast
460 congWindow -= 4096;
461 } else {
462 // Only the "congested" pongs are checked to see if the
463 // window is too small.
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100464
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100465 diff = minCongestedRTT - baseRTT;
466
467 if (diff < 5) {
468 // Way too slow
469 congWindow += 8192;
470 } else if (diff < 25) {
471 // Too slow
472 congWindow += 4096;
473 }
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100474 }
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100475 }
476
477 if (congWindow < MINIMUM_WINDOW)
478 congWindow = MINIMUM_WINDOW;
479 if (congWindow > MAXIMUM_WINDOW)
480 congWindow = MAXIMUM_WINDOW;
481
482#ifdef CONGESTION_DEBUG
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100483 vlog.debug("RTT: %d/%d ms (%d ms), Window: %d KiB, Bandwidth: %g Mbps%s",
484 minRTT, minCongestedRTT, baseRTT, congWindow / 1024,
485 congWindow * 8.0 / baseRTT / 1000.0,
486 inSlowStart ? " (slow start)" : "");
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100487#endif
488
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100489 measurements = 0;
490 gettimeofday(&lastAdjustment, NULL);
491 minRTT = minCongestedRTT = -1;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100492}
493