blob: 4d36d9f183b8447669b174dac49dcce4dd9c07b7 [file] [log] [blame]
Pierre Ossmana2b80d62018-03-23 09:30:09 +01001/* Copyright 2009-2018 Pierre Ossman for Cendio AB
Pierre Ossmanc09e5582015-12-11 20:23:17 +01002 *
3 * This is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This software is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this software; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
16 * USA.
17 */
18
19/*
20 * This code implements congestion control in the same way as TCP in
21 * order to avoid excessive latency in the transport. This is needed
22 * because "buffer bloat" is unfortunately still a very real problem.
23 *
24 * The basic principle is TCP Congestion Control (RFC 5618), with the
25 * addition of using the TCP Vegas algorithm. The reason we use Vegas
26 * is that we run on top of a reliable transport so we need a latency
Pierre Ossmana99d14d2015-12-13 15:43:46 +010027 * based algorithm rather than a loss based one. There is also a lot of
28 * interpolation of values. This is because we have rather horrible
29 * granularity in our measurements.
Pierre Ossmanda8904c2015-12-21 07:59:20 +010030 *
31 * We use a simplistic form of slow start in order to ramp up quickly
32 * from an idle state. We do not have any persistent threshold though
33 * as we have too much noise for it to be reliable.
Pierre Ossmanc09e5582015-12-11 20:23:17 +010034 */
35
Pierre Ossmana99d14d2015-12-13 15:43:46 +010036#include <assert.h>
Pierre Ossmanc09e5582015-12-11 20:23:17 +010037#include <sys/time.h>
38
Pierre Ossman8cf71632016-03-11 09:38:08 +010039#ifdef __linux__
40#include <sys/ioctl.h>
41#include <sys/socket.h>
42#include <netinet/in.h>
43#include <netinet/tcp.h>
44#include <linux/sockios.h>
45#endif
46
Pierre Ossmanc09e5582015-12-11 20:23:17 +010047#include <rfb/Congestion.h>
Pierre Ossmana99d14d2015-12-13 15:43:46 +010048#include <rfb/LogWriter.h>
Pierre Ossmanc09e5582015-12-11 20:23:17 +010049#include <rfb/util.h>
50
51// Debug output on what the congestion control is up to
52#undef CONGESTION_DEBUG
53
Pierre Ossman8cf71632016-03-11 09:38:08 +010054// Dump socket congestion window debug trace to disk
55#undef CONGESTION_TRACE
56
Pierre Ossmanc09e5582015-12-11 20:23:17 +010057using namespace rfb;
58
59// This window should get us going fairly fast on a decent bandwidth network.
60// If it's too high, it will rapidly be reduced and stay low.
61static const unsigned INITIAL_WINDOW = 16384;
62
63// TCP's minimal window is 3*MSS. But since we don't know the MSS, we
64// make a guess at 4 KiB (it's probably a bit higher).
65static const unsigned MINIMUM_WINDOW = 4096;
66
67// The current default maximum window for Linux (4 MiB). Should be a good
68// limit for now...
69static const unsigned MAXIMUM_WINDOW = 4194304;
70
Pierre Ossmana99d14d2015-12-13 15:43:46 +010071static LogWriter vlog("Congestion");
Pierre Ossmanc09e5582015-12-11 20:23:17 +010072
73Congestion::Congestion() :
Pierre Ossmana99d14d2015-12-13 15:43:46 +010074 lastPosition(0), extraBuffer(0),
Pierre Ossmanda8904c2015-12-21 07:59:20 +010075 baseRTT(-1), congWindow(INITIAL_WINDOW), inSlowStart(true),
Pierre Ossmana2b80d62018-03-23 09:30:09 +010076 safeBaseRTT(-1), measurements(0), minRTT(-1), minCongestedRTT(-1)
Pierre Ossmanc09e5582015-12-11 20:23:17 +010077{
Pierre Ossmana99d14d2015-12-13 15:43:46 +010078 gettimeofday(&lastUpdate, NULL);
79 gettimeofday(&lastSent, NULL);
80 memset(&lastPong, 0, sizeof(lastPong));
81 gettimeofday(&lastPongArrival, NULL);
82 gettimeofday(&lastAdjustment, NULL);
Pierre Ossmanc09e5582015-12-11 20:23:17 +010083}
84
85Congestion::~Congestion()
86{
87}
88
Pierre Ossmana99d14d2015-12-13 15:43:46 +010089void Congestion::updatePosition(unsigned pos)
90{
91 struct timeval now;
92 unsigned delta, consumed;
Pierre Ossmanc09e5582015-12-11 20:23:17 +010093
Pierre Ossmana99d14d2015-12-13 15:43:46 +010094 gettimeofday(&now, NULL);
95
96 delta = pos - lastPosition;
97 if ((delta > 0) || (extraBuffer > 0))
98 lastSent = now;
99
100 // Idle for too long?
101 // We use a very crude RTO calculation in order to keep things simple
102 // FIXME: should implement RFC 2861
103 if (msBetween(&lastSent, &now) > __rfbmax(baseRTT*2, 100)) {
104
105#ifdef CONGESTION_DEBUG
106 vlog.debug("Connection idle for %d ms, resetting congestion control",
107 msBetween(&lastSent, &now));
108#endif
109
110 // Close congestion window and redo wire latency measurement
111 congWindow = __rfbmin(INITIAL_WINDOW, congWindow);
112 baseRTT = -1;
113 measurements = 0;
114 gettimeofday(&lastAdjustment, NULL);
115 minRTT = minCongestedRTT = -1;
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100116 inSlowStart = true;
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100117 }
118
119 // Commonly we will be in a state of overbuffering. We need to
120 // estimate the extra delay that causes so we can separate it from
121 // the delay caused by an incorrect congestion window.
122 // (we cannot do this until we have a RTT measurement though)
123 if (baseRTT != (unsigned)-1) {
124 extraBuffer += delta;
125 consumed = msBetween(&lastUpdate, &now) * congWindow / baseRTT;
126 if (extraBuffer < consumed)
127 extraBuffer = 0;
128 else
129 extraBuffer -= consumed;
130 }
131
132 lastPosition = pos;
133 lastUpdate = now;
134}
135
136void Congestion::sentPing()
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100137{
138 struct RTTInfo rttInfo;
139
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100140 memset(&rttInfo, 0, sizeof(struct RTTInfo));
141
142 gettimeofday(&rttInfo.tv, NULL);
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100143 rttInfo.pos = lastPosition;
144 rttInfo.extra = getExtraBuffer();
145 rttInfo.congested = isCongested();
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100146
147 pings.push_back(rttInfo);
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100148}
149
150void Congestion::gotPong()
151{
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100152 struct timeval now;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100153 struct RTTInfo rttInfo;
154 unsigned rtt, delay;
155
156 if (pings.empty())
157 return;
158
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100159 gettimeofday(&now, NULL);
160
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100161 rttInfo = pings.front();
162 pings.pop_front();
163
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100164 lastPong = rttInfo;
165 lastPongArrival = now;
166
167 rtt = msBetween(&rttInfo.tv, &now);
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100168 if (rtt < 1)
169 rtt = 1;
170
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100171 // Try to estimate wire latency by tracking lowest seen latency
172 if (rtt < baseRTT)
Pierre Ossmana2b80d62018-03-23 09:30:09 +0100173 safeBaseRTT = baseRTT = rtt;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100174
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100175 // Pings sent before the last adjustment aren't interesting as they
176 // aren't a measurement of the current congestion window
177 if (isBefore(&rttInfo.tv, &lastAdjustment))
178 return;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100179
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100180 // Estimate added delay because of overtaxed buffers (see above)
181 delay = rttInfo.extra * baseRTT / congWindow;
182 if (delay < rtt)
183 rtt -= delay;
184 else
185 rtt = 1;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100186
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100187 // A latency less than the wire latency means that we've
188 // understimated the congestion window. We can't really determine
189 // how much, so pretend that we got no buffer latency at all.
190 if (rtt < baseRTT)
191 rtt = baseRTT;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100192
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100193 // Record the minimum seen delay (hopefully ignores jitter) and let
194 // the congestion control do its thing.
195 //
196 // Note: We are delay based rather than loss based, which means we
197 // need to look at pongs even if they weren't limited by the
198 // current window ("congested"). Otherwise we will fail to
199 // detect increasing congestion until the application exceeds
200 // the congestion window.
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100201 if (rtt < minRTT)
202 minRTT = rtt;
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100203 if (rttInfo.congested) {
204 if (rtt < minCongestedRTT)
205 minCongestedRTT = rtt;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100206 }
207
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100208 measurements++;
209 updateCongestion();
210}
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100211
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100212bool Congestion::isCongested()
213{
214 if (getInFlight() < congWindow)
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100215 return false;
216
217 return true;
218}
219
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100220int Congestion::getUncongestedETA()
221{
222 unsigned targetAcked;
223
224 const struct RTTInfo* prevPing;
225 unsigned eta, elapsed;
226 unsigned etaNext, delay;
227
228 std::list<struct RTTInfo>::const_iterator iter;
229
230 targetAcked = lastPosition - congWindow;
231
232 // Simple case?
233 if (lastPong.pos > targetAcked)
234 return 0;
235
236 // No measurements yet?
237 if (baseRTT == (unsigned)-1)
238 return -1;
239
240 prevPing = &lastPong;
241 eta = 0;
242 elapsed = msSince(&lastPongArrival);
243
244 // Walk the ping queue and figure out which one we are waiting for to
245 // get to an uncongested state
246
247 for (iter = pings.begin(); ;++iter) {
248 struct RTTInfo curPing;
249
250 // If we aren't waiting for a pong that will clear the congested
251 // state then we have to estimate the final bit by pretending that
252 // we had a ping just after the last position update.
253 if (iter == pings.end()) {
254 curPing.tv = lastUpdate;
255 curPing.pos = lastPosition;
256 curPing.extra = extraBuffer;
257 } else {
258 curPing = *iter;
259 }
260
261 etaNext = msBetween(&prevPing->tv, &curPing.tv);
262 // Compensate for buffering delays
263 delay = curPing.extra * baseRTT / congWindow;
264 etaNext += delay;
265 delay = prevPing->extra * baseRTT / congWindow;
266 if (delay >= etaNext)
267 etaNext = 0;
268 else
269 etaNext -= delay;
270
271 // Found it?
272 if (curPing.pos > targetAcked) {
273 eta += etaNext * (curPing.pos - targetAcked) / (curPing.pos - prevPing->pos);
274 if (elapsed > eta)
275 return 0;
276 else
277 return eta - elapsed;
278 }
279
280 assert(iter != pings.end());
281
282 eta += etaNext;
283 prevPing = &*iter;
284 }
285}
286
Pierre Ossmana2b80d62018-03-23 09:30:09 +0100287size_t Congestion::getBandwidth()
288{
289 // No measurements yet? Guess RTT of 60 ms
290 if (safeBaseRTT == (unsigned)-1)
291 return congWindow * 1000 / 60;
292
293 return congWindow * 1000 / safeBaseRTT;
294}
295
Pierre Ossman8cf71632016-03-11 09:38:08 +0100296void Congestion::debugTrace(const char* filename, int fd)
297{
298#ifdef CONGESTION_TRACE
299#ifdef __linux__
300 FILE *f;
301 f = fopen(filename, "ab");
302 if (f != NULL) {
303 struct tcp_info info;
304 int buffered;
305 socklen_t len;
306 len = sizeof(info);
307 if ((getsockopt(fd, IPPROTO_TCP,
308 TCP_INFO, &info, &len) == 0) &&
309 (ioctl(fd, SIOCOUTQ, &buffered) == 0)) {
310 struct timeval now;
311 gettimeofday(&now, NULL);
312 fprintf(f, "%u.%06u,%u,%u,%u,%u\n",
313 (unsigned)now.tv_sec, (unsigned)now.tv_usec,
314 congWindow, info.tcpi_snd_cwnd * info.tcpi_snd_mss,
315 getInFlight(), buffered);
316 }
317 fclose(f);
318 }
319#endif
320#endif
321}
322
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100323unsigned Congestion::getExtraBuffer()
324{
325 unsigned elapsed;
326 unsigned consumed;
327
328 if (baseRTT == (unsigned)-1)
329 return 0;
330
331 elapsed = msSince(&lastUpdate);
332 consumed = elapsed * congWindow / baseRTT;
333
334 if (consumed >= extraBuffer)
335 return 0;
336 else
337 return extraBuffer - consumed;
338}
339
340unsigned Congestion::getInFlight()
341{
342 struct RTTInfo nextPong;
343 unsigned etaNext, delay, elapsed, acked;
344
345 // Simple case?
346 if (lastPosition == lastPong.pos)
347 return 0;
348
349 // No measurements yet?
350 if (baseRTT == (unsigned)-1) {
351 if (!pings.empty())
352 return lastPosition - pings.front().pos;
353 return 0;
354 }
355
356 // If we aren't waiting for any pong then we have to estimate things
357 // by pretending that we had a ping just after the last position
358 // update.
359 if (pings.empty()) {
360 nextPong.tv = lastUpdate;
361 nextPong.pos = lastPosition;
362 nextPong.extra = extraBuffer;
363 } else {
364 nextPong = pings.front();
365 }
366
367 // First we need to estimate how many bytes have made it through
368 // completely. Look at the next ping that should arrive and figure
369 // out how far behind it should be and interpolate the positions.
370
371 etaNext = msBetween(&lastPong.tv, &nextPong.tv);
372 // Compensate for buffering delays
373 delay = nextPong.extra * baseRTT / congWindow;
374 etaNext += delay;
375 delay = lastPong.extra * baseRTT / congWindow;
376 if (delay >= etaNext)
377 etaNext = 0;
378 else
379 etaNext -= delay;
380
381 elapsed = msSince(&lastPongArrival);
382
383 // The pong should be here any second. Be optimistic and assume
384 // we can already use its value.
385 if (etaNext <= elapsed)
386 acked = nextPong.pos;
387 else {
388 acked = lastPong.pos;
389 acked += (nextPong.pos - lastPong.pos) * elapsed / etaNext;
390 }
391
392 return lastPosition - acked;
393}
394
395void Congestion::updateCongestion()
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100396{
397 unsigned diff;
398
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100399 // We want at least three measurements to avoid noise
400 if (measurements < 3)
401 return;
402
403 assert(minRTT >= baseRTT);
404 assert(minCongestedRTT >= baseRTT);
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100405
406 // The goal is to have a slightly too large congestion window since
407 // a "perfect" one cannot be distinguished from a too small one. This
408 // translates to a goal of a few extra milliseconds of delay.
409
410 diff = minRTT - baseRTT;
411
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100412 if (diff > __rfbmax(100, baseRTT/2)) {
413 // We have no way of detecting loss, so assume massive latency
414 // spike means packet loss. Adjust the window and go directly
415 // to congestion avoidance.
416#ifdef CONGESTION_DEBUG
417 vlog.debug("Latency spike! Backing off...");
418#endif
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100419 congWindow = congWindow * baseRTT / minRTT;
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100420 inSlowStart = false;
421 }
422
423 if (inSlowStart) {
424 // Slow start. Aggressive growth until we see congestion.
425
426 if (diff > 25) {
427 // If we see an increased latency then we assume we've hit the
428 // limit and it's time to leave slow start and switch to
429 // congestion avoidance
430 congWindow = congWindow * baseRTT / minRTT;
431 inSlowStart = false;
432 } else {
433 // It's not safe to increase unless we actually used the entire
434 // congestion window, hence we look at minCongestedRTT and not
435 // minRTT
436
437 diff = minCongestedRTT - baseRTT;
438 if (diff < 25)
439 congWindow *= 2;
440 }
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100441 } else {
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100442 // Congestion avoidance (VEGAS)
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100443
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100444 if (diff > 50) {
445 // Slightly too fast
446 congWindow -= 4096;
447 } else {
448 // Only the "congested" pongs are checked to see if the
449 // window is too small.
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100450
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100451 diff = minCongestedRTT - baseRTT;
452
453 if (diff < 5) {
454 // Way too slow
455 congWindow += 8192;
456 } else if (diff < 25) {
457 // Too slow
458 congWindow += 4096;
459 }
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100460 }
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100461 }
462
463 if (congWindow < MINIMUM_WINDOW)
464 congWindow = MINIMUM_WINDOW;
465 if (congWindow > MAXIMUM_WINDOW)
466 congWindow = MAXIMUM_WINDOW;
467
468#ifdef CONGESTION_DEBUG
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100469 vlog.debug("RTT: %d/%d ms (%d ms), Window: %d KiB, Bandwidth: %g Mbps%s",
470 minRTT, minCongestedRTT, baseRTT, congWindow / 1024,
471 congWindow * 8.0 / baseRTT / 1000.0,
472 inSlowStart ? " (slow start)" : "");
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100473#endif
474
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100475 measurements = 0;
476 gettimeofday(&lastAdjustment, NULL);
477 minRTT = minCongestedRTT = -1;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100478}
479