blob: c7d6f710cb9e628a7eed1e98a43ca681ba9157b5 [file] [log] [blame]
Pierre Ossmanc09e5582015-12-11 20:23:17 +01001/* Copyright 2009-2015 Pierre Ossman for Cendio AB
2 *
3 * This is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This software is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this software; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
16 * USA.
17 */
18
19/*
20 * This code implements congestion control in the same way as TCP in
21 * order to avoid excessive latency in the transport. This is needed
22 * because "buffer bloat" is unfortunately still a very real problem.
23 *
24 * The basic principle is TCP Congestion Control (RFC 5618), with the
25 * addition of using the TCP Vegas algorithm. The reason we use Vegas
26 * is that we run on top of a reliable transport so we need a latency
Pierre Ossmana99d14d2015-12-13 15:43:46 +010027 * based algorithm rather than a loss based one. There is also a lot of
28 * interpolation of values. This is because we have rather horrible
29 * granularity in our measurements.
Pierre Ossmanda8904c2015-12-21 07:59:20 +010030 *
31 * We use a simplistic form of slow start in order to ramp up quickly
32 * from an idle state. We do not have any persistent threshold though
33 * as we have too much noise for it to be reliable.
Pierre Ossmanc09e5582015-12-11 20:23:17 +010034 */
35
Pierre Ossmana99d14d2015-12-13 15:43:46 +010036#include <assert.h>
Pierre Ossmanc09e5582015-12-11 20:23:17 +010037#include <sys/time.h>
38
39#include <rfb/Congestion.h>
Pierre Ossmana99d14d2015-12-13 15:43:46 +010040#include <rfb/LogWriter.h>
Pierre Ossmanc09e5582015-12-11 20:23:17 +010041#include <rfb/util.h>
42
43// Debug output on what the congestion control is up to
44#undef CONGESTION_DEBUG
45
46using namespace rfb;
47
48// This window should get us going fairly fast on a decent bandwidth network.
49// If it's too high, it will rapidly be reduced and stay low.
50static const unsigned INITIAL_WINDOW = 16384;
51
52// TCP's minimal window is 3*MSS. But since we don't know the MSS, we
53// make a guess at 4 KiB (it's probably a bit higher).
54static const unsigned MINIMUM_WINDOW = 4096;
55
56// The current default maximum window for Linux (4 MiB). Should be a good
57// limit for now...
58static const unsigned MAXIMUM_WINDOW = 4194304;
59
Pierre Ossmana99d14d2015-12-13 15:43:46 +010060static LogWriter vlog("Congestion");
Pierre Ossmanc09e5582015-12-11 20:23:17 +010061
62Congestion::Congestion() :
Pierre Ossmana99d14d2015-12-13 15:43:46 +010063 lastPosition(0), extraBuffer(0),
Pierre Ossmanda8904c2015-12-21 07:59:20 +010064 baseRTT(-1), congWindow(INITIAL_WINDOW), inSlowStart(true),
Pierre Ossmana99d14d2015-12-13 15:43:46 +010065 measurements(0), minRTT(-1), minCongestedRTT(-1)
Pierre Ossmanc09e5582015-12-11 20:23:17 +010066{
Pierre Ossmana99d14d2015-12-13 15:43:46 +010067 gettimeofday(&lastUpdate, NULL);
68 gettimeofday(&lastSent, NULL);
69 memset(&lastPong, 0, sizeof(lastPong));
70 gettimeofday(&lastPongArrival, NULL);
71 gettimeofday(&lastAdjustment, NULL);
Pierre Ossmanc09e5582015-12-11 20:23:17 +010072}
73
74Congestion::~Congestion()
75{
76}
77
Pierre Ossmana99d14d2015-12-13 15:43:46 +010078void Congestion::updatePosition(unsigned pos)
79{
80 struct timeval now;
81 unsigned delta, consumed;
Pierre Ossmanc09e5582015-12-11 20:23:17 +010082
Pierre Ossmana99d14d2015-12-13 15:43:46 +010083 gettimeofday(&now, NULL);
84
85 delta = pos - lastPosition;
86 if ((delta > 0) || (extraBuffer > 0))
87 lastSent = now;
88
89 // Idle for too long?
90 // We use a very crude RTO calculation in order to keep things simple
91 // FIXME: should implement RFC 2861
92 if (msBetween(&lastSent, &now) > __rfbmax(baseRTT*2, 100)) {
93
94#ifdef CONGESTION_DEBUG
95 vlog.debug("Connection idle for %d ms, resetting congestion control",
96 msBetween(&lastSent, &now));
97#endif
98
99 // Close congestion window and redo wire latency measurement
100 congWindow = __rfbmin(INITIAL_WINDOW, congWindow);
101 baseRTT = -1;
102 measurements = 0;
103 gettimeofday(&lastAdjustment, NULL);
104 minRTT = minCongestedRTT = -1;
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100105 inSlowStart = true;
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100106 }
107
108 // Commonly we will be in a state of overbuffering. We need to
109 // estimate the extra delay that causes so we can separate it from
110 // the delay caused by an incorrect congestion window.
111 // (we cannot do this until we have a RTT measurement though)
112 if (baseRTT != (unsigned)-1) {
113 extraBuffer += delta;
114 consumed = msBetween(&lastUpdate, &now) * congWindow / baseRTT;
115 if (extraBuffer < consumed)
116 extraBuffer = 0;
117 else
118 extraBuffer -= consumed;
119 }
120
121 lastPosition = pos;
122 lastUpdate = now;
123}
124
125void Congestion::sentPing()
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100126{
127 struct RTTInfo rttInfo;
128
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100129 memset(&rttInfo, 0, sizeof(struct RTTInfo));
130
131 gettimeofday(&rttInfo.tv, NULL);
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100132 rttInfo.pos = lastPosition;
133 rttInfo.extra = getExtraBuffer();
134 rttInfo.congested = isCongested();
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100135
136 pings.push_back(rttInfo);
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100137}
138
139void Congestion::gotPong()
140{
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100141 struct timeval now;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100142 struct RTTInfo rttInfo;
143 unsigned rtt, delay;
144
145 if (pings.empty())
146 return;
147
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100148 gettimeofday(&now, NULL);
149
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100150 rttInfo = pings.front();
151 pings.pop_front();
152
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100153 lastPong = rttInfo;
154 lastPongArrival = now;
155
156 rtt = msBetween(&rttInfo.tv, &now);
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100157 if (rtt < 1)
158 rtt = 1;
159
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100160 // Try to estimate wire latency by tracking lowest seen latency
161 if (rtt < baseRTT)
162 baseRTT = rtt;
163
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100164 // Pings sent before the last adjustment aren't interesting as they
165 // aren't a measurement of the current congestion window
166 if (isBefore(&rttInfo.tv, &lastAdjustment))
167 return;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100168
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100169 // Estimate added delay because of overtaxed buffers (see above)
170 delay = rttInfo.extra * baseRTT / congWindow;
171 if (delay < rtt)
172 rtt -= delay;
173 else
174 rtt = 1;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100175
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100176 // A latency less than the wire latency means that we've
177 // understimated the congestion window. We can't really determine
178 // how much, so pretend that we got no buffer latency at all.
179 if (rtt < baseRTT)
180 rtt = baseRTT;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100181
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100182 // Record the minimum seen delay (hopefully ignores jitter) and let
183 // the congestion control do its thing.
184 //
185 // Note: We are delay based rather than loss based, which means we
186 // need to look at pongs even if they weren't limited by the
187 // current window ("congested"). Otherwise we will fail to
188 // detect increasing congestion until the application exceeds
189 // the congestion window.
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100190 if (rtt < minRTT)
191 minRTT = rtt;
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100192 if (rttInfo.congested) {
193 if (rtt < minCongestedRTT)
194 minCongestedRTT = rtt;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100195 }
196
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100197 measurements++;
198 updateCongestion();
199}
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100200
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100201bool Congestion::isCongested()
202{
203 if (getInFlight() < congWindow)
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100204 return false;
205
206 return true;
207}
208
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100209int Congestion::getUncongestedETA()
210{
211 unsigned targetAcked;
212
213 const struct RTTInfo* prevPing;
214 unsigned eta, elapsed;
215 unsigned etaNext, delay;
216
217 std::list<struct RTTInfo>::const_iterator iter;
218
219 targetAcked = lastPosition - congWindow;
220
221 // Simple case?
222 if (lastPong.pos > targetAcked)
223 return 0;
224
225 // No measurements yet?
226 if (baseRTT == (unsigned)-1)
227 return -1;
228
229 prevPing = &lastPong;
230 eta = 0;
231 elapsed = msSince(&lastPongArrival);
232
233 // Walk the ping queue and figure out which one we are waiting for to
234 // get to an uncongested state
235
236 for (iter = pings.begin(); ;++iter) {
237 struct RTTInfo curPing;
238
239 // If we aren't waiting for a pong that will clear the congested
240 // state then we have to estimate the final bit by pretending that
241 // we had a ping just after the last position update.
242 if (iter == pings.end()) {
243 curPing.tv = lastUpdate;
244 curPing.pos = lastPosition;
245 curPing.extra = extraBuffer;
246 } else {
247 curPing = *iter;
248 }
249
250 etaNext = msBetween(&prevPing->tv, &curPing.tv);
251 // Compensate for buffering delays
252 delay = curPing.extra * baseRTT / congWindow;
253 etaNext += delay;
254 delay = prevPing->extra * baseRTT / congWindow;
255 if (delay >= etaNext)
256 etaNext = 0;
257 else
258 etaNext -= delay;
259
260 // Found it?
261 if (curPing.pos > targetAcked) {
262 eta += etaNext * (curPing.pos - targetAcked) / (curPing.pos - prevPing->pos);
263 if (elapsed > eta)
264 return 0;
265 else
266 return eta - elapsed;
267 }
268
269 assert(iter != pings.end());
270
271 eta += etaNext;
272 prevPing = &*iter;
273 }
274}
275
276unsigned Congestion::getExtraBuffer()
277{
278 unsigned elapsed;
279 unsigned consumed;
280
281 if (baseRTT == (unsigned)-1)
282 return 0;
283
284 elapsed = msSince(&lastUpdate);
285 consumed = elapsed * congWindow / baseRTT;
286
287 if (consumed >= extraBuffer)
288 return 0;
289 else
290 return extraBuffer - consumed;
291}
292
293unsigned Congestion::getInFlight()
294{
295 struct RTTInfo nextPong;
296 unsigned etaNext, delay, elapsed, acked;
297
298 // Simple case?
299 if (lastPosition == lastPong.pos)
300 return 0;
301
302 // No measurements yet?
303 if (baseRTT == (unsigned)-1) {
304 if (!pings.empty())
305 return lastPosition - pings.front().pos;
306 return 0;
307 }
308
309 // If we aren't waiting for any pong then we have to estimate things
310 // by pretending that we had a ping just after the last position
311 // update.
312 if (pings.empty()) {
313 nextPong.tv = lastUpdate;
314 nextPong.pos = lastPosition;
315 nextPong.extra = extraBuffer;
316 } else {
317 nextPong = pings.front();
318 }
319
320 // First we need to estimate how many bytes have made it through
321 // completely. Look at the next ping that should arrive and figure
322 // out how far behind it should be and interpolate the positions.
323
324 etaNext = msBetween(&lastPong.tv, &nextPong.tv);
325 // Compensate for buffering delays
326 delay = nextPong.extra * baseRTT / congWindow;
327 etaNext += delay;
328 delay = lastPong.extra * baseRTT / congWindow;
329 if (delay >= etaNext)
330 etaNext = 0;
331 else
332 etaNext -= delay;
333
334 elapsed = msSince(&lastPongArrival);
335
336 // The pong should be here any second. Be optimistic and assume
337 // we can already use its value.
338 if (etaNext <= elapsed)
339 acked = nextPong.pos;
340 else {
341 acked = lastPong.pos;
342 acked += (nextPong.pos - lastPong.pos) * elapsed / etaNext;
343 }
344
345 return lastPosition - acked;
346}
347
348void Congestion::updateCongestion()
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100349{
350 unsigned diff;
351
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100352 // We want at least three measurements to avoid noise
353 if (measurements < 3)
354 return;
355
356 assert(minRTT >= baseRTT);
357 assert(minCongestedRTT >= baseRTT);
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100358
359 // The goal is to have a slightly too large congestion window since
360 // a "perfect" one cannot be distinguished from a too small one. This
361 // translates to a goal of a few extra milliseconds of delay.
362
363 diff = minRTT - baseRTT;
364
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100365 if (diff > __rfbmax(100, baseRTT/2)) {
366 // We have no way of detecting loss, so assume massive latency
367 // spike means packet loss. Adjust the window and go directly
368 // to congestion avoidance.
369#ifdef CONGESTION_DEBUG
370 vlog.debug("Latency spike! Backing off...");
371#endif
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100372 congWindow = congWindow * baseRTT / minRTT;
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100373 inSlowStart = false;
374 }
375
376 if (inSlowStart) {
377 // Slow start. Aggressive growth until we see congestion.
378
379 if (diff > 25) {
380 // If we see an increased latency then we assume we've hit the
381 // limit and it's time to leave slow start and switch to
382 // congestion avoidance
383 congWindow = congWindow * baseRTT / minRTT;
384 inSlowStart = false;
385 } else {
386 // It's not safe to increase unless we actually used the entire
387 // congestion window, hence we look at minCongestedRTT and not
388 // minRTT
389
390 diff = minCongestedRTT - baseRTT;
391 if (diff < 25)
392 congWindow *= 2;
393 }
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100394 } else {
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100395 // Congestion avoidance (VEGAS)
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100396
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100397 if (diff > 50) {
398 // Slightly too fast
399 congWindow -= 4096;
400 } else {
401 // Only the "congested" pongs are checked to see if the
402 // window is too small.
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100403
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100404 diff = minCongestedRTT - baseRTT;
405
406 if (diff < 5) {
407 // Way too slow
408 congWindow += 8192;
409 } else if (diff < 25) {
410 // Too slow
411 congWindow += 4096;
412 }
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100413 }
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100414 }
415
416 if (congWindow < MINIMUM_WINDOW)
417 congWindow = MINIMUM_WINDOW;
418 if (congWindow > MAXIMUM_WINDOW)
419 congWindow = MAXIMUM_WINDOW;
420
421#ifdef CONGESTION_DEBUG
Pierre Ossmanda8904c2015-12-21 07:59:20 +0100422 vlog.debug("RTT: %d/%d ms (%d ms), Window: %d KiB, Bandwidth: %g Mbps%s",
423 minRTT, minCongestedRTT, baseRTT, congWindow / 1024,
424 congWindow * 8.0 / baseRTT / 1000.0,
425 inSlowStart ? " (slow start)" : "");
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100426#endif
427
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100428 measurements = 0;
429 gettimeofday(&lastAdjustment, NULL);
430 minRTT = minCongestedRTT = -1;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100431}
432