blob: 94d78e32f26f85a059e35702ff1fefae336c0ca7 [file] [log] [blame]
Pierre Ossmanc09e5582015-12-11 20:23:17 +01001/* Copyright 2009-2015 Pierre Ossman for Cendio AB
2 *
3 * This is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This software is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this software; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
16 * USA.
17 */
18
19/*
20 * This code implements congestion control in the same way as TCP in
21 * order to avoid excessive latency in the transport. This is needed
22 * because "buffer bloat" is unfortunately still a very real problem.
23 *
24 * The basic principle is TCP Congestion Control (RFC 5618), with the
25 * addition of using the TCP Vegas algorithm. The reason we use Vegas
26 * is that we run on top of a reliable transport so we need a latency
Pierre Ossmana99d14d2015-12-13 15:43:46 +010027 * based algorithm rather than a loss based one. There is also a lot of
28 * interpolation of values. This is because we have rather horrible
29 * granularity in our measurements.
Pierre Ossmanc09e5582015-12-11 20:23:17 +010030 */
31
Pierre Ossmana99d14d2015-12-13 15:43:46 +010032#include <assert.h>
Pierre Ossmanc09e5582015-12-11 20:23:17 +010033#include <sys/time.h>
34
35#include <rfb/Congestion.h>
Pierre Ossmana99d14d2015-12-13 15:43:46 +010036#include <rfb/LogWriter.h>
Pierre Ossmanc09e5582015-12-11 20:23:17 +010037#include <rfb/util.h>
38
39// Debug output on what the congestion control is up to
40#undef CONGESTION_DEBUG
41
42using namespace rfb;
43
44// This window should get us going fairly fast on a decent bandwidth network.
45// If it's too high, it will rapidly be reduced and stay low.
46static const unsigned INITIAL_WINDOW = 16384;
47
48// TCP's minimal window is 3*MSS. But since we don't know the MSS, we
49// make a guess at 4 KiB (it's probably a bit higher).
50static const unsigned MINIMUM_WINDOW = 4096;
51
52// The current default maximum window for Linux (4 MiB). Should be a good
53// limit for now...
54static const unsigned MAXIMUM_WINDOW = 4194304;
55
Pierre Ossmana99d14d2015-12-13 15:43:46 +010056static LogWriter vlog("Congestion");
Pierre Ossmanc09e5582015-12-11 20:23:17 +010057
58Congestion::Congestion() :
Pierre Ossmana99d14d2015-12-13 15:43:46 +010059 lastPosition(0), extraBuffer(0),
Pierre Ossmanc09e5582015-12-11 20:23:17 +010060 baseRTT(-1), congWindow(INITIAL_WINDOW),
Pierre Ossmana99d14d2015-12-13 15:43:46 +010061 measurements(0), minRTT(-1), minCongestedRTT(-1)
Pierre Ossmanc09e5582015-12-11 20:23:17 +010062{
Pierre Ossmana99d14d2015-12-13 15:43:46 +010063 gettimeofday(&lastUpdate, NULL);
64 gettimeofday(&lastSent, NULL);
65 memset(&lastPong, 0, sizeof(lastPong));
66 gettimeofday(&lastPongArrival, NULL);
67 gettimeofday(&lastAdjustment, NULL);
Pierre Ossmanc09e5582015-12-11 20:23:17 +010068}
69
70Congestion::~Congestion()
71{
72}
73
Pierre Ossmana99d14d2015-12-13 15:43:46 +010074void Congestion::updatePosition(unsigned pos)
75{
76 struct timeval now;
77 unsigned delta, consumed;
Pierre Ossmanc09e5582015-12-11 20:23:17 +010078
Pierre Ossmana99d14d2015-12-13 15:43:46 +010079 gettimeofday(&now, NULL);
80
81 delta = pos - lastPosition;
82 if ((delta > 0) || (extraBuffer > 0))
83 lastSent = now;
84
85 // Idle for too long?
86 // We use a very crude RTO calculation in order to keep things simple
87 // FIXME: should implement RFC 2861
88 if (msBetween(&lastSent, &now) > __rfbmax(baseRTT*2, 100)) {
89
90#ifdef CONGESTION_DEBUG
91 vlog.debug("Connection idle for %d ms, resetting congestion control",
92 msBetween(&lastSent, &now));
93#endif
94
95 // Close congestion window and redo wire latency measurement
96 congWindow = __rfbmin(INITIAL_WINDOW, congWindow);
97 baseRTT = -1;
98 measurements = 0;
99 gettimeofday(&lastAdjustment, NULL);
100 minRTT = minCongestedRTT = -1;
101 }
102
103 // Commonly we will be in a state of overbuffering. We need to
104 // estimate the extra delay that causes so we can separate it from
105 // the delay caused by an incorrect congestion window.
106 // (we cannot do this until we have a RTT measurement though)
107 if (baseRTT != (unsigned)-1) {
108 extraBuffer += delta;
109 consumed = msBetween(&lastUpdate, &now) * congWindow / baseRTT;
110 if (extraBuffer < consumed)
111 extraBuffer = 0;
112 else
113 extraBuffer -= consumed;
114 }
115
116 lastPosition = pos;
117 lastUpdate = now;
118}
119
120void Congestion::sentPing()
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100121{
122 struct RTTInfo rttInfo;
123
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100124 memset(&rttInfo, 0, sizeof(struct RTTInfo));
125
126 gettimeofday(&rttInfo.tv, NULL);
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100127 rttInfo.pos = lastPosition;
128 rttInfo.extra = getExtraBuffer();
129 rttInfo.congested = isCongested();
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100130
131 pings.push_back(rttInfo);
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100132}
133
134void Congestion::gotPong()
135{
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100136 struct timeval now;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100137 struct RTTInfo rttInfo;
138 unsigned rtt, delay;
139
140 if (pings.empty())
141 return;
142
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100143 gettimeofday(&now, NULL);
144
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100145 rttInfo = pings.front();
146 pings.pop_front();
147
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100148 lastPong = rttInfo;
149 lastPongArrival = now;
150
151 rtt = msBetween(&rttInfo.tv, &now);
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100152 if (rtt < 1)
153 rtt = 1;
154
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100155 // Try to estimate wire latency by tracking lowest seen latency
156 if (rtt < baseRTT)
157 baseRTT = rtt;
158
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100159 // Pings sent before the last adjustment aren't interesting as they
160 // aren't a measurement of the current congestion window
161 if (isBefore(&rttInfo.tv, &lastAdjustment))
162 return;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100163
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100164 // Estimate added delay because of overtaxed buffers (see above)
165 delay = rttInfo.extra * baseRTT / congWindow;
166 if (delay < rtt)
167 rtt -= delay;
168 else
169 rtt = 1;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100170
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100171 // A latency less than the wire latency means that we've
172 // understimated the congestion window. We can't really determine
173 // how much, so pretend that we got no buffer latency at all.
174 if (rtt < baseRTT)
175 rtt = baseRTT;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100176
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100177 // Record the minimum seen delay (hopefully ignores jitter) and let
178 // the congestion control do its thing.
179 //
180 // Note: We are delay based rather than loss based, which means we
181 // need to look at pongs even if they weren't limited by the
182 // current window ("congested"). Otherwise we will fail to
183 // detect increasing congestion until the application exceeds
184 // the congestion window.
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100185 if (rtt < minRTT)
186 minRTT = rtt;
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100187 if (rttInfo.congested) {
188 if (rtt < minCongestedRTT)
189 minCongestedRTT = rtt;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100190 }
191
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100192 measurements++;
193 updateCongestion();
194}
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100195
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100196bool Congestion::isCongested()
197{
198 if (getInFlight() < congWindow)
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100199 return false;
200
201 return true;
202}
203
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100204int Congestion::getUncongestedETA()
205{
206 unsigned targetAcked;
207
208 const struct RTTInfo* prevPing;
209 unsigned eta, elapsed;
210 unsigned etaNext, delay;
211
212 std::list<struct RTTInfo>::const_iterator iter;
213
214 targetAcked = lastPosition - congWindow;
215
216 // Simple case?
217 if (lastPong.pos > targetAcked)
218 return 0;
219
220 // No measurements yet?
221 if (baseRTT == (unsigned)-1)
222 return -1;
223
224 prevPing = &lastPong;
225 eta = 0;
226 elapsed = msSince(&lastPongArrival);
227
228 // Walk the ping queue and figure out which one we are waiting for to
229 // get to an uncongested state
230
231 for (iter = pings.begin(); ;++iter) {
232 struct RTTInfo curPing;
233
234 // If we aren't waiting for a pong that will clear the congested
235 // state then we have to estimate the final bit by pretending that
236 // we had a ping just after the last position update.
237 if (iter == pings.end()) {
238 curPing.tv = lastUpdate;
239 curPing.pos = lastPosition;
240 curPing.extra = extraBuffer;
241 } else {
242 curPing = *iter;
243 }
244
245 etaNext = msBetween(&prevPing->tv, &curPing.tv);
246 // Compensate for buffering delays
247 delay = curPing.extra * baseRTT / congWindow;
248 etaNext += delay;
249 delay = prevPing->extra * baseRTT / congWindow;
250 if (delay >= etaNext)
251 etaNext = 0;
252 else
253 etaNext -= delay;
254
255 // Found it?
256 if (curPing.pos > targetAcked) {
257 eta += etaNext * (curPing.pos - targetAcked) / (curPing.pos - prevPing->pos);
258 if (elapsed > eta)
259 return 0;
260 else
261 return eta - elapsed;
262 }
263
264 assert(iter != pings.end());
265
266 eta += etaNext;
267 prevPing = &*iter;
268 }
269}
270
271unsigned Congestion::getExtraBuffer()
272{
273 unsigned elapsed;
274 unsigned consumed;
275
276 if (baseRTT == (unsigned)-1)
277 return 0;
278
279 elapsed = msSince(&lastUpdate);
280 consumed = elapsed * congWindow / baseRTT;
281
282 if (consumed >= extraBuffer)
283 return 0;
284 else
285 return extraBuffer - consumed;
286}
287
288unsigned Congestion::getInFlight()
289{
290 struct RTTInfo nextPong;
291 unsigned etaNext, delay, elapsed, acked;
292
293 // Simple case?
294 if (lastPosition == lastPong.pos)
295 return 0;
296
297 // No measurements yet?
298 if (baseRTT == (unsigned)-1) {
299 if (!pings.empty())
300 return lastPosition - pings.front().pos;
301 return 0;
302 }
303
304 // If we aren't waiting for any pong then we have to estimate things
305 // by pretending that we had a ping just after the last position
306 // update.
307 if (pings.empty()) {
308 nextPong.tv = lastUpdate;
309 nextPong.pos = lastPosition;
310 nextPong.extra = extraBuffer;
311 } else {
312 nextPong = pings.front();
313 }
314
315 // First we need to estimate how many bytes have made it through
316 // completely. Look at the next ping that should arrive and figure
317 // out how far behind it should be and interpolate the positions.
318
319 etaNext = msBetween(&lastPong.tv, &nextPong.tv);
320 // Compensate for buffering delays
321 delay = nextPong.extra * baseRTT / congWindow;
322 etaNext += delay;
323 delay = lastPong.extra * baseRTT / congWindow;
324 if (delay >= etaNext)
325 etaNext = 0;
326 else
327 etaNext -= delay;
328
329 elapsed = msSince(&lastPongArrival);
330
331 // The pong should be here any second. Be optimistic and assume
332 // we can already use its value.
333 if (etaNext <= elapsed)
334 acked = nextPong.pos;
335 else {
336 acked = lastPong.pos;
337 acked += (nextPong.pos - lastPong.pos) * elapsed / etaNext;
338 }
339
340 return lastPosition - acked;
341}
342
343void Congestion::updateCongestion()
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100344{
345 unsigned diff;
346
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100347 // We want at least three measurements to avoid noise
348 if (measurements < 3)
349 return;
350
351 assert(minRTT >= baseRTT);
352 assert(minCongestedRTT >= baseRTT);
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100353
354 // The goal is to have a slightly too large congestion window since
355 // a "perfect" one cannot be distinguished from a too small one. This
356 // translates to a goal of a few extra milliseconds of delay.
357
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100358 // First we check all pongs to make sure we're not having a too large
359 // congestion window.
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100360 diff = minRTT - baseRTT;
361
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100362 // FIXME: Should we do slow start?
363 if (diff > 100) {
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100364 // Way too fast
365 congWindow = congWindow * baseRTT / minRTT;
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100366 } else if (diff > 50) {
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100367 // Slightly too fast
368 congWindow -= 4096;
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100369 } else {
370 // Secondly only the "congested" pongs are checked to see if the
371 // window is too small.
372
373 diff = minCongestedRTT - baseRTT;
374
375 if (diff < 5) {
376 // Way too slow
377 congWindow += 8192;
378 } else if (diff < 25) {
379 // Too slow
380 congWindow += 4096;
381 }
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100382 }
383
384 if (congWindow < MINIMUM_WINDOW)
385 congWindow = MINIMUM_WINDOW;
386 if (congWindow > MAXIMUM_WINDOW)
387 congWindow = MAXIMUM_WINDOW;
388
389#ifdef CONGESTION_DEBUG
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100390 vlog.debug("RTT: %d ms (%d ms), Window: %d KiB, Bandwidth: %g Mbps",
391 minRTT, baseRTT, congWindow / 1024,
392 congWindow * 8.0 / baseRTT / 1000.0);
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100393#endif
394
Pierre Ossmana99d14d2015-12-13 15:43:46 +0100395 measurements = 0;
396 gettimeofday(&lastAdjustment, NULL);
397 minRTT = minCongestedRTT = -1;
Pierre Ossmanc09e5582015-12-11 20:23:17 +0100398}
399