Use UTF-8 in clipboard API
In prepartion for better clipboard extensions that can send Unicode
data between the client and server.
diff --git a/common/rfb/CConnection.cxx b/common/rfb/CConnection.cxx
index ce2741e..4e8ea4e 100644
--- a/common/rfb/CConnection.cxx
+++ b/common/rfb/CConnection.cxx
@@ -470,7 +470,7 @@
strFree(serverClipboard);
serverClipboard = NULL;
- serverClipboard = strDup(str);
+ serverClipboard = latin1ToUTF8(str);
handleClipboardAnnounce(true);
}
@@ -516,7 +516,9 @@
void CConnection::sendClipboardData(const char* data)
{
- writer()->writeClientCutText(data);
+ CharArray latin1(utf8ToLatin1(data));
+
+ writer()->writeClientCutText(latin1.buf);
}
void CConnection::refreshFramebuffer()
diff --git a/common/rfb/SConnection.cxx b/common/rfb/SConnection.cxx
index 1cc330d..46f0a85 100644
--- a/common/rfb/SConnection.cxx
+++ b/common/rfb/SConnection.cxx
@@ -306,7 +306,7 @@
strFree(clientClipboard);
clientClipboard = NULL;
- clientClipboard = strDup(str);
+ clientClipboard = latin1ToUTF8(str);
handleClipboardAnnounce(true);
}
@@ -450,7 +450,9 @@
void SConnection::sendClipboardData(const char* data)
{
- writer()->writeServerCutText(data);
+ CharArray latin1(utf8ToLatin1(data));
+
+ writer()->writeServerCutText(latin1.buf);
}
void SConnection::writeFakeColourMap(void)
diff --git a/common/rfb/util.cxx b/common/rfb/util.cxx
index deb68ca..fc4f4ca 100644
--- a/common/rfb/util.cxx
+++ b/common/rfb/util.cxx
@@ -64,6 +64,10 @@
delete [] s;
}
+ void strFree(wchar_t* s) {
+ delete [] s;
+ }
+
bool strSplit(const char* src, const char limiter, char** out1, char** out2, bool fromEnd) {
CharArray out1old, out2old;
@@ -163,6 +167,67 @@
return buffer;
}
+ char* convertCRLF(const char* src, size_t bytes)
+ {
+ char* buffer;
+ size_t sz;
+
+ char* out;
+ const char* in;
+ size_t in_len;
+
+ // Always include space for a NULL
+ sz = 1;
+
+ // Compute output size
+ in = src;
+ in_len = bytes;
+ while ((*in != '\0') && (in_len > 0)) {
+ sz++;
+
+ if (*in == '\r') {
+ if ((in_len == 0) || (*(in+1) != '\n'))
+ sz++;
+ } else if (*in == '\n') {
+ if ((in == src) || (*(in-1) != '\r'))
+ sz++;
+ }
+
+ in++;
+ in_len--;
+ }
+
+ // Alloc
+ buffer = new char[sz];
+ memset(buffer, 0, sz);
+
+ // And convert
+ out = buffer;
+ in = src;
+ in_len = bytes;
+ while ((*in != '\0') && (in_len > 0)) {
+ if (*in == '\n') {
+ if ((in == src) || (*(in-1) != '\r'))
+ *out++ = '\r';
+ }
+
+ *out = *in;
+
+ if (*in == '\r') {
+ if ((in_len == 0) || (*(in+1) != '\n')) {
+ out++;
+ *out = '\n';
+ }
+ }
+
+ out++;
+ in++;
+ in_len--;
+ }
+
+ return buffer;
+ }
+
size_t ucs4ToUTF8(unsigned src, char* dst) {
if (src < 0x80) {
*dst++ = src;
@@ -242,6 +307,61 @@
return consumed;
}
+ size_t ucs4ToUTF16(unsigned src, wchar_t* dst) {
+ if ((src < 0xd800) || ((src >= 0xe000) && (src < 0x10000))) {
+ *dst++ = src;
+ *dst++ = L'\0';
+ return 1;
+ } else if (src < 0x110000) {
+ *dst++ = 0xd800 | ((src >> 10) & 0x07ff);
+ *dst++ = 0xdc00 | (src & 0x07ff);
+ *dst++ = L'\0';
+ return 2;
+ } else {
+ return ucs4ToUTF16(0xfffd, dst);
+ }
+ }
+
+ size_t utf16ToUCS4(const wchar_t* src, size_t max, unsigned* dst) {
+ *dst = 0xfffd;
+
+ if (max == 0)
+ return 0;
+
+ if ((*src < 0xd800) || (*src >= 0xe000)) {
+ *dst = *src;
+ return 1;
+ }
+
+ if (*src & 0x0400) {
+ size_t consumed;
+
+ // Invalid sequence, consume all continuation characters
+ consumed = 0;
+ while ((max > 0) && (*src & 0x0400)) {
+ src++;
+ max--;
+ consumed++;
+ }
+
+ return consumed;
+ }
+
+ *dst = *src++;
+ max--;
+
+ // Invalid or truncated sequence?
+ if ((max == 0) || ((*src & 0xfc00) != 0xdc00)) {
+ *dst = 0xfffd;
+ return 1;
+ }
+
+ *dst = 0x10000 | ((*dst & 0x03ff) << 10);
+ *dst |= *src & 0x3ff;
+
+ return 2;
+ }
+
char* latin1ToUTF8(const char* src, size_t bytes) {
char* buffer;
size_t sz;
@@ -329,6 +449,104 @@
return buffer;
}
+ char* utf16ToUTF8(const wchar_t* src, size_t units)
+ {
+ char* buffer;
+ size_t sz;
+
+ char* out;
+ const wchar_t* in;
+ size_t in_len;
+
+ // Always include space for a NULL
+ sz = 1;
+
+ // Compute output size
+ in = src;
+ in_len = units;
+ while ((*in != '\0') && (in_len > 0)) {
+ size_t len;
+ unsigned ucs;
+ char buf[5];
+
+ len = utf16ToUCS4(in, in_len, &ucs);
+ in += len;
+ in_len -= len;
+
+ sz += ucs4ToUTF8(ucs, buf);
+ }
+
+ // Alloc
+ buffer = new char[sz];
+ memset(buffer, 0, sz);
+
+ // And convert
+ out = buffer;
+ in = src;
+ in_len = units;
+ while ((*in != '\0') && (in_len > 0)) {
+ size_t len;
+ unsigned ucs;
+
+ len = utf16ToUCS4(in, in_len, &ucs);
+ in += len;
+ in_len -= len;
+
+ out += ucs4ToUTF8(ucs, out);
+ }
+
+ return buffer;
+ }
+
+ wchar_t* utf8ToUTF16(const char* src, size_t bytes)
+ {
+ wchar_t* buffer;
+ size_t sz;
+
+ wchar_t* out;
+ const char* in;
+ size_t in_len;
+
+ // Always include space for a NULL
+ sz = 1;
+
+ // Compute output size
+ in = src;
+ in_len = bytes;
+ while ((*in != '\0') && (in_len > 0)) {
+ size_t len;
+ unsigned ucs;
+ wchar_t buf[3];
+
+ len = utf8ToUCS4(in, in_len, &ucs);
+ in += len;
+ in_len -= len;
+
+ sz += ucs4ToUTF16(ucs, buf);
+ }
+
+ // Alloc
+ buffer = new wchar_t[sz];
+ memset(buffer, 0, sz);
+
+ // And convert
+ out = buffer;
+ in = src;
+ in_len = bytes;
+ while ((*in != '\0') && (in_len > 0)) {
+ size_t len;
+ unsigned ucs;
+
+ len = utf8ToUCS4(in, in_len, &ucs);
+ in += len;
+ in_len -= len;
+
+ out += ucs4ToUTF16(ucs, out);
+ }
+
+ return buffer;
+ }
+
unsigned msBetween(const struct timeval *first,
const struct timeval *second)
{
diff --git a/common/rfb/util.h b/common/rfb/util.h
index 7bd5cc0..8503519 100644
--- a/common/rfb/util.h
+++ b/common/rfb/util.h
@@ -68,6 +68,7 @@
char* strDup(const char* s);
void strFree(char* s);
+ void strFree(wchar_t* s);
// Returns true if split successful. Returns false otherwise.
// ALWAYS *copies* first part of string to out1 buffer.
@@ -87,6 +88,7 @@
// Makes sure line endings are in a certain format
char* convertLF(const char* src, size_t bytes = (size_t)-1);
+ char* convertCRLF(const char* src, size_t bytes = (size_t)-1);
// Convertions between various Unicode formats. The returned strings are
// always null terminated and must be freed using strFree().
@@ -94,9 +96,15 @@
size_t ucs4ToUTF8(unsigned src, char* dst);
size_t utf8ToUCS4(const char* src, size_t max, unsigned* dst);
+ size_t ucs4ToUTF16(unsigned src, wchar_t* dst);
+ size_t utf16ToUCS4(const wchar_t* src, size_t max, unsigned* dst);
+
char* latin1ToUTF8(const char* src, size_t bytes = (size_t)-1);
char* utf8ToLatin1(const char* src, size_t bytes = (size_t)-1);
+ char* utf16ToUTF8(const wchar_t* src, size_t units = (size_t)-1);
+ wchar_t* utf8ToUTF16(const char* src, size_t bytes = (size_t)-1);
+
// HELPER functions for timeout handling
// soonestTimeout() is a function to help work out the soonest of several