Use UTF-8 in clipboard API
In prepartion for better clipboard extensions that can send Unicode
data between the client and server.
diff --git a/common/rfb/util.cxx b/common/rfb/util.cxx
index deb68ca..fc4f4ca 100644
--- a/common/rfb/util.cxx
+++ b/common/rfb/util.cxx
@@ -64,6 +64,10 @@
delete [] s;
}
+ void strFree(wchar_t* s) {
+ delete [] s;
+ }
+
bool strSplit(const char* src, const char limiter, char** out1, char** out2, bool fromEnd) {
CharArray out1old, out2old;
@@ -163,6 +167,67 @@
return buffer;
}
+ char* convertCRLF(const char* src, size_t bytes)
+ {
+ char* buffer;
+ size_t sz;
+
+ char* out;
+ const char* in;
+ size_t in_len;
+
+ // Always include space for a NULL
+ sz = 1;
+
+ // Compute output size
+ in = src;
+ in_len = bytes;
+ while ((*in != '\0') && (in_len > 0)) {
+ sz++;
+
+ if (*in == '\r') {
+ if ((in_len == 0) || (*(in+1) != '\n'))
+ sz++;
+ } else if (*in == '\n') {
+ if ((in == src) || (*(in-1) != '\r'))
+ sz++;
+ }
+
+ in++;
+ in_len--;
+ }
+
+ // Alloc
+ buffer = new char[sz];
+ memset(buffer, 0, sz);
+
+ // And convert
+ out = buffer;
+ in = src;
+ in_len = bytes;
+ while ((*in != '\0') && (in_len > 0)) {
+ if (*in == '\n') {
+ if ((in == src) || (*(in-1) != '\r'))
+ *out++ = '\r';
+ }
+
+ *out = *in;
+
+ if (*in == '\r') {
+ if ((in_len == 0) || (*(in+1) != '\n')) {
+ out++;
+ *out = '\n';
+ }
+ }
+
+ out++;
+ in++;
+ in_len--;
+ }
+
+ return buffer;
+ }
+
size_t ucs4ToUTF8(unsigned src, char* dst) {
if (src < 0x80) {
*dst++ = src;
@@ -242,6 +307,61 @@
return consumed;
}
+ size_t ucs4ToUTF16(unsigned src, wchar_t* dst) {
+ if ((src < 0xd800) || ((src >= 0xe000) && (src < 0x10000))) {
+ *dst++ = src;
+ *dst++ = L'\0';
+ return 1;
+ } else if (src < 0x110000) {
+ *dst++ = 0xd800 | ((src >> 10) & 0x07ff);
+ *dst++ = 0xdc00 | (src & 0x07ff);
+ *dst++ = L'\0';
+ return 2;
+ } else {
+ return ucs4ToUTF16(0xfffd, dst);
+ }
+ }
+
+ size_t utf16ToUCS4(const wchar_t* src, size_t max, unsigned* dst) {
+ *dst = 0xfffd;
+
+ if (max == 0)
+ return 0;
+
+ if ((*src < 0xd800) || (*src >= 0xe000)) {
+ *dst = *src;
+ return 1;
+ }
+
+ if (*src & 0x0400) {
+ size_t consumed;
+
+ // Invalid sequence, consume all continuation characters
+ consumed = 0;
+ while ((max > 0) && (*src & 0x0400)) {
+ src++;
+ max--;
+ consumed++;
+ }
+
+ return consumed;
+ }
+
+ *dst = *src++;
+ max--;
+
+ // Invalid or truncated sequence?
+ if ((max == 0) || ((*src & 0xfc00) != 0xdc00)) {
+ *dst = 0xfffd;
+ return 1;
+ }
+
+ *dst = 0x10000 | ((*dst & 0x03ff) << 10);
+ *dst |= *src & 0x3ff;
+
+ return 2;
+ }
+
char* latin1ToUTF8(const char* src, size_t bytes) {
char* buffer;
size_t sz;
@@ -329,6 +449,104 @@
return buffer;
}
+ char* utf16ToUTF8(const wchar_t* src, size_t units)
+ {
+ char* buffer;
+ size_t sz;
+
+ char* out;
+ const wchar_t* in;
+ size_t in_len;
+
+ // Always include space for a NULL
+ sz = 1;
+
+ // Compute output size
+ in = src;
+ in_len = units;
+ while ((*in != '\0') && (in_len > 0)) {
+ size_t len;
+ unsigned ucs;
+ char buf[5];
+
+ len = utf16ToUCS4(in, in_len, &ucs);
+ in += len;
+ in_len -= len;
+
+ sz += ucs4ToUTF8(ucs, buf);
+ }
+
+ // Alloc
+ buffer = new char[sz];
+ memset(buffer, 0, sz);
+
+ // And convert
+ out = buffer;
+ in = src;
+ in_len = units;
+ while ((*in != '\0') && (in_len > 0)) {
+ size_t len;
+ unsigned ucs;
+
+ len = utf16ToUCS4(in, in_len, &ucs);
+ in += len;
+ in_len -= len;
+
+ out += ucs4ToUTF8(ucs, out);
+ }
+
+ return buffer;
+ }
+
+ wchar_t* utf8ToUTF16(const char* src, size_t bytes)
+ {
+ wchar_t* buffer;
+ size_t sz;
+
+ wchar_t* out;
+ const char* in;
+ size_t in_len;
+
+ // Always include space for a NULL
+ sz = 1;
+
+ // Compute output size
+ in = src;
+ in_len = bytes;
+ while ((*in != '\0') && (in_len > 0)) {
+ size_t len;
+ unsigned ucs;
+ wchar_t buf[3];
+
+ len = utf8ToUCS4(in, in_len, &ucs);
+ in += len;
+ in_len -= len;
+
+ sz += ucs4ToUTF16(ucs, buf);
+ }
+
+ // Alloc
+ buffer = new wchar_t[sz];
+ memset(buffer, 0, sz);
+
+ // And convert
+ out = buffer;
+ in = src;
+ in_len = bytes;
+ while ((*in != '\0') && (in_len > 0)) {
+ size_t len;
+ unsigned ucs;
+
+ len = utf8ToUCS4(in, in_len, &ucs);
+ in += len;
+ in_len -= len;
+
+ out += ucs4ToUTF16(ucs, out);
+ }
+
+ return buffer;
+ }
+
unsigned msBetween(const struct timeval *first,
const struct timeval *second)
{