Add UTF-8 to/from ISO 8859-1 conversion routines
We convert between UTF-8 and ISO 8859-1 (latin 1) in several places
so create some common routines for this.
diff --git a/common/rfb/util.cxx b/common/rfb/util.cxx
index f43a945..deb68ca 100644
--- a/common/rfb/util.cxx
+++ b/common/rfb/util.cxx
@@ -163,6 +163,172 @@
return buffer;
}
+ size_t ucs4ToUTF8(unsigned src, char* dst) {
+ if (src < 0x80) {
+ *dst++ = src;
+ *dst++ = '\0';
+ return 1;
+ } else if (src < 0x800) {
+ *dst++ = 0xc0 | (src >> 6);
+ *dst++ = 0x80 | (src & 0x3f);
+ *dst++ = '\0';
+ return 2;
+ } else if (src < 0x10000) {
+ *dst++ = 0xe0 | (src >> 12);
+ *dst++ = 0x80 | ((src >> 6) & 0x3f);
+ *dst++ = 0x80 | (src & 0x3f);
+ *dst++ = '\0';
+ return 3;
+ } else if (src < 0x110000) {
+ *dst++ = 0xf0 | (src >> 18);
+ *dst++ = 0x80 | ((src >> 12) & 0x3f);
+ *dst++ = 0x80 | ((src >> 6) & 0x3f);
+ *dst++ = 0x80 | (src & 0x3f);
+ *dst++ = '\0';
+ return 4;
+ } else {
+ return ucs4ToUTF8(0xfffd, dst);
+ }
+ }
+
+ size_t utf8ToUCS4(const char* src, size_t max, unsigned* dst) {
+ size_t count, consumed;
+
+ *dst = 0xfffd;
+
+ if (max == 0)
+ return 0;
+
+ consumed = 1;
+
+ if ((*src & 0x80) == 0) {
+ *dst = *src;
+ count = 0;
+ } else if ((*src & 0xe0) == 0xc0) {
+ *dst = *src & 0x1f;
+ count = 1;
+ } else if ((*src & 0xf0) == 0xe0) {
+ *dst = *src & 0x0f;
+ count = 2;
+ } else if ((*src & 0xf8) == 0xf0) {
+ *dst = *src & 0x07;
+ count = 3;
+ } else {
+ // Invalid sequence, consume all continuation characters
+ src++;
+ max--;
+ while ((max-- > 0) && ((*src++ & 0xc0) == 0x80))
+ consumed++;
+ return consumed;
+ }
+
+ src++;
+ max--;
+
+ while (count--) {
+ // Invalid or truncated sequence?
+ if ((max == 0) || ((*src & 0xc0) != 0x80)) {
+ *dst = 0xfffd;
+ return consumed;
+ }
+
+ *dst <<= 6;
+ *dst |= *src & 0x3f;
+
+ src++;
+ max--;
+ }
+
+ return consumed;
+ }
+
+ char* latin1ToUTF8(const char* src, size_t bytes) {
+ char* buffer;
+ size_t sz;
+
+ char* out;
+ const char* in;
+ size_t in_len;
+
+ // Always include space for a NULL
+ sz = 1;
+
+ // Compute output size
+ in = src;
+ in_len = bytes;
+ while ((*in != '\0') && (in_len > 0)) {
+ char buf[5];
+ sz += ucs4ToUTF8(*in, buf);
+ in++;
+ in_len--;
+ }
+
+ // Alloc
+ buffer = new char[sz];
+ memset(buffer, 0, sz);
+
+ // And convert
+ out = buffer;
+ in = src;
+ in_len = bytes;
+ while ((*in != '\0') && (in_len > 0)) {
+ out += ucs4ToUTF8(*in, out);
+ in++;
+ in_len--;
+ }
+
+ return buffer;
+ }
+
+ char* utf8ToLatin1(const char* src, size_t bytes) {
+ char* buffer;
+ size_t sz;
+
+ char* out;
+ const char* in;
+ size_t in_len;
+
+ // Always include space for a NULL
+ sz = 1;
+
+ // Compute output size
+ in = src;
+ in_len = bytes;
+ while ((*in != '\0') && (in_len > 0)) {
+ size_t len;
+ unsigned ucs;
+
+ len = utf8ToUCS4(in, in_len, &ucs);
+ in += len;
+ in_len -= len;
+ sz++;
+ }
+
+ // Alloc
+ buffer = new char[sz];
+ memset(buffer, 0, sz);
+
+ // And convert
+ out = buffer;
+ in = src;
+ in_len = bytes;
+ while ((*in != '\0') && (in_len > 0)) {
+ size_t len;
+ unsigned ucs;
+
+ len = utf8ToUCS4(in, in_len, &ucs);
+ in += len;
+ in_len -= len;
+
+ if (ucs > 0xff)
+ *out++ = '?';
+ else
+ *out++ = (unsigned char)ucs;
+ }
+
+ return buffer;
+ }
+
unsigned msBetween(const struct timeval *first,
const struct timeval *second)
{