Optional use of UTF-8 strings in resource bundles Allows the use of UTF-8 for packing resources instead of the default of UTF-16 for Java. When strings are extracted from the ResStringPool, they are converted to UTF-16 and the result is cached for subsequent calls. When using aapt to package, add in the "-8" switch to pack the resources using UTF-8. This will result in the value, key, and type strings as well as the compiled XML string values taking significantly less space in the final application package in most scenarios. Change-Id: I129483f8b3d3b1c5869dced05cb525e494a6c83a

commit: 9a2d83e698b16ec86ad2751b6e7cf103ad645cce [log] [tgz]
author: Kenny Root <kroot@google.com> Fri Dec 04 09:38:48 2009 -0800
committer: Alex Ray <aray@google.com> Tue Jul 30 13:56:52 2013 -0700
tree: 14c45647a106931bd544d517112a1cd7e7f67754
parent: 09b41cbf9fe74a831c0ad883406e03dfe0568d78 [diff] [blame]
diff --git a/libs/utils/String16.cpp b/libs/utils/String16.cpp
index aef67f2..eab7b2b 100644
--- a/libs/utils/String16.cpp
+++ b/libs/utils/String16.cpp

@@ -172,10 +172,6 @@
            : 0);
 }
 
-// ---------------------------------------------------------------------------
-
-namespace android {
-
 static inline size_t
 utf8_char_len(uint8_t ch)
 {
@@ -215,8 +211,38 @@
     //printf("Char at %p: len=%d, utf-16=%p\n", src, length, (void*)result);
 }
 
+void
+utf8_to_utf16(const uint8_t *src, size_t srcLen,
+        char16_t* dst, const size_t dstLen)
+{
+    const uint8_t* const end = src + srcLen;
+    const char16_t* const dstEnd = dst + dstLen;
+    while (src < end && dst < dstEnd) {
+        size_t len = utf8_char_len(*src);
+        uint32_t codepoint = utf8_to_utf32((const uint8_t*)src, len);
+
+        // Convert the UTF32 codepoint to one or more UTF16 codepoints
+        if (codepoint <= 0xFFFF) {
+            // Single UTF16 character
+            *dst++ = (char16_t) codepoint;
+        } else {
+            // Multiple UTF16 characters with surrogates
+            codepoint = codepoint - 0x10000;
+            *dst++ = (char16_t) ((codepoint >> 10) + 0xD800);
+            *dst++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00);
+        }
+
+        src += len;
+    }
+    if (dst < dstEnd) {
+        *dst = 0;
+    }
+}
+
 // ---------------------------------------------------------------------------
 
+namespace android {
+
 static SharedBuffer* gEmptyStringBuf = NULL;
 static char16_t* gEmptyString = NULL;
 
@@ -260,30 +286,14 @@
         p += utf8len;
     }
     
-    SharedBuffer* buf = SharedBuffer::alloc((chars+1)*sizeof(char16_t));
+    size_t bufSize = (chars+1)*sizeof(char16_t);
+    SharedBuffer* buf = SharedBuffer::alloc(bufSize);
     if (buf) {
         p = in;
         char16_t* str = (char16_t*)buf->data();
-        char16_t* d = str;
-        while (p < end) {
-            size_t len = utf8_char_len(*p);
-            uint32_t codepoint = utf8_to_utf32((const uint8_t*)p, len);
-
-            // Convert the UTF32 codepoint to one or more UTF16 codepoints
-            if (codepoint <= 0xFFFF) {
-                // Single UTF16 character
-                *d++ = (char16_t) codepoint;
-            } else {
-                // Multiple UTF16 characters with surrogates
-                codepoint = codepoint - 0x10000;
-                *d++ = (char16_t) ((codepoint >> 10) + 0xD800);
-                *d++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00);
-            }
-
-            p += len;
-        }
-        *d = 0;
         
+        utf8_to_utf16((const uint8_t*)p, len, str, bufSize);
+
         //printf("Created UTF-16 string from UTF-8 \"%s\":", in);
         //printHexData(1, str, buf->size(), 16, 1);
         //printf("\n");
commit	9a2d83e698b16ec86ad2751b6e7cf103ad645cce	[log] [tgz]
author	Kenny Root <kroot@google.com>	Fri Dec 04 09:38:48 2009 -0800
committer	Alex Ray <aray@google.com>	Tue Jul 30 13:56:52 2013 -0700
tree	14c45647a106931bd544d517112a1cd7e7f67754
parent	09b41cbf9fe74a831c0ad883406e03dfe0568d78 [diff] [blame]