blob: c50d343a731b4498797569aacb1d298743340f58 [file] [log] [blame]
The Android Open Source Projectcbb10112009-03-03 19:31:44 -08001/*
2 * Copyright (C) 2005 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <utils/String8.h>
18
19#include <utils/Log.h>
20#include <utils/String16.h>
21#include <utils/TextOutput.h>
22#include <utils/threads.h>
23
24#include <private/utils/Static.h>
25
26#include <ctype.h>
27
28namespace android {
29
30// ---------------------------------------------------------------------------
31
32static const uint32_t kByteMask = 0x000000BF;
33static const uint32_t kByteMark = 0x00000080;
34
35// Surrogates aren't valid for UTF-32 characters, so define some
36// constants that will let us screen them out.
37static const uint32_t kUnicodeSurrogateHighStart = 0x0000D800;
38static const uint32_t kUnicodeSurrogateHighEnd = 0x0000DBFF;
39static const uint32_t kUnicodeSurrogateLowStart = 0x0000DC00;
40static const uint32_t kUnicodeSurrogateLowEnd = 0x0000DFFF;
41static const uint32_t kUnicodeSurrogateStart = kUnicodeSurrogateHighStart;
42static const uint32_t kUnicodeSurrogateEnd = kUnicodeSurrogateLowEnd;
43
44// Mask used to set appropriate bits in first byte of UTF-8 sequence,
45// indexed by number of bytes in the sequence.
46static const uint32_t kFirstByteMark[] = {
47 0x00000000, 0x00000000, 0x000000C0, 0x000000E0, 0x000000F0
48};
49
50// Separator used by resource paths. This is not platform dependent contrary
51// to OS_PATH_SEPARATOR.
52#define RES_PATH_SEPARATOR '/'
53
54// Return number of utf8 bytes required for the character.
55static size_t utf32_to_utf8_bytes(uint32_t srcChar)
56{
57 size_t bytesToWrite;
58
59 // Figure out how many bytes the result will require.
60 if (srcChar < 0x00000080)
61 {
62 bytesToWrite = 1;
63 }
64 else if (srcChar < 0x00000800)
65 {
66 bytesToWrite = 2;
67 }
68 else if (srcChar < 0x00010000)
69 {
70 if ((srcChar < kUnicodeSurrogateStart)
71 || (srcChar > kUnicodeSurrogateEnd))
72 {
73 bytesToWrite = 3;
74 }
75 else
76 {
77 // Surrogates are invalid UTF-32 characters.
78 return 0;
79 }
80 }
81 // Max code point for Unicode is 0x0010FFFF.
82 else if (srcChar < 0x00110000)
83 {
84 bytesToWrite = 4;
85 }
86 else
87 {
88 // Invalid UTF-32 character.
89 return 0;
90 }
91
92 return bytesToWrite;
93}
94
95// Write out the source character to <dstP>.
96
97static void utf32_to_utf8(uint8_t* dstP, uint32_t srcChar, size_t bytes)
98{
99 dstP += bytes;
100 switch (bytes)
101 { /* note: everything falls through. */
102 case 4: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6;
103 case 3: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6;
104 case 2: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6;
105 case 1: *--dstP = (uint8_t)(srcChar | kFirstByteMark[bytes]);
106 }
107}
108
109// ---------------------------------------------------------------------------
110
111static SharedBuffer* gEmptyStringBuf = NULL;
112static char* gEmptyString = NULL;
113
114extern int gDarwinCantLoadAllObjects;
115int gDarwinIsReallyAnnoying;
116
117static inline char* getEmptyString()
118{
119 gEmptyStringBuf->acquire();
120 return gEmptyString;
121}
122
123void initialize_string8()
124{
125#ifdef LIBUTILS_NATIVE
126 // Bite me, Darwin!
127 gDarwinIsReallyAnnoying = gDarwinCantLoadAllObjects;
128#endif
129
130 SharedBuffer* buf = SharedBuffer::alloc(1);
131 char* str = (char*)buf->data();
132 *str = 0;
133 gEmptyStringBuf = buf;
134 gEmptyString = str;
135}
136
137void terminate_string8()
138{
139 SharedBuffer::bufferFromData(gEmptyString)->release();
140 gEmptyStringBuf = NULL;
141 gEmptyString = NULL;
142}
143
144// ---------------------------------------------------------------------------
145
146static char* allocFromUTF8(const char* in, size_t len)
147{
148 if (len > 0) {
149 SharedBuffer* buf = SharedBuffer::alloc(len+1);
150 LOG_ASSERT(buf, "Unable to allocate shared buffer");
151 if (buf) {
152 char* str = (char*)buf->data();
153 memcpy(str, in, len);
154 str[len] = 0;
155 return str;
156 }
157 return NULL;
158 }
159
160 return getEmptyString();
161}
162
163// Note: not dealing with expanding surrogate pairs.
164static char* allocFromUTF16(const char16_t* in, size_t len)
165{
166 if (len == 0) return getEmptyString();
167
168 size_t bytes = 0;
169 const char16_t* end = in+len;
170 const char16_t* p = in;
171
172 while (p < end) {
173 bytes += utf32_to_utf8_bytes(*p);
174 p++;
175 }
176
177 SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
178 LOG_ASSERT(buf, "Unable to allocate shared buffer");
179 if (buf) {
180 p = in;
181 char* str = (char*)buf->data();
182 char* d = str;
183 while (p < end) {
184 uint32_t c = *p++;
185 size_t len = utf32_to_utf8_bytes(c);
186 utf32_to_utf8((uint8_t*)d, c, len);
187 d += len;
188 }
189 *d = 0;
190
191 return str;
192 }
193
194 return getEmptyString();
195}
196
197// ---------------------------------------------------------------------------
198
199String8::String8()
200 : mString(getEmptyString())
201{
202}
203
204String8::String8(const String8& o)
205 : mString(o.mString)
206{
207 SharedBuffer::bufferFromData(mString)->acquire();
208}
209
210String8::String8(const char* o)
211 : mString(allocFromUTF8(o, strlen(o)))
212{
213 if (mString == NULL) {
214 mString = getEmptyString();
215 }
216}
217
218String8::String8(const char* o, size_t len)
219 : mString(allocFromUTF8(o, len))
220{
221 if (mString == NULL) {
222 mString = getEmptyString();
223 }
224}
225
226String8::String8(const String16& o)
227 : mString(allocFromUTF16(o.string(), o.size()))
228{
229}
230
231String8::String8(const char16_t* o)
232 : mString(allocFromUTF16(o, strlen16(o)))
233{
234}
235
236String8::String8(const char16_t* o, size_t len)
237 : mString(allocFromUTF16(o, len))
238{
239}
240
241String8::~String8()
242{
243 SharedBuffer::bufferFromData(mString)->release();
244}
245
246void String8::setTo(const String8& other)
247{
248 SharedBuffer::bufferFromData(other.mString)->acquire();
249 SharedBuffer::bufferFromData(mString)->release();
250 mString = other.mString;
251}
252
253status_t String8::setTo(const char* other)
254{
255 SharedBuffer::bufferFromData(mString)->release();
256 mString = allocFromUTF8(other, strlen(other));
257 if (mString) return NO_ERROR;
258
259 mString = getEmptyString();
260 return NO_MEMORY;
261}
262
263status_t String8::setTo(const char* other, size_t len)
264{
265 SharedBuffer::bufferFromData(mString)->release();
266 mString = allocFromUTF8(other, len);
267 if (mString) return NO_ERROR;
268
269 mString = getEmptyString();
270 return NO_MEMORY;
271}
272
273status_t String8::setTo(const char16_t* other, size_t len)
274{
275 SharedBuffer::bufferFromData(mString)->release();
276 mString = allocFromUTF16(other, len);
277 if (mString) return NO_ERROR;
278
279 mString = getEmptyString();
280 return NO_MEMORY;
281}
282
283status_t String8::append(const String8& other)
284{
285 const size_t otherLen = other.bytes();
286 if (bytes() == 0) {
287 setTo(other);
288 return NO_ERROR;
289 } else if (otherLen == 0) {
290 return NO_ERROR;
291 }
292
293 return real_append(other.string(), otherLen);
294}
295
296status_t String8::append(const char* other)
297{
298 return append(other, strlen(other));
299}
300
301status_t String8::append(const char* other, size_t otherLen)
302{
303 if (bytes() == 0) {
304 return setTo(other, otherLen);
305 } else if (otherLen == 0) {
306 return NO_ERROR;
307 }
308
309 return real_append(other, otherLen);
310}
311
312status_t String8::real_append(const char* other, size_t otherLen)
313{
314 const size_t myLen = bytes();
315
316 SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
317 ->editResize(myLen+otherLen+1);
318 if (buf) {
319 char* str = (char*)buf->data();
320 mString = str;
321 str += myLen;
322 memcpy(str, other, otherLen);
323 str[otherLen] = '\0';
324 return NO_ERROR;
325 }
326 return NO_MEMORY;
327}
328
329char* String8::lockBuffer(size_t size)
330{
331 SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
332 ->editResize(size+1);
333 if (buf) {
334 char* str = (char*)buf->data();
335 mString = str;
336 return str;
337 }
338 return NULL;
339}
340
341void String8::unlockBuffer()
342{
343 unlockBuffer(strlen(mString));
344}
345
346status_t String8::unlockBuffer(size_t size)
347{
348 if (size != this->size()) {
349 SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
350 ->editResize(size+1);
351 if (buf) {
352 char* str = (char*)buf->data();
353 str[size] = 0;
354 mString = str;
355 return NO_ERROR;
356 }
357 }
358
359 return NO_MEMORY;
360}
361
362ssize_t String8::find(const char* other, size_t start) const
363{
364 size_t len = size();
365 if (start >= len) {
366 return -1;
367 }
368 const char* s = mString+start;
369 const char* p = strstr(s, other);
370 return p ? p-mString : -1;
371}
372
373void String8::toLower()
374{
375 toLower(0, size());
376}
377
378void String8::toLower(size_t start, size_t length)
379{
380 const size_t len = size();
381 if (start >= len) {
382 return;
383 }
384 if (start+length > len) {
385 length = len-start;
386 }
387 char* buf = lockBuffer(len);
388 buf += start;
389 while (length > 0) {
390 *buf = tolower(*buf);
391 buf++;
392 length--;
393 }
394 unlockBuffer(len);
395}
396
397void String8::toUpper()
398{
399 toUpper(0, size());
400}
401
402void String8::toUpper(size_t start, size_t length)
403{
404 const size_t len = size();
405 if (start >= len) {
406 return;
407 }
408 if (start+length > len) {
409 length = len-start;
410 }
411 char* buf = lockBuffer(len);
412 buf += start;
413 while (length > 0) {
414 *buf = toupper(*buf);
415 buf++;
416 length--;
417 }
418 unlockBuffer(len);
419}
420
421TextOutput& operator<<(TextOutput& to, const String8& val)
422{
423 to << val.string();
424 return to;
425}
426
427// ---------------------------------------------------------------------------
428// Path functions
429
430
431void String8::setPathName(const char* name)
432{
433 setPathName(name, strlen(name));
434}
435
436void String8::setPathName(const char* name, size_t len)
437{
438 char* buf = lockBuffer(len);
439
440 memcpy(buf, name, len);
441
442 // remove trailing path separator, if present
443 if (len > 0 && buf[len-1] == OS_PATH_SEPARATOR)
444 len--;
445
446 buf[len] = '\0';
447
448 unlockBuffer(len);
449}
450
451String8 String8::getPathLeaf(void) const
452{
453 const char* cp;
454 const char*const buf = mString;
455
456 cp = strrchr(buf, OS_PATH_SEPARATOR);
457 if (cp == NULL)
458 return String8(*this);
459 else
460 return String8(cp+1);
461}
462
463String8 String8::getPathDir(void) const
464{
465 const char* cp;
466 const char*const str = mString;
467
468 cp = strrchr(str, OS_PATH_SEPARATOR);
469 if (cp == NULL)
470 return String8("");
471 else
472 return String8(str, cp - str);
473}
474
475String8 String8::walkPath(String8* outRemains) const
476{
477 const char* cp;
478 const char*const str = mString;
479 const char* buf = str;
480
481 cp = strchr(buf, OS_PATH_SEPARATOR);
482 if (cp == buf) {
483 // don't include a leading '/'.
484 buf = buf+1;
485 cp = strchr(buf, OS_PATH_SEPARATOR);
486 }
487
488 if (cp == NULL) {
489 String8 res = buf != str ? String8(buf) : *this;
490 if (outRemains) *outRemains = String8("");
491 return res;
492 }
493
494 String8 res(buf, cp-buf);
495 if (outRemains) *outRemains = String8(cp+1);
496 return res;
497}
498
499/*
500 * Helper function for finding the start of an extension in a pathname.
501 *
502 * Returns a pointer inside mString, or NULL if no extension was found.
503 */
504char* String8::find_extension(void) const
505{
506 const char* lastSlash;
507 const char* lastDot;
508 int extLen;
509 const char* const str = mString;
510
511 // only look at the filename
512 lastSlash = strrchr(str, OS_PATH_SEPARATOR);
513 if (lastSlash == NULL)
514 lastSlash = str;
515 else
516 lastSlash++;
517
518 // find the last dot
519 lastDot = strrchr(lastSlash, '.');
520 if (lastDot == NULL)
521 return NULL;
522
523 // looks good, ship it
524 return const_cast<char*>(lastDot);
525}
526
527String8 String8::getPathExtension(void) const
528{
529 char* ext;
530
531 ext = find_extension();
532 if (ext != NULL)
533 return String8(ext);
534 else
535 return String8("");
536}
537
538String8 String8::getBasePath(void) const
539{
540 char* ext;
541 const char* const str = mString;
542
543 ext = find_extension();
544 if (ext == NULL)
545 return String8(*this);
546 else
547 return String8(str, ext - str);
548}
549
550String8& String8::appendPath(const char* name)
551{
552 // TODO: The test below will fail for Win32 paths. Fix later or ignore.
553 if (name[0] != OS_PATH_SEPARATOR) {
554 if (*name == '\0') {
555 // nothing to do
556 return *this;
557 }
558
559 size_t len = length();
560 if (len == 0) {
561 // no existing filename, just use the new one
562 setPathName(name);
563 return *this;
564 }
565
566 // make room for oldPath + '/' + newPath
567 int newlen = strlen(name);
568
569 char* buf = lockBuffer(len+1+newlen);
570
571 // insert a '/' if needed
572 if (buf[len-1] != OS_PATH_SEPARATOR)
573 buf[len++] = OS_PATH_SEPARATOR;
574
575 memcpy(buf+len, name, newlen+1);
576 len += newlen;
577
578 unlockBuffer(len);
579
580 return *this;
581 } else {
582 setPathName(name);
583 return *this;
584 }
585}
586
587String8& String8::convertToResPath()
588{
589#if OS_PATH_SEPARATOR != RES_PATH_SEPARATOR
590 size_t len = length();
591 if (len > 0) {
592 char * buf = lockBuffer(len);
593 for (char * end = buf + len; buf < end; ++buf) {
594 if (*buf == OS_PATH_SEPARATOR)
595 *buf = RES_PATH_SEPARATOR;
596 }
597 unlockBuffer(len);
598 }
599#endif
600 return *this;
601}
602
603
604}; // namespace android