blob: 8436d49bedd10b636abbbc533d538e207d16f3e9 [file] [log] [blame]
Narayan Kamath7462f022013-11-21 13:05:04 +00001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Read-only access to Zip archives, with minimal heap allocation.
19 */
20#include "ziparchive/zip_archive.h"
21
22#include <zlib.h>
23
24#include <assert.h>
25#include <errno.h>
26#include <limits.h>
27#include <log/log.h>
28#include <fcntl.h>
29#include <stdlib.h>
30#include <string.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000031#include <unistd.h>
Narayan Kamatheaf98852013-12-11 14:51:51 +000032#include <utils/FileMap.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000033
34#include <JNIHelp.h> // TEMP_FAILURE_RETRY may or may not be in unistd
35
36// This is for windows. If we don't open a file in binary mode, weirds
37// things will happen.
38#ifndef O_BINARY
39#define O_BINARY 0
40#endif
41
42/*
43 * Zip file constants.
44 */
45static const uint32_t kEOCDSignature = 0x06054b50;
46static const uint32_t kEOCDLen = 2;
47static const uint32_t kEOCDNumEntries = 8; // offset to #of entries in file
48static const uint32_t kEOCDSize = 12; // size of the central directory
49static const uint32_t kEOCDFileOffset = 16; // offset to central directory
50
51static const uint32_t kMaxCommentLen = 65535; // longest possible in ushort
52static const uint32_t kMaxEOCDSearch = (kMaxCommentLen + kEOCDLen);
53
54static const uint32_t kLFHSignature = 0x04034b50;
55static const uint32_t kLFHLen = 30; // excluding variable-len fields
56static const uint32_t kLFHGPBFlags = 6; // general purpose bit flags
57static const uint32_t kLFHCRC = 14; // offset to CRC
58static const uint32_t kLFHCompLen = 18; // offset to compressed length
59static const uint32_t kLFHUncompLen = 22; // offset to uncompressed length
60static const uint32_t kLFHNameLen = 26; // offset to filename length
61static const uint32_t kLFHExtraLen = 28; // offset to extra length
62
63static const uint32_t kCDESignature = 0x02014b50;
64static const uint32_t kCDELen = 46; // excluding variable-len fields
65static const uint32_t kCDEMethod = 10; // offset to compression method
66static const uint32_t kCDEModWhen = 12; // offset to modification timestamp
67static const uint32_t kCDECRC = 16; // offset to entry CRC
68static const uint32_t kCDECompLen = 20; // offset to compressed length
69static const uint32_t kCDEUncompLen = 24; // offset to uncompressed length
70static const uint32_t kCDENameLen = 28; // offset to filename length
71static const uint32_t kCDEExtraLen = 30; // offset to extra length
72static const uint32_t kCDECommentLen = 32; // offset to comment length
73static const uint32_t kCDELocalOffset = 42; // offset to local hdr
74
75static const uint32_t kDDOptSignature = 0x08074b50; // *OPTIONAL* data descriptor signature
76static const uint32_t kDDSignatureLen = 4;
77static const uint32_t kDDLen = 12;
78static const uint32_t kDDMaxLen = 16; // max of 16 bytes with a signature, 12 bytes without
79static const uint32_t kDDCrc32 = 0; // offset to crc32
80static const uint32_t kDDCompLen = 4; // offset to compressed length
81static const uint32_t kDDUncompLen = 8; // offset to uncompressed length
82
83static const uint32_t kGPBDDFlagMask = 0x0008; // mask value that signifies that the entry has a DD
84
85static const uint32_t kMaxErrorLen = 1024;
86
87static const char* kErrorMessages[] = {
88 "Unknown return code.",
Narayan Kamatheb41ad22013-12-09 16:26:36 +000089 "Iteration ended",
Narayan Kamath7462f022013-11-21 13:05:04 +000090 "Zlib error",
91 "Invalid file",
92 "Invalid handle",
93 "Duplicate entries in archive",
94 "Empty archive",
95 "Entry not found",
96 "Invalid offset",
97 "Inconsistent information",
98 "Invalid entry name",
Narayan Kamatheb41ad22013-12-09 16:26:36 +000099 "I/O Error",
Narayan Kamatheaf98852013-12-11 14:51:51 +0000100 "File mapping failed"
Narayan Kamath7462f022013-11-21 13:05:04 +0000101};
102
103static const int32_t kErrorMessageUpperBound = 0;
104
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000105static const int32_t kIterationEnd = -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000106
107// We encountered a Zlib error when inflating a stream from this file.
108// Usually indicates file corruption.
109static const int32_t kZlibError = -2;
110
111// The input file cannot be processed as a zip archive. Usually because
112// it's too small, too large or does not have a valid signature.
113static const int32_t kInvalidFile = -3;
114
115// An invalid iteration / ziparchive handle was passed in as an input
116// argument.
117static const int32_t kInvalidHandle = -4;
118
119// The zip archive contained two (or possibly more) entries with the same
120// name.
121static const int32_t kDuplicateEntry = -5;
122
123// The zip archive contains no entries.
124static const int32_t kEmptyArchive = -6;
125
126// The specified entry was not found in the archive.
127static const int32_t kEntryNotFound = -7;
128
129// The zip archive contained an invalid local file header pointer.
130static const int32_t kInvalidOffset = -8;
131
132// The zip archive contained inconsistent entry information. This could
133// be because the central directory & local file header did not agree, or
134// if the actual uncompressed length or crc32 do not match their declared
135// values.
136static const int32_t kInconsistentInformation = -9;
137
138// An invalid entry name was encountered.
139static const int32_t kInvalidEntryName = -10;
140
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000141// An I/O related system call (read, lseek, ftruncate, map) failed.
142static const int32_t kIoError = -11;
Narayan Kamath7462f022013-11-21 13:05:04 +0000143
Narayan Kamatheaf98852013-12-11 14:51:51 +0000144// We were not able to mmap the central directory or entry contents.
145static const int32_t kMmapFailed = -12;
Narayan Kamath7462f022013-11-21 13:05:04 +0000146
Narayan Kamatheaf98852013-12-11 14:51:51 +0000147static const int32_t kErrorMessageLowerBound = -13;
Narayan Kamath7462f022013-11-21 13:05:04 +0000148
Narayan Kamatheaf98852013-12-11 14:51:51 +0000149static const char kTempMappingFileName[] = "zip: ExtractFileToFile";
Narayan Kamath7462f022013-11-21 13:05:04 +0000150
151/*
152 * A Read-only Zip archive.
153 *
154 * We want "open" and "find entry by name" to be fast operations, and
155 * we want to use as little memory as possible. We memory-map the zip
156 * central directory, and load a hash table with pointers to the filenames
157 * (which aren't null-terminated). The other fields are at a fixed offset
158 * from the filename, so we don't need to extract those (but we do need
159 * to byte-read and endian-swap them every time we want them).
160 *
161 * It's possible that somebody has handed us a massive (~1GB) zip archive,
162 * so we can't expect to mmap the entire file.
163 *
164 * To speed comparisons when doing a lookup by name, we could make the mapping
165 * "private" (copy-on-write) and null-terminate the filenames after verifying
166 * the record structure. However, this requires a private mapping of
167 * every page that the Central Directory touches. Easier to tuck a copy
168 * of the string length into the hash table entry.
169 */
170struct ZipArchive {
171 /* open Zip archive */
172 int fd;
173
174 /* mapped central directory area */
175 off64_t directory_offset;
Narayan Kamatheaf98852013-12-11 14:51:51 +0000176 android::FileMap* directory_map;
Narayan Kamath7462f022013-11-21 13:05:04 +0000177
178 /* number of entries in the Zip archive */
179 uint16_t num_entries;
180
181 /*
182 * We know how many entries are in the Zip archive, so we can have a
183 * fixed-size hash table. We define a load factor of 0.75 and overallocat
184 * so the maximum number entries can never be higher than
185 * ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
186 */
187 uint32_t hash_table_size;
188 ZipEntryName* hash_table;
189};
190
191// Returns 0 on success and negative values on failure.
Narayan Kamatheaf98852013-12-11 14:51:51 +0000192static android::FileMap* MapFileSegment(const int fd, const off64_t start,
193 const size_t length, const bool read_only,
194 const char* debug_file_name) {
195 android::FileMap* file_map = new android::FileMap;
196 const bool success = file_map->create(debug_file_name, fd, start, length, read_only);
197 if (!success) {
198 file_map->release();
199 return NULL;
Narayan Kamath7462f022013-11-21 13:05:04 +0000200 }
201
Narayan Kamatheaf98852013-12-11 14:51:51 +0000202 return file_map;
Narayan Kamath7462f022013-11-21 13:05:04 +0000203}
204
205static int32_t CopyFileToFile(int fd, uint8_t* begin, const uint32_t length, uint64_t *crc_out) {
206 static const uint32_t kBufSize = 32768;
207 uint8_t buf[kBufSize];
208
209 uint32_t count = 0;
210 uint64_t crc = 0;
Narayan Kamath58aaf462013-12-10 16:47:14 +0000211 while (count < length) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000212 uint32_t remaining = length - count;
213
214 // Safe conversion because kBufSize is narrow enough for a 32 bit signed
215 // value.
216 ssize_t get_size = (remaining > kBufSize) ? kBufSize : remaining;
217 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, buf, get_size));
218
219 if (actual != get_size) {
220 ALOGW("CopyFileToFile: copy read failed (%d vs %zd)",
221 (int) actual, get_size);
222 return kIoError;
223 }
224
225 memcpy(begin + count, buf, get_size);
226 crc = crc32(crc, buf, get_size);
227 count += get_size;
228 }
229
230 *crc_out = crc;
231
232 return 0;
233}
234
235/*
236 * Round up to the next highest power of 2.
237 *
238 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
239 */
240static uint32_t RoundUpPower2(uint32_t val) {
241 val--;
242 val |= val >> 1;
243 val |= val >> 2;
244 val |= val >> 4;
245 val |= val >> 8;
246 val |= val >> 16;
247 val++;
248
249 return val;
250}
251
252static uint32_t ComputeHash(const char* str, uint16_t len) {
253 uint32_t hash = 0;
254
255 while (len--) {
256 hash = hash * 31 + *str++;
257 }
258
259 return hash;
260}
261
262/*
263 * Convert a ZipEntry to a hash table index, verifying that it's in a
264 * valid range.
265 */
266static int64_t EntryToIndex(const ZipEntryName* hash_table,
267 const uint32_t hash_table_size,
268 const char* name, uint16_t length) {
269 const uint32_t hash = ComputeHash(name, length);
270
271 // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
272 uint32_t ent = hash & (hash_table_size - 1);
273 while (hash_table[ent].name != NULL) {
274 if (hash_table[ent].name_length == length &&
275 memcmp(hash_table[ent].name, name, length) == 0) {
276 return ent;
277 }
278
279 ent = (ent + 1) & (hash_table_size - 1);
280 }
281
282 ALOGV("Zip: Unable to find entry %.*s", name_length, name);
283 return kEntryNotFound;
284}
285
286/*
287 * Add a new entry to the hash table.
288 */
289static int32_t AddToHash(ZipEntryName *hash_table, const uint64_t hash_table_size,
290 const char* name, uint16_t length) {
291 const uint64_t hash = ComputeHash(name, length);
292 uint32_t ent = hash & (hash_table_size - 1);
293
294 /*
295 * We over-allocated the table, so we're guaranteed to find an empty slot.
296 * Further, we guarantee that the hashtable size is not 0.
297 */
298 while (hash_table[ent].name != NULL) {
299 if (hash_table[ent].name_length == length &&
300 memcmp(hash_table[ent].name, name, length) == 0) {
301 // We've found a duplicate entry. We don't accept it
302 ALOGW("Zip: Found duplicate entry %.*s", length, name);
303 return kDuplicateEntry;
304 }
305 ent = (ent + 1) & (hash_table_size - 1);
306 }
307
308 hash_table[ent].name = name;
309 hash_table[ent].name_length = length;
310 return 0;
311}
312
313/*
314 * Get 2 little-endian bytes.
315 */
316static uint16_t get2LE(const uint8_t* src) {
317 return src[0] | (src[1] << 8);
318}
319
320/*
321 * Get 4 little-endian bytes.
322 */
323static uint32_t get4LE(const uint8_t* src) {
324 uint32_t result;
325
326 result = src[0];
327 result |= src[1] << 8;
328 result |= src[2] << 16;
329 result |= src[3] << 24;
330
331 return result;
332}
333
334static int32_t MapCentralDirectory0(int fd, const char* debug_file_name,
335 ZipArchive* archive, off64_t file_length,
336 uint32_t read_amount, uint8_t* scan_buffer) {
337 const off64_t search_start = file_length - read_amount;
338
339 if (lseek64(fd, search_start, SEEK_SET) != search_start) {
340 ALOGW("Zip: seek %lld failed: %s", search_start, strerror(errno));
341 return kIoError;
342 }
343 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, scan_buffer, read_amount));
344 if (actual != (ssize_t) read_amount) {
345 ALOGW("Zip: read %zd failed: %s", read_amount, strerror(errno));
346 return kIoError;
347 }
348
349 /*
350 * Scan backward for the EOCD magic. In an archive without a trailing
351 * comment, we'll find it on the first try. (We may want to consider
352 * doing an initial minimal read; if we don't find it, retry with a
353 * second read as above.)
354 */
355 int i;
356 for (i = read_amount - kEOCDLen; i >= 0; i--) {
357 if (scan_buffer[i] == 0x50 && get4LE(&scan_buffer[i]) == kEOCDSignature) {
358 ALOGV("+++ Found EOCD at buf+%d", i);
359 break;
360 }
361 }
362 if (i < 0) {
363 ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name);
364 return kInvalidFile;
365 }
366
367 const off64_t eocd_offset = search_start + i;
368 const uint8_t* eocd_ptr = scan_buffer + i;
369
370 assert(eocd_offset < file_length);
371
372 /*
373 * Grab the CD offset and size, and the number of entries in the
374 * archive. Verify that they look reasonable. Widen dir_size and
375 * dir_offset to the file offset type.
376 */
377 const uint16_t num_entries = get2LE(eocd_ptr + kEOCDNumEntries);
378 const off64_t dir_size = get4LE(eocd_ptr + kEOCDSize);
379 const off64_t dir_offset = get4LE(eocd_ptr + kEOCDFileOffset);
380
381 if (dir_offset + dir_size > eocd_offset) {
382 ALOGW("Zip: bad offsets (dir %lld, size %lld, eocd %lld)",
383 dir_offset, dir_size, eocd_offset);
384 return kInvalidOffset;
385 }
386 if (num_entries == 0) {
387 ALOGW("Zip: empty archive?");
388 return kEmptyArchive;
389 }
390
391 ALOGV("+++ num_entries=%d dir_size=%d dir_offset=%d", num_entries, dir_size,
392 dir_offset);
393
394 /*
395 * It all looks good. Create a mapping for the CD, and set the fields
396 * in archive.
397 */
Narayan Kamatheaf98852013-12-11 14:51:51 +0000398 android::FileMap* map = MapFileSegment(fd, dir_offset, dir_size,
399 true /* read only */, debug_file_name);
400 if (map == NULL) {
401 archive->directory_map = NULL;
402 return kMmapFailed;
Narayan Kamath7462f022013-11-21 13:05:04 +0000403 }
404
Narayan Kamatheaf98852013-12-11 14:51:51 +0000405 archive->directory_map = map;
Narayan Kamath7462f022013-11-21 13:05:04 +0000406 archive->num_entries = num_entries;
407 archive->directory_offset = dir_offset;
408
409 return 0;
410}
411
412/*
413 * Find the zip Central Directory and memory-map it.
414 *
415 * On success, returns 0 after populating fields from the EOCD area:
416 * directory_offset
417 * directory_map
418 * num_entries
419 */
420static int32_t MapCentralDirectory(int fd, const char* debug_file_name,
421 ZipArchive* archive) {
422
423 // Test file length. We use lseek64 to make sure the file
424 // is small enough to be a zip file (Its size must be less than
425 // 0xffffffff bytes).
426 off64_t file_length = lseek64(fd, 0, SEEK_END);
427 if (file_length == -1) {
428 ALOGV("Zip: lseek on fd %d failed", fd);
429 return kInvalidFile;
430 }
431
432 if (file_length > (off64_t) 0xffffffff) {
433 ALOGV("Zip: zip file too long %d", file_length);
434 return kInvalidFile;
435 }
436
437 if (file_length < (int64_t) kEOCDLen) {
438 ALOGV("Zip: length %ld is too small to be zip", file_length);
439 return kInvalidFile;
440 }
441
442 /*
443 * Perform the traditional EOCD snipe hunt.
444 *
445 * We're searching for the End of Central Directory magic number,
446 * which appears at the start of the EOCD block. It's followed by
447 * 18 bytes of EOCD stuff and up to 64KB of archive comment. We
448 * need to read the last part of the file into a buffer, dig through
449 * it to find the magic number, parse some values out, and use those
450 * to determine the extent of the CD.
451 *
452 * We start by pulling in the last part of the file.
453 */
454 uint32_t read_amount = kMaxEOCDSearch;
455 if (file_length < (off64_t) read_amount) {
456 read_amount = file_length;
457 }
458
459 uint8_t* scan_buffer = (uint8_t*) malloc(read_amount);
460 int32_t result = MapCentralDirectory0(fd, debug_file_name, archive,
461 file_length, read_amount, scan_buffer);
462
463 free(scan_buffer);
464 return result;
465}
466
467/*
468 * Parses the Zip archive's Central Directory. Allocates and populates the
469 * hash table.
470 *
471 * Returns 0 on success.
472 */
473static int32_t ParseZipArchive(ZipArchive* archive) {
474 int32_t result = -1;
Narayan Kamatheaf98852013-12-11 14:51:51 +0000475 const uint8_t* cd_ptr = (const uint8_t*) archive->directory_map->getDataPtr();
476 size_t cd_length = archive->directory_map->getDataLength();
Narayan Kamath7462f022013-11-21 13:05:04 +0000477 uint16_t num_entries = archive->num_entries;
478
479 /*
480 * Create hash table. We have a minimum 75% load factor, possibly as
481 * low as 50% after we round off to a power of 2. There must be at
482 * least one unused entry to avoid an infinite loop during creation.
483 */
484 archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3);
485 archive->hash_table = (ZipEntryName*) calloc(archive->hash_table_size,
486 sizeof(ZipEntryName));
487
488 /*
489 * Walk through the central directory, adding entries to the hash
490 * table and verifying values.
491 */
492 const uint8_t* ptr = cd_ptr;
493 for (uint16_t i = 0; i < num_entries; i++) {
494 if (get4LE(ptr) != kCDESignature) {
495 ALOGW("Zip: missed a central dir sig (at %d)", i);
496 goto bail;
497 }
498
499 if (ptr + kCDELen > cd_ptr + cd_length) {
500 ALOGW("Zip: ran off the end (at %d)", i);
501 goto bail;
502 }
503
504 const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset);
505 if (local_header_offset >= archive->directory_offset) {
506 ALOGW("Zip: bad LFH offset %lld at entry %d", local_header_offset, i);
507 goto bail;
508 }
509
510 const uint16_t file_name_length = get2LE(ptr + kCDENameLen);
511 const uint16_t extra_length = get2LE(ptr + kCDEExtraLen);
512 const uint16_t comment_length = get2LE(ptr + kCDECommentLen);
513
514 /* add the CDE filename to the hash table */
515 const int add_result = AddToHash(archive->hash_table,
516 archive->hash_table_size, (const char*) ptr + kCDELen, file_name_length);
517 if (add_result) {
518 ALOGW("Zip: Error adding entry to hash table %d", add_result);
519 result = add_result;
520 goto bail;
521 }
522
523 ptr += kCDELen + file_name_length + extra_length + comment_length;
524 if ((size_t)(ptr - cd_ptr) > cd_length) {
525 ALOGW("Zip: bad CD advance (%d vs %zd) at entry %d",
526 (int) (ptr - cd_ptr), cd_length, i);
527 goto bail;
528 }
529 }
530 ALOGV("+++ zip good scan %d entries", num_entries);
531
532 result = 0;
533
534bail:
535 return result;
536}
537
538static int32_t OpenArchiveInternal(ZipArchive* archive,
539 const char* debug_file_name) {
540 int32_t result = -1;
541 if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) {
542 return result;
543 }
544
545 if ((result = ParseZipArchive(archive))) {
546 return result;
547 }
548
549 return 0;
550}
551
552int32_t OpenArchiveFd(int fd, const char* debug_file_name,
553 ZipArchiveHandle* handle) {
554 ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive));
555 memset(archive, 0, sizeof(*archive));
556 *handle = archive;
557
558 archive->fd = fd;
559
560 return OpenArchiveInternal(archive, debug_file_name);
561}
562
563int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) {
564 ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive));
565 memset(archive, 0, sizeof(*archive));
566 *handle = archive;
567
568 const int fd = open(fileName, O_RDONLY | O_BINARY, 0);
569 if (fd < 0) {
570 ALOGW("Unable to open '%s': %s", fileName, strerror(errno));
571 return kIoError;
572 } else {
573 archive->fd = fd;
574 }
575
576 return OpenArchiveInternal(archive, fileName);
577}
578
579/*
580 * Close a ZipArchive, closing the file and freeing the contents.
581 */
582void CloseArchive(ZipArchiveHandle handle) {
583 ZipArchive* archive = (ZipArchive*) handle;
584 ALOGV("Closing archive %p", archive);
585
586 if (archive->fd >= 0) {
587 close(archive->fd);
588 }
589
Narayan Kamatheaf98852013-12-11 14:51:51 +0000590 if (archive->directory_map != NULL) {
591 archive->directory_map->release();
592 }
Narayan Kamath7462f022013-11-21 13:05:04 +0000593 free(archive->hash_table);
594
595 /* ensure nobody tries to use the ZipArchive after it's closed */
596 archive->directory_offset = -1;
597 archive->fd = -1;
598 archive->num_entries = -1;
599 archive->hash_table_size = -1;
600 archive->hash_table = NULL;
601}
602
603static int32_t UpdateEntryFromDataDescriptor(int fd,
604 ZipEntry *entry) {
605 uint8_t ddBuf[kDDMaxLen];
606 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, ddBuf, sizeof(ddBuf)));
607 if (actual != sizeof(ddBuf)) {
608 return kIoError;
609 }
610
611 const uint32_t ddSignature = get4LE(ddBuf);
612 uint16_t ddOffset = 0;
613 if (ddSignature == kDDOptSignature) {
614 ddOffset = 4;
615 }
616
617 entry->crc32 = get4LE(ddBuf + ddOffset + kDDCrc32);
618 entry->compressed_length = get4LE(ddBuf + ddOffset + kDDCompLen);
619 entry->uncompressed_length = get4LE(ddBuf + ddOffset + kDDUncompLen);
620
621 return 0;
622}
623
624// Attempts to read |len| bytes into |buf| at offset |off|.
625//
626// This method uses pread64 on platforms that support it and
627// lseek64 + read on platforms that don't. This implies that
628// callers should not rely on the |fd| offset being incremented
629// as a side effect of this call.
630static inline ssize_t ReadAtOffset(int fd, uint8_t* buf, size_t len,
631 off64_t off) {
632#ifdef HAVE_PREAD
633 return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off));
634#else
635 // The only supported platform that doesn't support pread at the moment
636 // is Windows. Only recent versions of windows support unix like forks,
637 // and even there the semantics are quite different.
638 if (lseek64(fd, off, SEEK_SET) != off) {
Narayan Kamatheaf98852013-12-11 14:51:51 +0000639 ALOGW("Zip: failed seek to offset %lld", off);
Narayan Kamath7462f022013-11-21 13:05:04 +0000640 return kIoError;
641 }
642
643 return TEMP_FAILURE_RETRY(read(fd, buf, len));
644#endif // HAVE_PREAD
645}
646
647static int32_t FindEntry(const ZipArchive* archive, const int ent,
648 ZipEntry* data) {
649 const uint16_t nameLen = archive->hash_table[ent].name_length;
650 const char* name = archive->hash_table[ent].name;
651
652 // Recover the start of the central directory entry from the filename
653 // pointer. The filename is the first entry past the fixed-size data,
654 // so we can just subtract back from that.
655 const unsigned char* ptr = (const unsigned char*) name;
656 ptr -= kCDELen;
657
658 // This is the base of our mmapped region, we have to sanity check that
659 // the name that's in the hash table is a pointer to a location within
660 // this mapped region.
661 const unsigned char* base_ptr = (const unsigned char*)
Narayan Kamatheaf98852013-12-11 14:51:51 +0000662 archive->directory_map->getDataPtr();
663 if (ptr < base_ptr || ptr > base_ptr + archive->directory_map->getDataLength()) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000664 ALOGW("Zip: Invalid entry pointer");
665 return kInvalidOffset;
666 }
667
668 // The offset of the start of the central directory in the zipfile.
669 // We keep this lying around so that we can sanity check all our lengths
670 // and our per-file structures.
671 const off64_t cd_offset = archive->directory_offset;
672
673 // Fill out the compression method, modification time, crc32
674 // and other interesting attributes from the central directory. These
675 // will later be compared against values from the local file header.
676 data->method = get2LE(ptr + kCDEMethod);
677 data->mod_time = get4LE(ptr + kCDEModWhen);
678 data->crc32 = get4LE(ptr + kCDECRC);
679 data->compressed_length = get4LE(ptr + kCDECompLen);
680 data->uncompressed_length = get4LE(ptr + kCDEUncompLen);
681
682 // Figure out the local header offset from the central directory. The
683 // actual file data will begin after the local header and the name /
684 // extra comments.
685 const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset);
686 if (local_header_offset + (off64_t) kLFHLen >= cd_offset) {
687 ALOGW("Zip: bad local hdr offset in zip");
688 return kInvalidOffset;
689 }
690
691 uint8_t lfh_buf[kLFHLen];
692 ssize_t actual = ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf),
693 local_header_offset);
694 if (actual != sizeof(lfh_buf)) {
695 ALOGW("Zip: failed reading lfh name from offset %lld", local_header_offset);
696 return kIoError;
697 }
698
699 if (get4LE(lfh_buf) != kLFHSignature) {
700 ALOGW("Zip: didn't find signature at start of lfh, offset=%lld",
701 local_header_offset);
702 return kInvalidOffset;
703 }
704
705 // Paranoia: Match the values specified in the local file header
706 // to those specified in the central directory.
707 const uint16_t lfhGpbFlags = get2LE(lfh_buf + kLFHGPBFlags);
708 const uint16_t lfhNameLen = get2LE(lfh_buf + kLFHNameLen);
709 const uint16_t lfhExtraLen = get2LE(lfh_buf + kLFHExtraLen);
710
711 if ((lfhGpbFlags & kGPBDDFlagMask) == 0) {
712 const uint32_t lfhCrc = get4LE(lfh_buf + kLFHCRC);
713 const uint32_t lfhCompLen = get4LE(lfh_buf + kLFHCompLen);
714 const uint32_t lfhUncompLen = get4LE(lfh_buf + kLFHUncompLen);
715
716 data->has_data_descriptor = 0;
717 if (data->compressed_length != lfhCompLen || data->uncompressed_length != lfhUncompLen
718 || data->crc32 != lfhCrc) {
719 ALOGW("Zip: size/crc32 mismatch. expected {%d, %d, %x}, was {%d, %d, %x}",
720 data->compressed_length, data->uncompressed_length, data->crc32,
721 lfhCompLen, lfhUncompLen, lfhCrc);
722 return kInconsistentInformation;
723 }
724 } else {
725 data->has_data_descriptor = 1;
726 }
727
728 // Check that the local file header name matches the declared
729 // name in the central directory.
730 if (lfhNameLen == nameLen) {
731 const off64_t name_offset = local_header_offset + kLFHLen;
732 if (name_offset + lfhNameLen >= cd_offset) {
733 ALOGW("Zip: Invalid declared length");
734 return kInvalidOffset;
735 }
736
737 uint8_t* name_buf = (uint8_t*) malloc(nameLen);
738 ssize_t actual = ReadAtOffset(archive->fd, name_buf, nameLen,
739 name_offset);
740
741 if (actual != nameLen) {
742 ALOGW("Zip: failed reading lfh name from offset %lld", name_offset);
743 free(name_buf);
744 return kIoError;
745 }
746
747 if (memcmp(name, name_buf, nameLen)) {
748 free(name_buf);
749 return kInconsistentInformation;
750 }
751
752 free(name_buf);
753 } else {
754 ALOGW("Zip: lfh name did not match central directory.");
755 return kInconsistentInformation;
756 }
757
758 const off64_t data_offset = local_header_offset + kLFHLen + lfhNameLen + lfhExtraLen;
759 if (data_offset >= cd_offset) {
760 ALOGW("Zip: bad data offset %lld in zip", (off64_t) data_offset);
761 return kInvalidOffset;
762 }
763
764 if ((off64_t)(data_offset + data->compressed_length) > cd_offset) {
765 ALOGW("Zip: bad compressed length in zip (%lld + %zd > %lld)",
766 data_offset, data->compressed_length, cd_offset);
767 return kInvalidOffset;
768 }
769
770 if (data->method == kCompressStored &&
771 (off64_t)(data_offset + data->uncompressed_length) > cd_offset) {
772 ALOGW("Zip: bad uncompressed length in zip (%lld + %zd > %lld)",
773 data_offset, data->uncompressed_length, cd_offset);
774 return kInvalidOffset;
775 }
776
777 data->offset = data_offset;
778 return 0;
779}
780
781struct IterationHandle {
782 uint32_t position;
783 const char* prefix;
784 uint16_t prefix_len;
785 ZipArchive* archive;
786};
787
788int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr, const char* prefix) {
789 ZipArchive* archive = (ZipArchive *) handle;
790
791 if (archive == NULL || archive->hash_table == NULL) {
792 ALOGW("Zip: Invalid ZipArchiveHandle");
793 return kInvalidHandle;
794 }
795
796 IterationHandle* cookie = (IterationHandle*) malloc(sizeof(IterationHandle));
797 cookie->position = 0;
798 cookie->prefix = prefix;
799 cookie->archive = archive;
800 if (prefix != NULL) {
801 cookie->prefix_len = strlen(prefix);
802 }
803
804 *cookie_ptr = cookie ;
805 return 0;
806}
807
808int32_t FindEntry(const ZipArchiveHandle handle, const char* entryName,
809 ZipEntry* data) {
810 const ZipArchive* archive = (ZipArchive*) handle;
811 const int nameLen = strlen(entryName);
812 if (nameLen == 0 || nameLen > 65535) {
813 ALOGW("Zip: Invalid filename %s", entryName);
814 return kInvalidEntryName;
815 }
816
817 const int64_t ent = EntryToIndex(archive->hash_table,
818 archive->hash_table_size, entryName, nameLen);
819
820 if (ent < 0) {
Narayan Kamatha1ff8012013-12-31 10:27:59 +0000821 ALOGV("Zip: Could not find entry %.*s", nameLen, entryName);
Narayan Kamath7462f022013-11-21 13:05:04 +0000822 return ent;
823 }
824
825 return FindEntry(archive, ent, data);
826}
827
828int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) {
829 IterationHandle* handle = (IterationHandle *) cookie;
830 if (handle == NULL) {
831 return kInvalidHandle;
832 }
833
834 ZipArchive* archive = handle->archive;
835 if (archive == NULL || archive->hash_table == NULL) {
836 ALOGW("Zip: Invalid ZipArchiveHandle");
837 return kInvalidHandle;
838 }
839
840 const uint32_t currentOffset = handle->position;
841 const uint32_t hash_table_length = archive->hash_table_size;
842 const ZipEntryName *hash_table = archive->hash_table;
843
844 for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
845 if (hash_table[i].name != NULL &&
846 (handle->prefix == NULL ||
847 (memcmp(handle->prefix, hash_table[i].name, handle->prefix_len) == 0))) {
848 handle->position = (i + 1);
849 const int error = FindEntry(archive, i, data);
850 if (!error) {
851 name->name = hash_table[i].name;
852 name->name_length = hash_table[i].name_length;
853 }
854
855 return error;
856 }
857 }
858
859 handle->position = 0;
860 return kIterationEnd;
861}
862
863static int32_t InflateToFile(int fd, const ZipEntry* entry,
864 uint8_t* begin, uint32_t length,
865 uint64_t* crc_out) {
866 int32_t result = -1;
867 const uint32_t kBufSize = 32768;
868 uint8_t read_buf[kBufSize];
869 uint8_t write_buf[kBufSize];
870 z_stream zstream;
871 int zerr;
872
873 /*
874 * Initialize the zlib stream struct.
875 */
876 memset(&zstream, 0, sizeof(zstream));
877 zstream.zalloc = Z_NULL;
878 zstream.zfree = Z_NULL;
879 zstream.opaque = Z_NULL;
880 zstream.next_in = NULL;
881 zstream.avail_in = 0;
882 zstream.next_out = (Bytef*) write_buf;
883 zstream.avail_out = kBufSize;
884 zstream.data_type = Z_UNKNOWN;
885
886 /*
887 * Use the undocumented "negative window bits" feature to tell zlib
888 * that there's no zlib header waiting for it.
889 */
890 zerr = inflateInit2(&zstream, -MAX_WBITS);
891 if (zerr != Z_OK) {
892 if (zerr == Z_VERSION_ERROR) {
893 ALOGE("Installed zlib is not compatible with linked version (%s)",
894 ZLIB_VERSION);
895 } else {
896 ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr);
897 }
898
899 return kZlibError;
900 }
901
902 const uint32_t uncompressed_length = entry->uncompressed_length;
903
904 uint32_t compressed_length = entry->compressed_length;
905 uint32_t write_count = 0;
906 do {
907 /* read as much as we can */
908 if (zstream.avail_in == 0) {
909 const ssize_t getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length;
910 const ssize_t actual = TEMP_FAILURE_RETRY(read(fd, read_buf, getSize));
911 if (actual != getSize) {
912 ALOGW("Zip: inflate read failed (%d vs %zd)", actual, getSize);
913 result = kIoError;
914 goto z_bail;
915 }
916
917 compressed_length -= getSize;
918
919 zstream.next_in = read_buf;
920 zstream.avail_in = getSize;
921 }
922
923 /* uncompress the data */
924 zerr = inflate(&zstream, Z_NO_FLUSH);
925 if (zerr != Z_OK && zerr != Z_STREAM_END) {
926 ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)",
927 zerr, zstream.next_in, zstream.avail_in,
928 zstream.next_out, zstream.avail_out);
929 result = kZlibError;
930 goto z_bail;
931 }
932
933 /* write when we're full or when we're done */
934 if (zstream.avail_out == 0 ||
935 (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) {
936 const size_t write_size = zstream.next_out - write_buf;
937 // The file might have declared a bogus length.
938 if (write_size + write_count > length) {
939 goto z_bail;
940 }
941 memcpy(begin + write_count, write_buf, write_size);
942 write_count += write_size;
943
944 zstream.next_out = write_buf;
945 zstream.avail_out = kBufSize;
946 }
947 } while (zerr == Z_OK);
948
949 assert(zerr == Z_STREAM_END); /* other errors should've been caught */
950
951 // stream.adler holds the crc32 value for such streams.
952 *crc_out = zstream.adler;
953
954 if (zstream.total_out != uncompressed_length || compressed_length != 0) {
955 ALOGW("Zip: size mismatch on inflated file (%ld vs %zd)",
956 zstream.total_out, uncompressed_length);
957 result = kInconsistentInformation;
958 goto z_bail;
959 }
960
961 result = 0;
962
963z_bail:
964 inflateEnd(&zstream); /* free up any allocated structures */
965
966 return result;
967}
968
969int32_t ExtractToMemory(ZipArchiveHandle handle,
970 ZipEntry* entry, uint8_t* begin, uint32_t size) {
971 ZipArchive* archive = (ZipArchive*) handle;
972 const uint16_t method = entry->method;
973 off64_t data_offset = entry->offset;
974
975 if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) {
976 ALOGW("Zip: lseek to data at %lld failed", (off64_t) data_offset);
977 return kIoError;
978 }
979
980 // this should default to kUnknownCompressionMethod.
981 int32_t return_value = -1;
982 uint64_t crc = 0;
983 if (method == kCompressStored) {
984 return_value = CopyFileToFile(archive->fd, begin, size, &crc);
985 } else if (method == kCompressDeflated) {
986 return_value = InflateToFile(archive->fd, entry, begin, size, &crc);
987 }
988
989 if (!return_value && entry->has_data_descriptor) {
990 return_value = UpdateEntryFromDataDescriptor(archive->fd, entry);
991 if (return_value) {
992 return return_value;
993 }
994 }
995
996 // TODO: Fix this check by passing the right flags to inflate2 so that
997 // it calculates the CRC for us.
998 if (entry->crc32 != crc && false) {
999 ALOGW("Zip: crc mismatch: expected %u, was %llu", entry->crc32, crc);
1000 return kInconsistentInformation;
1001 }
1002
1003 return return_value;
1004}
1005
1006int32_t ExtractEntryToFile(ZipArchiveHandle handle,
1007 ZipEntry* entry, int fd) {
1008 const int32_t declared_length = entry->uncompressed_length;
1009
Narayan Kamath00a258c2013-12-13 16:06:19 +00001010 const off64_t current_offset = lseek64(fd, 0, SEEK_CUR);
1011 if (current_offset == -1) {
1012 ALOGW("Zip: unable to seek to current location on fd %d: %s", fd,
1013 strerror(errno));
Narayan Kamath7462f022013-11-21 13:05:04 +00001014 return kIoError;
1015 }
1016
Narayan Kamath00a258c2013-12-13 16:06:19 +00001017 int result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset));
1018 if (result == -1) {
1019 ALOGW("Zip: unable to truncate file to %lld: %s", declared_length + current_offset,
1020 strerror(errno));
1021 return kIoError;
1022 }
1023
1024 android::FileMap* map = MapFileSegment(fd, current_offset, declared_length,
Narayan Kamatheaf98852013-12-11 14:51:51 +00001025 false, kTempMappingFileName);
1026 if (map == NULL) {
1027 return kMmapFailed;
Narayan Kamath7462f022013-11-21 13:05:04 +00001028 }
1029
Narayan Kamatheaf98852013-12-11 14:51:51 +00001030 const int32_t error = ExtractToMemory(handle, entry,
1031 reinterpret_cast<uint8_t*>(map->getDataPtr()),
1032 map->getDataLength());
1033 map->release();
Narayan Kamath7462f022013-11-21 13:05:04 +00001034 return error;
1035}
1036
1037const char* ErrorCodeString(int32_t error_code) {
1038 if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) {
1039 return kErrorMessages[error_code * -1];
1040 }
1041
1042 return kErrorMessages[0];
1043}
1044
1045int GetFileDescriptor(const ZipArchiveHandle handle) {
1046 return ((ZipArchive*) handle)->fd;
1047}
1048