blob: b63ac794d361bb2ba8ffec733f00259657e3b4af [file] [log] [blame]
Narayan Kamath7462f022013-11-21 13:05:04 +00001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Read-only access to Zip archives, with minimal heap allocation.
19 */
20#include "ziparchive/zip_archive.h"
21
22#include <zlib.h>
23
24#include <assert.h>
25#include <errno.h>
26#include <limits.h>
27#include <log/log.h>
28#include <fcntl.h>
29#include <stdlib.h>
30#include <string.h>
31#include <sys/mman.h>
32#include <unistd.h>
33
34#include <JNIHelp.h> // TEMP_FAILURE_RETRY may or may not be in unistd
35
36// This is for windows. If we don't open a file in binary mode, weirds
37// things will happen.
38#ifndef O_BINARY
39#define O_BINARY 0
40#endif
41
42/*
43 * Zip file constants.
44 */
45static const uint32_t kEOCDSignature = 0x06054b50;
46static const uint32_t kEOCDLen = 2;
47static const uint32_t kEOCDNumEntries = 8; // offset to #of entries in file
48static const uint32_t kEOCDSize = 12; // size of the central directory
49static const uint32_t kEOCDFileOffset = 16; // offset to central directory
50
51static const uint32_t kMaxCommentLen = 65535; // longest possible in ushort
52static const uint32_t kMaxEOCDSearch = (kMaxCommentLen + kEOCDLen);
53
54static const uint32_t kLFHSignature = 0x04034b50;
55static const uint32_t kLFHLen = 30; // excluding variable-len fields
56static const uint32_t kLFHGPBFlags = 6; // general purpose bit flags
57static const uint32_t kLFHCRC = 14; // offset to CRC
58static const uint32_t kLFHCompLen = 18; // offset to compressed length
59static const uint32_t kLFHUncompLen = 22; // offset to uncompressed length
60static const uint32_t kLFHNameLen = 26; // offset to filename length
61static const uint32_t kLFHExtraLen = 28; // offset to extra length
62
63static const uint32_t kCDESignature = 0x02014b50;
64static const uint32_t kCDELen = 46; // excluding variable-len fields
65static const uint32_t kCDEMethod = 10; // offset to compression method
66static const uint32_t kCDEModWhen = 12; // offset to modification timestamp
67static const uint32_t kCDECRC = 16; // offset to entry CRC
68static const uint32_t kCDECompLen = 20; // offset to compressed length
69static const uint32_t kCDEUncompLen = 24; // offset to uncompressed length
70static const uint32_t kCDENameLen = 28; // offset to filename length
71static const uint32_t kCDEExtraLen = 30; // offset to extra length
72static const uint32_t kCDECommentLen = 32; // offset to comment length
73static const uint32_t kCDELocalOffset = 42; // offset to local hdr
74
75static const uint32_t kDDOptSignature = 0x08074b50; // *OPTIONAL* data descriptor signature
76static const uint32_t kDDSignatureLen = 4;
77static const uint32_t kDDLen = 12;
78static const uint32_t kDDMaxLen = 16; // max of 16 bytes with a signature, 12 bytes without
79static const uint32_t kDDCrc32 = 0; // offset to crc32
80static const uint32_t kDDCompLen = 4; // offset to compressed length
81static const uint32_t kDDUncompLen = 8; // offset to uncompressed length
82
83static const uint32_t kGPBDDFlagMask = 0x0008; // mask value that signifies that the entry has a DD
84
85static const uint32_t kMaxErrorLen = 1024;
86
87static const char* kErrorMessages[] = {
88 "Unknown return code.",
Narayan Kamatheb41ad22013-12-09 16:26:36 +000089 "Iteration ended",
Narayan Kamath7462f022013-11-21 13:05:04 +000090 "Zlib error",
91 "Invalid file",
92 "Invalid handle",
93 "Duplicate entries in archive",
94 "Empty archive",
95 "Entry not found",
96 "Invalid offset",
97 "Inconsistent information",
98 "Invalid entry name",
Narayan Kamatheb41ad22013-12-09 16:26:36 +000099 "I/O Error",
Narayan Kamath7462f022013-11-21 13:05:04 +0000100};
101
102static const int32_t kErrorMessageUpperBound = 0;
103
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000104static const int32_t kIterationEnd = -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000105
106// We encountered a Zlib error when inflating a stream from this file.
107// Usually indicates file corruption.
108static const int32_t kZlibError = -2;
109
110// The input file cannot be processed as a zip archive. Usually because
111// it's too small, too large or does not have a valid signature.
112static const int32_t kInvalidFile = -3;
113
114// An invalid iteration / ziparchive handle was passed in as an input
115// argument.
116static const int32_t kInvalidHandle = -4;
117
118// The zip archive contained two (or possibly more) entries with the same
119// name.
120static const int32_t kDuplicateEntry = -5;
121
122// The zip archive contains no entries.
123static const int32_t kEmptyArchive = -6;
124
125// The specified entry was not found in the archive.
126static const int32_t kEntryNotFound = -7;
127
128// The zip archive contained an invalid local file header pointer.
129static const int32_t kInvalidOffset = -8;
130
131// The zip archive contained inconsistent entry information. This could
132// be because the central directory & local file header did not agree, or
133// if the actual uncompressed length or crc32 do not match their declared
134// values.
135static const int32_t kInconsistentInformation = -9;
136
137// An invalid entry name was encountered.
138static const int32_t kInvalidEntryName = -10;
139
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000140// An I/O related system call (read, lseek, ftruncate, map) failed.
141static const int32_t kIoError = -11;
Narayan Kamath7462f022013-11-21 13:05:04 +0000142
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000143static const int32_t kErrorMessageLowerBound = -12;
Narayan Kamath7462f022013-11-21 13:05:04 +0000144
145
146#ifdef PAGE_SHIFT
147#define SYSTEM_PAGE_SIZE (1 << PAGE_SHIFT)
148#else
149#define SYSTEM_PAGE_SIZE 4096
150#endif
151
152struct MemMapping {
153 uint8_t* addr; // Start of data
154 size_t length; // Length of data
155
156 uint8_t* base_address; // page-aligned base address
157 size_t base_length; // length of mapping
158};
159
160/*
161 * A Read-only Zip archive.
162 *
163 * We want "open" and "find entry by name" to be fast operations, and
164 * we want to use as little memory as possible. We memory-map the zip
165 * central directory, and load a hash table with pointers to the filenames
166 * (which aren't null-terminated). The other fields are at a fixed offset
167 * from the filename, so we don't need to extract those (but we do need
168 * to byte-read and endian-swap them every time we want them).
169 *
170 * It's possible that somebody has handed us a massive (~1GB) zip archive,
171 * so we can't expect to mmap the entire file.
172 *
173 * To speed comparisons when doing a lookup by name, we could make the mapping
174 * "private" (copy-on-write) and null-terminate the filenames after verifying
175 * the record structure. However, this requires a private mapping of
176 * every page that the Central Directory touches. Easier to tuck a copy
177 * of the string length into the hash table entry.
178 */
179struct ZipArchive {
180 /* open Zip archive */
181 int fd;
182
183 /* mapped central directory area */
184 off64_t directory_offset;
185 MemMapping directory_map;
186
187 /* number of entries in the Zip archive */
188 uint16_t num_entries;
189
190 /*
191 * We know how many entries are in the Zip archive, so we can have a
192 * fixed-size hash table. We define a load factor of 0.75 and overallocat
193 * so the maximum number entries can never be higher than
194 * ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
195 */
196 uint32_t hash_table_size;
197 ZipEntryName* hash_table;
198};
199
200// Returns 0 on success and negative values on failure.
201static int32_t MapFileSegment(const int fd, const off64_t start, const size_t length,
202 const int prot, const int flags, MemMapping *mapping) {
203 /* adjust to be page-aligned */
204 const int adjust = start % SYSTEM_PAGE_SIZE;
205 const off64_t actual_start = start - adjust;
206 const off64_t actual_length = length + adjust;
207
208 void* map_addr = mmap(NULL, actual_length, prot, flags, fd, actual_start);
209 if (map_addr == MAP_FAILED) {
210 ALOGW("mmap(%llx, R, FILE|SHARED, %d, %llx) failed: %s",
211 actual_length, fd, actual_start, strerror(errno));
212 return kIoError;
213 }
214
215 mapping->base_address = (uint8_t*) map_addr;
216 mapping->base_length = actual_length;
217 mapping->addr = (uint8_t*) map_addr + adjust;
218 mapping->length = length;
219
220 ALOGV("mmap seg (st=%d ln=%d): b=%p bl=%d ad=%p ln=%d",
221 start, length, mapping->base_address, mapping->base_length,
222 mapping->addr, mapping->length);
223
224 return 0;
225}
226
227static void ReleaseMappedSegment(MemMapping* map) {
228 if (map->base_address == 0 || map->base_length == 0) {
229 return;
230 }
231
232 if (munmap(map->base_address, map->base_length) < 0) {
233 ALOGW("munmap(%p, %d) failed: %s",
234 map->base_address, map->base_length, strerror(errno));
235 } else {
236 ALOGV("munmap(%p, %d) succeeded", map->base_address, map->base_length);
237 }
238}
239
240static int32_t CopyFileToFile(int fd, uint8_t* begin, const uint32_t length, uint64_t *crc_out) {
241 static const uint32_t kBufSize = 32768;
242 uint8_t buf[kBufSize];
243
244 uint32_t count = 0;
245 uint64_t crc = 0;
246 while (count <= length) {
247 uint32_t remaining = length - count;
248
249 // Safe conversion because kBufSize is narrow enough for a 32 bit signed
250 // value.
251 ssize_t get_size = (remaining > kBufSize) ? kBufSize : remaining;
252 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, buf, get_size));
253
254 if (actual != get_size) {
255 ALOGW("CopyFileToFile: copy read failed (%d vs %zd)",
256 (int) actual, get_size);
257 return kIoError;
258 }
259
260 memcpy(begin + count, buf, get_size);
261 crc = crc32(crc, buf, get_size);
262 count += get_size;
263 }
264
265 *crc_out = crc;
266
267 return 0;
268}
269
270/*
271 * Round up to the next highest power of 2.
272 *
273 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
274 */
275static uint32_t RoundUpPower2(uint32_t val) {
276 val--;
277 val |= val >> 1;
278 val |= val >> 2;
279 val |= val >> 4;
280 val |= val >> 8;
281 val |= val >> 16;
282 val++;
283
284 return val;
285}
286
287static uint32_t ComputeHash(const char* str, uint16_t len) {
288 uint32_t hash = 0;
289
290 while (len--) {
291 hash = hash * 31 + *str++;
292 }
293
294 return hash;
295}
296
297/*
298 * Convert a ZipEntry to a hash table index, verifying that it's in a
299 * valid range.
300 */
301static int64_t EntryToIndex(const ZipEntryName* hash_table,
302 const uint32_t hash_table_size,
303 const char* name, uint16_t length) {
304 const uint32_t hash = ComputeHash(name, length);
305
306 // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
307 uint32_t ent = hash & (hash_table_size - 1);
308 while (hash_table[ent].name != NULL) {
309 if (hash_table[ent].name_length == length &&
310 memcmp(hash_table[ent].name, name, length) == 0) {
311 return ent;
312 }
313
314 ent = (ent + 1) & (hash_table_size - 1);
315 }
316
317 ALOGV("Zip: Unable to find entry %.*s", name_length, name);
318 return kEntryNotFound;
319}
320
321/*
322 * Add a new entry to the hash table.
323 */
324static int32_t AddToHash(ZipEntryName *hash_table, const uint64_t hash_table_size,
325 const char* name, uint16_t length) {
326 const uint64_t hash = ComputeHash(name, length);
327 uint32_t ent = hash & (hash_table_size - 1);
328
329 /*
330 * We over-allocated the table, so we're guaranteed to find an empty slot.
331 * Further, we guarantee that the hashtable size is not 0.
332 */
333 while (hash_table[ent].name != NULL) {
334 if (hash_table[ent].name_length == length &&
335 memcmp(hash_table[ent].name, name, length) == 0) {
336 // We've found a duplicate entry. We don't accept it
337 ALOGW("Zip: Found duplicate entry %.*s", length, name);
338 return kDuplicateEntry;
339 }
340 ent = (ent + 1) & (hash_table_size - 1);
341 }
342
343 hash_table[ent].name = name;
344 hash_table[ent].name_length = length;
345 return 0;
346}
347
348/*
349 * Get 2 little-endian bytes.
350 */
351static uint16_t get2LE(const uint8_t* src) {
352 return src[0] | (src[1] << 8);
353}
354
355/*
356 * Get 4 little-endian bytes.
357 */
358static uint32_t get4LE(const uint8_t* src) {
359 uint32_t result;
360
361 result = src[0];
362 result |= src[1] << 8;
363 result |= src[2] << 16;
364 result |= src[3] << 24;
365
366 return result;
367}
368
369static int32_t MapCentralDirectory0(int fd, const char* debug_file_name,
370 ZipArchive* archive, off64_t file_length,
371 uint32_t read_amount, uint8_t* scan_buffer) {
372 const off64_t search_start = file_length - read_amount;
373
374 if (lseek64(fd, search_start, SEEK_SET) != search_start) {
375 ALOGW("Zip: seek %lld failed: %s", search_start, strerror(errno));
376 return kIoError;
377 }
378 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, scan_buffer, read_amount));
379 if (actual != (ssize_t) read_amount) {
380 ALOGW("Zip: read %zd failed: %s", read_amount, strerror(errno));
381 return kIoError;
382 }
383
384 /*
385 * Scan backward for the EOCD magic. In an archive without a trailing
386 * comment, we'll find it on the first try. (We may want to consider
387 * doing an initial minimal read; if we don't find it, retry with a
388 * second read as above.)
389 */
390 int i;
391 for (i = read_amount - kEOCDLen; i >= 0; i--) {
392 if (scan_buffer[i] == 0x50 && get4LE(&scan_buffer[i]) == kEOCDSignature) {
393 ALOGV("+++ Found EOCD at buf+%d", i);
394 break;
395 }
396 }
397 if (i < 0) {
398 ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name);
399 return kInvalidFile;
400 }
401
402 const off64_t eocd_offset = search_start + i;
403 const uint8_t* eocd_ptr = scan_buffer + i;
404
405 assert(eocd_offset < file_length);
406
407 /*
408 * Grab the CD offset and size, and the number of entries in the
409 * archive. Verify that they look reasonable. Widen dir_size and
410 * dir_offset to the file offset type.
411 */
412 const uint16_t num_entries = get2LE(eocd_ptr + kEOCDNumEntries);
413 const off64_t dir_size = get4LE(eocd_ptr + kEOCDSize);
414 const off64_t dir_offset = get4LE(eocd_ptr + kEOCDFileOffset);
415
416 if (dir_offset + dir_size > eocd_offset) {
417 ALOGW("Zip: bad offsets (dir %lld, size %lld, eocd %lld)",
418 dir_offset, dir_size, eocd_offset);
419 return kInvalidOffset;
420 }
421 if (num_entries == 0) {
422 ALOGW("Zip: empty archive?");
423 return kEmptyArchive;
424 }
425
426 ALOGV("+++ num_entries=%d dir_size=%d dir_offset=%d", num_entries, dir_size,
427 dir_offset);
428
429 /*
430 * It all looks good. Create a mapping for the CD, and set the fields
431 * in archive.
432 */
433 const int32_t result = MapFileSegment(fd, dir_offset, dir_size,
434 PROT_READ, MAP_FILE | MAP_SHARED,
435 &(archive->directory_map));
436 if (result) {
437 return result;
438 }
439
440 archive->num_entries = num_entries;
441 archive->directory_offset = dir_offset;
442
443 return 0;
444}
445
446/*
447 * Find the zip Central Directory and memory-map it.
448 *
449 * On success, returns 0 after populating fields from the EOCD area:
450 * directory_offset
451 * directory_map
452 * num_entries
453 */
454static int32_t MapCentralDirectory(int fd, const char* debug_file_name,
455 ZipArchive* archive) {
456
457 // Test file length. We use lseek64 to make sure the file
458 // is small enough to be a zip file (Its size must be less than
459 // 0xffffffff bytes).
460 off64_t file_length = lseek64(fd, 0, SEEK_END);
461 if (file_length == -1) {
462 ALOGV("Zip: lseek on fd %d failed", fd);
463 return kInvalidFile;
464 }
465
466 if (file_length > (off64_t) 0xffffffff) {
467 ALOGV("Zip: zip file too long %d", file_length);
468 return kInvalidFile;
469 }
470
471 if (file_length < (int64_t) kEOCDLen) {
472 ALOGV("Zip: length %ld is too small to be zip", file_length);
473 return kInvalidFile;
474 }
475
476 /*
477 * Perform the traditional EOCD snipe hunt.
478 *
479 * We're searching for the End of Central Directory magic number,
480 * which appears at the start of the EOCD block. It's followed by
481 * 18 bytes of EOCD stuff and up to 64KB of archive comment. We
482 * need to read the last part of the file into a buffer, dig through
483 * it to find the magic number, parse some values out, and use those
484 * to determine the extent of the CD.
485 *
486 * We start by pulling in the last part of the file.
487 */
488 uint32_t read_amount = kMaxEOCDSearch;
489 if (file_length < (off64_t) read_amount) {
490 read_amount = file_length;
491 }
492
493 uint8_t* scan_buffer = (uint8_t*) malloc(read_amount);
494 int32_t result = MapCentralDirectory0(fd, debug_file_name, archive,
495 file_length, read_amount, scan_buffer);
496
497 free(scan_buffer);
498 return result;
499}
500
501/*
502 * Parses the Zip archive's Central Directory. Allocates and populates the
503 * hash table.
504 *
505 * Returns 0 on success.
506 */
507static int32_t ParseZipArchive(ZipArchive* archive) {
508 int32_t result = -1;
509 const uint8_t* cd_ptr = (const uint8_t*) archive->directory_map.addr;
510 size_t cd_length = archive->directory_map.length;
511 uint16_t num_entries = archive->num_entries;
512
513 /*
514 * Create hash table. We have a minimum 75% load factor, possibly as
515 * low as 50% after we round off to a power of 2. There must be at
516 * least one unused entry to avoid an infinite loop during creation.
517 */
518 archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3);
519 archive->hash_table = (ZipEntryName*) calloc(archive->hash_table_size,
520 sizeof(ZipEntryName));
521
522 /*
523 * Walk through the central directory, adding entries to the hash
524 * table and verifying values.
525 */
526 const uint8_t* ptr = cd_ptr;
527 for (uint16_t i = 0; i < num_entries; i++) {
528 if (get4LE(ptr) != kCDESignature) {
529 ALOGW("Zip: missed a central dir sig (at %d)", i);
530 goto bail;
531 }
532
533 if (ptr + kCDELen > cd_ptr + cd_length) {
534 ALOGW("Zip: ran off the end (at %d)", i);
535 goto bail;
536 }
537
538 const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset);
539 if (local_header_offset >= archive->directory_offset) {
540 ALOGW("Zip: bad LFH offset %lld at entry %d", local_header_offset, i);
541 goto bail;
542 }
543
544 const uint16_t file_name_length = get2LE(ptr + kCDENameLen);
545 const uint16_t extra_length = get2LE(ptr + kCDEExtraLen);
546 const uint16_t comment_length = get2LE(ptr + kCDECommentLen);
547
548 /* add the CDE filename to the hash table */
549 const int add_result = AddToHash(archive->hash_table,
550 archive->hash_table_size, (const char*) ptr + kCDELen, file_name_length);
551 if (add_result) {
552 ALOGW("Zip: Error adding entry to hash table %d", add_result);
553 result = add_result;
554 goto bail;
555 }
556
557 ptr += kCDELen + file_name_length + extra_length + comment_length;
558 if ((size_t)(ptr - cd_ptr) > cd_length) {
559 ALOGW("Zip: bad CD advance (%d vs %zd) at entry %d",
560 (int) (ptr - cd_ptr), cd_length, i);
561 goto bail;
562 }
563 }
564 ALOGV("+++ zip good scan %d entries", num_entries);
565
566 result = 0;
567
568bail:
569 return result;
570}
571
572static int32_t OpenArchiveInternal(ZipArchive* archive,
573 const char* debug_file_name) {
574 int32_t result = -1;
575 if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) {
576 return result;
577 }
578
579 if ((result = ParseZipArchive(archive))) {
580 return result;
581 }
582
583 return 0;
584}
585
586int32_t OpenArchiveFd(int fd, const char* debug_file_name,
587 ZipArchiveHandle* handle) {
588 ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive));
589 memset(archive, 0, sizeof(*archive));
590 *handle = archive;
591
592 archive->fd = fd;
593
594 return OpenArchiveInternal(archive, debug_file_name);
595}
596
597int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) {
598 ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive));
599 memset(archive, 0, sizeof(*archive));
600 *handle = archive;
601
602 const int fd = open(fileName, O_RDONLY | O_BINARY, 0);
603 if (fd < 0) {
604 ALOGW("Unable to open '%s': %s", fileName, strerror(errno));
605 return kIoError;
606 } else {
607 archive->fd = fd;
608 }
609
610 return OpenArchiveInternal(archive, fileName);
611}
612
613/*
614 * Close a ZipArchive, closing the file and freeing the contents.
615 */
616void CloseArchive(ZipArchiveHandle handle) {
617 ZipArchive* archive = (ZipArchive*) handle;
618 ALOGV("Closing archive %p", archive);
619
620 if (archive->fd >= 0) {
621 close(archive->fd);
622 }
623
624 ReleaseMappedSegment(&archive->directory_map);
625 free(archive->hash_table);
626
627 /* ensure nobody tries to use the ZipArchive after it's closed */
628 archive->directory_offset = -1;
629 archive->fd = -1;
630 archive->num_entries = -1;
631 archive->hash_table_size = -1;
632 archive->hash_table = NULL;
633}
634
635static int32_t UpdateEntryFromDataDescriptor(int fd,
636 ZipEntry *entry) {
637 uint8_t ddBuf[kDDMaxLen];
638 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, ddBuf, sizeof(ddBuf)));
639 if (actual != sizeof(ddBuf)) {
640 return kIoError;
641 }
642
643 const uint32_t ddSignature = get4LE(ddBuf);
644 uint16_t ddOffset = 0;
645 if (ddSignature == kDDOptSignature) {
646 ddOffset = 4;
647 }
648
649 entry->crc32 = get4LE(ddBuf + ddOffset + kDDCrc32);
650 entry->compressed_length = get4LE(ddBuf + ddOffset + kDDCompLen);
651 entry->uncompressed_length = get4LE(ddBuf + ddOffset + kDDUncompLen);
652
653 return 0;
654}
655
656// Attempts to read |len| bytes into |buf| at offset |off|.
657//
658// This method uses pread64 on platforms that support it and
659// lseek64 + read on platforms that don't. This implies that
660// callers should not rely on the |fd| offset being incremented
661// as a side effect of this call.
662static inline ssize_t ReadAtOffset(int fd, uint8_t* buf, size_t len,
663 off64_t off) {
664#ifdef HAVE_PREAD
665 return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off));
666#else
667 // The only supported platform that doesn't support pread at the moment
668 // is Windows. Only recent versions of windows support unix like forks,
669 // and even there the semantics are quite different.
670 if (lseek64(fd, off, SEEK_SET) != off) {
671 ALOGW("Zip: failed seek to offset %lld", name_offset);
672 return kIoError;
673 }
674
675 return TEMP_FAILURE_RETRY(read(fd, buf, len));
676#endif // HAVE_PREAD
677}
678
679static int32_t FindEntry(const ZipArchive* archive, const int ent,
680 ZipEntry* data) {
681 const uint16_t nameLen = archive->hash_table[ent].name_length;
682 const char* name = archive->hash_table[ent].name;
683
684 // Recover the start of the central directory entry from the filename
685 // pointer. The filename is the first entry past the fixed-size data,
686 // so we can just subtract back from that.
687 const unsigned char* ptr = (const unsigned char*) name;
688 ptr -= kCDELen;
689
690 // This is the base of our mmapped region, we have to sanity check that
691 // the name that's in the hash table is a pointer to a location within
692 // this mapped region.
693 const unsigned char* base_ptr = (const unsigned char*)
694 archive->directory_map.addr;
695 if (ptr < base_ptr || ptr > base_ptr + archive->directory_map.length) {
696 ALOGW("Zip: Invalid entry pointer");
697 return kInvalidOffset;
698 }
699
700 // The offset of the start of the central directory in the zipfile.
701 // We keep this lying around so that we can sanity check all our lengths
702 // and our per-file structures.
703 const off64_t cd_offset = archive->directory_offset;
704
705 // Fill out the compression method, modification time, crc32
706 // and other interesting attributes from the central directory. These
707 // will later be compared against values from the local file header.
708 data->method = get2LE(ptr + kCDEMethod);
709 data->mod_time = get4LE(ptr + kCDEModWhen);
710 data->crc32 = get4LE(ptr + kCDECRC);
711 data->compressed_length = get4LE(ptr + kCDECompLen);
712 data->uncompressed_length = get4LE(ptr + kCDEUncompLen);
713
714 // Figure out the local header offset from the central directory. The
715 // actual file data will begin after the local header and the name /
716 // extra comments.
717 const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset);
718 if (local_header_offset + (off64_t) kLFHLen >= cd_offset) {
719 ALOGW("Zip: bad local hdr offset in zip");
720 return kInvalidOffset;
721 }
722
723 uint8_t lfh_buf[kLFHLen];
724 ssize_t actual = ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf),
725 local_header_offset);
726 if (actual != sizeof(lfh_buf)) {
727 ALOGW("Zip: failed reading lfh name from offset %lld", local_header_offset);
728 return kIoError;
729 }
730
731 if (get4LE(lfh_buf) != kLFHSignature) {
732 ALOGW("Zip: didn't find signature at start of lfh, offset=%lld",
733 local_header_offset);
734 return kInvalidOffset;
735 }
736
737 // Paranoia: Match the values specified in the local file header
738 // to those specified in the central directory.
739 const uint16_t lfhGpbFlags = get2LE(lfh_buf + kLFHGPBFlags);
740 const uint16_t lfhNameLen = get2LE(lfh_buf + kLFHNameLen);
741 const uint16_t lfhExtraLen = get2LE(lfh_buf + kLFHExtraLen);
742
743 if ((lfhGpbFlags & kGPBDDFlagMask) == 0) {
744 const uint32_t lfhCrc = get4LE(lfh_buf + kLFHCRC);
745 const uint32_t lfhCompLen = get4LE(lfh_buf + kLFHCompLen);
746 const uint32_t lfhUncompLen = get4LE(lfh_buf + kLFHUncompLen);
747
748 data->has_data_descriptor = 0;
749 if (data->compressed_length != lfhCompLen || data->uncompressed_length != lfhUncompLen
750 || data->crc32 != lfhCrc) {
751 ALOGW("Zip: size/crc32 mismatch. expected {%d, %d, %x}, was {%d, %d, %x}",
752 data->compressed_length, data->uncompressed_length, data->crc32,
753 lfhCompLen, lfhUncompLen, lfhCrc);
754 return kInconsistentInformation;
755 }
756 } else {
757 data->has_data_descriptor = 1;
758 }
759
760 // Check that the local file header name matches the declared
761 // name in the central directory.
762 if (lfhNameLen == nameLen) {
763 const off64_t name_offset = local_header_offset + kLFHLen;
764 if (name_offset + lfhNameLen >= cd_offset) {
765 ALOGW("Zip: Invalid declared length");
766 return kInvalidOffset;
767 }
768
769 uint8_t* name_buf = (uint8_t*) malloc(nameLen);
770 ssize_t actual = ReadAtOffset(archive->fd, name_buf, nameLen,
771 name_offset);
772
773 if (actual != nameLen) {
774 ALOGW("Zip: failed reading lfh name from offset %lld", name_offset);
775 free(name_buf);
776 return kIoError;
777 }
778
779 if (memcmp(name, name_buf, nameLen)) {
780 free(name_buf);
781 return kInconsistentInformation;
782 }
783
784 free(name_buf);
785 } else {
786 ALOGW("Zip: lfh name did not match central directory.");
787 return kInconsistentInformation;
788 }
789
790 const off64_t data_offset = local_header_offset + kLFHLen + lfhNameLen + lfhExtraLen;
791 if (data_offset >= cd_offset) {
792 ALOGW("Zip: bad data offset %lld in zip", (off64_t) data_offset);
793 return kInvalidOffset;
794 }
795
796 if ((off64_t)(data_offset + data->compressed_length) > cd_offset) {
797 ALOGW("Zip: bad compressed length in zip (%lld + %zd > %lld)",
798 data_offset, data->compressed_length, cd_offset);
799 return kInvalidOffset;
800 }
801
802 if (data->method == kCompressStored &&
803 (off64_t)(data_offset + data->uncompressed_length) > cd_offset) {
804 ALOGW("Zip: bad uncompressed length in zip (%lld + %zd > %lld)",
805 data_offset, data->uncompressed_length, cd_offset);
806 return kInvalidOffset;
807 }
808
809 data->offset = data_offset;
810 return 0;
811}
812
813struct IterationHandle {
814 uint32_t position;
815 const char* prefix;
816 uint16_t prefix_len;
817 ZipArchive* archive;
818};
819
820int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr, const char* prefix) {
821 ZipArchive* archive = (ZipArchive *) handle;
822
823 if (archive == NULL || archive->hash_table == NULL) {
824 ALOGW("Zip: Invalid ZipArchiveHandle");
825 return kInvalidHandle;
826 }
827
828 IterationHandle* cookie = (IterationHandle*) malloc(sizeof(IterationHandle));
829 cookie->position = 0;
830 cookie->prefix = prefix;
831 cookie->archive = archive;
832 if (prefix != NULL) {
833 cookie->prefix_len = strlen(prefix);
834 }
835
836 *cookie_ptr = cookie ;
837 return 0;
838}
839
840int32_t FindEntry(const ZipArchiveHandle handle, const char* entryName,
841 ZipEntry* data) {
842 const ZipArchive* archive = (ZipArchive*) handle;
843 const int nameLen = strlen(entryName);
844 if (nameLen == 0 || nameLen > 65535) {
845 ALOGW("Zip: Invalid filename %s", entryName);
846 return kInvalidEntryName;
847 }
848
849 const int64_t ent = EntryToIndex(archive->hash_table,
850 archive->hash_table_size, entryName, nameLen);
851
852 if (ent < 0) {
853 ALOGW("Zip: Could not find entry %.*s", nameLen, entryName);
854 return ent;
855 }
856
857 return FindEntry(archive, ent, data);
858}
859
860int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) {
861 IterationHandle* handle = (IterationHandle *) cookie;
862 if (handle == NULL) {
863 return kInvalidHandle;
864 }
865
866 ZipArchive* archive = handle->archive;
867 if (archive == NULL || archive->hash_table == NULL) {
868 ALOGW("Zip: Invalid ZipArchiveHandle");
869 return kInvalidHandle;
870 }
871
872 const uint32_t currentOffset = handle->position;
873 const uint32_t hash_table_length = archive->hash_table_size;
874 const ZipEntryName *hash_table = archive->hash_table;
875
876 for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
877 if (hash_table[i].name != NULL &&
878 (handle->prefix == NULL ||
879 (memcmp(handle->prefix, hash_table[i].name, handle->prefix_len) == 0))) {
880 handle->position = (i + 1);
881 const int error = FindEntry(archive, i, data);
882 if (!error) {
883 name->name = hash_table[i].name;
884 name->name_length = hash_table[i].name_length;
885 }
886
887 return error;
888 }
889 }
890
891 handle->position = 0;
892 return kIterationEnd;
893}
894
895static int32_t InflateToFile(int fd, const ZipEntry* entry,
896 uint8_t* begin, uint32_t length,
897 uint64_t* crc_out) {
898 int32_t result = -1;
899 const uint32_t kBufSize = 32768;
900 uint8_t read_buf[kBufSize];
901 uint8_t write_buf[kBufSize];
902 z_stream zstream;
903 int zerr;
904
905 /*
906 * Initialize the zlib stream struct.
907 */
908 memset(&zstream, 0, sizeof(zstream));
909 zstream.zalloc = Z_NULL;
910 zstream.zfree = Z_NULL;
911 zstream.opaque = Z_NULL;
912 zstream.next_in = NULL;
913 zstream.avail_in = 0;
914 zstream.next_out = (Bytef*) write_buf;
915 zstream.avail_out = kBufSize;
916 zstream.data_type = Z_UNKNOWN;
917
918 /*
919 * Use the undocumented "negative window bits" feature to tell zlib
920 * that there's no zlib header waiting for it.
921 */
922 zerr = inflateInit2(&zstream, -MAX_WBITS);
923 if (zerr != Z_OK) {
924 if (zerr == Z_VERSION_ERROR) {
925 ALOGE("Installed zlib is not compatible with linked version (%s)",
926 ZLIB_VERSION);
927 } else {
928 ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr);
929 }
930
931 return kZlibError;
932 }
933
934 const uint32_t uncompressed_length = entry->uncompressed_length;
935
936 uint32_t compressed_length = entry->compressed_length;
937 uint32_t write_count = 0;
938 do {
939 /* read as much as we can */
940 if (zstream.avail_in == 0) {
941 const ssize_t getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length;
942 const ssize_t actual = TEMP_FAILURE_RETRY(read(fd, read_buf, getSize));
943 if (actual != getSize) {
944 ALOGW("Zip: inflate read failed (%d vs %zd)", actual, getSize);
945 result = kIoError;
946 goto z_bail;
947 }
948
949 compressed_length -= getSize;
950
951 zstream.next_in = read_buf;
952 zstream.avail_in = getSize;
953 }
954
955 /* uncompress the data */
956 zerr = inflate(&zstream, Z_NO_FLUSH);
957 if (zerr != Z_OK && zerr != Z_STREAM_END) {
958 ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)",
959 zerr, zstream.next_in, zstream.avail_in,
960 zstream.next_out, zstream.avail_out);
961 result = kZlibError;
962 goto z_bail;
963 }
964
965 /* write when we're full or when we're done */
966 if (zstream.avail_out == 0 ||
967 (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) {
968 const size_t write_size = zstream.next_out - write_buf;
969 // The file might have declared a bogus length.
970 if (write_size + write_count > length) {
971 goto z_bail;
972 }
973 memcpy(begin + write_count, write_buf, write_size);
974 write_count += write_size;
975
976 zstream.next_out = write_buf;
977 zstream.avail_out = kBufSize;
978 }
979 } while (zerr == Z_OK);
980
981 assert(zerr == Z_STREAM_END); /* other errors should've been caught */
982
983 // stream.adler holds the crc32 value for such streams.
984 *crc_out = zstream.adler;
985
986 if (zstream.total_out != uncompressed_length || compressed_length != 0) {
987 ALOGW("Zip: size mismatch on inflated file (%ld vs %zd)",
988 zstream.total_out, uncompressed_length);
989 result = kInconsistentInformation;
990 goto z_bail;
991 }
992
993 result = 0;
994
995z_bail:
996 inflateEnd(&zstream); /* free up any allocated structures */
997
998 return result;
999}
1000
1001int32_t ExtractToMemory(ZipArchiveHandle handle,
1002 ZipEntry* entry, uint8_t* begin, uint32_t size) {
1003 ZipArchive* archive = (ZipArchive*) handle;
1004 const uint16_t method = entry->method;
1005 off64_t data_offset = entry->offset;
1006
1007 if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) {
1008 ALOGW("Zip: lseek to data at %lld failed", (off64_t) data_offset);
1009 return kIoError;
1010 }
1011
1012 // this should default to kUnknownCompressionMethod.
1013 int32_t return_value = -1;
1014 uint64_t crc = 0;
1015 if (method == kCompressStored) {
1016 return_value = CopyFileToFile(archive->fd, begin, size, &crc);
1017 } else if (method == kCompressDeflated) {
1018 return_value = InflateToFile(archive->fd, entry, begin, size, &crc);
1019 }
1020
1021 if (!return_value && entry->has_data_descriptor) {
1022 return_value = UpdateEntryFromDataDescriptor(archive->fd, entry);
1023 if (return_value) {
1024 return return_value;
1025 }
1026 }
1027
1028 // TODO: Fix this check by passing the right flags to inflate2 so that
1029 // it calculates the CRC for us.
1030 if (entry->crc32 != crc && false) {
1031 ALOGW("Zip: crc mismatch: expected %u, was %llu", entry->crc32, crc);
1032 return kInconsistentInformation;
1033 }
1034
1035 return return_value;
1036}
1037
1038int32_t ExtractEntryToFile(ZipArchiveHandle handle,
1039 ZipEntry* entry, int fd) {
1040 const int32_t declared_length = entry->uncompressed_length;
1041
1042 int result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length));
1043 if (result == -1) {
1044 ALOGW("Zip: unable to truncate file to %ud", declared_length);
1045 return kIoError;
1046 }
1047
1048 MemMapping mapping;
1049 int32_t error = MapFileSegment(fd, 0, declared_length,
1050 PROT_READ | PROT_WRITE,
1051 MAP_FILE | MAP_SHARED,
1052 &mapping);
1053 if (error) {
1054 return error;
1055 }
1056
1057 error = ExtractToMemory(handle, entry, mapping.addr,
1058 mapping.length);
1059 ReleaseMappedSegment(&mapping);
1060 return error;
1061}
1062
1063const char* ErrorCodeString(int32_t error_code) {
1064 if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) {
1065 return kErrorMessages[error_code * -1];
1066 }
1067
1068 return kErrorMessages[0];
1069}
1070
1071int GetFileDescriptor(const ZipArchiveHandle handle) {
1072 return ((ZipArchive*) handle)->fd;
1073}
1074