libsparse: add sparse_file read and convert tools to use it

Abstract the logic from simg2img into libsparse, and add logic
for reading a regular image into libsparse.  simg2img then
becomes a simple wrapper around libsparse.

img2simg was not actually making the file sparse, it was using
sparse files to create multiple files that could be pieced back
together.  Replace it with a simple wrapper around libsparse.
Its functionality will be replaced by an simg2simg that can
resparse a file into smaller chunks.

Change-Id: I266f70e1c750454183ce46c71a7bb66bbb033a26
diff --git a/libsparse/Android.mk b/libsparse/Android.mk
index d3eeae6..e83ee1c 100644
--- a/libsparse/Android.mk
+++ b/libsparse/Android.mk
@@ -7,7 +7,8 @@
         output_file.c \
         sparse.c \
         sparse_crc32.c \
-        sparse_err.c
+        sparse_err.c \
+        sparse_read.c
 
 include $(CLEAR_VARS)
 
@@ -48,6 +49,7 @@
 	sparse_crc32.c
 LOCAL_MODULE := simg2img
 LOCAL_MODULE_TAGS := debug
+LOCAL_STATIC_LIBRARIES := libsparse libz
 
 include $(BUILD_HOST_EXECUTABLE)
 
@@ -57,6 +59,7 @@
 	sparse_crc32.c
 LOCAL_MODULE := simg2img
 LOCAL_MODULE_TAGS := optional
+LOCAL_STATIC_LIBRARIES := libsparse libz
 
 include $(BUILD_EXECUTABLE)
 
@@ -65,6 +68,7 @@
 LOCAL_SRC_FILES := img2simg.c
 LOCAL_MODULE := img2simg
 LOCAL_MODULE_TAGS := debug
+LOCAL_STATIC_LIBRARIES := libsparse libz
 
 include $(BUILD_HOST_EXECUTABLE)
 
@@ -73,6 +77,7 @@
 LOCAL_SRC_FILES := img2simg.c
 LOCAL_MODULE := img2simg
 LOCAL_MODULE_TAGS := optional
+LOCAL_STATIC_LIBRARIES := libsparse libz
 
 include $(BUILD_EXECUTABLE)
 
diff --git a/libsparse/img2simg.c b/libsparse/img2simg.c
index a1594df..6b1caa5 100644
--- a/libsparse/img2simg.c
+++ b/libsparse/img2simg.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2012 The Android Open Source Project
+ * Copyright (C) 2012 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,314 +14,103 @@
  * limitations under the License.
  */
 
-#define DEFAULT_BLOCK_SIZE	"4K"
-#define DEFAULT_CHUNK_SIZE	"64M"
-#define DEFAULT_SUFFIX		"%03d"
+#define _FILE_OFFSET_BITS 64
+#define _LARGEFILE64_SOURCE 1
 
-#include "sparse_format.h"
-#if 0 /* endian.h is not on all platforms */
-# include <endian.h>
-#else
-  /* For now, just assume we're going to run on little-endian. */
-# define my_htole32(h) (h)
-# define my_htole16(h) (h)
-#endif
-#include <errno.h>
 #include <fcntl.h>
-#include <limits.h>
-#include <stdarg.h>
-#include <stddef.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <unistd.h>
 
-#define COPY_BUF_SIZE (1024*1024)
-static char *copy_buf;
+#include <sparse/sparse.h>
 
-static const char *progname(const char *argv0)
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+#if defined(__APPLE__) && defined(__MACH__)
+#define lseek64 lseek
+#define off64_t off_t
+#endif
+
+void usage()
 {
-    const char *prog_name;
-    if ((prog_name = strrchr(argv0, '/')))
-	return(prog_name + 1);	/* Advance beyond '/'. */
-    return(argv0);		/* No '/' in argv0, use it as is. */
-}
-
-static void error_exit(const char *fmt, ...)
-{
-    va_list ap;
-    va_start(ap, fmt);
-    vfprintf(stderr, fmt, ap);
-    fputc('\n', stderr);
-    va_end(ap);
-
-    exit(EXIT_FAILURE);
-}
-
-static void usage(const char *argv0, const char *error_fmt, ...)
-{
-    fprintf(stderr,
-	    "Usage: %s [OPTIONS] <raw_image_file>\n",
-	    progname(argv0));
-    fprintf(stderr, "The <raw_image_file> will be split into as many sparse\n");
-    fprintf(stderr, "files as needed.  Each sparse file will contain a single\n");
-    fprintf(stderr, "DONT CARE chunk to offset to the correct block and then\n");
-    fprintf(stderr, "a single RAW chunk containing a portion of the data from\n");
-    fprintf(stderr, "the raw image file.  The sparse files will be named by\n");
-    fprintf(stderr, "appending a number to the name of the raw image file.\n");
-    fprintf(stderr, "\n");
-    fprintf(stderr, "OPTIONS (Defaults are enclosed by square brackets):\n");
-    fprintf(stderr, "  -s SUFFIX      Format appended number with SUFFIX [%s]\n",
-	    DEFAULT_SUFFIX);
-    fprintf(stderr, "  -B SIZE        Use a block size of SIZE [%s]\n",
-	    DEFAULT_BLOCK_SIZE);
-    fprintf(stderr, "  -C SIZE        Use a chunk size of SIZE [%s]\n",
-	    DEFAULT_CHUNK_SIZE);
-    fprintf(stderr, "SIZE is a decimal integer that may optionally be\n");
-    fprintf(stderr, "followed by a suffix that specifies a multiplier for\n");
-    fprintf(stderr, "the integer:\n");
-    fprintf(stderr, "       c         1 byte (the default when omitted)\n");
-    fprintf(stderr, "       w         2 bytes\n");
-    fprintf(stderr, "       b         512 bytes\n");
-    fprintf(stderr, "       kB        1000 bytes\n");
-    fprintf(stderr, "       K         1024 bytes\n");
-    fprintf(stderr, "       MB        1000*1000 bytes\n");
-    fprintf(stderr, "       M         1024*1024 bytes\n");
-    fprintf(stderr, "       GB        1000*1000*1000 bytes\n");
-    fprintf(stderr, "       G         1024*1024*1024 bytes\n");
-
-    if (error_fmt && *error_fmt)
-    {
-	fprintf(stderr, "\n");
-	va_list ap;
-	va_start(ap, error_fmt);
-	vfprintf(stderr, error_fmt, ap);
-	va_end(ap);
-	fprintf(stderr, "\n");
-    }
-
-    exit(EXIT_FAILURE);
-}
-
-static void cpy_file(int out_fd, char *out_path, int in_fd, char *in_path,
-		     size_t len)
-{
-    ssize_t s, cpy_len = COPY_BUF_SIZE;
-
-    while (len) {
-	if (len < COPY_BUF_SIZE)
-	    cpy_len = len;
-
-	s = read(in_fd, copy_buf, cpy_len);
-	if (s < 0)
-	    error_exit("\"%s\": %s", in_path, strerror(errno));
-	if (!s)
-	    error_exit("\"%s\": Unexpected EOF", in_path);
-
-	cpy_len = s;
-
-	s = write(out_fd, copy_buf, cpy_len);
-	if (s < 0)
-	    error_exit("\"%s\": %s", out_path, strerror(errno));
-	if (s != cpy_len)
-	    error_exit("\"%s\": Short data write (%lu)", out_path,
-		       (unsigned long)s);
-
-	len -= cpy_len;
-    }
-}
-
-static int parse_size(const char *size_str, size_t *size)
-{
-    static const size_t MAX_SIZE_T = ~(size_t)0;
-    size_t mult;
-    unsigned long long int value;
-    const char *end;
-    errno = 0;
-    value = strtoull(size_str, (char **)&end, 10);
-    if (errno != 0 || end == size_str || value > MAX_SIZE_T)
-	return -1;
-    if (*end == '\0') {
-	*size = value;
-	return 0;
-    }
-    if (!strcmp(end, "c"))
-	mult = 1;
-    else if (!strcmp(end, "w"))
-	mult = 2;
-    else if (!strcmp(end, "b"))
-	mult = 512;
-    else if (!strcmp(end, "kB"))
-	mult = 1000;
-    else if (!strcmp(end, "K"))
-	mult = 1024;
-    else if (!strcmp(end, "MB"))
-	mult = (size_t)1000*1000;
-    else if (!strcmp(end, "M"))
-	mult = (size_t)1024*1024;
-    else if (!strcmp(end, "GB"))
-	mult = (size_t)1000*1000*1000;
-    else if (!strcmp(end, "G"))
-	mult = (size_t)1024*1024*1024;
-    else
-	return -1;
-
-    if (value > MAX_SIZE_T / mult)
-	return -1;
-    *size = value * mult;
-    return 0;
+    fprintf(stderr, "Usage: img2simg <raw_image_file> <sparse_image_file> [<block_size>]\n");
 }
 
 int main(int argc, char *argv[])
 {
-    char *suffix = DEFAULT_SUFFIX;
-    char *block_size_str = DEFAULT_BLOCK_SIZE;
-    char *chunk_size_str = DEFAULT_CHUNK_SIZE;
-    size_t block_size, chunk_size, blocks_per_chunk, to_write;
-    char *in_path, *out_path, *out_fmt;
-    int in_fd, out_fd;
-    struct stat in_st;
-    off_t left_to_write;
-    struct {
-	sparse_header_t sparse_hdr;
-	chunk_header_t dont_care_hdr;
-	chunk_header_t raw_hdr;
-    } file_hdr;
-    unsigned int file_count;
-    ssize_t s;
-    int i;
+	int in;
+	int out;
+	unsigned int i;
+	int ret;
+	struct sparse_file *s;
+	unsigned int block_size = 4096;
+	off64_t len;
 
-    /* Parse the command line. */
-    while ((i = getopt(argc, argv, "s:B:C:")) != -1)
-    {
-	switch (i) {
-	case 's':
-	    suffix = optarg;
-	    break;
-	case 'B':
-	    block_size_str = optarg;
-	    break;
-	case 'C':
-	    chunk_size_str = optarg;
-	    break;
-	default:
-	    usage(argv[0], NULL);
-	    break;
-	}
-    }
-
-    if (parse_size(block_size_str, &block_size))
-	usage(argv[0], "Can not parse \"%s\" as a block size.",
-	      block_size_str);
-    if (block_size % 4096)
-	usage(argv[0], "Block size is not a multiple of 4096.");
-
-    if (parse_size(chunk_size_str, &chunk_size))
-	usage(argv[0], "Can not parse \"%s\" as a chunk size.",
-	      chunk_size_str);
-    if (chunk_size % block_size)
-	usage(argv[0], "Chunk size is not a multiple of the block size.");
-    blocks_per_chunk = chunk_size / block_size;
-
-    if ((argc - optind) != 1)
-	usage(argv[0], "Missing or extra arguments.");
-    in_path = argv[optind];
-
-    /* Open the input file and validate it. */
-    if ((in_fd = open(in_path, O_RDONLY)) < 0)
-	error_exit("open \"%s\": %s", in_path, strerror(errno));
-    if (fstat(in_fd, &in_st))
-	error_exit("fstat \"%s\": %s", in_path, strerror(errno));
-    left_to_write = in_st.st_size;
-    if (left_to_write % block_size)
-	error_exit(
-	    "\"%s\" size (%llu) is not a multiple of the block size (%llu).\n",
-	    in_path,
-	    (unsigned long long)left_to_write, (unsigned long long)block_size);
-
-    /* Get a buffer for copying the chunks. */
-    if ((copy_buf = malloc(COPY_BUF_SIZE)) == 0)
-	error_exit("malloc copy buffer: %s", strerror(errno));
-
-    /* Get a buffer for a sprintf format to form output paths. */
-    if ((out_fmt = malloc(sizeof("%s") + strlen(suffix))) == 0)
-	error_exit("malloc format buffer: %s", strerror(errno));
-    out_fmt[0] = '%';
-    out_fmt[1] = 's';
-    strcpy(out_fmt + 2, suffix);
-
-    /* Get a buffer for an output path. */
-    i = snprintf(copy_buf, COPY_BUF_SIZE, out_fmt, in_path, UINT_MAX);
-    if (i >= COPY_BUF_SIZE)
-	error_exit("Ridulously long suffix: %s", suffix);
-    if ((out_path = malloc(i + 1)) == 0)
-	error_exit("malloc output path buffer: %s", strerror(errno));
-
-    /*
-     * Each file gets a sparse_header, a Don't Care chunk to offset to
-     * where the data belongs and then a Raw chunk with the actual data.
-     */
-    memset((void *)&file_hdr.sparse_hdr, 0, sizeof(file_hdr.sparse_hdr));
-    file_hdr.sparse_hdr.magic = my_htole32(SPARSE_HEADER_MAGIC);
-    file_hdr.sparse_hdr.major_version = my_htole16(1);
-    file_hdr.sparse_hdr.minor_version = my_htole16(0);
-    file_hdr.sparse_hdr.file_hdr_sz = my_htole16(sizeof(sparse_header_t));
-    file_hdr.sparse_hdr.chunk_hdr_sz = my_htole16(sizeof(chunk_header_t));
-    file_hdr.sparse_hdr.blk_sz = my_htole32(block_size);
-    /* The total_blks will be set in the file loop below. */
-    file_hdr.sparse_hdr.total_chunks = my_htole32(2);
-    file_hdr.sparse_hdr.image_checksum = my_htole32(0); /* Typically unused. */
-
-    memset((void *)&file_hdr.dont_care_hdr, 0, sizeof(file_hdr.dont_care_hdr));
-    file_hdr.dont_care_hdr.chunk_type = my_htole16(CHUNK_TYPE_DONT_CARE);
-    /* The Don't Care's chunk_sz will be set in the file loop below. */
-    file_hdr.dont_care_hdr.total_sz = my_htole32(sizeof(chunk_header_t));
-
-    memset((void *)&file_hdr.raw_hdr, 0, sizeof(file_hdr.raw_hdr));
-    file_hdr.raw_hdr.chunk_type = my_htole16(CHUNK_TYPE_RAW);
-    file_hdr.raw_hdr.chunk_sz = my_htole32(blocks_per_chunk);
-    file_hdr.raw_hdr.total_sz = my_htole32(chunk_size + sizeof(chunk_header_t));
-
-    /* Loop through writing chunk_size to each of the output files. */
-    to_write = chunk_size;
-    for (file_count = 1; left_to_write ; file_count++) {
-	/* Fix up the headers on the last block. */
-	if (left_to_write < (off_t)chunk_size) {
-	    to_write = left_to_write;
-	    file_hdr.raw_hdr.chunk_sz = my_htole32(left_to_write / block_size);
-	    file_hdr.raw_hdr.total_sz = my_htole32(left_to_write
-						+ sizeof(chunk_header_t));
+	if (argc < 3 || argc > 4) {
+		usage();
+		exit(-1);
 	}
 
-	/* Form the pathname for this output file and open it. */
-	sprintf(out_path, out_fmt, in_path, file_count);
-	if ((out_fd = creat(out_path, 0666)) < 0)
-	    error_exit("\"%s\": %s", out_path, strerror(errno));
+	if (argc == 4) {
+		block_size = atoi(argv[3]);
+	}
 
-	/* Update and write the headers to this output file. */
-	s = (file_count-1) * blocks_per_chunk;
-	file_hdr.dont_care_hdr.chunk_sz = my_htole32(s);
-	file_hdr.sparse_hdr.total_blks = my_htole32(s
-						+ (to_write / block_size));
-	s = write(out_fd, (void *)&file_hdr, sizeof(file_hdr));
-	if (s < 0)
-	    error_exit("\"%s\": %s", out_path, strerror(errno));
-	if (s != sizeof(file_hdr))
-	    error_exit("\"%s\": Short write (%lu)", out_path, (unsigned long)s);
+	if (block_size < 1024 || block_size % 4 != 0) {
+		usage();
+		exit(-1);
+	}
 
-	/* Copy this chunk from the input file to the output file. */
-	cpy_file(out_fd, out_path, in_fd, in_path, to_write);
+	if (strcmp(argv[1], "-") == 0) {
+		in = STDIN_FILENO;
+	} else {
+		in = open(argv[1], O_RDONLY | O_BINARY);
+		if (in < 0) {
+			fprintf(stderr, "Cannot open input file %s\n", argv[1]);
+			exit(-1);
+		}
+	}
 
-	/* Close this output file and update the amount left to write. */
-	if (close(out_fd))
-	    error_exit("close \"%s\": %s", out_path, strerror(errno));
-	left_to_write -= to_write;
-    }
+	if (strcmp(argv[2], "-") == 0) {
+		out = STDOUT_FILENO;
+	} else {
+		out = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0664);
+		if (out < 0) {
+			fprintf(stderr, "Cannot open output file %s\n", argv[2]);
+			exit(-1);
+		}
+	}
 
-    if (close(in_fd))
-	error_exit("close \"%s\": %s", in_path, strerror(errno));
+	len = lseek64(in, 0, SEEK_END);
+	lseek64(in, 0, SEEK_SET);
 
-    exit(EXIT_SUCCESS);
+	s = sparse_file_new(block_size, len);
+	if (!s) {
+		fprintf(stderr, "Failed to create sparse file\n");
+		exit(-1);
+	}
+
+	sparse_file_verbose(s);
+	ret = sparse_file_read(s, in, false, false);
+	if (ret) {
+		fprintf(stderr, "Failed to read file\n");
+		exit(-1);
+	}
+
+	ret = sparse_file_write(s, out, false, true, false);
+	if (ret) {
+		fprintf(stderr, "Failed to write sparse file\n");
+		exit(-1);
+	}
+
+	close(in);
+	close(out);
+
+	exit(0);
 }
diff --git a/libsparse/include/sparse/sparse.h b/libsparse/include/sparse/sparse.h
index 09a5137..ae54955 100644
--- a/libsparse/include/sparse/sparse.h
+++ b/libsparse/include/sparse/sparse.h
@@ -158,6 +158,55 @@
 		bool crc);
 
 /**
+ * sparse_file_read - read a file into a sparse file cookie
+ *
+ * @s - sparse file cookie
+ * @fd - file descriptor to read from
+ * @sparse - read a file in the Android sparse file format
+ * @crc - verify the crc of a file in the Android sparse file format
+ *
+ * Reads a file into a sparse file cookie.  If sparse is true, the file is
+ * assumed to be in the Android sparse file format.  If sparse is false, the
+ * file will be sparsed by looking for block aligned chunks of all zeros or
+ * another 32 bit value.  If crc is true, the crc of the sparse file will be
+ * verified.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int sparse_file_read(struct sparse_file *s, int fd, bool sparse, bool crc);
+
+/**
+ * sparse_file_import - import an existing sparse file
+ *
+ * @s - sparse file cookie
+ * @verbose - print verbose errors while reading the sparse file
+ * @crc - verify the crc of a file in the Android sparse file format
+ *
+ * Reads an existing sparse file into a sparse file cookie, recreating the same
+ * sparse cookie that was used to write it.  If verbose is true, prints verbose
+ * errors when the sparse file is formatted incorrectly.
+ *
+ * Returns a new sparse file cookie on success, NULL on error.
+ */
+struct sparse_file *sparse_file_import(int fd, bool verbose, bool crc);
+
+/**
+ * sparse_file_import_auto - import an existing sparse or normal file
+ *
+ * @fd - file descriptor to read from
+ * @crc - verify the crc of a file in the Android sparse file format
+ *
+ * Reads an existing sparse or normal file into a sparse file cookie.
+ * Attempts to determine if the file is sparse or not by looking for the sparse
+ * file magic number in the first 4 bytes.  If the file is not sparse, the file
+ * will be sparsed by looking for block aligned chunks of all zeros or another
+ * 32 bit value.  If crc is true, the crc of the sparse file will be verified.
+ *
+ * Returns a new sparse file cookie on success, NULL on error.
+ */
+struct sparse_file *sparse_file_import_auto(int fd, bool crc);
+
+/**
  * sparse_file_verbose - set a sparse file cookie to print verbose errors
  *
  * @s - sparse file cookie
diff --git a/libsparse/simg2img.c b/libsparse/simg2img.c
index 486b805..ab35583 100644
--- a/libsparse/simg2img.c
+++ b/libsparse/simg2img.c
@@ -14,194 +14,36 @@
  * limitations under the License.
  */
 
-#define _FILE_OFFSET_BITS 64
-#define _LARGEFILE64_SOURCE 1
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "sparse_defs.h"
-#include "sparse_format.h"
-#include "sparse_crc32.h"
+#include <sparse/sparse.h>
 
 #include <fcntl.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/types.h>
-#include <sys/mman.h>
 #include <unistd.h>
 
-#define COPY_BUF_SIZE (1024*1024)
-u8 *copybuf;
-
-/* This will be malloc'ed with the size of blk_sz from the sparse file header */
-u8* zerobuf;
-
-#define SPARSE_HEADER_MAJOR_VER 1
-#define SPARSE_HEADER_LEN       (sizeof(sparse_header_t))
-#define CHUNK_HEADER_LEN (sizeof(chunk_header_t))
-
 void usage()
 {
   fprintf(stderr, "Usage: simg2img <sparse_image_file> <raw_image_file>\n");
 }
 
-static int read_all(int fd, void *buf, size_t len)
-{
-	size_t total = 0;
-	int ret;
-	char *ptr = buf;
-
-	while (total < len) {
-		ret = read(fd, ptr, len - total);
-
-		if (ret < 0)
-			return ret;
-
-		if (ret == 0)
-			return total;
-
-		ptr += ret;
-		total += ret;
-	}
-
-	return total;
-}
-
-static int write_all(int fd, void *buf, size_t len)
-{
-	size_t total = 0;
-	int ret;
-	char *ptr = buf;
-
-	while (total < len) {
-		ret = write(fd, ptr, len - total);
-
-		if (ret < 0)
-			return ret;
-
-		if (ret == 0)
-			return total;
-
-		ptr += ret;
-		total += ret;
-	}
-
-	return total;
-}
-
-int process_raw_chunk(int in, int out, u32 blocks, u32 blk_sz, u32 *crc32)
-{
-	u64 len = (u64)blocks * blk_sz;
-	int ret;
-	int chunk;
-
-	while (len) {
-		chunk = (len > COPY_BUF_SIZE) ? COPY_BUF_SIZE : len;
-		ret = read_all(in, copybuf, chunk);
-		if (ret != chunk) {
-			fprintf(stderr, "read returned an error copying a raw chunk: %d %d\n",
-					ret, chunk);
-			exit(-1);
-		}
-		*crc32 = sparse_crc32(*crc32, copybuf, chunk);
-		ret = write_all(out, copybuf, chunk);
-		if (ret != chunk) {
-			fprintf(stderr, "write returned an error copying a raw chunk\n");
-			exit(-1);
-		}
-		len -= chunk;
-	}
-
-	return blocks;
-}
-
-
-int process_fill_chunk(int in, int out, u32 blocks, u32 blk_sz, u32 *crc32)
-{
-	u64 len = (u64)blocks * blk_sz;
-	int ret;
-	int chunk;
-	u32 fill_val;
-	u32 *fillbuf;
-	unsigned int i;
-
-	/* Fill copy_buf with the fill value */
-	ret = read_all(in, &fill_val, sizeof(fill_val));
-	fillbuf = (u32 *)copybuf;
-	for (i = 0; i < (COPY_BUF_SIZE / sizeof(fill_val)); i++) {
-		fillbuf[i] = fill_val;
-	}
-
-	while (len) {
-		chunk = (len > COPY_BUF_SIZE) ? COPY_BUF_SIZE : len;
-		*crc32 = sparse_crc32(*crc32, copybuf, chunk);
-		ret = write_all(out, copybuf, chunk);
-		if (ret != chunk) {
-			fprintf(stderr, "write returned an error copying a raw chunk\n");
-			exit(-1);
-		}
-		len -= chunk;
-	}
-
-	return blocks;
-}
-
-int process_skip_chunk(int out, u32 blocks, u32 blk_sz, u32 *crc32)
-{
-	/* len needs to be 64 bits, as the sparse file specifies the skip amount
-	 * as a 32 bit value of blocks.
-	 */
-	u64 len = (u64)blocks * blk_sz;
-
-	lseek64(out, len, SEEK_CUR);
-
-	return blocks;
-}
-
-int process_crc32_chunk(int in, u32 crc32)
-{
-	u32 file_crc32;
-	int ret;
-
-	ret = read_all(in, &file_crc32, 4);
-	if (ret != 4) {
-		fprintf(stderr, "read returned an error copying a crc32 chunk\n");
-		exit(-1);
-	}
-
-	if (file_crc32 != crc32) {
-		fprintf(stderr, "computed crc32 of 0x%8.8x, expected 0x%8.8x\n",
-			 crc32, file_crc32);
-		exit(-1);
-	}
-
-	return 0;
-}
-
 int main(int argc, char *argv[])
 {
 	int in;
 	int out;
 	unsigned int i;
-	sparse_header_t sparse_header;
-	chunk_header_t chunk_header;
-	u32 crc32 = 0;
-	u32 total_blocks = 0;
 	int ret;
+	struct sparse_file *s;
 
 	if (argc != 3) {
 		usage();
 		exit(-1);
 	}
 
-	if ( (copybuf = malloc(COPY_BUF_SIZE)) == 0) {
-		fprintf(stderr, "Cannot malloc copy buf\n");
-		exit(-1);
-	}
-
 	if (strcmp(argv[1], "-") == 0) {
 		in = STDIN_FILENO;
 	} else {
@@ -220,102 +62,16 @@
 		}
 	}
 
-	ret = read_all(in, &sparse_header, sizeof(sparse_header));
-	if (ret != sizeof(sparse_header)) {
-		fprintf(stderr, "Error reading sparse file header\n");
+	s = sparse_file_import(in, true, false);
+	if (!s) {
+		fprintf(stderr, "Failed to read sparse file\n");
 		exit(-1);
 	}
-
-	if (sparse_header.magic != SPARSE_HEADER_MAGIC) {
-		fprintf(stderr, "Bad magic\n");
-		exit(-1);
-	}
-
-	if (sparse_header.major_version != SPARSE_HEADER_MAJOR_VER) {
-		fprintf(stderr, "Unknown major version number\n");
-		exit(-1);
-	}
-
-	if (sparse_header.file_hdr_sz > SPARSE_HEADER_LEN) {
-		/* Skip the remaining bytes in a header that is longer than
-		 * we expected.
-		 */
-		lseek64(in, sparse_header.file_hdr_sz - SPARSE_HEADER_LEN, SEEK_CUR);
-	}
-
-	if ( (zerobuf = malloc(sparse_header.blk_sz)) == 0) {
-		fprintf(stderr, "Cannot malloc zero buf\n");
-		exit(-1);
-	}
-
-	for (i=0; i<sparse_header.total_chunks; i++) {
-		ret = read_all(in, &chunk_header, sizeof(chunk_header));
-		if (ret != sizeof(chunk_header)) {
-			fprintf(stderr, "Error reading chunk header\n");
-			exit(-1);
-		}
-
-		if (sparse_header.chunk_hdr_sz > CHUNK_HEADER_LEN) {
-			/* Skip the remaining bytes in a header that is longer than
-			 * we expected.
-			 */
-			lseek64(in, sparse_header.chunk_hdr_sz - CHUNK_HEADER_LEN, SEEK_CUR);
-		}
-
-		switch (chunk_header.chunk_type) {
-		    case CHUNK_TYPE_RAW:
-			if (chunk_header.total_sz != (sparse_header.chunk_hdr_sz +
-				 (chunk_header.chunk_sz * sparse_header.blk_sz)) ) {
-				fprintf(stderr, "Bogus chunk size for chunk %d, type Raw\n", i);
-				exit(-1);
-			}
-			total_blocks += process_raw_chunk(in, out,
-					 chunk_header.chunk_sz, sparse_header.blk_sz, &crc32);
-			break;
-		    case CHUNK_TYPE_FILL:
-			if (chunk_header.total_sz != (sparse_header.chunk_hdr_sz + sizeof(u32)) ) {
-				fprintf(stderr, "Bogus chunk size for chunk %d, type Fill\n", i);
-				exit(-1);
-			}
-			total_blocks += process_fill_chunk(in, out,
-					 chunk_header.chunk_sz, sparse_header.blk_sz, &crc32);
-			break;
-		    case CHUNK_TYPE_DONT_CARE:
-			if (chunk_header.total_sz != sparse_header.chunk_hdr_sz) {
-				fprintf(stderr, "Bogus chunk size for chunk %d, type Dont Care\n", i);
-				exit(-1);
-			}
-			total_blocks += process_skip_chunk(out,
-					 chunk_header.chunk_sz, sparse_header.blk_sz, &crc32);
-			break;
-		    case CHUNK_TYPE_CRC32:
-			process_crc32_chunk(in, crc32);
-			break;
-		    default:
-			fprintf(stderr, "Unknown chunk type 0x%4.4x\n", chunk_header.chunk_type);
-		}
-
-	}
-
-	/* If the last chunk was a skip, then the code just did a seek, but
-	 * no write, and the file won't actually be the correct size.  This
-	 * will make the file the correct size.  Make sure the offset is
-	 * computed in 64 bits, and the function called can handle 64 bits.
-	 */
-	if (ftruncate64(out, (u64)total_blocks * sparse_header.blk_sz)) {
-		fprintf(stderr, "Error calling ftruncate() to set the image size\n");
-		exit(-1);
-	}
+	ret = sparse_file_write(s, out, false, false, false);
 
 	close(in);
 	close(out);
 
-	if (sparse_header.total_blks != total_blocks) {
-		fprintf(stderr, "Wrote %d blocks, expected to write %d blocks\n",
-			 total_blocks, sparse_header.total_blks);
-		exit(-1);
-	}
-
 	exit(0);
 }
 
diff --git a/libsparse/sparse_read.c b/libsparse/sparse_read.c
new file mode 100644
index 0000000..704bcfa
--- /dev/null
+++ b/libsparse/sparse_read.c
@@ -0,0 +1,509 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+#define _FILE_OFFSET_BITS 64
+#define _LARGEFILE64_SOURCE 1
+
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sparse/sparse.h>
+
+#include "sparse_crc32.h"
+#include "sparse_file.h"
+#include "sparse_format.h"
+
+#if defined(__APPLE__) && defined(__MACH__)
+#define lseek64 lseek
+#define off64_t off_t
+#endif
+
+#define SPARSE_HEADER_MAJOR_VER 1
+#define SPARSE_HEADER_LEN       (sizeof(sparse_header_t))
+#define CHUNK_HEADER_LEN (sizeof(chunk_header_t))
+
+#define COPY_BUF_SIZE (1024U*1024U)
+static char *copybuf;
+
+#define min(a, b) \
+	({ typeof(a) _a = (a); typeof(b) _b = (b); (_a < _b) ? _a : _b; })
+
+static void verbose_error(bool verbose, int err, const char *fmt, ...)
+{
+	char *s = "";
+	char *at = "";
+	if (fmt) {
+		va_list argp;
+		int size;
+
+		va_start(argp, fmt);
+		size = vsnprintf(NULL, 0, fmt, argp);
+		va_end(argp);
+
+		if (size < 0) {
+			return;
+		}
+
+		at = malloc(size + 1);
+		if (at == NULL) {
+			return;
+		}
+
+		va_start(argp, fmt);
+		vsnprintf(at, size, fmt, argp);
+		va_end(argp);
+		at[size] = 0;
+		s = " at ";
+	}
+	if (verbose) {
+#ifndef USE_MINGW
+		if (err == -EOVERFLOW) {
+			sparse_print_verbose("EOF while reading file%s%s\n", s, at);
+		} else
+#endif
+		if (err == -EINVAL) {
+			sparse_print_verbose("Invalid sparse file format%s%s\n", s, at);
+		} else if (err == -ENOMEM) {
+			sparse_print_verbose("Failed allocation while reading file%s%s\n",
+					s, at);
+		} else {
+			sparse_print_verbose("Unknown error %d%s%s\n", err, s, at);
+		}
+	}
+	if (fmt) {
+		free(at);
+	}
+}
+
+static int process_raw_chunk(struct sparse_file *s, unsigned int chunk_size,
+		int fd, int64_t offset, unsigned int blocks, unsigned int block,
+		uint32_t *crc32)
+{
+	int ret;
+	int chunk;
+	unsigned int len = blocks * s->block_size;
+
+	if (chunk_size % s->block_size != 0) {
+		return -EINVAL;
+	}
+
+	if (chunk_size / s->block_size != blocks) {
+		return -EINVAL;
+	}
+
+	ret = sparse_file_add_fd(s, fd, offset, len, block);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (crc32) {
+		while (len) {
+			chunk = min(len, COPY_BUF_SIZE);
+			ret = read_all(fd, copybuf, chunk);
+			if (ret < 0) {
+				return ret;
+			}
+			*crc32 = sparse_crc32(*crc32, copybuf, chunk);
+			len -= chunk;
+		}
+	} else {
+		lseek64(fd, len, SEEK_CUR);
+	}
+
+	return 0;
+}
+
+static int process_fill_chunk(struct sparse_file *s, unsigned int chunk_size,
+		int fd, unsigned int blocks, unsigned int block, uint32_t *crc32)
+{
+	int ret;
+	int chunk;
+	int64_t len = (int64_t)blocks * s->block_size;
+	uint32_t fill_val;
+	uint32_t *fillbuf;
+	unsigned int i;
+
+	if (chunk_size != sizeof(fill_val)) {
+		return -EINVAL;
+	}
+
+	ret = read_all(fd, &fill_val, sizeof(fill_val));
+	if (ret < 0) {
+		return ret;
+	}
+
+	ret = sparse_file_add_fill(s, fill_val, len, block);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (crc32) {
+		/* Fill copy_buf with the fill value */
+		fillbuf = (uint32_t *)copybuf;
+		for (i = 0; i < (COPY_BUF_SIZE / sizeof(fill_val)); i++) {
+			fillbuf[i] = fill_val;
+		}
+
+		while (len) {
+			chunk = min(len, COPY_BUF_SIZE);
+			*crc32 = sparse_crc32(*crc32, copybuf, chunk);
+			len -= chunk;
+		}
+	}
+
+	return 0;
+}
+
+static int process_skip_chunk(struct sparse_file *s, unsigned int chunk_size,
+		int fd, unsigned int blocks, unsigned int block, uint32_t *crc32)
+{
+	int ret;
+	int chunk;
+	int64_t len = (int64_t)blocks * s->block_size;
+	uint32_t fill_val;
+	uint32_t *fillbuf;
+	unsigned int i;
+
+	if (chunk_size != 0) {
+		return -EINVAL;
+	}
+
+	if (crc32) {
+		memset(copybuf, 0, COPY_BUF_SIZE);
+
+		while (len) {
+			chunk = min(len, COPY_BUF_SIZE);
+			*crc32 = sparse_crc32(*crc32, copybuf, chunk);
+			len -= chunk;
+		}
+	}
+
+	return 0;
+}
+
+static int process_crc32_chunk(int fd, unsigned int chunk_size, uint32_t crc32)
+{
+	uint32_t file_crc32;
+	int ret;
+
+	if (chunk_size != sizeof(file_crc32)) {
+		return -EINVAL;
+	}
+
+	ret = read_all(fd, &file_crc32, sizeof(file_crc32));
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (file_crc32 != crc32) {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int process_chunk(struct sparse_file *s, int fd, off64_t offset,
+		unsigned int chunk_hdr_sz, chunk_header_t *chunk_header,
+		unsigned int cur_block, uint32_t *crc_ptr)
+{
+	int ret;
+	unsigned int chunk_data_size;
+
+	chunk_data_size = chunk_header->total_sz - chunk_hdr_sz;
+
+	switch (chunk_header->chunk_type) {
+		case CHUNK_TYPE_RAW:
+			ret = process_raw_chunk(s, chunk_data_size, fd, offset,
+					chunk_header->chunk_sz, cur_block, crc_ptr);
+			if (ret < 0) {
+				verbose_error(s->verbose, ret, "data block at %lld", offset);
+				return ret;
+			}
+			return chunk_header->chunk_sz;
+		case CHUNK_TYPE_FILL:
+			ret = process_fill_chunk(s, chunk_data_size, fd,
+					chunk_header->chunk_sz, cur_block, crc_ptr);
+			if (ret < 0) {
+				verbose_error(s->verbose, ret, "fill block at %lld", offset);
+				return ret;
+			}
+			return chunk_header->chunk_sz;
+		case CHUNK_TYPE_DONT_CARE:
+			ret = process_skip_chunk(s, chunk_data_size, fd,
+					chunk_header->chunk_sz, cur_block, crc_ptr);
+			if (chunk_data_size != 0) {
+				if (ret < 0) {
+					verbose_error(s->verbose, ret, "skip block at %lld", offset);
+					return ret;
+				}
+			}
+			return chunk_header->chunk_sz;
+		case CHUNK_TYPE_CRC32:
+			ret = process_crc32_chunk(fd, chunk_data_size, *crc_ptr);
+			if (ret < 0) {
+				verbose_error(s->verbose, -EINVAL, "crc block at %lld",
+						offset);
+				return ret;
+			}
+			return 0;
+		default:
+			verbose_error(s->verbose, -EINVAL, "unknown block %04X at %lld",
+					chunk_header->chunk_type, offset);
+	}
+
+	return 0;
+}
+
+static int sparse_file_read_sparse(struct sparse_file *s, int fd, bool crc)
+{
+	int ret;
+	unsigned int i;
+	sparse_header_t sparse_header;
+	chunk_header_t chunk_header;
+	uint32_t crc32 = 0;
+	uint32_t *crc_ptr = 0;
+	unsigned int cur_block = 0;
+	off64_t offset;
+
+	if (!copybuf) {
+		copybuf = malloc(COPY_BUF_SIZE);
+	}
+
+	if (!copybuf) {
+		return -ENOMEM;
+	}
+
+	if (crc) {
+		crc_ptr = &crc32;
+	}
+
+	ret = read_all(fd, &sparse_header, sizeof(sparse_header));
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (sparse_header.magic != SPARSE_HEADER_MAGIC) {
+		return -EINVAL;
+	}
+
+	if (sparse_header.major_version != SPARSE_HEADER_MAJOR_VER) {
+		return -EINVAL;
+	}
+
+	if (sparse_header.file_hdr_sz < SPARSE_HEADER_LEN) {
+		return -EINVAL;
+	}
+
+	if (sparse_header.chunk_hdr_sz < sizeof(chunk_header)) {
+		return -EINVAL;
+	}
+
+	if (sparse_header.file_hdr_sz > SPARSE_HEADER_LEN) {
+		/* Skip the remaining bytes in a header that is longer than
+		 * we expected.
+		 */
+		lseek64(fd, sparse_header.file_hdr_sz - SPARSE_HEADER_LEN, SEEK_CUR);
+	}
+
+	for (i = 0; i < sparse_header.total_chunks; i++) {
+		ret = read_all(fd, &chunk_header, sizeof(chunk_header));
+		if (ret < 0) {
+			return ret;
+		}
+
+		if (sparse_header.chunk_hdr_sz > CHUNK_HEADER_LEN) {
+			/* Skip the remaining bytes in a header that is longer than
+			 * we expected.
+			 */
+			lseek64(fd, sparse_header.chunk_hdr_sz - CHUNK_HEADER_LEN, SEEK_CUR);
+		}
+
+		offset = lseek64(fd, 0, SEEK_CUR);
+
+		ret = process_chunk(s, fd, offset, sparse_header.chunk_hdr_sz, &chunk_header,
+				cur_block, crc_ptr);
+		if (ret < 0) {
+			return ret;
+		}
+
+		cur_block += ret;
+	}
+
+	if (sparse_header.total_blks != cur_block) {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int sparse_file_read_normal(struct sparse_file *s, int fd)
+{
+	int ret;
+	uint32_t *buf = malloc(s->block_size);
+	unsigned int block = 0;
+	int64_t remain = s->len;
+	int64_t offset = 0;
+	unsigned int to_read;
+	char *ptr;
+	unsigned int i;
+	bool sparse_block;
+
+	if (!buf) {
+		return -ENOMEM;
+	}
+
+	while (remain > 0) {
+		to_read = min(remain, s->block_size);
+		ret = read_all(fd, buf, to_read);
+		if (ret < 0) {
+			error("failed to read sparse file");
+			return ret;
+		}
+
+		if (to_read == s->block_size) {
+			sparse_block = true;
+			for (i = 1; i < s->block_size / sizeof(uint32_t); i++) {
+				if (buf[0] != buf[i]) {
+					sparse_block = false;
+					break;
+				}
+			}
+		} else {
+			sparse_block = false;
+		}
+
+		if (sparse_block) {
+			/* TODO: add flag to use skip instead of fill for buf[0] == 0 */
+			sparse_file_add_fill(s, buf[0], to_read, block);
+		} else {
+			sparse_file_add_fd(s, fd, offset, to_read, block);
+		}
+
+		remain -= to_read;
+		offset += to_read;
+		block++;
+	}
+
+	return 0;
+}
+
+int sparse_file_read(struct sparse_file *s, int fd, bool sparse, bool crc)
+{
+	if (crc && !sparse) {
+		return -EINVAL;
+	}
+
+	if (sparse) {
+		return sparse_file_read_sparse(s, fd, crc);
+	} else {
+		return sparse_file_read_normal(s, fd);
+	}
+}
+
+struct sparse_file *sparse_file_import(int fd, bool verbose, bool crc)
+{
+	int ret;
+	sparse_header_t sparse_header;
+	int64_t len;
+	struct sparse_file *s;
+
+	ret = read_all(fd, &sparse_header, sizeof(sparse_header));
+	if (ret < 0) {
+		verbose_error(verbose, ret, "header");
+		return NULL;
+	}
+
+	if (sparse_header.magic != SPARSE_HEADER_MAGIC) {
+		verbose_error(verbose, -EINVAL, "header magic");
+		return NULL;
+	}
+
+	if (sparse_header.major_version != SPARSE_HEADER_MAJOR_VER) {
+		verbose_error(verbose, -EINVAL, "header major version");
+		return NULL;
+	}
+
+	if (sparse_header.file_hdr_sz < SPARSE_HEADER_LEN) {
+		return NULL;
+	}
+
+	if (sparse_header.chunk_hdr_sz < sizeof(chunk_header_t)) {
+		return NULL;
+	}
+
+	len = (int64_t)sparse_header.total_blks * sparse_header.blk_sz;
+	s = sparse_file_new(sparse_header.blk_sz, len);
+	if (!s) {
+		verbose_error(verbose, -EINVAL, NULL);
+		return NULL;
+	}
+
+	ret = lseek64(fd, 0, SEEK_SET);
+	if (ret < 0) {
+		verbose_error(verbose, ret, "seeking");
+		sparse_file_destroy(s);
+		return NULL;
+	}
+
+	s->verbose = verbose;
+
+	ret = sparse_file_read(s, fd, true, crc);
+	if (ret < 0) {
+		sparse_file_destroy(s);
+		return NULL;
+	}
+
+	return s;
+}
+
+struct sparse_file *sparse_file_import_auto(int fd, bool crc)
+{
+	struct sparse_file *s;
+	int64_t len;
+	int ret;
+
+	s = sparse_file_import(fd, true, crc);
+	if (s) {
+		return s;
+	}
+
+	len = lseek64(fd, 0, SEEK_END);
+	if (len < 0) {
+		return NULL;
+	}
+
+	lseek64(fd, 0, SEEK_SET);
+
+	s = sparse_file_new(4096, len);
+	if (!s) {
+		return NULL;
+	}
+
+	ret = sparse_file_read_normal(s, fd);
+	if (ret < 0) {
+		sparse_file_destroy(s);
+		return NULL;
+	}
+
+	return s;
+}