libsparse: add function to resparse a file and a utility to use it

Add sparse_file_repsarse, which splits chunks in an existing sparse
file such that the maximum size of a chunk, plus a header and footer,
is smaller than the given size.  This will allow multiple smaller
sparse files to result in the same data as a large sparse file.

Change-Id: I177abdb958a23d5afd394ff265c5b0c6a3ff22fa
diff --git a/libsparse/Android.mk b/libsparse/Android.mk
index e83ee1c..69b52c3 100644
--- a/libsparse/Android.mk
+++ b/libsparse/Android.mk
@@ -83,6 +83,15 @@
 
 include $(CLEAR_VARS)
 
+LOCAL_SRC_FILES := simg2simg.c
+LOCAL_MODULE := simg2simg
+LOCAL_MODULE_TAGS := debug
+LOCAL_STATIC_LIBRARIES := libsparse libz
+
+include $(BUILD_HOST_EXECUTABLE)
+
+include $(CLEAR_VARS)
+
 LOCAL_MODULE := simg_dump.py
 LOCAL_MODULE_TAGS := debug
 LOCAL_SRC_FILES := simg_dump.py
diff --git a/libsparse/backed_block.c b/libsparse/backed_block.c
index 629fc28..dfb217b 100644
--- a/libsparse/backed_block.c
+++ b/libsparse/backed_block.c
@@ -141,6 +141,52 @@
 	free(bbl);
 }
 
+void backed_block_list_move(struct backed_block_list *from,
+		struct backed_block_list *to, struct backed_block *start,
+		struct backed_block *end)
+{
+	struct backed_block *bb;
+
+	if (start == NULL) {
+		start = from->data_blocks;
+	}
+
+	if (!end) {
+		for (end = start; end && end->next; end = end->next)
+			;
+	}
+
+	if (start == NULL || end == NULL) {
+		return;
+	}
+
+	from->last_used = NULL;
+	to->last_used = NULL;
+	if (from->data_blocks == start) {
+		from->data_blocks = end->next;
+	} else {
+		for (bb = from->data_blocks; bb; bb = bb->next) {
+			if (bb->next == start) {
+				bb->next = end->next;
+				break;
+			}
+		}
+	}
+
+	if (!to->data_blocks) {
+		to->data_blocks = start;
+		end->next = NULL;
+	} else {
+		for (bb = to->data_blocks; bb; bb = bb->next) {
+			if (!bb->next || bb->next->block > start->block) {
+				end->next = bb->next;
+				bb->next = start;
+				break;
+			}
+		}
+	}
+}
+
 /* may free b */
 static int merge_bb(struct backed_block_list *bbl,
 		struct backed_block *a, struct backed_block *b)
@@ -311,3 +357,44 @@
 
 	return queue_bb(bbl, bb);
 }
+
+int backed_block_split(struct backed_block_list *bbl, struct backed_block *bb,
+		unsigned int max_len)
+{
+	struct backed_block *new_bb;
+
+	max_len = ALIGN_DOWN(max_len, bbl->block_size);
+
+	if (bb->len <= max_len) {
+		return 0;
+	}
+
+	new_bb = malloc(sizeof(struct backed_block));
+	if (bb == NULL) {
+		return -ENOMEM;
+	}
+
+	*new_bb = *bb;
+
+	new_bb->len = bb->len - max_len;
+	new_bb->block = bb->block + max_len / bbl->block_size;
+	new_bb->next = bb->next;
+	bb->next = new_bb;
+	bb->len = max_len;
+
+	switch (bb->type) {
+	case BACKED_BLOCK_DATA:
+		new_bb->data.data = (char *)bb->data.data + max_len;
+		break;
+	case BACKED_BLOCK_FILE:
+		new_bb->file.offset += max_len;
+		break;
+	case BACKED_BLOCK_FD:
+		new_bb->fd.offset += max_len;
+		break;
+	case BACKED_BLOCK_FILL:
+		break;
+	}
+
+	return 0;
+}
diff --git a/libsparse/backed_block.h b/libsparse/backed_block.h
index 6926917..1a159be 100644
--- a/libsparse/backed_block.h
+++ b/libsparse/backed_block.h
@@ -48,6 +48,8 @@
 int64_t backed_block_file_offset(struct backed_block *bb);
 uint32_t backed_block_fill_val(struct backed_block *bb);
 enum backed_block_type backed_block_type(struct backed_block *bb);
+int backed_block_split(struct backed_block_list *bbl, struct backed_block *bb,
+		unsigned int max_len);
 
 struct backed_block *backed_block_iter_new(struct backed_block_list *bbl);
 struct backed_block *backed_block_iter_next(struct backed_block *bb);
@@ -55,4 +57,8 @@
 struct backed_block_list *backed_block_list_new(unsigned int block_size);
 void backed_block_list_destroy(struct backed_block_list *bbl);
 
+void backed_block_list_move(struct backed_block_list *from,
+		struct backed_block_list *to, struct backed_block *start,
+		struct backed_block *end);
+
 #endif
diff --git a/libsparse/include/sparse/sparse.h b/libsparse/include/sparse/sparse.h
index b215227..fe003f6 100644
--- a/libsparse/include/sparse/sparse.h
+++ b/libsparse/include/sparse/sparse.h
@@ -227,6 +227,20 @@
  */
 struct sparse_file *sparse_file_import_auto(int fd, bool crc);
 
+/** sparse_file_resparse - rechunk an existing sparse file into smaller files
+ *
+ * @in_s - sparse file cookie of the existing sparse file
+ * @max_len - maximum file size
+ * @out_s - array of sparse file cookies
+ * @out_s_count - size of out_s array
+ *
+ * Splits chunks of an existing sparse file into smaller sparse files such that
+ * each sparse file is less than max_len.  Returns the number of sparse_files
+ * that would have been written to out_s if out_s were big enough.
+ */
+int sparse_file_resparse(struct sparse_file *in_s, unsigned int max_len,
+		struct sparse_file **out_s, int out_s_count);
+
 /**
  * sparse_file_verbose - set a sparse file cookie to print verbose errors
  *
diff --git a/libsparse/simg2img.c b/libsparse/simg2img.c
index ab35583..95e9b5b 100644
--- a/libsparse/simg2img.c
+++ b/libsparse/simg2img.c
@@ -26,50 +26,62 @@
 #include <sys/types.h>
 #include <unistd.h>
 
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
 void usage()
 {
-  fprintf(stderr, "Usage: simg2img <sparse_image_file> <raw_image_file>\n");
+  fprintf(stderr, "Usage: simg2img <sparse_image_files> <raw_image_file>\n");
 }
 
 int main(int argc, char *argv[])
 {
 	int in;
 	int out;
-	unsigned int i;
+	int i;
 	int ret;
 	struct sparse_file *s;
 
-	if (argc != 3) {
+	if (argc < 3) {
 		usage();
 		exit(-1);
 	}
 
-	if (strcmp(argv[1], "-") == 0) {
-		in = STDIN_FILENO;
-	} else {
-		if ((in = open(argv[1], O_RDONLY)) == 0) {
-			fprintf(stderr, "Cannot open input file %s\n", argv[1]);
-			exit(-1);
-		}
-	}
-
-	if (strcmp(argv[2], "-") == 0) {
-		out = STDOUT_FILENO;
-	} else {
-		if ((out = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC, 0666)) == 0) {
-			fprintf(stderr, "Cannot open output file %s\n", argv[2]);
-			exit(-1);
-		}
-	}
-
-	s = sparse_file_import(in, true, false);
-	if (!s) {
-		fprintf(stderr, "Failed to read sparse file\n");
+	out = open(argv[argc - 1], O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0664);
+	if (out < 0) {
+		fprintf(stderr, "Cannot open output file %s\n", argv[argc - 1]);
 		exit(-1);
 	}
-	ret = sparse_file_write(s, out, false, false, false);
 
-	close(in);
+	for (i = 1; i < argc - 1; i++) {
+		if (strcmp(argv[i], "-") == 0) {
+			in = STDIN_FILENO;
+		} else {
+			in = open(argv[i], O_RDONLY | O_BINARY);
+			if (in < 0) {
+				fprintf(stderr, "Cannot open input file %s\n", argv[i]);
+				exit(-1);
+			}
+		}
+
+		s = sparse_file_import(in, true, false);
+		if (!s) {
+			fprintf(stderr, "Failed to read sparse file\n");
+			exit(-1);
+		}
+
+		lseek(out, SEEK_SET, 0);
+
+		ret = sparse_file_write(s, out, false, false, false);
+		if (ret < 0) {
+			fprintf(stderr, "Cannot write output file\n");
+			exit(-1);
+		}
+		sparse_file_destroy(s);
+		close(in);
+	}
+
 	close(out);
 
 	exit(0);
diff --git a/libsparse/simg2simg.c b/libsparse/simg2simg.c
new file mode 100644
index 0000000..5f9ccf6
--- /dev/null
+++ b/libsparse/simg2simg.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _FILE_OFFSET_BITS 64
+#define _LARGEFILE64_SOURCE 1
+#define _GNU_SOURCE
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <sparse/sparse.h>
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+void usage()
+{
+  fprintf(stderr, "Usage: simg2simg <sparse image file> <sparse_image_file> <max_size>\n");
+}
+
+int main(int argc, char *argv[])
+{
+	int in;
+	int out;
+	int i;
+	int ret;
+	struct sparse_file *s;
+	int64_t max_size;
+	struct sparse_file **out_s;
+	int files;
+	char filename[4096];
+
+	if (argc != 4) {
+		usage();
+		exit(-1);
+	}
+
+	max_size = atoll(argv[3]);
+
+	in = open(argv[1], O_RDONLY | O_BINARY);
+	if (in < 0) {
+		fprintf(stderr, "Cannot open input file %s\n", argv[1]);
+		exit(-1);
+	}
+
+	s = sparse_file_import(in, true, false);
+	if (!s) {
+		fprintf(stderr, "Failed to import sparse file\n");
+		exit(-1);
+	}
+
+	files = sparse_file_resparse(s, max_size, NULL, 0);
+	if (files < 0) {
+		fprintf(stderr, "Failed to resparse\n");
+		exit(-1);
+	}
+
+	out_s = calloc(sizeof(struct sparse_file *), files);
+	if (!out_s) {
+		fprintf(stderr, "Failed to allocate sparse file array\n");
+		exit(-1);
+	}
+
+	files = sparse_file_resparse(s, max_size, out_s, files);
+	if (files < 0) {
+		fprintf(stderr, "Failed to resparse\n");
+		exit(-1);
+	}
+
+	for (i = 0; i < files; i++) {
+		ret = snprintf(filename, sizeof(filename), "%s.%d", argv[2], i);
+		if (ret >= (int)sizeof(filename)) {
+			fprintf(stderr, "Filename too long\n");
+			exit(-1);
+		}
+
+		out = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0664);
+		if (out < 0) {
+			fprintf(stderr, "Cannot open output file %s\n", argv[2]);
+			exit(-1);
+		}
+
+		ret = sparse_file_write(out_s[i], out, false, true, false);
+		if (ret) {
+			fprintf(stderr, "Failed to write sparse file\n");
+			exit(-1);
+		}
+		close(out);
+	}
+
+	close(in);
+
+	exit(0);
+}
diff --git a/libsparse/sparse.c b/libsparse/sparse.c
index c560ec9..77f02fc 100644
--- a/libsparse/sparse.c
+++ b/libsparse/sparse.c
@@ -24,6 +24,7 @@
 #include "output_file.h"
 #include "backed_block.h"
 #include "sparse_defs.h"
+#include "sparse_format.h"
 
 struct sparse_file *sparse_file_new(unsigned int block_size, int64_t len)
 {
@@ -188,6 +189,104 @@
 	return ret;
 }
 
+static int out_counter_write(void *priv, const void *data, int len)
+{
+	int64_t *count = priv;
+	*count += len;
+	return 0;
+}
+
+static struct backed_block *move_chunks_up_to_len(struct sparse_file *from,
+		struct sparse_file *to, unsigned int len)
+{
+	int64_t count = 0;
+	struct output_file *out_counter;
+	struct backed_block *last_bb = NULL;
+	struct backed_block *bb;
+	struct backed_block *start;
+	int64_t file_len = 0;
+
+	/*
+	 * overhead is sparse file header, initial skip chunk, split chunk, end
+	 * skip chunk, and crc chunk.
+	 */
+	int overhead = sizeof(sparse_header_t) + 4 * sizeof(chunk_header_t) +
+			sizeof(uint32_t);
+	len -= overhead;
+
+	start = backed_block_iter_new(from->backed_block_list);
+	out_counter = open_output_callback(out_counter_write, &count,
+			to->block_size, to->len, false, true, 0, false);
+	if (!out_counter) {
+		return NULL;
+	}
+
+	for (bb = start; bb; bb = backed_block_iter_next(bb)) {
+		count = 0;
+		/* will call out_counter_write to update count */
+		sparse_file_write_block(out_counter, bb);
+		if (file_len + count > len) {
+			/*
+			 * If the remaining available size is more than 1/8th of the
+			 * requested size, split the chunk.  Results in sparse files that
+			 * are at least 7/8ths of the requested size
+			 */
+			if (!last_bb || (len - file_len > (len / 8))) {
+				backed_block_split(from->backed_block_list, bb, len - file_len);
+				last_bb = bb;
+			}
+			goto out;
+		}
+		file_len += count;
+		last_bb = bb;
+	}
+
+out:
+	backed_block_list_move(from->backed_block_list,
+		to->backed_block_list, start, last_bb);
+
+	close_output_file(out_counter);
+
+	return bb;
+}
+
+int sparse_file_resparse(struct sparse_file *in_s, unsigned int max_len,
+		struct sparse_file **out_s, int out_s_count)
+{
+	struct backed_block *bb;
+	unsigned int overhead;
+	struct sparse_file *s;
+	struct sparse_file *tmp;
+	int c = 0;
+
+	tmp = sparse_file_new(in_s->block_size, in_s->len);
+	if (!tmp) {
+		return -ENOMEM;
+	}
+
+	do {
+		s = sparse_file_new(in_s->block_size, in_s->len);
+
+		bb = move_chunks_up_to_len(in_s, s, max_len);
+
+		if (c < out_s_count) {
+			out_s[c] = s;
+		} else {
+			backed_block_list_move(s->backed_block_list, tmp->backed_block_list,
+					NULL, NULL);
+			sparse_file_destroy(s);
+		}
+		c++;
+	} while (bb);
+
+	backed_block_list_move(tmp->backed_block_list, in_s->backed_block_list,
+			NULL, NULL);
+
+	sparse_file_destroy(tmp);
+
+	return c;
+}
+
 void sparse_file_verbose(struct sparse_file *s)
 {
 	s->verbose = true;
diff --git a/libsparse/sparse_defs.h b/libsparse/sparse_defs.h
index 9f32d59..b99cfd5 100644
--- a/libsparse/sparse_defs.h
+++ b/libsparse/sparse_defs.h
@@ -41,6 +41,7 @@
 
 #define DIV_ROUND_UP(x, y) (((x) + (y) - 1)/(y))
 #define ALIGN(x, y) ((y) * DIV_ROUND_UP((x), (y)))
+#define ALIGN_DOWN(x, y) ((y) * ((x) / (y)))
 
 #define error(fmt, args...) do { fprintf(stderr, "error: %s: " fmt "\n", __func__, ## args); } while (0)
 #define error_errno(s, args...) error(s ": %s", ##args, strerror(errno))