blob: 86b765820fff4ffd049b471dba74d6d5641e5db4 [file] [log] [blame]
Jeff Sharkey3e8b1582012-07-13 16:37:13 -07001/* $NetBSD: file.c,v 1.7 2011/04/18 22:46:48 joerg Exp $ */
2/* $FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $ */
3/* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
4
5/*-
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8 * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#if HAVE_NBTOOL_CONFIG_H
34#include "nbtool_config.h"
35#endif
36
37#include <sys/cdefs.h>
38__RCSID("$NetBSD: file.c,v 1.7 2011/04/18 22:46:48 joerg Exp $");
39
40#include <sys/param.h>
41#include <sys/types.h>
42#include <sys/stat.h>
43
44#ifndef ANDROID
45#include <bzlib.h>
46#endif
47#include <err.h>
48#include <errno.h>
49#include <fcntl.h>
50#include <stddef.h>
51#include <stdlib.h>
52#include <string.h>
53#include <unistd.h>
54#include <wchar.h>
55#include <wctype.h>
56#ifndef ANDROID
57#include <zlib.h>
58#endif
59
60#include "grep.h"
61
62#define MAXBUFSIZ (32 * 1024)
63#define LNBUFBUMP 80
64
65#ifndef ANDROID
66static gzFile gzbufdesc;
67static BZFILE* bzbufdesc;
68#endif
69
70static unsigned char buffer[MAXBUFSIZ];
71static unsigned char *bufpos;
72static size_t bufrem;
73
74static unsigned char *lnbuf;
75static size_t lnbuflen;
76
77static inline int
78grep_refill(struct file *f)
79{
80 ssize_t nr;
81 int bzerr;
82
83 bufpos = buffer;
84 bufrem = 0;
85
86#ifndef ANDROID
87 if (filebehave == FILE_GZIP)
88 nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
89 else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
90 nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
91 switch (bzerr) {
92 case BZ_OK:
93 case BZ_STREAM_END:
94 /* No problem, nr will be okay */
95 break;
96 case BZ_DATA_ERROR_MAGIC:
97 /*
98 * As opposed to gzread(), which simply returns the
99 * plain file data, if it is not in the correct
100 * compressed format, BZ2_bzRead() instead aborts.
101 *
102 * So, just restart at the beginning of the file again,
103 * and use plain reads from now on.
104 */
105 BZ2_bzReadClose(&bzerr, bzbufdesc);
106 bzbufdesc = NULL;
107 if (lseek(f->fd, 0, SEEK_SET) == -1)
108 return (-1);
109 nr = read(f->fd, buffer, MAXBUFSIZ);
110 break;
111 default:
112 /* Make sure we exit with an error */
113 nr = -1;
114 }
115 } else
116#endif
117 nr = read(f->fd, buffer, MAXBUFSIZ);
118
119 if (nr < 0)
120 return (-1);
121
122 bufrem = nr;
123 return (0);
124}
125
126static inline int
127grep_lnbufgrow(size_t newlen)
128{
129
130 if (lnbuflen < newlen) {
131 lnbuf = grep_realloc(lnbuf, newlen);
132 lnbuflen = newlen;
133 }
134
135 return (0);
136}
137
138char *
139grep_fgetln(struct file *f, size_t *lenp)
140{
141 unsigned char *p;
142 char *ret;
143 size_t len;
144 size_t off;
145 ptrdiff_t diff;
146
147 /* Fill the buffer, if necessary */
148 if (bufrem == 0 && grep_refill(f) != 0)
149 goto error;
150
151 if (bufrem == 0) {
152 /* Return zero length to indicate EOF */
153 *lenp = 0;
154 return ((char *)bufpos);
155 }
156
157 /* Look for a newline in the remaining part of the buffer */
158 if ((p = memchr(bufpos, line_sep, bufrem)) != NULL) {
159 ++p; /* advance over newline */
160 ret = (char *)bufpos;
161 len = p - bufpos;
162 bufrem -= len;
163 bufpos = p;
164 *lenp = len;
165 return (ret);
166 }
167
168 /* We have to copy the current buffered data to the line buffer */
169 for (len = bufrem, off = 0; ; len += bufrem) {
170 /* Make sure there is room for more data */
171 if (grep_lnbufgrow(len + LNBUFBUMP))
172 goto error;
173 memcpy(lnbuf + off, bufpos, len - off);
174 off = len;
175 if (grep_refill(f) != 0)
176 goto error;
177 if (bufrem == 0)
178 /* EOF: return partial line */
179 break;
180 if ((p = memchr(bufpos, line_sep, bufrem)) == NULL)
181 continue;
182 /* got it: finish up the line (like code above) */
183 ++p;
184 diff = p - bufpos;
185 len += diff;
186 if (grep_lnbufgrow(len))
187 goto error;
188 memcpy(lnbuf + off, bufpos, diff);
189 bufrem -= diff;
190 bufpos = p;
191 break;
192 }
193 *lenp = len;
194 return ((char *)lnbuf);
195
196error:
197 *lenp = 0;
198 return (NULL);
199}
200
201static inline struct file *
202grep_file_init(struct file *f)
203{
204
205#ifndef ANDROID
206 if (filebehave == FILE_GZIP &&
207 (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
208 goto error;
209
210 if (filebehave == FILE_BZIP &&
211 (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
212 goto error;
213#endif
214
215 /* Fill read buffer, also catches errors early */
216 if (grep_refill(f) != 0)
217 goto error;
218
219 /* Check for binary stuff, if necessary */
220 if (!nulldataflag && binbehave != BINFILE_TEXT &&
221 memchr(bufpos, '\0', bufrem) != NULL)
222 f->binary = true;
223
224 return (f);
225error:
226 close(f->fd);
227 free(f);
228 return (NULL);
229}
230
231/*
232 * Opens a file for processing.
233 */
234struct file *
235grep_open(const char *path)
236{
237 struct file *f;
238
239 f = grep_malloc(sizeof *f);
240 memset(f, 0, sizeof *f);
241 if (path == NULL) {
242 /* Processing stdin implies --line-buffered. */
243 lbflag = true;
244 f->fd = STDIN_FILENO;
245 } else if ((f->fd = open(path, O_RDONLY)) == -1) {
246 free(f);
247 return (NULL);
248 }
249
250 return (grep_file_init(f));
251}
252
253/*
254 * Closes a file.
255 */
256void
257grep_close(struct file *f)
258{
259
260 close(f->fd);
261
262 /* Reset read buffer and line buffer */
263 bufpos = buffer;
264 bufrem = 0;
265
266 free(lnbuf);
267 lnbuf = NULL;
268 lnbuflen = 0;
269}