blob: b5bb2ef808254c855c71bde52c4f9260b11efb26 [file] [log] [blame]
Jeff Sharkey3e8b1582012-07-13 16:37:13 -07001/* $NetBSD: grep.c,v 1.11 2012/05/06 22:27:00 joerg Exp $ */
2/* $FreeBSD: head/usr.bin/grep/grep.c 211519 2010-08-19 22:55:17Z delphij $ */
3/* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
4
5/*-
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#if HAVE_NBTOOL_CONFIG_H
33#include "nbtool_config.h"
34#endif
35
36#include <sys/cdefs.h>
37__RCSID("$NetBSD: grep.c,v 1.11 2012/05/06 22:27:00 joerg Exp $");
38
39#include <sys/stat.h>
40#include <sys/types.h>
41
42#include <ctype.h>
43#include <err.h>
44#include <errno.h>
45#include <getopt.h>
46#include <limits.h>
47#include <libgen.h>
48#include <locale.h>
49#include <stdbool.h>
50#include <stdio.h>
51#include <stdlib.h>
52#include <string.h>
53#include <unistd.h>
54
55#include "grep.h"
56
57#ifndef WITHOUT_NLS
58#include <nl_types.h>
59nl_catd catalog;
60#endif
61
62/*
63 * Default messags to use when NLS is disabled or no catalogue
64 * is found.
65 */
66const char *errstr[] = {
67 "",
68/* 1*/ "(standard input)",
69/* 2*/ "cannot read bzip2 compressed file",
70/* 3*/ "unknown %s option",
71/* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A num] [-B num] [-C[num]]\n",
72/* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
73/* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
74/* 7*/ "\t[pattern] [file ...]\n",
75/* 8*/ "Binary file %s matches\n",
76/* 9*/ "%s (BSD grep) %s\n",
77};
78
79/* Flags passed to regcomp() and regexec() */
80int cflags = 0;
81int eflags = REG_STARTEND;
82
83/* Searching patterns */
84unsigned int patterns, pattern_sz;
85char **pattern;
86regex_t *r_pattern;
87fastgrep_t *fg_pattern;
88
89/* Filename exclusion/inclusion patterns */
90unsigned int fpatterns, fpattern_sz;
91unsigned int dpatterns, dpattern_sz;
92struct epat *dpattern, *fpattern;
93
94/* For regex errors */
95char re_error[RE_ERROR_BUF + 1];
96
97/* Command-line flags */
98unsigned long long Aflag; /* -A x: print x lines trailing each match */
99unsigned long long Bflag; /* -B x: print x lines leading each match */
100bool Hflag; /* -H: always print file name */
101bool Lflag; /* -L: only show names of files with no matches */
102bool bflag; /* -b: show block numbers for each match */
103bool cflag; /* -c: only show a count of matching lines */
104bool hflag; /* -h: don't print filename headers */
105bool iflag; /* -i: ignore case */
106bool lflag; /* -l: only show names of files with matches */
107bool mflag; /* -m x: stop reading the files after x matches */
108unsigned long long mcount; /* count for -m */
109bool nflag; /* -n: show line numbers in front of matching lines */
110bool oflag; /* -o: print only matching part */
111bool qflag; /* -q: quiet mode (don't output anything) */
112bool sflag; /* -s: silent mode (ignore errors) */
113bool vflag; /* -v: only show non-matching lines */
114bool wflag; /* -w: pattern must start and end on word boundaries */
115bool xflag; /* -x: pattern must match entire line */
116bool lbflag; /* --line-buffered */
117bool nullflag; /* --null */
118bool nulldataflag; /* --null-data */
119unsigned char line_sep = '\n'; /* 0 for --null-data */
120char *label; /* --label */
121const char *color; /* --color */
122int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
123int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
124int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
125int devbehave = DEV_READ; /* -D: handling of devices */
126int dirbehave = DIR_READ; /* -dRr: handling of directories */
127int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
128
129bool dexclude, dinclude; /* --exclude-dir and --include-dir */
130bool fexclude, finclude; /* --exclude and --include */
131
132enum {
133 BIN_OPT = CHAR_MAX + 1,
134 COLOR_OPT,
135 DECOMPRESS_OPT,
136 HELP_OPT,
137 MMAP_OPT,
138 LINEBUF_OPT,
139 LABEL_OPT,
140 R_EXCLUDE_OPT,
141 R_INCLUDE_OPT,
142 R_DEXCLUDE_OPT,
143 R_DINCLUDE_OPT
144};
145
146static inline const char *init_color(const char *);
147
148/* Housekeeping */
149int tail; /* lines left to print */
150bool notfound; /* file not found */
151
152extern char *__progname;
153
154/*
155 * Prints usage information and returns 2.
156 */
157__dead static void
158usage(void)
159{
160 fprintf(stderr, getstr(4), __progname);
161 fprintf(stderr, "%s", getstr(5));
162 fprintf(stderr, "%s", getstr(5));
163 fprintf(stderr, "%s", getstr(6));
164 fprintf(stderr, "%s", getstr(7));
165 exit(2);
166}
167
168static const char optstr[] =
169 "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxyz";
170
171struct option long_options[] =
172{
173 {"binary-files", required_argument, NULL, BIN_OPT},
174 {"decompress", no_argument, NULL, DECOMPRESS_OPT},
175 {"help", no_argument, NULL, HELP_OPT},
176 {"mmap", no_argument, NULL, MMAP_OPT},
177 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
178 {"label", required_argument, NULL, LABEL_OPT},
179 {"color", optional_argument, NULL, COLOR_OPT},
180 {"colour", optional_argument, NULL, COLOR_OPT},
181 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
182 {"include", required_argument, NULL, R_INCLUDE_OPT},
183 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
184 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
185 {"after-context", required_argument, NULL, 'A'},
186 {"text", no_argument, NULL, 'a'},
187 {"before-context", required_argument, NULL, 'B'},
188 {"byte-offset", no_argument, NULL, 'b'},
189 {"context", optional_argument, NULL, 'C'},
190 {"count", no_argument, NULL, 'c'},
191 {"devices", required_argument, NULL, 'D'},
192 {"directories", required_argument, NULL, 'd'},
193 {"extended-regexp", no_argument, NULL, 'E'},
194 {"regexp", required_argument, NULL, 'e'},
195 {"fixed-strings", no_argument, NULL, 'F'},
196 {"file", required_argument, NULL, 'f'},
197 {"basic-regexp", no_argument, NULL, 'G'},
198 {"no-filename", no_argument, NULL, 'h'},
199 {"with-filename", no_argument, NULL, 'H'},
200 {"ignore-case", no_argument, NULL, 'i'},
201 {"bz2decompress", no_argument, NULL, 'J'},
202 {"files-with-matches", no_argument, NULL, 'l'},
203 {"files-without-match", no_argument, NULL, 'L'},
204 {"max-count", required_argument, NULL, 'm'},
205 {"line-number", no_argument, NULL, 'n'},
206 {"only-matching", no_argument, NULL, 'o'},
207 {"quiet", no_argument, NULL, 'q'},
208 {"silent", no_argument, NULL, 'q'},
209 {"recursive", no_argument, NULL, 'r'},
210 {"no-messages", no_argument, NULL, 's'},
211 {"binary", no_argument, NULL, 'U'},
212 {"unix-byte-offsets", no_argument, NULL, 'u'},
213 {"invert-match", no_argument, NULL, 'v'},
214 {"version", no_argument, NULL, 'V'},
215 {"word-regexp", no_argument, NULL, 'w'},
216 {"line-regexp", no_argument, NULL, 'x'},
217 {"null", no_argument, NULL, 'Z'},
218 {"null-data", no_argument, NULL, 'z'},
219 {NULL, no_argument, NULL, 0}
220};
221
222/*
223 * Adds a searching pattern to the internal array.
224 */
225static void
226add_pattern(char *pat, size_t len)
227{
228
229 /* TODO: Check for empty patterns and shortcut */
230
231 /* Increase size if necessary */
232 if (patterns == pattern_sz) {
233 pattern_sz *= 2;
234 pattern = grep_realloc(pattern, ++pattern_sz *
235 sizeof(*pattern));
236 }
237 if (len > 0 && pat[len - 1] == '\n')
238 --len;
239 /* pat may not be NUL-terminated */
240 pattern[patterns] = grep_malloc(len + 1);
241 memcpy(pattern[patterns], pat, len);
242 pattern[patterns][len] = '\0';
243 ++patterns;
244}
245
246/*
247 * Adds a file include/exclude pattern to the internal array.
248 */
249static void
250add_fpattern(const char *pat, int mode)
251{
252
253 /* Increase size if necessary */
254 if (fpatterns == fpattern_sz) {
255 fpattern_sz *= 2;
256 fpattern = grep_realloc(fpattern, ++fpattern_sz *
257 sizeof(struct epat));
258 }
259 fpattern[fpatterns].pat = grep_strdup(pat);
260 fpattern[fpatterns].mode = mode;
261 ++fpatterns;
262}
263
264/*
265 * Adds a directory include/exclude pattern to the internal array.
266 */
267static void
268add_dpattern(const char *pat, int mode)
269{
270
271 /* Increase size if necessary */
272 if (dpatterns == dpattern_sz) {
273 dpattern_sz *= 2;
274 dpattern = grep_realloc(dpattern, ++dpattern_sz *
275 sizeof(struct epat));
276 }
277 dpattern[dpatterns].pat = grep_strdup(pat);
278 dpattern[dpatterns].mode = mode;
279 ++dpatterns;
280}
281
282/*
283 * Reads searching patterns from a file and adds them with add_pattern().
284 */
285static void
286read_patterns(const char *fn)
287{
288 FILE *f;
289 char *line;
290 size_t len;
291 ssize_t rlen;
292
293 if ((f = fopen(fn, "r")) == NULL)
294 err(2, "%s", fn);
295 line = NULL;
296 len = 0;
297#ifndef ANDROID
298 while ((rlen = getline(&line, &len, f)) != -1)
299 add_pattern(line, *line == '\n' ? 0 : (size_t)rlen);
300#endif
301 free(line);
302 if (ferror(f))
303 err(2, "%s", fn);
304 fclose(f);
305}
306
307static inline const char *
308init_color(const char *d)
309{
310 char *c;
311
312 c = getenv("GREP_COLOR");
313 return (c != NULL ? c : d);
314}
315
316int
317grep_main(int argc, char *argv[])
318{
319 char **aargv, **eargv, *eopts;
320 char *ep;
321 unsigned long long l;
322 unsigned int aargc, eargc, i, j;
323 int c, lastc, needpattern, newarg, prevoptind;
324
325 setlocale(LC_ALL, "");
326
327#ifndef WITHOUT_NLS
328 catalog = catopen("grep", NL_CAT_LOCALE);
329#endif
330
331 /* Check what is the program name of the binary. In this
332 way we can have all the funcionalities in one binary
333 without the need of scripting and using ugly hacks. */
334 switch (__progname[0]) {
335 case 'e':
336 grepbehave = GREP_EXTENDED;
337 break;
338 case 'f':
339 grepbehave = GREP_FIXED;
340 break;
341 case 'g':
342 grepbehave = GREP_BASIC;
343 break;
344 case 'z':
345 filebehave = FILE_GZIP;
346 switch(__progname[1]) {
347 case 'e':
348 grepbehave = GREP_EXTENDED;
349 break;
350 case 'f':
351 grepbehave = GREP_FIXED;
352 break;
353 case 'g':
354 grepbehave = GREP_BASIC;
355 break;
356 }
357 break;
358 }
359
360 lastc = '\0';
361 newarg = 1;
362 prevoptind = 1;
363 needpattern = 1;
364
365 eopts = getenv("GREP_OPTIONS");
366
367 /* support for extra arguments in GREP_OPTIONS */
368 eargc = 0;
369 if (eopts != NULL) {
370 char *str;
371
372 /* make an estimation of how many extra arguments we have */
373 for (j = 0; j < strlen(eopts); j++)
374 if (eopts[j] == ' ')
375 eargc++;
376
377 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
378
379 eargc = 0;
380 /* parse extra arguments */
381 while ((str = strsep(&eopts, " ")) != NULL)
382 eargv[eargc++] = grep_strdup(str);
383
384 aargv = (char **)grep_calloc(eargc + argc + 1,
385 sizeof(char *));
386
387 aargv[0] = argv[0];
388 for (i = 0; i < eargc; i++)
389 aargv[i + 1] = eargv[i];
390 for (j = 1; j < (unsigned int)argc; j++, i++)
391 aargv[i + 1] = argv[j];
392
393 aargc = eargc + argc;
394 } else {
395 aargv = argv;
396 aargc = argc;
397 }
398
399 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
400 -1)) {
401 switch (c) {
402 case '0': case '1': case '2': case '3': case '4':
403 case '5': case '6': case '7': case '8': case '9':
404 if (newarg || !isdigit(lastc))
405 Aflag = 0;
406 else if (Aflag > LLONG_MAX / 10) {
407 errno = ERANGE;
408 err(2, NULL);
409 }
410 Aflag = Bflag = (Aflag * 10) + (c - '0');
411 break;
412 case 'C':
413 if (optarg == NULL) {
414 Aflag = Bflag = 2;
415 break;
416 }
417 /* FALLTHROUGH */
418 case 'A':
419 /* FALLTHROUGH */
420 case 'B':
421 errno = 0;
422 l = strtoull(optarg, &ep, 10);
423 if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
424 ((errno == EINVAL) && (l == 0)))
425 err(2, NULL);
426 else if (ep[0] != '\0') {
427 errno = EINVAL;
428 err(2, NULL);
429 }
430 if (c == 'A')
431 Aflag = l;
432 else if (c == 'B')
433 Bflag = l;
434 else
435 Aflag = Bflag = l;
436 break;
437 case 'a':
438 binbehave = BINFILE_TEXT;
439 break;
440 case 'b':
441 bflag = true;
442 break;
443 case 'c':
444 cflag = true;
445 break;
446 case 'D':
447 if (strcasecmp(optarg, "skip") == 0)
448 devbehave = DEV_SKIP;
449 else if (strcasecmp(optarg, "read") == 0)
450 devbehave = DEV_READ;
451 else
452 errx(2, getstr(3), "--devices");
453 break;
454 case 'd':
455 if (strcasecmp("recurse", optarg) == 0) {
456 Hflag = true;
457 dirbehave = DIR_RECURSE;
458 } else if (strcasecmp("skip", optarg) == 0)
459 dirbehave = DIR_SKIP;
460 else if (strcasecmp("read", optarg) == 0)
461 dirbehave = DIR_READ;
462 else
463 errx(2, getstr(3), "--directories");
464 break;
465 case 'E':
466 grepbehave = GREP_EXTENDED;
467 break;
468 case 'e':
469 add_pattern(optarg, strlen(optarg));
470 needpattern = 0;
471 break;
472 case 'F':
473 grepbehave = GREP_FIXED;
474 break;
475 case 'f':
476 read_patterns(optarg);
477 needpattern = 0;
478 break;
479 case 'G':
480 grepbehave = GREP_BASIC;
481 break;
482 case 'H':
483 Hflag = true;
484 break;
485 case 'h':
486 Hflag = false;
487 hflag = true;
488 break;
489 case 'I':
490 binbehave = BINFILE_SKIP;
491 break;
492 case 'i':
493 case 'y':
494 iflag = true;
495 cflags |= REG_ICASE;
496 break;
497 case 'J':
498 filebehave = FILE_BZIP;
499 break;
500 case 'L':
501 lflag = false;
502 Lflag = true;
503 break;
504 case 'l':
505 Lflag = false;
506 lflag = true;
507 break;
508 case 'm':
509 mflag = true;
510 errno = 0;
511 mcount = strtoull(optarg, &ep, 10);
512 if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
513 ((errno == EINVAL) && (mcount == 0)))
514 err(2, NULL);
515 else if (ep[0] != '\0') {
516 errno = EINVAL;
517 err(2, NULL);
518 }
519 break;
520 case 'n':
521 nflag = true;
522 break;
523 case 'O':
524 linkbehave = LINK_EXPLICIT;
525 break;
526 case 'o':
527 oflag = true;
528 break;
529 case 'p':
530 linkbehave = LINK_SKIP;
531 break;
532 case 'q':
533 qflag = true;
534 break;
535 case 'S':
536 linkbehave = LINK_READ;
537 break;
538 case 'R':
539 case 'r':
540 dirbehave = DIR_RECURSE;
541 Hflag = true;
542 break;
543 case 's':
544 sflag = true;
545 break;
546 case 'U':
547 binbehave = BINFILE_BIN;
548 break;
549 case 'u':
550 case MMAP_OPT:
551 /* noop, compatibility */
552 break;
553 case 'V':
554 printf(getstr(9), __progname, VERSION);
555 exit(0);
556 case 'v':
557 vflag = true;
558 break;
559 case 'w':
560 wflag = true;
561 break;
562 case 'x':
563 xflag = true;
564 break;
565 case 'Z':
566 nullflag = true;
567 break;
568 case 'z':
569 nulldataflag = true;
570 line_sep = '\0';
571 break;
572 case BIN_OPT:
573 if (strcasecmp("binary", optarg) == 0)
574 binbehave = BINFILE_BIN;
575 else if (strcasecmp("without-match", optarg) == 0)
576 binbehave = BINFILE_SKIP;
577 else if (strcasecmp("text", optarg) == 0)
578 binbehave = BINFILE_TEXT;
579 else
580 errx(2, getstr(3), "--binary-files");
581 break;
582 case COLOR_OPT:
583 color = NULL;
584 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
585 strcasecmp("tty", optarg) == 0 ||
586 strcasecmp("if-tty", optarg) == 0) {
587 char *term;
588
589 term = getenv("TERM");
590 if (isatty(STDOUT_FILENO) && term != NULL &&
591 strcasecmp(term, "dumb") != 0)
592 color = init_color("01;31");
593 } else if (strcasecmp("always", optarg) == 0 ||
594 strcasecmp("yes", optarg) == 0 ||
595 strcasecmp("force", optarg) == 0) {
596 color = init_color("01;31");
597 } else if (strcasecmp("never", optarg) != 0 &&
598 strcasecmp("none", optarg) != 0 &&
599 strcasecmp("no", optarg) != 0)
600 errx(2, getstr(3), "--color");
601 break;
602 case DECOMPRESS_OPT:
603 filebehave = FILE_GZIP;
604 break;
605 case LABEL_OPT:
606 label = optarg;
607 break;
608 case LINEBUF_OPT:
609 lbflag = true;
610 break;
611 case R_INCLUDE_OPT:
612 finclude = true;
613 add_fpattern(optarg, INCL_PAT);
614 break;
615 case R_EXCLUDE_OPT:
616 fexclude = true;
617 add_fpattern(optarg, EXCL_PAT);
618 break;
619 case R_DINCLUDE_OPT:
620 dinclude = true;
621 add_dpattern(optarg, INCL_PAT);
622 break;
623 case R_DEXCLUDE_OPT:
624 dexclude = true;
625 add_dpattern(optarg, EXCL_PAT);
626 break;
627 case HELP_OPT:
628 default:
629 usage();
630 }
631 lastc = c;
632 newarg = optind != prevoptind;
633 prevoptind = optind;
634 }
635 aargc -= optind;
636 aargv += optind;
637
638 /* Fail if we don't have any pattern */
639 if (aargc == 0 && needpattern)
640 usage();
641
642 /* Process patterns from command line */
643 if (aargc != 0 && needpattern) {
644 add_pattern(*aargv, strlen(*aargv));
645 --aargc;
646 ++aargv;
647 }
648
649 switch (grepbehave) {
650 case GREP_FIXED:
651 case GREP_BASIC:
652 break;
653 case GREP_EXTENDED:
654 cflags |= REG_EXTENDED;
655 break;
656 default:
657 /* NOTREACHED */
658 usage();
659 }
660
661 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
662 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
663/*
664 * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
665 * Optimizations should be done there.
666 */
667 /* Check if cheating is allowed (always is for fgrep). */
668 if (grepbehave == GREP_FIXED) {
669 for (i = 0; i < patterns; ++i)
670 fgrepcomp(&fg_pattern[i], pattern[i]);
671 } else {
672 for (i = 0; i < patterns; ++i) {
673 if (fastcomp(&fg_pattern[i], pattern[i])) {
674 /* Fall back to full regex library */
675 c = regcomp(&r_pattern[i], pattern[i], cflags);
676 if (c != 0) {
677 regerror(c, &r_pattern[i], re_error,
678 RE_ERROR_BUF);
679 errx(2, "%s", re_error);
680 }
681 }
682 }
683 }
684
685 if (lbflag)
686 setlinebuf(stdout);
687
688 if ((aargc == 0 || aargc == 1) && !Hflag)
689 hflag = true;
690
691 if (aargc == 0)
692 exit(!procfile("-"));
693
694 if (dirbehave == DIR_RECURSE)
695 c = grep_tree(aargv);
696 else
697 for (c = 0; aargc--; ++aargv) {
698 if ((finclude || fexclude) && !file_matching(*aargv))
699 continue;
700 c+= procfile(*aargv);
701 }
702
703#ifndef WITHOUT_NLS
704 catclose(catalog);
705#endif
706
707 /* Find out the correct return value according to the
708 results and the command line option. */
709 exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1));
710}