#!/usr/local/bin/perl -w my $VERSION = '1.00'; my $Usage = "Usage: peg [|-help] []\n"; my ($Dirs_specified, $Eval, $Implicit_C, $No_slurp); my (@Files, %Options, @Perlexpr, @Warnings) = (); my ($After, $Before) = (2, 2); my $Perlexpr = ''; $SIG{'__WARN__'} = sub { push @Warnings, @_; }; process_ARGV(); check_Perlexpr(); $g ||= $s; find_files() if ($d || $r); build_Eval(); show_debug() if $D; reset 'a-z'; eval $Eval; die "\npeg: run time eval error:\n", @Warnings, $@, "\n...when eval'ing:\n$Eval\n...with input:\n$_\n"; sub process_ARGV { my $options = 1; my $context = 'C'; if ($_ = $ENV{'PEG_OPTIONS'}) { unshift @ARGV, (/^-/ ? $_ : "-$_"); } while (defined ($_ = shift @ARGV)) { if ($f) { open(F, "<$_") || die "peg: can't open $_: $!\n"; while () { chomp; push @Perlexpr, $_ unless /^$/; } close F; $f = 0; } elsif ($options && s/^-(?=.)//) { /^help$/ && help(); while (s/^(.)//) { my $opt = $1; if ($opt =~ /^[abcdfghilnoqrstvwxyABCDEFGHLNOPQSTXYZ]$/) { ${$opt} = $Options{$opt} = 1; $context = $opt if ($opt =~ /^[ABC]$/); } elsif ($opt =~ /^\d$/) { while (s/^(\d)//) { $opt = (10 * $opt) + $1; } $After = $opt if ($context ne 'B'); $Before = $opt if ($context ne 'A'); $Implicit_C = 1; } elsif ($opt eq '-') { $options = 0; } elsif ($opt eq 'V') { die "peg v$VERSION (Dec 1999)\n"; } else { die "peg: illegal option -- $opt\n$Usage"; } } } elsif (!@Perlexpr || (($o || $O) && $options)) { push @Perlexpr, $_; } else { push @Files, $_; } } die $Usage unless @Perlexpr; } # process_ARGV sub check_Perlexpr { my $regexp = $G || $Q || $i || $w || $x; foreach (@Perlexpr) { ($Q && !$E) || ($No_slurp ||= /[\^\$]/); next if ($E || !($regexp || /^\w+$/)); $Q ? ($_ = quotemeta($_)) : (s/\//\\\//g); $_ = '\b' . $_ . '\b' if ($w && !$x); $_ = '^' . $_ . '$' if $x; $_ = '/' . $_ . '/'; $_ .= 'i' if $i; } if ($O) { $Perlexpr .= join(",\n\t", map({"(\$Match$_ ||= (" . $Perlexpr[$_] . "))"} (0..$#Perlexpr)), ('(' . join(' && ', map {"\$Match$_"} (0 .. $#Perlexpr)) . ')')); } else { $Perlexpr = join("\n\t|| ", map {"($_)"} @Perlexpr); } $Perlexpr = 'not (' . $Perlexpr . ')' if $v; local ($a, $b, $c, $d, $f, $g, $h, $i, $l, $n, $o, $q, $r, $s, $t, $v, $w, $x, $y, $A, $B, $C, $D, $E, $F, $G, $H, $L, $N, $O, $P, $Q, $S, $T, $X, $Y, $Z); eval "\$_ = ''; if ($Perlexpr) {}"; die "peg: error in Perl expression: $Perlexpr\n", @Warnings, $@ if $@; } # check_Perlexpr sub find_files { if ($d && @Files) { my ($start_dir, $dir, @dirs, @files); foreach (@Files) { (-d $_) ? push @dirs, $_ : push @files, $_; } if ($Dirs_specified = @dirs) { @Files = @files; require Cwd; $start_dir = Cwd::cwd() || die "peg: cannot determine current directory\n"; foreach $dir (@dirs) { chdir($dir) || (($s || print STDERR "peg: can't chdir to $dir: $!\n"), next); find($dir); chdir($start_dir) || die "peg: can't chdir back to starting directory $start_dir: $!\n"; } } } find('.') if $r; if (!@Files && ($r || ($d && $Dirs_specified)) && !$X) { print STDERR "peg: no files found\n" if !$s; exit(1); } } # find_files sub find { my $cwd = shift; my (@f, $f, $ff); opendir(DIR, '.') || (($g || print STDERR "peg: can't opendir $cwd: $!\n"), return); @f = readdir DIR; closedir DIR; foreach $f (@f) { next if ($f eq '.' || $f eq '..'); $ff = "$cwd/$f"; lstat $f; if (-d _) { chdir($f) || (($g || print STDERR "peg: can't chdir to $ff: $!\n"), next); find($ff); chdir('..') || die "peg: can't chdir back to .. from $ff: $!\n"; } else { push @Files, $ff; } } } # find sub help { system("perldoc peg") && die "\npeg: perldoc: $?\n"; exit; } # help sub show_debug { print "peg: Warnings =>\n", @Warnings, "\n" if @Warnings; print "peg: Options => ", sort(keys %Options), (($_ = $ENV{'PEG_OPTIONS'}) ? " (PEG_OPTIONS = $_)" : ''), "\n\n"; print "peg: Files =>\n", (map {"\t$_\n"} @Files), "\n"; print "peg: Perl code =>\n$Eval\n"; exit; } # show_debug sub build_Eval { my ($context, $gap, $nonmatch_print, $output, $print, $reset, @my_vars); if ($O) { $l = 1; $A = $B = $C = $Implicit_C = $c = $L = $q = $Z = 0; } $No_slurp = @Files = ('-') if (!@Files && !$X); $No_slurp ||= $x; $C = 1 if ($Implicit_C && !($A || $B)); $A = $B = 1 if $C; $context = $A || $B || $C; $c = $l = $L = $q = $S = $Z = 0 if $context; $h = 1 if (@Files <= 1 && !(($d && $Dirs_specified) || $r || $X)); $reset = 1 if (((@Files > 1) || $X) && $Perlexpr =~ /[\$\@\%][a-z]/ && !$x); $h = 0 if $H; if ($c || $l || $L || $O || $q || $Z) { $a = 1; $b = $N = $S = $T = 0; } $y = 1 if (($l || $L || $q) && !$No_slurp); $N = 0 if $T; $F = 0 if ($F && $Perlexpr !~ /\bF\b/); $P = 0 if ($P && $Perlexpr !~ /\bP\b/); $a = 1 if $S; if ($c) { $L = $q = $Z = 0; } elsif ($L) { $q = $Z = 0; } elsif ($l) { $q = $Z = 0; $output = '"$File\n"'; $t = 1; } elsif ($Z) { $q = 0; } elsif ($q) {} else { $output = ''; $output = "\$Offset:" if $b; $output = "\$.:$output" if $n; $output = "\$File:$output" if !$h; $output = "\"$output\$_\"" if $output; } if (defined $output) { $print = 'print' . ($output ? " $output" : '') . ';'; $print .= ' last;' if ($t && !$context); } if ($context) { $output ||= '$_'; $gap = ($A ? $After : 0) + ($B ? $Before : 0); ($nonmatch_print = $print) =~ s/:/-/g; $output =~ s/:/-/g; $Perlexpr = "\$First_match && ($Perlexpr)" if $t; } @my_vars = (($context ? '$After' : ()), ($B ? '@Before' : ()), ($a ? () : '$Binary_file'), ($c ? '$Count' : ()), ($F ? '@F' : ()), ($context ? '$First_match' : ()), '$File', ($L ? '$Found' : ()), ($b ? '$Length' : ()), ($O ? (map {"\$Match$_"} (0..$#Perlexpr)): ()), ($context ? '$Matched' : ()), ($b ? '$Offset' : ()), ($P ? ('$P', '@P') : ()), ($Z ? '$Z' : ())); $Eval = ''; $Eval .= "while () { chomp; push \@Files, \$_; }\n" . "\@Files || (" . ($s ? '' : '(print STDERR "peg: no files found\n"), ') . "exit(1));\n" if $X; $Eval .= "\$| = 1;\n" if !$q; $Eval .= ($y ? "undef \$/;\n" : ($Y ? "\$/ = '';\n" : '')); $Eval .= "my \$Exit_code = 1;\n" if !$q; $Eval .= 'my (' . join(", ", @my_vars) . ");\n"; $Eval .= "foreach \$File (\@Files) {\n"; $Eval .= " open(FILE, \"<\$File\")"; $Eval .= $s ? " || next;\n" : "\n || ((print STDERR \"peg: can't open \$File: \$!\\n\"), next);\n"; $Eval .= " \$After = $After;\n" if $A; $Eval .= " \@Before = ();\n" if $B; $Eval .= " \$Binary_file = -B FILE;\n" if !$a; $Eval .= " \$Count = 0;\n" if $c; $Eval .= " \$Found = 0;\n" if $L; $Eval .= " \$Offset = 0;\n" if $b; $Eval .= " \$First_match = 1;\n" if $context; $Eval .= ' ' . join(" = ", map {"\$Match$_"} (0..$#Perlexpr)) . " = 0;\n" if $O; $Eval .= " \@P = ();\n" if $P; $Eval .= " \$Z = '';\n" if $Z; $Eval .= " while () {\n"; $Eval .= " \$P = \$_;\n" if $P; $Eval .= " \$Length = length;\n" if $b; $Eval .= " \@F = split;\n" if $F; $Eval .= " shift \@Before if (\@Before > $Before);\n" if $B; $Eval .= " study;\n" if (@Perlexpr > 5); $Eval .= " if ($Perlexpr) {\n" if !$S; $Eval .= ' ' . ($q ? 'exit(0)' : '$Exit_code = 0') . ";\n"; $Eval .= ' $Binary_file && ((print "Binary file $File matches\n"), last);' . "\n" if !$a; $Eval .= " $Perlexpr;\n" if $S; $Eval .= " chomp; \$_ .= \"\\n\";\n" if $N; $Eval .= " chomp; \$_ .= ' ';\n" if $T; $Eval .= " ++\$Count;\n" if $c; $Eval .= " \$Found = 1;\n last;\n" if $L; $Eval .= " print \"--\\n\" if (\$Matched++ && (\$First_match || (\$After > $gap)));\n" if $context; $Eval .= " print \@Before;\n" if $B; $Eval .= " $print\n" if $print; $Eval .= " \$After = 0;\n" if $A; $Eval .= " \@Before = ();\n" if $B; $Eval .= " \$First_match = 0;\n" if $context; $Eval .= " }\n" if !$S; $Eval .= " elsif (++\$After <= $After) {\n $nonmatch_print\n }\n" if $A; $Eval .= " else {\n" if ($B || ($context && $t)); $Eval .= " ++\$After;\n" if (!$A && $B); $Eval .= " push \@Before, $output;\n" if $B; $Eval .= " last if !\$First_match;\n" if ($context && $t); $Eval .= " }\n" if ($B || ($context && $t)); $Eval .= " \$Offset += \$Length;\n" if $b; $Eval .= " push \@P, \$P;\n" if $P; $Eval .= " }\n"; $Eval .= ' print "\n";' . "\n" if $T; $Eval .= ' print "' . ($h ? '' : '$File:') . '$Count\n";' . "\n" if $c; $Eval .= " chomp \$Z;\n print \"" . ($h ? '' : '$File:') . "\$Z\\n\";\n" if $Z; $Eval .= ' if (!$Found) { print "$File\n"; }' . "\n" if $L; $Eval .= " reset 'a-z';\n" if $reset; $Eval .= " close FILE;\n}\n"; $Eval .= 'exit(' . ($q ? '1' : '$Exit_code') . ");\n"; } # build_Eval __END__ =head1 NAME peg - Perl expression grep =head1 SYNOPSIS peg [|-help] [] =head1 DESCRIPTION Peg is a grep(1) clone. It uses a Perl expressions to match lines from a list of input files, or standard input if none specified. Internally, peg eval's code that resembles the following pseudo-Perl: foreach $File ( ) { open(FILE, "<$File"); while () { if ( ) { print; } } } Thus, each input line is available as the Perl variable C<$_>, and this will be printed if is true. In particular, to match lines according to a Perl regular expression pattern, it is necessary to place it within the pattern matching operator, which defaults to searching C<$_>. eg% peg '/\bneedle\b/i' haystack Note that can be any Perl expression, and is not limited just to regular expressions. =head1 OPTIONS The options include equivalents to most of those of standard grep(1), including the GNU extensions. They can be grouped anywhere in the argument list (except after '--'), and can also be set via the environment variable "PEG_OPTIONS". If less than two files specified, then B<-h> is assumed. Selection and interpretation of : =over 4 =item B<-E> Overrides B<-G> & B<-Q>. Assume is a Perl expression (this is the default behavior). =item B<-G> Assume is a Perl regular expression pattern to be matched. This option is implicit if any of B<-i>, B<-w>, B<-x> are used, or if matches /^\w+$/ (ie. is entirely alphanumeric). Thus, "peg foo bar" is equivalent to "peg '/foo/' bar". =item B<-Q> Overrides B<-G>. Assume is a fixed literal string to be matched. Thus, C<"peg -Q 'fo+' bar"> is equivalent to "peg '/fo\+/' bar". =item B<-f> The following argument is a file containing further 's. (Note, this is the only option that takes an argument). Lines will be adjudged to match if they match any of the 's. =item B<-o> Arguments following the B<-o> option up until '--' are interpreted as further 's. Lines will be adjudged to match if they match any of the 's. For example, C<"peg -o foo bar baz -- file"> is equivalent to C<"peg '/foo/ or /bar/ or /baz/' file">. =item B<-O> This option is similar to B<-ol>, except each must match independently. As with B<-o>, arguments following the B<-O> option up until '--' are interpreted as further 's. =item B<-i> Enables B<-G>. Ignore case distinctions. =item B<-v> Negates the sense of . =item B<-w> Enables B<-G>. Force to match only whole words. =item B<-x> Enables B<-G>. Force to match only whole lines. =back File selection: =over 4 =item B<-d> Any directories listed in the argument list will be searched recursively for files to work upon. =item B<-r> Work upon all files in and beneath the current directory. =item B<-X> Interpret STDIN as a stream of filenames to process. It provides a builtin B facility. (See example 6). =back Basic output control: =over 4 =item B<-a> Do not suppress binary output. The default behavior for when a match occurs on a binary file is to print "Binary file matches". =item B<-A> B<-B> B<-C> B<-NUM> These options specify that matching lines should be shown with lines of surrounding I. B<-A> shows lines of trailing (I) context; B<-B> shows lines of leading (I) context; B<-C> shows both leading and trailing context. B<-NUM> sets the number of lines of context for the most recently specified context option (the default is 2) or assumes B<-C> if none specified. That is, B<-B1A3> specifies one line of leading context and three lines of trailing context. =item B<-b> Print the byte offset within the input file. =item B<-c> Print only a count of the input lines that match . =item B<-h> Suppress filenames being printed when searching multiple files. =item B<-H> Print the filename for each match. =item B<-l> Print only the names of files which match at least once. =item B<-L> Print only the names of files which don't match anywhere. =item B<-n> Print the input line number. =item B<-t> Print only the first match in any one file. =back Peg specials: =over 4 =item B<-D> Prints out the internal Perl code that would otherwise be eval'ed. =item B<-F> Provide an array @F which is the result of a split applied to the input line. =item B<-N> Ensure each printed line ends in a newline. This is only necessary if leaves C<$_> without a trailing newline. (See example 2). =item B<-P> Provide an array @P of the input up until that point. $P[-1] is the previous line. This provides a mechanism to allow matches to be made over consecutive lines. (See example 4). =item B<-S> Always print the input line. This enables stream editing with s///. =item B<-T> Print each file's output on one single line, with each line separated by a single whitespace. =item B<-y> Treat each file as a single line. =item B<-Y> Treat paragraphs (text delimited by blank lines) as single lines. =item B<-Z> Print the value of $Z at EOF. (See example 5). =back Miscellaneous: =over 4 =item B<-g> Suppress the error messages about unreadable directories outputted when either B<-d> or B<-r> is used. =item B<-q> Write nothing to STDOUT. Exit 0 if a match is found, else exit 1. =item B<-s> Suppress all error messages about unreadable files and directories. =item B<-V> Display peg's version number and exit. =item B<--> Explicitly end options. Allows filenames beginning with a -. Also used by the B<-o> and B<-O> options to determine which arguments are 's and which are files. =back =head1 EXAMPLES 1. Search recursively for all VHDL constant declarations: % peg -r '/^\s*constant\s.*:=/i' 2. Find the instance names of CTS buffers in a verilog netlist: % peg -N '/^\s*CTS\w*\s+(\w+)\s*\(/ and $_ = $1' foo.v 3. Extract the entity declaration section from a VHDL file: % peg 's/\s*--.*$//, /^\s*entity\b/i .. /^\s*end\b/i' bar.vhd 4. Search for the sequence A,B,C split over 3 consecutive lines: % peg -PB2n '$P[-2]=~/A/ and $P[-1]=~/B/ and /C/' bam 5. Sum up the entries in the last column of a file: % peg -ZF '$Z += $F[-1]' report.txt 6. Search for "main" in C files below the current directory. % find . -name "*.c" | peg -Xw main =head1 ENVIRONMENT The environment variable PEG_OPTIONS can be used to set options. =head1 EXIT STATUS The following exit values are returned: 0 one or more matches were found 1 no matches were found >1 peg did not complete normally =head1 SCRIPT CATEGORIES Search =head1 README This script is yet another Perl grep(1). =head1 SEE ALSO L, L, L. =head1 AUTHOR Alex Davies =head1 COPYRIGHT Copyright (c) 1999 Alex Davies. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut