#!/usr/local/bin/perl # GPL(C) Mohsin Ahmed, http://www.cs.albany.edu/~mosh # super grep: find PATTERN in FILES, much more than egrep. $windows = $ENV{ 'windir' } || $ENV{'WINDIR'}; $USAGE=' USAGE: grep OPTIONS PATTERN FILES SYNOPSIS: find PATTERN in FILES, much more than egrep. AUTHOR: GPL(C) Mohsin Ahmed, http://www.cs.albany.edu/~mosh OPTIONS: -findfirst find only the first occurrence in each file. -count count number of lines that match, shows only the last match. -color Use colors on xterm. -count15 Shows only if matches >= 15. -absent find files where pattern is absent, ie. count==0. -para para mode (instead of line). -file Slurp whole file as single line. -text Search only text files. -binary Treat input files as binary. Input binary pattern with -pat (see below) -bindump:02x Output format for control chars in hex, use 03o for octal, 03d for decimal. -pat:PAT PAT with control chars to grep, Eg. -pat:\\C-a\\001 (two C-a, \\threedecimaldigits not octal). -skip:PAT grep only lines not matching PAT, ie. grep -v PAT file | grep ... -quote quote all meta-chars, so treat the PATTERN literally. -word treat pattern as a separate word, PAT => \bPAT\b. -show Show the match clearly. -Show Show only the match, not the whole line. -quickfix show filename:line for each match, for vim -q .. quickfix mode. -fast Same as print -ne "print if m/pat/ FILES" -terse Do not show filename:line too many times. -eg Examples, other ready made patterns, advanced options. -case case sensitive, auto if PATTERN has UPPER case letters. Use -case to make a lower case PATTERN case-sensitive. -i ignore case, default. -v skip matching lines. -V verbose, default is quiet, (-V -V to see all files). -? -h Help NOTES: -i -v are for compatibility with grep. Long lines are wrapped, eg. in dumping binary files. PAT is Perl5 Regexp are better than gnu grep -iE, '; $EXAMPLES = ' ADVANCED: -eval"f" print line if eval( f ); where f is ANY perl expression. Stacking: -eval"f1" -eval"f2" == -eval"(f1 && (f2))" slow, since eval is done for each line. READY MADE PATTERNS: -repeated Locate repeated repeated words. -ana A/An vowel/consonants mismatch, Eg. a apple, an rose. -functions C functions(). -hexwords Hex chars [abcdef] only., eg deadbeef baddad. -8.3 Finds invalid dos filenames, eg. dir/s/b | grep -8.3 -sptab Finds trailing space/tabs, for makefiles. EXAMPLES: grep WORD *.c - Search for WORD in all c files. grep -count WORD * - find how many times WORD appears in files. grep -absent WORD * - find files where WORD doesnot appear. grep -eval"m/mosh/io || length()>80" *.c # find mosh or long lines. grep -eval"m/mosh/" -eval"-s($ARGV)>5000" *.bat # find mosh in big files. BINARY DATA GREP: mgrep pattern can contain control chars in decimal \\ddd (aka vim) and \\C-a .. \\C-z (aka emacs). -bindump:03d for vim style decimal dump, which you can paste back in search pat. > vim +/C-vddd binfile , do _ga_ to see decimal value of char under cursor. > emacs binfile C-sC-qddd, do "C-x=" to see all about char under cursor. > xxd binfile (-b for binary, -g for xxd help). '; $block = 'line'; # No binary outputs except TAB (\ci) and LF (\cj) and blob (\333), # -color uses ESC, so skip \033 below # see ./ascii -table 0 40 # $controlchars = '[\000-\ch\ck-\032\034-\c_\c?-\377]'; $controlchars = '[\000-\ch\ck-\c_\c?-\377]'; $skip = 0; $quickfix = 1; $verbose = 0; $nocase = '(?i)'; # Default is nocase. while( $_ = $ARGV[0], /^-/ ){ shift; if( m/^--$/ ){ last; }elsif( m/^-[?h]$/ ){ # help print $USAGE; exit; }elsif( m/^-eg/ ){ # examples print $EXAMPLES; exit; }elsif( m/^-skip:(.+)$/ ){ # grep -skip:xyz files $notpat = $1; $mode .= " -skip lines=~m/$notpat/"; }elsif( m/^-pat:(.+)$/ ){ # grep -pat:xyz files $pat = $1; $unwrapinput++; $mode .= " -matching/$pat/"; }elsif( m/^-color/ ){ # color $mode .= " $_"; $color++; }elsif( m/^-case/ ){ # case sensitive. $mode .= " $_"; $nocase = ''; }elsif( m/^-i/ ){ # nocase. $mode .= " -icase"; $nocase = '(?i)'; }elsif( m/^-functions/ ){ # Function pattern $mode .= " $_"; $pat = '^\w\w\w.*\('; }elsif( m/^-findfirst/ ){ # findfirst $mode .= " $_"; $findfirst++; }elsif( m/^-fast/ ){ # fast. $mode .= " $_"; $fast = 1; }elsif( m/^-terse/ ){ # terse. $mode .= " $_"; $quickfix = 0; }elsif( m/^-quickfix/ ){ # quickfix. $mode .= " $_"; $quickfix++; }elsif( m/^-count(\d*)/ ){ # count_matches. $mode .= " $_"; $count_matches = $1 || 1; }elsif( m/^-absent/ ){ # absent $mode .= " $_"; $count_matches++; $absent++; }elsif( m/^-para/ ){ # para mode. $mode .= " $_"; $block = 'para'; $/ = ""; $* = 0; }elsif( m/^-text/ ){ # text mode. $mode .= " $_"; $text = 1; }elsif( m/^-bindump:(.+)/ ){ # bindump mode. $mode .= " $_"; $bindump = $1; }elsif( m/^-binary/ ){ # binary slurp mode. $mode .= " $_"; $binary = 1; # also updated later. undef( $/ ); $* = 0; }elsif( m/^-file/ ){ # slurp mode. $mode .= " $_"; $block = 'file'; undef( $/ ); $* = 0; }elsif( m/^-quote/ ){ # quote(pattern) $mode .= " $_"; $quote++; }elsif( m/^-show/ ){ # show $mode .= " $_"; $show_blob++; }elsif( m/^-Show/ ){ # Show $mode .= " $_"; $show_only_match++; }elsif( m/^-v/ ){ # skip $skip=1; $mode .= ' -skip'; }elsif( m/^-V/ ){ # verbose. $verbose++; $mode .= " -verbose=$verbose"; }elsif( m/^-word/ ){ # word delimited. $mode .= " $_"; $word_delimit++; }elsif( m/^-eval(.+)/ ){ # eval func, can stack them. $func++; if( $pat ){ $pat = "$pat && ($1)"; }else{ $pat = $1; } }elsif( m/^-hexwords/ ){ # Words from hex digits. $mode .= " $_"; $pat = '\b[a-fA-F]{3,}\b'; }elsif( m/^-sptab/ ){ # space tab problems $mode .= " $_"; $pat = ' +\t|[ \t]+$'; $show_blob++; }elsif( m/^-repeated/ ){ # repeated repeated words $mode .= " $_"; $pat = "\\b(\\w+)\\s+\\1\\b"; }elsif( m/^-ana/ ){ # an grammar error. $mode .= " -ana"; $pat = "(\\ba\\W+[aeiou])|" . "(\\ban\\W+[b-df-hj-np-tv-z])" ; }elsif( m/^-8\.3/ ){ $mode .= " -8.3"; $pat = "\\w{9,}\\.|\\.\\w{4,}|[^/\\\\]{13,}\\n"; }else{ die "Invalid option: $_, see -? and -eg for help.\n"; } } if( $show_blob ){ $fancy = '@'; # the blob character. $fancy = "\333" if $windows; $fance = $fancy; } if( $color and ($ENV{ "TERM" } =~ /xterm/i ) ){ $fancy = "\e[31m\e[47m"; # fRed bGrey (for spaces also). $fance = "\e[39m\e[49m"; # Normal. } $pat = shift unless $pat; die "Need pat, see -? for help\n" unless $pat; if( $unwrapinput ){ # from rgrep unwrapinput() # Convert for \C-a..\C-z \000..\177. $pat =~ s|\\(\d\d\d)|pack("c",$1)|eg; $pat =~ s|\\C-([a-z])|pack("c",(1+ord($1)-ord('a')))|eg; $pat =~ s|\\r|\r|g; $pat =~ s|\\n|\n|g; $pat =~ s|\\j|\j|g; } # First mode -eval if( $func ){ print STDERR "$0 -eval( $pat )\n"; while( <> ){ printf("%s:%-4d: %s",$ARGV,$.,$_) if eval( $pat ); close(ARGV) if eof; } die "\n"; } # Second mode. if( $nocase ){ my $tpat = $pat; $tpat =~ s/\\.//g; # Otherwise /xyz\S/i becomes /xyz\S/ $nocase = '' if $tpat =~ m/[A-Z]/; $nocase = '' if $tpat !~ m/[a-z]/; } $pat = quotemeta( $pat ) if $quote; $pat = $nocase . $pat if $nocase; $pat = '\b' . $pat . '\b' if $word_delimit; print STDERR "mgrep$mode $block=~m/$pat/go "; # Now figure out what files to grep or stdin if( @ARGV ){ my @nonfiles = grep( ! -f, @ARGV ); my @nontexts; @ARGV = grep( -f, @ARGV ); if( $text > 0 ){ @nontexts = grep( ! -T, @ARGV ); @ARGV = grep( -T, @ARGV ); } $numfiles = @ARGV; print STDERR "'$ARGV[0]'"; print STDERR " .. '$ARGV[-1]' ($numfiles files)" if @ARGV > 1; print STDERR "\n"; print STDERR "Skipping non texts: @nontexts.\n" if @nontexts; print STDERR "Skipping non files: @nonfiles.\n" if @nonfiles; print STDERR "FILES: @ARGV\n\n" if $verbose > 1; die "No files left to grep.\n" unless @ARGV; }else{ print STDERR "\n"; } if( $fast ){ while(<>){ if( m/$pat/o ){ print $ARGV,':',$.,':', $_,"\n"; next; } } die "\n"; } $num_matches = 0; $selected_files = 0; my %count_matches; LINE: while(<>){ if( m/$pat/o xor $skip ){ $found++; next LINE if $notpat && m/$notpat/; if( ! $count_matches ){ if( $show_only_match ){ $_ = "$&\n"; }else{ my $numcontrols = tr/\000-\ch\ck-\c_\c?-\377//; if( $numcontrols > 3 ){ s,\\,\\\\,g; # quote backslashes. if( $bindump ){ $numcontrols = s/($controlchars)/sprintf("\\\%$bindump",ord($1))/ego; }else{ $numcontrols = s/($controlchars)/sprintf("\\\%03o",ord($1))/ego; } $binary++; s/\t/\\t/go; $_ = "[$bindump dump]\n =" . $_ ; # indent by two spaces. $_ .= "\n" if ! m/\n\Z/; # final \n is needed. }elsif( $show_blob ){ s/$pat/${fancy}$&${fance}/go; } } if( $binary && length($_)>60 ){ $_ =~ s/(.{70,75}?\w\b)/$1\\\n =/og if length($_) >75; # wrap long lines. } if( $ARGV eq '-' ){ print $_; }elsif( $quickfix > 0 ){ printf("%s:%d: %s",$ARGV,$.,$_); # format vim -q (quickfix mode). }elsif( $numfiles == 1 ){ printf("%4d: %s",$.,$_); }else{ print('-' x 10,'File: ',$ARGV,':',$.,"\n") unless $quickfix{ $ARGV }++; print $_; } } $num_matches++; close(ARGV) if $findfirst; } if( eof ){ if( $absent ){ if( ! $found ){ print "absent in $ARGV\n"; } }elsif( defined($count_matches) && $found >= $count_matches ){ printf("%14s: total %3d matches (%4d lines) '%s'\n", $ARGV, $found, $., $& ); $count_matches{ $ARGV } = $found; }elsif( $verbose ){ printf("%14s: total %2d matches from %4d lines.\n", $ARGV, $found, $. ); } $selected_files++ if $found xor $absent; $found = 0; close(ARGV); } } if( $verbose && $num_matches > 0 ){ if( $count_matches ){ foreach $file (sort { $count_matches{$a} - $count_matches{$b} } (keys %count_matches ) ){ printf "%4d = grep /$pat/ %s\n", $count_matches{$file},$file; } } print STDERR "Matches=$num_matches, ", "Files=$selected_files/$numfiles, ", "Pattern=\"$pat\"\n"; } # EOF