#!/usr/local/bin/perl # bibexec.pl # v1.0 Sept 1 1995 - handles SpatBib format files # v1.1 Sept 15 1995 - recursively search files in TODO directory # v1.2 Oct 1 1995 - added AND/OR search for bibtex and text entries, # also MAX entries, Keyword info # v1.3 Nov 3 1995 - added keyword equivalence feedback # # Copyright amit@cs.albany.edu (Amit Mukerjee) 1995 # # Using ReadParse from cgi-lib.pl - Copyright 1993 Steven E. Brenner # and many parts from bibsearch.pl (c) 1994 Andy Wood (amw@cs.bham.ac.uk) # http://www.cs.bham.ac.uk/~amw/agents/bibtex/search.html # # Main mods by AM: # Multiple file formats handled # Added AND/OR concatenation, Case # Output filtering options # Modified bibtex Search function for AND/OR and Case Handling # # This script will respond to a call from a HTML form with the following # fields: # # 'header' - url filename of the header template file. # 'footer' - url filename of the footer template file. # 'term' - the search term(s). # 'andor' - one of 'and', or 'or'. # 'field' - the field(s) to search in. # 'Output' - one of 'all', 'Nocomments', 'Nokeys', or 'Quiet'. # 'MaxCount' - Maximum number of entries to be printed # 'mode' - one of 'substr', 'word', or 'regexp'. # 'filters'- ignore case? # 'files' - list of url filenames to be searched seperated by `\0' # # It copies the specified header template file to stdout. This file should # contain valid html and can be used to put a title on the search results. # Any instances of "$term" will be replaced with the search term text, and # any instances of "$mode" with the search mode. # # Then it searches the list of files, using the search term, in the manner # specified by the search mode. The list of files can be a bib # database as per the document ~amit/bib/format.info, or in BibTeX format, # or an HTML or plaintext file, which is searched without fields. # Any entries in the database that match the search will be outputted with # a HTML title. # # When it has scanned all the files it will then copy the specified footer # template to stdout, again replacing "$term" and "$mode". # # A url filename is of the form "~user/path/file" or "ftp:/path/file" and # these only work on the local filesystem - see GetPath for more details. #$DEBUG=1; #$SrchDEBUG=1; # **** # # Constants # # **** # For use in GetPath # USERHTMLDIR - name of directory for user supplied pages # LOCALFTPPATH - local path for public ftp site $EOL_REPL="\0\0"; # Used to fuse multiple lines together $USERHTMLDIR = "/public_html/"; $LOCALFTPPATH = "/scratch/ftp"; #$MAXENTRIES = 200; $MAXKEYS = 45; # **** # # ReadParse # # Reads in GET or POST data, converts it to unescaped text, and puts # one key=value in each member of the list "@in" # Also creates key/value pairs in %in, using '\0' to separate multiple # selections # # If a variable-glob parameter (e.g., *cgi_input) is passed to ReadParse, # information is stored there, rather than in $in, @in, and %in. # # From cgi-lib.pl - Copyright 1993/1994 Steven E. Brenner # http://www.bio.cam.ac.uk/web # # **** sub ReadParse { local (*in) = @_ if @_; local ($i, $key, $val); # Read in text if ($ENV{'REQUEST_METHOD'} eq "GET") { $in = $ENV{'QUERY_STRING'}; } elsif ($ENV{'REQUEST_METHOD'} eq "POST") { read(STDIN,$in,$ENV{'CONTENT_LENGTH'}); } @in = split(/&/,$in); foreach $i (0 .. $#in) { # Convert plus's to spaces $in[$i] =~ s/\+/ /g; # Split into key and value. ($key, $val) = split(/=/,$in[$i],2); # splits on the first =. # Convert %XX from hex numbers to alphanumeric $key =~ s/%(..)/pack("c",hex($1))/ge; $val =~ s/%(..)/pack("c",hex($1))/ge; # Associate key and value $in{$key} .= "\0" if (defined($in{$key})); # \0 is the multiple separator $in{$key} .= $val; } return 1; # just for fun } # **** # # GetPath # # Converts it's argument from a partial path ("~amw/file" or "ftp:/path/file") into # it's full equivalent ("/home/pg/amw/public_html/file" or "/scratch/ftp/path/file") # ensuring that we don't inadvertently allow external users full access to the file # system. We also remove any instances of ".." in the path. # # **** sub GetPath { local( $filename ) = $_[ 0 ]; if ( $filename =~ /^~/ ) { local( $name ) = $filename; $name =~ s/^~([^\/]*)\/(.*)/$1/; local( $file ) = $2; local( @entry ) = getpwnam( $name ); $filename = $entry[ $#entry - 1 ].$USERHTMLDIR.$file; } elsif ( $filename =~ /^ftp:(.*)/ ) { $filename = $LOCALFTPPATH.$1; } else { $filename = ""; } $filename =~ s/\.\.//g; # Make sure we don't allow any ..'ing return $filename; } # **** # # PrintHeader # # Prints the line that tells WWW that we're an HTML document (honest!) # # **** sub PrintHeader { print "Content-type: text/html\n\n"; } # **** # # Search # # See if entry ($_) matches required search term. The arguments are # 1: Search term (in this case $in{ 'term' }), # 2: Field - ($in{ 'field' }), # 3: Search Mode ($in{ 'mode' }). # The concatenation variable $OR is passed globally - AM # **** sub Search { local( $found ) = 0; local( $searchin ); local ($term, $mode, $field) = @_; local (@SrchTrms) = split(/ /,$term); if ($mode eq 'word') { $SrchMode = '$searchin =~ /\b' . '($key)' . '\b/' . $ignore ; } else { $SrchMode = '$searchin =~ /' . '($key)' . "/" . $ignore ; } if ( $field ne "" && $field ne "all" ) { local( @fields ) = split( /[\s]+/, $_[2] ); foreach $field ( 0..$#fields ) { $searchin .= &GetField( $fields[$field], $_ ); } } else { $searchin = $_; } # if ( $mode eq 'regexp' ) # { # $found = 1 if $searchin =~ /($term)/i; # } # else { foreach $key ( @SrchTrms ) { if (eval $SrchMode) { $found=1; last if ($OR); } else {$found=0; last if (!$OR);} } } return $found; } # **** # # MarkupEntry # # Create a line of HTML for each entry in the file. This pulls the # title and the author (or editor) from the BibTeX entry in $_, and # prints a HTML heading, followed by an availablity list of urls if # there are any, followed by the full entry in
formatted form.
#
# ****

sub MarkupEntry
  {
  local( $junk, $bibtex ) = split( '@', $_, 2 );
  $bibtex = '@'.$bibtex;
  local( $key, $rest ) = split( ',', $bibtex, 2 );
  $key =~ s/^@.*[{(]\s*(.*)\s*$/$1/;                     # Retrieve the key

  local( $author, $title );
  $title = &GetField( "title", $bibtex );

  if ( /author\s*=/i )
    {
    $author = &GetField( "author", $bibtex );
    }
  elsif ( /editor\s*=/i )
    {
    $author = &GetField( "editor", $bibtex );
    if ( $author =~ /\band\b/ )
      {
      $author .= " (Eds)";
      }
    else
      {
      $author .= " (Ed)";
      }
    }
  else
    {
    $author = "";
    }
               
  print "\n", $title, ", ", $author, "\n";

  if ( /url\s*=/i )
    {
    print "
Available as ", &MarkupURL( &GetField( "url", $bibtex ) ), ".\n"; } s/
//i;
  s/<\/PRE>//i;
  print "
\n", $bibtex, "
\n"; } # **** # # MarkupURL # # Takes a list of URLs seperated by commas and expands them into a html list # that you can click on. # # **** sub MarkupURL { local( $url, $format, $html ) = ""; local( @urls ) = split( ',', $_[0] ); foreach $url ( 0..$#urls ) { $html .= "\nand" if ( $url == $#urls && $#urls >= 1 ); $html .= "," if ( $url >= 1 && $url < $#urls ); $format = ""; if ( $urls[ $url ] =~ /\.Z/ || $urls[ $url ] =~ /\.gz/ ) { $format = "compressed "; } if ( $urls[ $url ] =~ /\.ps/i ) { $format .= "postscript"; } elsif ( $urls[ $url ] =~ /\.txt/i ) { $format .= "text"; } elsif ( $urls[ $url ] =~ /\.gif/i ) { $format .= "gif"; } elsif ( $urls[ $url ] =~ /\.html/i ) { $format .= "hypertext"; } else { $format = $urls[ $url ]; $format =~ s/.*\/([^\/]*)$/$1/; } $urls[ $url ] =~ s/^\s+//; $html .= "\n
".$format.""; } return $html; } # **** # # GetField # # Gets the field specified in the first argument and strips it of quotes and/or # squiggly brackets, removes excess spaces and returns it. # # **** sub GetField { local( $field, $contents ) = @_; # Arguments: field name, bibtex entry $contents =~ s/\n/ /g; # Remove all \n's if ( $contents =~ /.*\b($field)\s*=\s*"([^"]*)"\s*,/i ) { $contents = $2; } elsif ( $contents =~ /.*\b($field)\s*=\s*{(.*)}\s*,/i ) { $contents = $2; # Contains remaining fields too $contents =~ s/}\s*,.*//g; # So remove everything after }, } elsif ( $contents =~ /.*\b($field)\s*=\s*(\d*)\s*,/i ) { $contents = $2; } else { $contents = ""; } $contents =~ s/"|{|}//g; # Remove ""`s and {}'s $contents =~ s/\s+/ /g; # Make lots of spaces into 1. return $contents; } # **** # # ProcessFile # # Process the file (filename specified by the first argument). This involves # opening it, printing the last part of the filename, stripping off the html # header (if it is not a .bib file), printing all the entries that match # the search term (or "none found" if there aren't any) and closing the # file again. # # **** sub ProcessFile { local( $file, $field, $mode, $term ) = @_; $filename = &GetPath( $_[0] ); if ($field eq "key" || $field eq "comments" || $field eq "source") { $field = "all"; } local( $nfound ) = 0; open( FILE, $filename ) || print "

Couldn't Open Input file - ".$filename."


\n"; print "

".substr( $filename, rindex( $filename, '/' ) + 1 )."

\n"; if ( $filename =~ /\.bib$/ ) { $stage='body'; $/ = ""; } else { $stage='header'; } if ($DEBUG) { print "

BIBTEX search for TERM: $term, MODE: $mode FIELD: $field

\n"; } while ( ) { if ( $stage eq 'body' ) { if ( /@.*{/ && &Search( $term , $mode , $field ) ){ # If bibtex entry and matches search. &MarkupEntry(); &MaxReached if ($Maxcount -$bibcount < $nfound++); } if ( //i ) # If end of entries { $stage='footer'; $/ = "\n"; } if ( ( $stage eq 'header' ) && //i ) { # If end of header $stage='body'; $/ = ""; } } } $bibcount += $nfound; print "

", $nfound," Matching Entries found. (Total: ",$bibcount, ")

\n"; close( FILE ); print "
\n"; } # **** # # PrintTemplate # # Copy a Template file substituting variables where necessary. # # **** sub PrintTemplate { local( $filename ) = &GetPath( $_[0] ); open( TEMPLATE, $filename ) || print "

Incorrect Template File!


\n"; while (