#!/usr/bin/perl # SYNOPSIS: Hypertize text url list to html. # AUTHOR: Mohsin_Ahmed http://www.cs.albany.edu/~mosh # DOC: ~/init/index.txt is ascii files of urls. # o SECTION: becomes bold faced SECTION title. # o TEXT: URL COMMENT becomes a table of |TEXT|URL|COMMENT| # o URL =PTR COMMENT also add PTR as quickref at top. # o Backslash newline to break up long urls. # o Blank lines ignored # o # lines are comments, ignored. # o ========= becomes
or '-----' in the table. $USAGE = ' USAGE: url2html urlfile.txt ... prints html file on stdout url2html index.txt > index.htm url2html start.txt > start.htm Redirection does not work with in command.com/bat, use cmd95.exe OPTIONS: -h Help -plain No tables in html file. '; while( $_ = $ARGV[0], m/^-/ ){ shift; if( m/^--$/ ){ last; }elsif( m/^-plain/ ){ $do_plain++; }elsif( m/^-[h?]/ ){ die $USAGE; }elsif( m/^-v/ ){ $verbose++; }else{ die "Unknown option '$_'\n"; } } die $USAGE . "Need an input file.\n" unless @ARGV; my $blankcol = ""; my( $url_count, $nntp_count ); my @htmlfile; # The whole html file is first built then printed. my %quicker; # urls prefixed with = are added to quickref. my @section; INLINE: while(<>){ while( s,\\\s*$,, && ! eof() ){ # Join \ Lines. my $nextline; $nextline = <>; # spaces before '\' are preserved. $nextline =~ s/^\s*//; # spaces after '\' are removed. $_ .= $nextline; # join lines, now fall thru. } if( m/^#/ ){ # mhtml only comments. next INLINE; } s/\s\#\s.*\n/\n/; # eg. ftp:xyz # comment passwd is xy if( $do_plain ){ if( eof() ){ die "Done plain $. lines to urls.\n"; } # Hypertize File URLs. s!(\w+):\s*((file):[\\/]*(\S+)) !\n\t$1\n !gix; s!\s((file):[\\/]*(\S+)) !\n\t$3\n !gix; # Hypertize 'title: URL'. s!(\w+):\s*((http|ftp|news):[/\\]*(\S+)) !\n\t$1\n !gix; # ' URL' s!(\s|^)((http|ftp|news):[/\\]*(\S+)) !\n\t$4\n !gix; next INLINE if m/^\s*$/; # skip blank lines. print $_, "
\n"; # Print regardless. next INLINE; } if( ! $tabled++ ){ push @htmlfile, "\n"; # start a new table. # "
\n"; } if( m/^[-=]{30,}$/ ){ # "-----------" are tablified. push @htmlfile, "", "","\n"; next INLINE; }elsif( m/^\s*$/ ){ # ignore blank lines. next INLINE; }elsif( m/^\s*(.*) # title $1 ((file|http|ftp|mailto|news|telnet): \S+)\s* # proto: url $2 $3 (=(\S*))? # quickref $5 (.*)$ # comment $6 /x ){ my $title = $1; # || "NO-TITLE"; my $url = $2; my $isquick = $4; my $ptquick = $5; my $comment = $6; my $showurl = $url; # Netscape cannot handle very large tables. $url_count++; if( ($url_count % 100) == 99 ){ push @htmlfile, "
", '_'x15, "", '_'x50, "

\n"; # end of table. push @htmlfile, "\n"; # start a new table. } if( $url =~ s/file://i ){ # pic: file:///e:/pic => e:/pic, remove extra slashes $showurl =~ s,file:[/\\]*,,; } # vim: http://www.vim.org => make title 'vim' clickable. # news: http://www.news.com/abc => news/abc # $showurl =~ s@(http|ftp)://(www\.)?(\w+)\.(org|com)\b$@$3@,gi; $comment =~ s/:?\s*$//; $comment = "($comment)" if $comment; push @htmlfile, "", "\n", "\n", "\n"; if( $isquick ){ # push @quickref, " $ptquick |\n"; $quicker{$ptquick} = $url; } next INLINE; }elsif( m/^(.*):\s*$/ ){ # 'Section:' is bold faced. my $sectionname = $1; push @section , "$sectionname|\n"; push @htmlfile, "\n\n", "\n"; next INLINE; }elsif( m/^(.*):\s*(.+)$/ ){ # 'Text: Info'. push @htmlfile, "\n"; next INLINE; }elsif( m/^(\S*)$/ ){ # word at line beginning push @htmlfile, " $blankcol \n"; next INLINE; }else{ # words inside line. chomp; push @htmlfile, " $blankcol \n"; next INLINE; } } push @htmlfile, "
$title $showurl $comment
", " $sectionname \n", ":\n", # colon => top "
$1: $2
$1
$_

\n"; # end of table. push @htmlfile, timestamp(), "\n"; # timestamp at end of file. # =================================================== # start printing. print " \n"; if( %quicker ){ print "Quickref:\n"; foreach $key ( sort {lc($a) cmp lc($b)} keys %quicker ) { print " $key | \n"; } } if( @section ){ print "\n
\n Sections:"; # Highlight each section alphabet (9)95/NT(A)ADSL(B)Bombay..(Y)(Z)Zztops my( $charone, $chartwo ); foreach (sort @section) { $chartwo = ( $_ =~ m/section_(.)/)[0]; unless( $charone eq $chartwo ){ # Whenever first char changes, bold it. # s|\">(.)|\">$1| if( $charone =~ m/[a-z]/i ){ # Print missing alphabets also, eg: (X)Xray(Y)(Z)Zztops. $charone++; foreach $charthree ($charone .. $chartwo ){ print "($charthree)"; } }else{ # Not a alphabet. print "($chartwo)"; } } $charone = $chartwo; print; } print "\n
\n"; # Netscape cannot handle very large tables, so break it up # into smaller tables? # print "
\n"; # while( @section ){ # print "\t\n"; # foreach $i (1..8){ # print "\t\t\n"; # } # print "\t\n"; # } # print "
", shift(@section), "

\n\n"; # end of table. } print @htmlfile; print "top\n"; warn "Processed $url_count urls, and matched $nntp_count newsgroups.\n"; # end of main ============================================ sub timestamp { my $user = $ENV{'LOGNAME'} || $ENV{'USERNAME'} || 'root'; my $domain= $ENV{'DOMAIN'}; my $host = $ENV{'HOSTNAME'} || $ENV{'COMPUTERNAME'} || $ENV{'HOST'}; $host .= '.' . $domain if $domain; my $today = localtime(); return "Updated on $today by $user\@$host.\n"; } # EOF