#!/usr/bin/perl
# SYNOPSIS: Hypertize text url list to html.
# AUTHOR: Mohsin_Ahmed http://www.cs.albany.edu/~mosh
# DOC: ~/init/index.txt is ascii files of urls.
# o SECTION: becomes bold faced SECTION title.
# o TEXT: URL COMMENT becomes a table of |TEXT|URL|COMMENT|
# o URL =PTR COMMENT also add PTR as quickref at top.
# o Backslash newline to break up long urls.
# o Blank lines ignored
# o # lines are comments, ignored.
# o ========= becomes
or '-----' in the table.
$USAGE = '
USAGE: url2html urlfile.txt ... prints html file on stdout
url2html index.txt > index.htm
url2html start.txt > start.htm
Redirection does not work with in command.com/bat, use cmd95.exe
OPTIONS:
-h Help
-plain No tables in html file.
';
while( $_ = $ARGV[0], m/^-/ ){ shift;
if( m/^--$/ ){ last;
}elsif( m/^-plain/ ){ $do_plain++;
}elsif( m/^-[h?]/ ){ die $USAGE;
}elsif( m/^-v/ ){ $verbose++;
}else{ die "Unknown option '$_'\n";
}
}
die $USAGE . "Need an input file.\n" unless @ARGV;
my $blankcol = " | ";
my( $url_count, $nntp_count );
my @htmlfile; # The whole html file is first built then printed.
my %quicker; # urls prefixed with = are added to quickref.
my @section;
INLINE: while(<>){
while( s,\\\s*$,, && ! eof() ){ # Join \ Lines.
my $nextline;
$nextline = <>; # spaces before '\' are preserved.
$nextline =~ s/^\s*//; # spaces after '\' are removed.
$_ .= $nextline; # join lines, now fall thru.
}
if( m/^#/ ){ # mhtml only comments.
next INLINE;
}
s/\s\#\s.*\n/\n/; # eg. ftp:xyz # comment passwd is xy
if( $do_plain ){
if( eof() ){ die "Done plain $. lines to urls.\n"; }
# Hypertize File URLs.
s!(\w+):\s*((file):[\\/]*(\S+))
!\n\t$1\n
!gix;
s!\s((file):[\\/]*(\S+))
!\n\t$3\n
!gix;
# Hypertize 'title: URL'.
s!(\w+):\s*((http|ftp|news):[/\\]*(\S+))
!\n\t$1\n
!gix;
# ' URL'
s!(\s|^)((http|ftp|news):[/\\]*(\S+))
!\n\t$4\n
!gix;
next INLINE if m/^\s*$/; # skip blank lines.
print $_, "
\n"; # Print regardless.
next INLINE;
}
if( ! $tabled++ ){
push @htmlfile, "\n"; # start a new table.
# " \n";
}
if( m/^[-=]{30,}$/ ){ # "-----------" are tablified.
push @htmlfile, "| ", '_'x15, " | ",
"", '_'x50, " | ","
\n";
next INLINE;
}elsif( m/^\s*$/ ){ # ignore blank lines.
next INLINE;
}elsif( m/^\s*(.*) # title $1
((file|http|ftp|mailto|news|telnet): \S+)\s* # proto: url $2 $3
(=(\S*))? # quickref $5
(.*)$ # comment $6
/x
){
my $title = $1; # || "NO-TITLE";
my $url = $2;
my $isquick = $4;
my $ptquick = $5;
my $comment = $6;
my $showurl = $url;
# Netscape cannot handle very large tables.
$url_count++;
if( ($url_count % 100) == 99 ){
push @htmlfile, "
\n"; # end of table.
push @htmlfile, "\n"; # start a new table.
}
if( $url =~ s/file://i ){
# pic: file:///e:/pic => e:/pic, remove extra slashes
$showurl =~ s,file:[/\\]*,,;
}
# vim: http://www.vim.org => make title 'vim' clickable.
# news: http://www.news.com/abc => news/abc
# $showurl =~ s@(http|ftp)://(www\.)?(\w+)\.(org|com)\b$@$3@,gi;
$comment =~ s/:?\s*$//;
$comment = "($comment)" if $comment;
push @htmlfile, "",
"| $title | \n",
" $showurl $comment | \n",
"
\n";
if( $isquick ){
# push @quickref, " $ptquick |\n";
$quicker{$ptquick} = $url;
}
next INLINE;
}elsif( m/^(.*):\s*$/ ){ # 'Section:' is bold faced.
my $sectionname = $1;
push @section ,
"$sectionname|\n";
push @htmlfile, "\n\n",
" | ",
" $sectionname \n",
":\n", # colon => top
" | |
\n";
next INLINE;
}elsif( m/^(.*):\s*(.+)$/ ){ # 'Text: Info'.
push @htmlfile, "| $1: | $2 |
\n";
next INLINE;
}elsif( m/^(\S*)$/ ){ # word at line beginning
push @htmlfile, " | $1 | $blankcol
\n";
next INLINE;
}else{ # words inside line.
chomp;
push @htmlfile, " $blankcol | $_ |
\n";
next INLINE;
}
}
push @htmlfile, "
\n"; # end of table.
push @htmlfile, timestamp(), "\n"; # timestamp at end of file.
# ===================================================
# start printing.
print " \n";
if( %quicker ){
print "Quickref:\n";
foreach $key ( sort {lc($a) cmp lc($b)} keys %quicker ) {
print " $key | \n";
}
}
if( @section ){
print "\n
\n Sections:";
# Highlight each section alphabet (9)95/NT(A)ADSL(B)Bombay..(Y)(Z)Zztops
my( $charone, $chartwo );
foreach (sort @section) {
$chartwo = ( $_ =~ m/section_(.)/)[0];
unless( $charone eq $chartwo ){
# Whenever first char changes, bold it.
# s|\">(.)|\">$1|
if( $charone =~ m/[a-z]/i ){
# Print missing alphabets also, eg: (X)Xray(Y)(Z)Zztops.
$charone++;
foreach $charthree ($charone .. $chartwo ){
print "($charthree)";
}
}else{
# Not a alphabet.
print "($chartwo)";
}
}
$charone = $chartwo;
print;
}
print "\n
\n";
# Netscape cannot handle very large tables, so break it up
# into smaller tables?
# print "\n";
# while( @section ){
# print "\t\n";
# foreach $i (1..8){
# print "\t\t| ", shift(@section), " | \n";
# }
# print "\t
\n";
# }
# print "
\n\n"; # end of table.
}
print @htmlfile;
print "top\n";
warn "Processed $url_count urls, and matched $nntp_count newsgroups.\n";
# end of main ============================================
sub timestamp {
my $user = $ENV{'LOGNAME'} || $ENV{'USERNAME'} || 'root';
my $domain= $ENV{'DOMAIN'};
my $host = $ENV{'HOSTNAME'} || $ENV{'COMPUTERNAME'} ||
$ENV{'HOST'};
$host .= '.' . $domain if $domain;
my $today = localtime();
return "Updated on $today by $user\@$host.\n";
}
# EOF