#!/share/nfs/users1/umr-tge/zoonek/gnu/Linux/bin/perl -w
# xml2html.pl
# Version 0.02
# (c) 2001 Vincent Zoonekynd <zoonek@math.jussieu.fr>
# Distributed under the GPL
use strict;
our @toc;
our $result="";
package TOCHandler;
use strict;
use constant TRUE => (0==0);
use constant FALSE => (0==1);
my $remember = FALSE;
my $string;
my $section_number = 0;
sub new { my $type = shift; return bless {}, $type; }
sub characters {
my ($self,$a) = @_;
my $b = $a->{Data};
$string .= $b;
}
sub start_element {
my ($self, $el) = @_;
if($el->{Name} eq "h1"){
$remember = TRUE;
$string = "";
}
}
sub end_element {
my($self, $el) = @_;
if($el->{Name} eq "h1") {
$remember = FALSE;
$section_number++;
push @toc, [$section_number, $string];
$string = "";
}
}
######################################################################
package MyHandler;
use strict;
sub new { my $type = shift; return bless {}, $type; }
## Constantes
use constant TRUE => (0==0);
use constant FALSE => (0==1);
## Variables globales
our $save_text = FALSE;
our $saved_text;
our $inside_p = 0;
our $section_number=0;
our $charset = "iso-8859-1" || "UTF-8" || "ISO-2022-JP";
our $bgcolor = '#FFFFFF';
our $text = '#000000';
our $alink = '#FFFFFF';
our $link = '#6D8ADA';
our $vlink = '#415383';
our $title_bgcolor = '#ffdb43';
our $title_fgcolor = $text;
our $section_bgcolor = '#6D8ADA';
our $section_fgcolor = '#FFFFFF';
our $code_bgcolor = '#FFFFAA';
our $code_fgcolor = $text;
our $tailer_fgcolor = '#c8c8c8';
our $author = "Vincent Zoonekynd";
our $web = "http://www.math.jussieu.fr/~zoonek/";
our $mail = 'zoonek@math.jussieu.fr';
our $title;
our $imagetitle;
our $date;
our $keywords;
## Affichage (ou sauvegarde) du texte
sub affiche {
my $a = shift;
if($save_text){ $saved_text .= $a }
else{ $result .= $a }
}
sub debug { my $a = shift; affiche "<!-- $a -->"; }
sub characters {
my ($self,$a) = @_;
my $b = $a->{Data};
$b =~ s/\&/\&\;/g;
$b =~ s/\</\<\;/g;
$b =~ s#((http|ftp|file)://[^\s\">]+)#<A HREF="$1">$1</A>#g;
affiche $b;
}
sub start_p {
debug "implicit paragraph start";
if( $inside_p == 0 ){
affiche "<center><table width=\"95\%\"><tr><td>";
} else {
affiche "<table width=\"100\%\"><tr><td>";
}
$inside_p++;
# print STDERR "<P> $inside_p\n";
}
sub end_p {
debug "implicit paragraph end";
affiche "</td></tr></table></center>\n";
$inside_p--;
print STDERR "<P> $inside_p\n";
}
sub start_element {
my ($self, $el) = @_;
if($el->{Name} eq "web"){
} elsif($el->{Name} eq "head"){
} elsif($el->{Name} eq "title"){
$save_text = TRUE;
$saved_text = "";
} elsif($el->{Name} eq "date"){
$save_text = TRUE;
$saved_text = "";
} elsif($el->{Name} eq "keywords"){
$save_text = TRUE;
$saved_text = "";
} elsif($el->{Name} eq "imagetitle"){
$imagetitle = $el->{Attributes}->{src};
} elsif($el->{Name} eq "h1"){
$section_number++;
affiche "\n<!-- Section $section_number -->\n";
affiche "<p></p><table width=\"100\%\" cellpadding=\"2\" cellspacing=\"3\" border=\"0\">\n";
affiche "<tr><td bgcolor=\"$section_bgcolor\"><font color=\"$section_fgcolor\" face=\"Arial,Helvetica\"><A name=\"$section_number\"></A>";
# affiche "$section_number. ";
} elsif($el->{Name} eq "a"){
affiche("<a href=\"$el->{Attributes}->{href}\">");
} elsif($el->{Name} eq "p"){
debug "paragraph";
start_p;
} elsif($el->{Name} eq "table"){
debug "table";
start_p;
affiche "<table cellpadding=0 cellspacing=0 border=0>\n";
affiche "<tr><td bgcolor=\"$text\"><table cellpadding=3 cellspacing=1 border=0>";
debug "table body";
} elsif($el->{Name} eq "tr"){
affiche('<tr>');
} elsif($el->{Name} eq "td"){
affiche "<td bgcolor=\"$bgcolor\">";
} elsif($el->{Name} eq "ul"){
debug "unnumbered list";
start_p;
affiche('<ul>');
} elsif($el->{Name} eq "li"){
affiche('<li>');
} elsif($el->{Name} eq "img"){
debug "image";
start_p;
affiche("<center><IMG SRC=\"$el->{Attributes}->{src}\" ALT=\"$el->{Attributes}->{alt}\"></center>");
end_p;
} elsif($el->{Name} eq "code"){
debug "code";
start_p;
affiche "<table width=\"100\%\"><tr><td bgcolor=\"$code_bgcolor\"><font color=\"$code_fgcolor\"><pre>";
} elsif($el->{Name} eq "em"){
affiche('<em>');
} elsif($el->{Name} eq "tt"){
affiche('<tt>');
} elsif($el->{Name} eq "toc"){
start_p;
affiche '<center>';
foreach(@toc){
my ($n, $t) = @$_;
affiche '<A HREF="#'. $n .'">'. $t .'</A><br>';
}
affiche '</center>';
end_p;
}
}
sub end_element {
my $self=shift;
my $el=shift;
if($el->{Name} eq "web"){
start_p;
affiche "<p align =\"RIGHT\">";
affiche "<font color=\"$tailer_fgcolor\">";
affiche "<a href=\"$web\" style=\"text-decoration: none\">$author</a><br>\n";
affiche "<a href=\"mailto:$mail\" style=\"text-decoration: none\">\<$mail></a><br>\n";
affiche "$date<br>\n" if $date;
affiche "latest modification on ". `date`;
affiche "</font></p>\n";
end_p;
affiche "</body></html>";
} elsif($el->{Name} eq "head"){
affiche "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"\n";
affiche "\"http://www.w3.org/TR/html4/loose.dtd\">\n";
affiche "<!-- This is a generated file -->\n";
affiche "<html>\n";
affiche " <head>\n";
affiche " <title>$title</title>\n";
affiche " <meta http-equiv=\"Content-Style-Type\" content=\"text/css\">";
affiche " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=$charset\">\n";
affiche " <meta NAME=\"keywords\" CONTENT=\"$keywords\">" if $keywords;
affiche " </head>\n\n";
affiche " <body bgcolor=\"$bgcolor\" text=\"$text\" link=\"$link\" alink=\"$alink\" vlink=\"$vlink\">\n";
affiche "\n<!-- title -->\n";
affiche "<center>\n";
if( defined $imagetitle ){
affiche "<img src=\"$imagetitle\" alt=\"$title\">\n";
} else {
affiche " <table cellpadding=\"10\">\n";
affiche " <tr><td bgcolor=\"$title_bgcolor\">";
affiche "<font color=\"$title_fgcolor\" face=\"Arial,Helvetica\">$title</font></td></tr>\n";
affiche " </table>\n";1
}
affiche "</center><p></p>";
} elsif($el->{Name} eq "title"){
$save_text = FALSE;
$title = $saved_text;
} elsif($el->{Name} eq "date"){
$save_text = FALSE;
$date = $saved_text;
} elsif($el->{Name} eq "keywords"){
$save_text = FALSE;
$keywords = $saved_text;
} elsif($el->{Name} eq "imagetitle"){
} elsif($el->{Name} eq "h1"){
affiche "</font></td></tr></table>\n";
} elsif($el->{Name} eq "a"){
affiche("</a>");
} elsif($el->{Name} eq "p"){
end_p;
debug "paragraph end";
} elsif($el->{Name} eq "table"){
debug "table body end";
affiche('</table></td></tr></table>');
end_p;
debug "table end";
} elsif($el->{Name} eq "tr"){
affiche('</tr>');
} elsif($el->{Name} eq "td"){
affiche('</td>');
} elsif($el->{Name} eq "ul"){
affiche('</ul>');
end_p;
debug "unnumbered list end";
} elsif($el->{Name} eq "li"){
affiche('</li>');
} elsif($el->{Name} eq "img"){
} elsif($el->{Name} eq "code"){
affiche('</pre></font></td></tr></table>');
end_p;
debug "code end";
} elsif($el->{Name} eq "em"){
affiche('</em>');
} elsif($el->{Name} eq "tt"){
affiche('</tt>');
}
}
######################################################################
package main;
use strict;
our $xml = join('',<>);
use XML::Parser::PerlSAX;
import MyHandler;
my $toc_handler = TOCHandler->new;
my $toc_parser = XML::Parser::PerlSAX->new( Handler => $toc_handler );
$toc_parser->parse( Source => { String => $xml } );
my $my_handler = MyHandler->new;
my $parser = XML::Parser::PerlSAX->new( Handler => $my_handler );
$parser = XML::Parser::PerlSAX->new( Handler => $my_handler );
$parser->parse( Source => { String => $xml } );
######################################################################
## Correction du codage
{
open(A, '>', 'tmp.html') || die "Cannot open tmp.html for writing: $!";
print A $result;
close A;
system "recode UTF-8..latin1 <tmp.html >tmp2.html"
|| die "Problem with recode: $!";
open(A, '<', "tmp2.html");
$result = join('',<A>);
close A;
# unlink "tmp.html";
# unlink "tmp2.html";
}
######################################################################
## Ajout de la taille des images
{
my $new = "";
while( $result =~ s/^(.*?)SRC\=\"([^"]*)\"//si ){ #"
my $avant = $1;
my $file = $2;
print STDERR "Looking for the size of $file\n";
open(SIZE, "convert -verbose $file /dev/null|") ||
warn "Cannot run `file $file /dev/null': $!";
# warn "Cannot run `convert -verbose $file /dev/null': $!";
my $tmp = join('',<SIZE>);
close SIZE;
my($width,$height)=(320,256);
if($tmp =~ m/([0-9]+)x([0-9]+)/){
$width = $1;
$height = $2;
}
print STDERR " width: $width height: $height\n";
$new .= "$avant WIDTH=$width HEIGHT=$height SRC=\"$file\" ";
}
$new .= $result;
$result = $new;
}
######################################################################
## On essaye d'enlever les espaces avant </pre>
$result =~ s|\s+</pre>|</pre>|gi;
######################################################################
print $result; |
|