# Philippe Verdret 1998-1999
use strict;
package RTF::HTML::Converter;
use RTF::Control;
@RTF::HTML::Converter::ISA = qw(RTF::Control);
use constant TRACE => 0;
use constant LIST_TRACE => 0;
use constant SHOW_STYLE_NOT_PROCESSED => 1;
use constant SHOW_STYLE => 0; # insert style name in the output
use constant SHOW_RTF_LINE_NUMBER => 0;
# Symbol exported by the RTF::Ouptut module:
# %info: informations of the {\info ...}
# %par_props: paragraph properties
# $style: name of the current style or pseudo-style
# $event: start and end on the 'document' event
# $text: text associated to the current style
# %symbol: symbol translations
# %do_on_control: routines associated to RTF controls
# %do_on_event: routines associated to events
# output(): a stack oriented output routine (don't use print())
my $START_NEW_PARA = 1; # some actions to do at the beginning of a new para
###########################################################################
my $N = "\n"; # Pretty-printing
# some output parameters
my $TITLE_FLAG = 0;
my $LANG = 'en';
my $TABLE_BORDER = 1;
my $CURRENT_LI = 0; # current list indent
my @LIST_STACK = (); # stack of opened lists
my %LI_LEVEL = (); # li -> list level
my %PAR_ALIGN = qw(
qc CENTER
ql LEFT
qr RIGHT
qj LEFT
);
# here put your style mappings
my %STYLES = ('Normal' => 'p',
'Abstract' => 'Blockquote',
'PACSCode' => 'Code',
#'AuthGrp' => '',
'Section' => 'H1',
'heading 1' => 'H1',
'heading 2' => 'H2',
'heading 3' => 'H3',
'heading 4' => 'H4',
'heading 5' => 'H5',
'heading 6' => 'H6',
'Code' => 'pre',
'par' => 'p', # default value
);
# list names -> level
my %UL_STYLES = ('toc 1' => 1,
'toc 2' => 2,
'toc 3' => 3,
'toc 4' => 4,
'toc 5' => 5,
);
# not used
my %UL_TYPES = qw(b7 disk
X square
Y circle
);
my %OL_STYLES = (
);
# not used
my %OL_TYPES = (
'pncard' => '1', # Cardinal numbering: One, Two, Three
'pndec' => '1', # Decimal numbering: 1, 2, 3
'pnucltr' => 'A', # Uppercase alphabetic numbering
'pnlcltr' => 'a', # lowercase alphabetic numbering
'pnucrm' => 'I', # Uppercase roman numbering
'pnlcrm' => 'i', # Lowercase roman numbering
);
my $in_Field = -1; # nested links are illegal, not used
my $in_Bookmark = -1; # nested links are illegal, not used
%do_on_event =
(
'document' => sub { # Special action
if ($event eq 'start') {
output qq@$N$N
$N@;
} else {
my $author = $info{author};
my $creatim = $info{creatim};
my $revtim = $info{revtim};
my $tag;
while (@LIST_STACK) {
$tag = pop @LIST_STACK;
output "$tag>" . $N;
}
$style = 'p';
if ($LANG eq 'fr') {
output "<$style>Auteur : $author$style>\n" if $author;
output "<$style>Date de création : $creatim$style>\n" if $creatim;
output "<$style>Date de modification : $revtim$style>\n" if $revtim;
} else { # Default
output "<$style>Author : $author$style>\n" if $author;
output "<$style>Creation date: $creatim$style>\n" if $creatim;
output "<$style>Modification date: $revtim$style>\n" if $revtim;
}
output "\n\n";
}
},
# Table processing
'table' => sub { # end of table
if ($event eq 'end') {
#print STDERR "end of table\n";
$TABLE_BORDER ? output "$N"
:
output "$N";
} else {
#print STDERR "start of table\n";
my $end;
while (@LIST_STACK) {
$end .= '' . pop(@LIST_STACK) . '>' . $N;
}
output ($end);
}
},
'row' => sub { # end of row
#my $char_props = $_[SELF]->force_char_props('end');
#output "$N$text$char_props
$N";
if ($event eq 'end') {
output "$N$N$text$N
$N";
} else {
# not defined
}
},
'cell' => sub { # end of cell
if ($event eq 'end') {
my $char_props = $_[SELF]->force_char_props('end');
my $end;
while (@LIST_STACK) {
$end .= '' . pop(@LIST_STACK) . '>' . $N;
}
output "$text$char_props$end | $N";
} else {
# not defined
}
},
# PARAGRAPH STYLES
#'Normal' => sub {}, # create one entry per style name???
'par' => sub { # Default rule: if no entry for a paragraph style
# Paragraph styles
#print STDERR "$style\n" if LIST_TRACE;
return output($text) unless $text =~ /\S/;
my ($tag_start, $tag_end, $before) = ('','','');
if (defined(my $level = $UL_STYLES{$style})) { # registered list styles
if ($level > @LIST_STACK) {
my $tag;
push @LIST_STACK, $tag = 'UL';
if (SHOW_STYLE) {
$before = "<$tag>[$style]" . $N;
} else {
$before = "<$tag>" . $N;
}
$tag_start = $tag_end = 'LI';
} else {
$level = @LIST_STACK - $level;
while ($level-- > 0) {
$before .= '' . pop(@LIST_STACK) . '>'. $N;
}
$tag_start = $tag_end = 'LI';
}
} else {
}
if ($tag_start eq '') { # end of list
while (@LIST_STACK) {
$before .= '' . pop(@LIST_STACK) . '>' . $N;
}
$tag_start = $tag_end = $STYLES{$style} || do {
if (SHOW_STYLE_NOT_PROCESSED) {
use vars qw/%style_not_processed/;
# todo: add count
unless (exists $style_not_processed{$style}) {
print STDERR "style not defined '$style'\n" if SHOW_STYLE_NOT_PROCESSED;
$style_not_processed{$style} = '';
}
}
$STYLES{'par'};
};
foreach (qw(qj qc ql qr)) { # for some html elements...
if ($par_props{$_}) {
$tag_start .= " ALIGN=$PAR_ALIGN{$_}";
}
}
}
$_[SELF]->trace("$tag_start-$tag_end: $text") if TRACE;
my $char_props = $_[SELF]->force_char_props('end');
if (SHOW_RTF_LINE_NUMBER) {
output "$N$before<$tag_start>[$.]$text$char_props$tag_end>$N";
} else {
output "$N$before<$tag_start>$text$char_props$tag_end>$N";
}
$START_NEW_PARA = 1;
},
# Hypertextuel links
# 'bookmark' => sub {
# $_[SELF]->trace("bookmark $event $text") if TRACE;
# if ($event eq 'end') {
# return if $in_Bookmark--;
# output("");
# } else {
# return if ++$in_Bookmark;
# output("");
# }
# },
# 'field' => sub {
# my $id = $_[0];
# $_[SELF]->trace("field $event $text") if TRACE;
# if ($event eq 'end') {
# return if $in_Field--;
# output("$text");
# } else {
# return if ++$in_Field;
# output(""); # doesn't work!
# }
# },
# CHAR properties
'b' => sub {
$style = 'b';
if ($event eq 'end') {
output "$style>";
} else {
output "<$style>";
}
},
'i' => sub {
$style = 'i';
if ($event eq 'end') {
output "$style>";
} else {
output "<$style>";
}
},
'ul' => sub {
$style = 'u';
if ($event eq 'end') {
output "$style>";
} else {
output "<$style>";
}
},
'sub' => sub {
$style = 'sub';
if ($event eq 'end') {
output "$style>";
} else {
output "<$style>";
}
},
'super' => sub {
$style = 'sup';
if ($event eq 'end') {
output "$style>";
} else {
output "<$style>";
}
},
'strike' => sub {
$style = 'strike';
if ($event eq 'end') {
output "$style>";
} else {
output "<$style>";
}
},
);
###############################################################################
# Could be used in a next release
# manage a minimal context for the tag generation
# gen_tags(EVENT, TAG_NAME, [ATTLIST])
# EVENT: open|close
# return: a tag start|end
my %cant_nest = map { $_ => 1 } qw(a);
use constant GEN_TAGS_WARNS => 1;
my @element_stack = ();
my %open_element = ();
sub gen_tags { # manage a minimal context for tag outputs
die "bad argument number" unless (@_ >= 2);
my ($eve, $tag, $att) = @_;
my $result = '';
if ($eve eq 'open') {
push @element_stack, $tag; # add a new node
if ($open_element{$tag}++ and defined $cant_nest{$tag}) {
#print STDERR "skip open $tag\n";
$result = '';
} else {
$result = '<'. $tag . '>' . $N;
}
} else { # close
unless (@element_stack) {
warn "no element to close on the '$tag' tag\n" if GEN_TAGS_WARNS;
return $result;
}
my $opened_elt;
while (1) {
$opened_elt = pop @element_stack;
if (--$open_element{$tag} >= 1 and defined $cant_nest{$tag}) {
#print STDERR "skip close $opened_elt\n";
} else {
$result .= '' . $opened_elt . '>' . $N;
}
last if $tag eq $opened_elt;
unless (@element_stack) {
warn "element stack is empty on $tag close\n" if GEN_TAGS_WARNS;
return $result;
}
}
}
$result;
}
###############################################################################
# If you have an &; in your RTF document and if
# is a character entity, you'll see "&;" in the RTF document
# and the corresponding glyphe in the HTML document
# How to give a new definition to a control registered in %do_on_control:
# - method redefinition (could be the purist's solution)
# - $Control::do_on_control{control_word} = sub {};
# - when %do_on_control is exported write:
$do_on_control{'ansi'} = # callback redefinition
sub {
# RTF: \'
# HTML: ;
my $charset = $_[CONTROL];
my $charset_file = $_[SELF]->application_dir() . "/$charset";
open CHAR_MAP, "$charset_file"
or die "unable to open the '$charset_file': $!";
my %charset = ( # general rule
map({ sprintf("%02x", $_) => "$_;" } (0..255)),
# and some specific defs
map({ s/^\s+//; split /\s+/ } ())
);
*char = sub {
my $char_props;
if ($START_NEW_PARA) {
$char_props = $_[SELF]->force_char_props('start');
$START_NEW_PARA = 0;
} else {
$char_props = $_[SELF]->process_char_props();
}
output $char_props . $charset{$_[1]}
}
};
# symbol processing
# RTF: \~
# named chars
# RTF: \ldblquote, \rdblquote
$symbol{'~'} = ' ';
$symbol{'tab'} = ' '; #' ';
$symbol{'ldblquote'} = '«';
$symbol{'rdblquote'} = '»';
$symbol{'line'} = '
';
sub symbol {
my $char_props;
if ($START_NEW_PARA) {
$char_props = $_[SELF]->force_char_props('start');
$START_NEW_PARA = 0;
} else {
$char_props = $_[SELF]->process_char_props();
}
if (defined(my $sym = $symbol{$_[1]})) {
output $char_props . $sym;
} else {
output $char_props . $_[1]; # as it
}
}
# Text
# certainly do the same thing with the char() method
sub text { # parser callback redefinition
my $text = $_[1];
my $char_props = '';
if ($START_NEW_PARA) {
$char_props = $_[SELF]->force_char_props('start');
$START_NEW_PARA = 0;
} else {
$char_props = $_[SELF]->process_char_props();
}
$text =~ s/&/&/g;
$text =~ s/</g;
$text =~ s/>/>/g;
if (defined $char_props) {
output("$char_props$text");
} else {
output("$text");
}
}
1;
__END__