# Philippe Verdret 1998-1999 use strict; package RTF::HTML::Converter; use RTF::Control; @RTF::HTML::Converter::ISA = qw(RTF::Control); use constant TRACE => 0; use constant LIST_TRACE => 0; use constant SHOW_STYLE_NOT_PROCESSED => 1; use constant SHOW_STYLE => 0; # insert style name in the output use constant SHOW_RTF_LINE_NUMBER => 0; # Symbol exported by the RTF::Ouptut module: # %info: informations of the {\info ...} # %par_props: paragraph properties # $style: name of the current style or pseudo-style # $event: start and end on the 'document' event # $text: text associated to the current style # %symbol: symbol translations # %do_on_control: routines associated to RTF controls # %do_on_event: routines associated to events # output(): a stack oriented output routine (don't use print()) my $START_NEW_PARA = 1; # some actions to do at the beginning of a new para ########################################################################### my $N = "\n"; # Pretty-printing # some output parameters my $TITLE_FLAG = 0; my $LANG = 'en'; my $TABLE_BORDER = 1; my $CURRENT_LI = 0; # current list indent my @LIST_STACK = (); # stack of opened lists my %LI_LEVEL = (); # li -> list level my %PAR_ALIGN = qw( qc CENTER ql LEFT qr RIGHT qj LEFT ); # here put your style mappings my %STYLES = ('Normal' => 'p', 'Abstract' => 'Blockquote', 'PACSCode' => 'Code', #'AuthGrp' => '', 'Section' => 'H1', 'heading 1' => 'H1', 'heading 2' => 'H2', 'heading 3' => 'H3', 'heading 4' => 'H4', 'heading 5' => 'H5', 'heading 6' => 'H6', 'Code' => 'pre', 'par' => 'p', # default value ); # list names -> level my %UL_STYLES = ('toc 1' => 1, 'toc 2' => 2, 'toc 3' => 3, 'toc 4' => 4, 'toc 5' => 5, ); # not used my %UL_TYPES = qw(b7 disk X square Y circle ); my %OL_STYLES = ( ); # not used my %OL_TYPES = ( 'pncard' => '1', # Cardinal numbering: One, Two, Three 'pndec' => '1', # Decimal numbering: 1, 2, 3 'pnucltr' => 'A', # Uppercase alphabetic numbering 'pnlcltr' => 'a', # lowercase alphabetic numbering 'pnucrm' => 'I', # Uppercase roman numbering 'pnlcrm' => 'i', # Lowercase roman numbering ); my $in_Field = -1; # nested links are illegal, not used my $in_Bookmark = -1; # nested links are illegal, not used %do_on_event = ( 'document' => sub { # Special action if ($event eq 'start') { output qq@$N$N$N@; } else { my $author = $info{author}; my $creatim = $info{creatim}; my $revtim = $info{revtim}; my $tag; while (@LIST_STACK) { $tag = pop @LIST_STACK; output "" . $N; } $style = 'p'; if ($LANG eq 'fr') { output "<$style>Auteur : $author\n" if $author; output "<$style>Date de création : $creatim\n" if $creatim; output "<$style>Date de modification : $revtim\n" if $revtim; } else { # Default output "<$style>Author : $author\n" if $author; output "<$style>Creation date: $creatim\n" if $creatim; output "<$style>Modification date: $revtim\n" if $revtim; } output "\n\n"; } }, # Table processing 'table' => sub { # end of table if ($event eq 'end') { #print STDERR "end of table\n"; $TABLE_BORDER ? output "$N$text
$N" : output "$N$text
$N"; } else { #print STDERR "start of table\n"; my $end; while (@LIST_STACK) { $end .= '' . $N; } output ($end); } }, 'row' => sub { # end of row #my $char_props = $_[SELF]->force_char_props('end'); #output "$N$text$char_props$N"; if ($event eq 'end') { output "$N$N$text$N$N"; } else { # not defined } }, 'cell' => sub { # end of cell if ($event eq 'end') { my $char_props = $_[SELF]->force_char_props('end'); my $end; while (@LIST_STACK) { $end .= '' . $N; } output "$text$char_props$end$N"; } else { # not defined } }, # PARAGRAPH STYLES #'Normal' => sub {}, # create one entry per style name??? 'par' => sub { # Default rule: if no entry for a paragraph style # Paragraph styles #print STDERR "$style\n" if LIST_TRACE; return output($text) unless $text =~ /\S/; my ($tag_start, $tag_end, $before) = ('','',''); if (defined(my $level = $UL_STYLES{$style})) { # registered list styles if ($level > @LIST_STACK) { my $tag; push @LIST_STACK, $tag = 'UL'; if (SHOW_STYLE) { $before = "<$tag>[$style]" . $N; } else { $before = "<$tag>" . $N; } $tag_start = $tag_end = 'LI'; } else { $level = @LIST_STACK - $level; while ($level-- > 0) { $before .= ''. $N; } $tag_start = $tag_end = 'LI'; } } else { } if ($tag_start eq '') { # end of list while (@LIST_STACK) { $before .= '' . $N; } $tag_start = $tag_end = $STYLES{$style} || do { if (SHOW_STYLE_NOT_PROCESSED) { use vars qw/%style_not_processed/; # todo: add count unless (exists $style_not_processed{$style}) { print STDERR "style not defined '$style'\n" if SHOW_STYLE_NOT_PROCESSED; $style_not_processed{$style} = ''; } } $STYLES{'par'}; }; foreach (qw(qj qc ql qr)) { # for some html elements... if ($par_props{$_}) { $tag_start .= " ALIGN=$PAR_ALIGN{$_}"; } } } $_[SELF]->trace("$tag_start-$tag_end: $text") if TRACE; my $char_props = $_[SELF]->force_char_props('end'); if (SHOW_RTF_LINE_NUMBER) { output "$N$before<$tag_start>[$.]$text$char_props$N"; } else { output "$N$before<$tag_start>$text$char_props$N"; } $START_NEW_PARA = 1; }, # Hypertextuel links # 'bookmark' => sub { # $_[SELF]->trace("bookmark $event $text") if TRACE; # if ($event eq 'end') { # return if $in_Bookmark--; # output(""); # } else { # return if ++$in_Bookmark; # output(""); # } # }, # 'field' => sub { # my $id = $_[0]; # $_[SELF]->trace("field $event $text") if TRACE; # if ($event eq 'end') { # return if $in_Field--; # output("$text"); # } else { # return if ++$in_Field; # output(""); # doesn't work! # } # }, # CHAR properties 'b' => sub { $style = 'b'; if ($event eq 'end') { output ""; } else { output "<$style>"; } }, 'i' => sub { $style = 'i'; if ($event eq 'end') { output ""; } else { output "<$style>"; } }, 'ul' => sub { $style = 'u'; if ($event eq 'end') { output ""; } else { output "<$style>"; } }, 'sub' => sub { $style = 'sub'; if ($event eq 'end') { output ""; } else { output "<$style>"; } }, 'super' => sub { $style = 'sup'; if ($event eq 'end') { output ""; } else { output "<$style>"; } }, 'strike' => sub { $style = 'strike'; if ($event eq 'end') { output ""; } else { output "<$style>"; } }, ); ############################################################################### # Could be used in a next release # manage a minimal context for the tag generation # gen_tags(EVENT, TAG_NAME, [ATTLIST]) # EVENT: open|close # return: a tag start|end my %cant_nest = map { $_ => 1 } qw(a); use constant GEN_TAGS_WARNS => 1; my @element_stack = (); my %open_element = (); sub gen_tags { # manage a minimal context for tag outputs die "bad argument number" unless (@_ >= 2); my ($eve, $tag, $att) = @_; my $result = ''; if ($eve eq 'open') { push @element_stack, $tag; # add a new node if ($open_element{$tag}++ and defined $cant_nest{$tag}) { #print STDERR "skip open $tag\n"; $result = ''; } else { $result = '<'. $tag . '>' . $N; } } else { # close unless (@element_stack) { warn "no element to close on the '$tag' tag\n" if GEN_TAGS_WARNS; return $result; } my $opened_elt; while (1) { $opened_elt = pop @element_stack; if (--$open_element{$tag} >= 1 and defined $cant_nest{$tag}) { #print STDERR "skip close $opened_elt\n"; } else { $result .= '' . $N; } last if $tag eq $opened_elt; unless (@element_stack) { warn "element stack is empty on $tag close\n" if GEN_TAGS_WARNS; return $result; } } } $result; } ############################################################################### # If you have an &; in your RTF document and if # is a character entity, you'll see "&;" in the RTF document # and the corresponding glyphe in the HTML document # How to give a new definition to a control registered in %do_on_control: # - method redefinition (could be the purist's solution) # - $Control::do_on_control{control_word} = sub {}; # - when %do_on_control is exported write: $do_on_control{'ansi'} = # callback redefinition sub { # RTF: \' # HTML: &#; my $charset = $_[CONTROL]; my $charset_file = $_[SELF]->application_dir() . "/$charset"; open CHAR_MAP, "$charset_file" or die "unable to open the '$charset_file': $!"; my %charset = ( # general rule map({ sprintf("%02x", $_) => "&#$_;" } (0..255)), # and some specific defs map({ s/^\s+//; split /\s+/ } ()) ); *char = sub { my $char_props; if ($START_NEW_PARA) { $char_props = $_[SELF]->force_char_props('start'); $START_NEW_PARA = 0; } else { $char_props = $_[SELF]->process_char_props(); } output $char_props . $charset{$_[1]} } }; # symbol processing # RTF: \~ # named chars # RTF: \ldblquote, \rdblquote $symbol{'~'} = ' '; $symbol{'tab'} = ' '; #'        '; $symbol{'ldblquote'} = '«'; $symbol{'rdblquote'} = '»'; $symbol{'line'} = '
'; sub symbol { my $char_props; if ($START_NEW_PARA) { $char_props = $_[SELF]->force_char_props('start'); $START_NEW_PARA = 0; } else { $char_props = $_[SELF]->process_char_props(); } if (defined(my $sym = $symbol{$_[1]})) { output $char_props . $sym; } else { output $char_props . $_[1]; # as it } } # Text # certainly do the same thing with the char() method sub text { # parser callback redefinition my $text = $_[1]; my $char_props = ''; if ($START_NEW_PARA) { $char_props = $_[SELF]->force_char_props('start'); $START_NEW_PARA = 0; } else { $char_props = $_[SELF]->process_char_props(); } $text =~ s/&/&/g; $text =~ s//>/g; if (defined $char_props) { output("$char_props$text"); } else { output("$text"); } } 1; __END__