#!/usr/bin/perl

# arbeit.pl - print-preprocessor for my Doktorarbeit
# Copyright(c) 2002 Dr. Georg Fischer <punctum@punctum.com>
# 24-Jun-2002, Georg Fischer
#--------------------------------------------------------------
# 'arbeit' was a PL/1 program for IBM OS/370 in 1980, 
# which I wrote to transform the input file(s) into a series of
# lines for a chain printer, with formfeed codes 
# 1   new page
# ' ' normal new line
# +   overprint this line onto the previous
# By the latter feature, several mathematical characters
# are simulated by use of the "backspace" character '%'.
# The following combinations are used: 
# C%-         element of
# -%,C%-        not element of
# c         contained in or equal to
# c=%|        contained in but unequal to
# 0%/         empty set
# -%,         not
# V%-         for all
# ]%-         exists 
# =%|         not equal

# As usual at that time, column 1 of each  input line
# was used to denote a special function. The following codes
# are interpreted by 'arbeit':
# ' '  normal line, maybe with embedded backspacing
# 1    new page
# 2    overprint this line onto the previous one (for underlining)
# 3    set a parameter value
# 4    print this line over itself (for bold text)
# 5    ?
# 6    start hidden text
# 7    end   hidden text
# 
# The goal of this perl script and the corresponding makefile
# is to recreate the pages in HTML as close to the original
# paper document as possible, without modifying the
# original files in any way. 
# 
# These files had been transferred from the IBM/370 to
# a Siemens BS2000 system, and from there to a Siemens Amboss
# desktop system, where they were written on 8" floppy disks.
# About 10 years later these floppies were read in an
# 8" NEC drive connected to a i386 PC. Now you can read them
# on the Internet.
#-----------------------------------------------------------------
use strict;
my $minline = 15; # don't start a new page up to that many lines
my $maxline = 61; # length of page
my $void    = '^^^^';
my $old_refid = '0';
my %overprint = (
  'C%-' => '&isin;',
  '0%/' => '&#216', # '&empty;',
  '-%,' => '&not;',
  'V%-' => '&forall;',
  ']%-' => '&exist;',
  '=%|' => '&ne;',
  'A%"' => '&Auml;',
  'O%"' => '&Ouml;',
  'U%"' => '&Uuml;',
  'a%"' => '&auml;',
  'o%"' => '&ouml;',
  'u%"' => '&uuml;',
  '=%#' => '&ne;',
  '#%_' => '|?_;',    # ??? end of axiom
  'v%#' => '&darr;',
  '#%v' => '&darr;',
  '#%A' => '&uarr;',
  'A%#' => '&uarr;',
  );
  
# testing
foreach my $key (keys %overprint)
{
  # print "$key->$overprint{$key}\n";
}

open  (INDX, '>>index.htm');
open  (TOC , '>>toc.htm');
open  (REF , '>>ref.htm');

print <<"GFis" if (0);
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//DE">
<html>
<head>
<title>Dissertation</title>
<link rel=stylesheet type="text/css" href="diss.css">
<!--gf>HEAD</gf-->
<!-- Author: Dr. Georg Fischer, punctum\@punctum.com -->
<meta name="copyright" content="This document copyright &#169; 1980 by Dr. Georg Fischer.">
</head>

<body>
<tt><pre>
GFis
& header;

my $refid; # current section number 
my $nlin = '' ; # current, new line just read in
my $ntag = ' '; # new tag, column 1 of $nlin
my $olin = $void ; # previous line, the one to be output
my $hidden = 0; # whether in hidden text line sequence
my $bold   = 0; # whether to print line in bold font
my $lineno = 0; # line number on page
my $rawline= 0; # line number in input file
my $page   = 0; # current page number
my $newpage = 1; # assume an new page was already started
if (open PAGE, "<page.tmp")
{
  $page = <PAGE>;
  $page =~ m/(\-?\d+)/;
  $page = $1;
  close PAGE;
}

while (<>)
{
  $rawline ++;
  s/\r//g;
  $_ = ' ' . $_ if m/^\n$/; # put a tag before empty lines
  m/^(.)(.*)\n/; # split into tag and rest of line
  $ntag = $1; 
  $nlin = $2;
  if    ($ntag eq ' ')
  { # normal line
    & output;
  } # normal line
  elsif ($ntag eq '1')
  { # start new page
    & output;
    if ($lineno >= $minline || ($nlin =~ m[\w] )) # && $lineno <= 2))
    { # filled enough, or beginning of a new file
      & eject;
      $rawline = 0;
      $lineno  = 0;
    }
    else
    { # "1", but this page is not filled enough
      my $nsp = 3;
      print "\n" x $nsp;
      $rawline += $nsp;
      $lineno  += $nsp;
    }
    $olin = ''; # $void;
  } # eject
  elsif ($ntag eq '2')
  { # print over previous line, i.e. underlining
    & underline;
  } # underline
  elsif ($ntag eq '3')
  { # parameter line
    $olin = $void;
  } # parameter
  elsif ($ntag eq '4')
  { # double print = bold
    & anchor; # possibly must enter it in index
    & output;
    $bold = 1;
  } # bold
  #     ($ntag eq '5')
  # undefined function code
  elsif ($ntag eq '6')
  { # start hidden text
    # & output;
    $hidden = 1;
  } # start hidden
  elsif ($ntag eq '7')
  { # end   hidden text
     $olin = '';
     $hidden = 0;
  } # end   hidden
  else
  { # unknown function code
    & output;
    # ???
  } # unknown
} # while <>
& output;
& footer;

open  PAGE, ">page.tmp";
print PAGE $page, "\n";
close PAGE;
close INDX;
# & eject;
print TOC <<"GFis";

GFis
close TOC;
close REF;

print <<"GFis" if (0); # print HTML trailer
</pre>
</tt>
<!-- -------------------------------------- -->
</body>
</html>
GFis

sub output
  {
    if ($bold) 
    {
      $olin = "<strong>$olin</strong>";
      $bold = 0 if $nlin !~  m/^\-\-\-\-\-/; 
        # big title lines are followed by a hyphen underscoring line,
        # which is too short if not also boldened in HTML
    }
    if (! $hidden)
    { 
      & expand;
      if ($olin =~ m/([\w\_]+)\: +proc[\(\;]/)
      { # reference to algorithm
        my $algor = $1;
        print REF sprintf ("%-20s <a href=\"\diss.html#%s\">%s</a>\n", 
          $algor, $refid, $refid);
      } # reference
      if ($olin =~ m/^    *[A-Z0-9\_]+   +(\d[\d\.]*)(\, \d[\d\.]*)? *$/)
      { # reference(s) to algorithm(s)
	# print STDERR "check /$olin/\n";
        while ($olin =~ m/ (\d[\d\.]*)/)
        { # link to sections
          my $sect = $1;
          # print STDERR "change $olin, sect=/$sect/\n";
          my $algor = "<a href=\"\#$sect\">$sect</a>";
          $olin =~ s/ \d[\d\.]*/ $algor/;
        } # while links to sections
      } # reference

      $olin =~ s/\<([^\w\/])/\&lt;$1/g;
      $olin =~ s/([^\w\/\"])\>/$1\&gt;/g;
      if ($olin ne $void)
      {
      	$newpage = 0;
        print # sprintf ("%02d ", $lineno), 
          $olin, "\n";
        $lineno ++;
      }
      if ($lineno >= $maxline)
      {
        & eject;
        $rawline = 0;
      }
      
    } 
    $olin = $nlin;
  } # output

sub anchor
  {
    if (($nlin =~ m/^ *([\d\.]+) +(.*)$/) ||
        ($old_refid =~ m/^([06])/)
       )
    {
      $nlin =~ m/^ *([\w\.]+) *(.*)$/;
      $refid = $1;
      my $rest = $2;
      # $refid =~ s/\.//g; # remove all dots
      print "<a name=\"$refid\">\n";
      print TOC "<a href=\"#$refid\">" 
      . sprintf ("%-64s %3d\n\n", $refid . "</a>  " . $rest, $page);
      print INDX $nlin, "\n";
      $old_refid = $refid if $refid =~m/^\d/;
    }
  } # anchor

sub header 
  {
    print <<"GFis" if ! $hidden; # column 85--------------------V
                                                                $page
                          
GFis
  } # header

sub footer  
  {
    $page ++;
    print <<"GFis" if ! $hidden;


GFis
  } # footer

sub eject 
  {
    if ($newpage == 0)
    {
      & footer;
      print <<"GFis" if ! $hidden;
<hr /><!-- ================================= -->
GFis
      $lineno = 0;
      & header;
    }
    $newpage = 1;
  }

sub expand
  {
    $olin =~ s/% //g; # was indicator for algorithm entry
    $olin =~ s/\%\|/%#/g; # problems with s and |
    $olin =~ s/\|\%/#%/g; # problems with s and |
    while ($olin =~ m/(.\%.)/)
    { # resolve overprints
      my $key = $1;
      # print $olin, "\n--------------------------\n";
      if (defined ($overprint{$key}))
      { # known overprint
        $olin =~ s/$key/$overprint{$key}/;
      }
      else
      { # unknown overprint, mask it
        my $new = $key;
        $new =~ s/\%/\?/;
        $olin =~ s/$key/$new/g; 
      } 
    } # while resolving overprints
  } # expand

sub underline
  { # takes $nlin, and tries to "overprint" it onto $olin
    # only underlining by underscores, and
    # replacement of spaces are currently supported
    my $ul = 0; # no underlining so far
    my $ulin = ' ' . $nlin; # because of loop end, and $uind + 1
    my $uind; # loop variable in $ulin
    my $oind; # loop variable in $olin
    $olin .= ' '; # to allow for insertion of </u> at the end

    return if length ($olin) + 1 < length ($nlin);
    for ($uind = length ($ulin) - 1; $uind >= 0; $uind --)
    { # work from the end of $nlin to the beginning
      my $nch = substr ($ulin, $uind, 1);
      my $oind = $uind + (substr ($olin, 0, $uind) =~ tr/%/%/) * 2;
      if ($nch eq '_')
      {
        if ($ul == 0)
        {
          $ul = 1;
          substr ($olin, $oind) = '</u>' . substr ($olin, $oind);
        }
      } # underscore
      else
      { # space or ordinary character
        if ($ul != 0)
        {
          $ul = 0;
          substr ($olin, $oind) = '<u>'  . substr ($olin, $oind);
        }
      } # ordinary
    } # for $uind
  } # underline
