#!/usr/bin/perl

# stripmime - get rid of MIME crap in email
#
# by Adam Glass <adam@clarity.net>
# modified by Dave Sherohman <dave@sherohman.org>

############################################################################

$version = "0.8.3a-dps"; # version string

$headers = 1;       # default to echo message headers
$cleanheaders = 0;  # strip non-essential headers in "forward" message sections
$echomime = 0;      # default to not echo mime section info
$sep = "";          # section separator
$needsep = 0;       # flag for section change
$echobody = 0;      # default to not echoing message body
$sectionmark = 0;   # 0=MIME headers, 1=message's headers, 2+ otherwise
$sectiontype = 0;   # section ID (0=message headers,1=body/nonmime,2=body/mime)
$mimesection = "";  # used to record MIME type of current section
$sectionname = "";  # Used to record name of current section
$printremoved = 0;  # print notice when attachments removed
$printnamed = 0;    # Print notice when named attachments removed
#$pr_template = "#### MIME type \"\{\}\" removed by stripmime ####";
$pr_template = "There is an attachment named SECTION.";

#$DEBUG = 1;        # uncomment for diagnostic info

###########################################################################
                            
# (If you're customizing the code and adding/removing MIME types to include
# by default, this would be a good place to do it.  %mimes is a Perl
# associative array (hash) of MIME types.  If the value of a key is:
#  1: It is passed through to STDOUT (if it appears in the input stream)
#  3: It is passed to STDOUT but each line is prepended with "> "
# Any other value is ignored -- not passed to STDOUT.  (Or, in the
# parlance of the command line arguments, excluded.)

$mimes{"text/plain"} = 1;               # default MIME type to pass
$mimes{"message/delivery-status"} = 1;  # this is probably useful stuff
$mimes{"message/rfc822"} = 3;           # forwarded messages should get passed
$mimes{"text/rfc822-headers"} = 3;      # webtv's (and others'?) MTA uses this

############################################################################


parse_command_line();

if ($DEBUG) { show_options(); }

while ($intxt = ($savethatinput ? $savethatinput : <STDIN>)) {

  $savethatinput = "";

  if ($sep && ($intxt eq "--$sep--\n")) {         # found final MIME boundary
    process_final_boundary();
  }

  elsif ($sep && ($intxt eq "--$sep\n")) {        # found MIME boundary
    process_mime_boundary();
  }

  elsif ($sectiontype == 0) {                     # deal with message headers
    process_message_headers();
  }

  elsif ($sectiontype == 1) {                     # deal with non-MIME body
    process_nonmime_body();
  }

  elsif (($sectiontype == 2) &&                   # deal with MIME in body
        (($mimes{$mimesection} == 1) ||           #  ("1" = include it
         ($mimes{$mimesection} == 3))) {          #   "3" = "forward" it)
    process_mime_section();
  }
}

# (print a terminal separator if need be)
if ($sep) { print "--$sep--\n" if ($echomime && $needsep); }

exit 0;



#
# wait, you mean it's not that simple?
#
#
# oh...
############################################################################

sub parse_command_line {
  foreach $arg (@ARGV) {
    if (substr($arg,0,1) eq "-") {                    # starts with a -?
     dovers() if ($arg =~ /^(-)?-version/i);          # version text
     dohelp() if ($arg =~ /^(-)?-help/i);             # help/usage message
     $sectiontype = 0;                                # reset for later use
     for ($i=1;$i<length($arg);$i++) {                # for each character
       $flag = substr($arg,$i,1);
       if ($flag eq "h") { $headers = 0; }            # don't echo msg hdrs
       elsif ($flag eq "m") { $echomime = 1; }        # echo MIME info/seps
       elsif ($flag eq "c") { $cleanheaders = 1; }    # (see line 17 above)
       elsif ($flag eq "b") { $echobody = 1; }        # print nonmime body
       elsif ($flag eq "r") { $printremoved = 1; }    # print warning removed
       elsif ($flag eq "R") { $printnamed = 1; }      # warn for named only
       elsif ($flag eq "i") { $sectiontype = 1; }     # include following types
       elsif ($flag eq "e") { $sectiontype = 2; }     # exclude following types
       elsif ($flag eq "f") { $sectiontype = 3; }     # fwd (>) following types
       else {                                         # not a valid flag
         print "invalid option: ".substr($arg,$i,1)."\n";
         dohelp();                                    # do help/usage message
       }
     }
    }
    elsif ($sectiontype != 0) {                       # expecting MIME types
      $arg =~ tr/ //d;
      @mimelist = split(",",$arg);                    # temporary array
      if (@mimelist == 0) { dohelp(); }               # none? do help/usage msg
      foreach $mime (@mimelist) {                     # for each one,
        if ($mime !~ /\w\/\w/) { dohelp(); } else {   # valid-looking?
          $mimes{lc($mime)} = $sectiontype;           # set action to take
        }
      }
      $sectiontype = 0;                               # reset for next argument
    }
  }

  # If -i/-e/-f was last argument, we didn't get MIME types we were expecting.

  if ($sectiontype != 0) { dohelp(); }                  # do help/usage msg

  if ($echomime) { $cleanheaders = 0; }                 # -m overrides -c
}


sub show_options {
  print "$0 $version -- DEBUG mode ON -- options:\n";
  print ($headers ? "  WILL" : "  Will NOT");
   print " pass-through message's headers.\n";
  print ($cleanheaders ? "  WILL" : "  Will NOT");
   print " clean forwarded-sections' headers.\n";
  print ($echomime ? "  WILL" : "  Will NOT");
   print " leave message looking like a MIME message.\n";
  print ($echobody ? "  WILL" : "  Will NOT");
   print " pass-through non-MIME part of message body.\n";
  print ($printremoved ? "  WILL" : "  Will NOT");
   print " insert warning when any MIME sections are removed.\n";
  print ($printnamed ? "  WILL" : "  Will NOT");
   print " insert warning when named MIME sections are removed.\n";
}


sub clean_text {                                       # clean input lines
 local($cleanme) = @_;

 chomp $cleanme;                                       # remove trailing \n
 chop $cleanme if (substr($cleanme,-1,1) eq "\r");     # and \r, as needed
 if ($DEBUG) { print "Translated cr/nl into nl...\n"; }
 # do i want to do anything else here??
 return $cleanme;                                      # and send it on back
}


sub process_final_boundary {
  $needsep = 1;                                 # print final boudary (if -m)
  $sectiontype = 1;                             # just in case there's more
  if (@seplist>1) {
    pop @seplist;
    $sep = $seplist[@seplist-1]; 
    if ($DEBUG) { print ">>> DROPPED BACK TO \"$sep\"\n"; }
  }
}


sub process_mime_boundary {
  $needsep = 1;                                 # need a separator for this
  $printedremoved = 0;                          # reset flag marking printed

  if ($DEBUG) { print ">>> HIT SEPARATOR\n"; }

  # in case you care... ("token documentation")
  # $sectionid is the action to take with this MIME section
  #  (1=echo,3=forward,anything else=ignore)
  # $sectionmark is the location within a section (0=in MIME headers,
  #  >0=in MIME body.  For MIME sections which are themselves messages
  #  (as in the case of message/rfc822-style forwarded messages, $sectionmark
  #  will be 1 in the forwarded message's headers, 2+ afterwards)

  # MIME boundaries mean we're moving onto a new section, so check
  # Content-Type field to identify section and set sectiontype as needed

  $done = 0;  $spool = "";  $hiteof = 0;  $mimesection = "";
  $sectionname = "";
  while (!$done && !$hiteof) {
    $hiteof = 1 unless ($intxt = <STDIN>);    # note down EOF if it happened
    $spool .= $intxt;                         # save mime headers
    $hassemi = index($intxt,";");
    if ($hassemi != -1) { $intxt = substr($intxt,0,$hassemi); }
    if (!$mimesection) {
      if ($intxt =~ /^Content-Type: (.*);?/i) {  # does it look right?
        $mimesection = lc($1);                # save for later
        chop $mimesection if (substr($mimesection,-1,1) eq ";");
      }
    } elsif (!$sectionname) {
      if ($intxt =~ /name=\"([^\"]+)\"/i) {
        $sectionname = $1;
      }
    }
    if ($intxt eq "\n") {
      $mimesection="text/plain" if (!$mimesection);  # default per RFC 2045
      $done = 1;                              # end of headers - exit loop
    }
  }
  die "Hit EOF scanning MIME headers" if ($hiteof);

  # we should have a Content-Type by this point -- is it multipart?
  # if so, record the boundary value on the stack
  if (index($mimesection,"multipart/") == 0) {
    if ($spool =~ /boundary="?(.*)"?;?\s*/im) {   # should find it in there
      $sep = $1; $sep =~ tr/"//d;                 # spooled MIME headers
      push @seplist,$sep;                         # add to boundary stack
    }
    if ($DEBUG) { print ">>> NEW BOUNDARY \"$sep\"\n"; }
  }	
  if ($DEBUG) { print ">>> SWITCHING TO: \"$mimesection\" ($mimes{$mimesection})\n"; }

  # assuming we got a good content-type header...

  $sectiontype = 2;                          # set sectiontype = MIME
  $sectionmark = 1;                          # set section location to body
  if (($mimes{$mimesection} == 1) || ($mimes{$mimesection} == 3)) {
    if ($needsep && $echomime) {             # and we need to print a
      print "--@seplist[0]\n" unless ($mimes{$mimesection} == 3);
               # MIME separator line, do so
      $needsep = 0;                          # and reset need flag
    }
    if ($echomime && ($mimes{$mimesection} != 3)) {
      @spool_lines = split("\n",$spool);     # convert to array
      foreach $spooled_line (@spool_lines) { # for each line spooled...
        print "$spooled_line\n";             #  definitely print the line
      }
    }
    print "\n" unless ($mimes{$mimesection} != 3);
  } else {                                   # not something we should print
    if (($printremoved || ($printnamed && $sectionname))
         && !$printedremoved) {              # look closely, they're different
      $printedremoved = 1;
      $removetext = $pr_template;
      $removetext =~ s/\{\}/$mimesection/ig;
      $removetext =~ s/SECTION/$sectionname/ig;
      print "$removetext\n";
    }
  }
}


sub process_message_headers {
  if ($intxt =~ /^Content-Type:/i) {       # if it's a MIME content-type def
    # this next line is wrong -- we don't necessarily want to echo
    # the input MIME type.  but what to use instead?  multipart/something?
    # text/plain?  ... fix this at some point.
    print "$intxt" if ($echomime && $headers);       # print it if we should
    if (!$sep) {                                     # don't already have one?
      if ($intxt =~ /boundary="?(.*)"?;?\s*/i) {     # find the boundary
	$sep = $1;
        $sep = substr($sep,1) if (substr($sep,0,1) eq "\"");
        $sep = substr($sep,0,index($sep,"\"")) if (index($sep,"\"") != -1);
        $sep =~ tr/"//d;
      } else {                                       # it's not here
        $spool = "";                                 # start spooling
        $done = 0;                                   # reset flag
        while (!$done) {
          if ($intxt = <STDIN>) {
            die "Premature end of MIME header" if ($intxt eq "\n");
            if ($intxt !~ /^\s+/) {     # ok, fine ... no boundary recorded!
	      $savethatinput = $intxt;
              return;
            }
            if ($intxt =~ /boundary="?(.*)"?;?\s*/i) { # found a boundary?
              $sep = $1;                               # yep - save it
              $sep = substr($sep,1) if (substr($sep,0,1) eq "\"");
              $sep = substr($sep,0,index($sep,"\"")) if (index($sep,"\"") != -1);
	      $sep =~ tr/"//d;
              $done = 1;                               # now we're done, so
              print "$spool$intxt" if ($echomime && $headers);
            } else {
              $spool .= "$intxt";                      # spool it up for later
            }
          } else {
            die "Premature end of input while reading headers";
          }
        }
      } 
    } else {      # this would be a second Content-Type -- not kosher
      die "Saw two Content-Type headers in input!";
    }
    chop $sep if (substr($sep,-1,1) eq ";");         # chop trailing ;
    if ($DEBUG) { print ">>> BOUNDARY IS \"$sep\"\n"; }
    push @seplist,$sep;                      # add to boundary stack
    $done = 0;
    while (!$done) {
      if ($intxt = <STDIN>) {
        if ($intxt eq "\n") {
          $sectiontype = 1;
          $sectionmark = 0;
          print $intxt unless (!$headers);
          $done = 1;
        }
        elsif ($intxt =~ /^\S+/) {
	  $savethatinput = $intxt;
          $done = 1;
        } else { print $intxt unless (!$echomime || !$headers); }
      } else { die "End of input in middle of headers!"; }
    }
  } else {
    if ($intxt eq "\n") {                    # watch for end of headers
      $sectiontype = 1;                      # state: now in message BODY
      $sectionmark = 0;                      # and at the start, no less!
      print $intxt unless (!$headers);       # no extra \n if !printing hdrs
    }
    elsif ($headers) {                       # should we print ANY headers?
      if (($intxt !~ /^Content-/i) &&        # is this a *non* MIME header?
          ($intxt !~ /^Mime-/i)) {
        print $intxt;                        # then automatically print it
      } else {                               # but if it IS a MIME header,
        print $intxt if ($echomime);         #  only print it if we should
      }
    }
  }
}


sub process_nonmime_body {
  print "$intxt" if ($echobody || !$sep);
}


sub process_mime_section {
  $sectionid = $mimes{$mimesection};
  if ($sectionid == 3) {              # FORWARD section (echo with >)
    if ($sectionmark == 0) {          #  in section's MIME headers
      print "> $intxt" if ($echomime);#    were we told to print MIME hdrs?
    }
    elsif ($sectionmark == 1) {       #  in forwarded message's headers
      if ($cleanheaders) {            #    cleaning them?
        if (($intxt =~ /^(From|Date|Subject|To|Cc): /i) ||
            ($intxt eq "\n")) {       #     must preserve head/body sep (\n\n)
          print "> $intxt";           #     yes + good header = print
        }
      } else {
        print "> $intxt";             #     not cleaning, so always print
      }
    } else {                          #  in the forwarded message's body
      print "> $intxt";               #   so always print it
    }
  }
  elsif ($sectionid == 1) {           # INCLUDE section (print it verbatim)
    if ($sectionmark == 0) {          #    in section's MIME headers
      print "$intxt" if ($echomime);  #      were we told to print MIME hdrs?
    }
    elsif ($sectionmark > 0) {        #    not in MIME headers
      print "$intxt";                 #      so always print it
    }
  }

  $sectionmark += 1 if ($intxt eq "\n");   # past MIME or message headers?
}

############################################################################


sub dohelp() {
  print <<ZZEOF
usage: $0 [--help] [--version] [-bmrhc] [-ief <MIMEtypes>]
 Expects an email message on STDIN and echoes the email to STDOUT except
 for MIME attachments.  Options:
   -i <MIMEtypes> -- comma-separated MIME types to include in output
   -e <MIMEtypes> -- comma-separated MIME types to exclude from output
   -f <MIMEtypes> -- comma-separated MIME types to "forward" (prepend "> ")
   -b  -- echo the non-MIME part of the message body (default ignores it)
   -m  -- echo MIME separators and message/section header info (default won't)
   -r  -- print a warning when $0 removes MIME from the message
   -R  -- print a warning when $0 removes named MIME sections
   -h  -- doesn't echo the message headers (default will)
   -c  -- only echo basic headers in forward (-f) MIME types (overridden by -m)
 Or:
   --help    -- print this help message
   --version -- print version info and exits
 For -i, -e and -f options, command line arguments are parsed left-to-right,
 and later directives override earlier ones.
 Unless you explicitly exclude (-e) them, text/plain and
 message/delivery-status will be included (-i), and message/rfc822
 and text/rfc822-headers will be forwarded (-f) by default.
ZZEOF
;
  exit 1;
}

sub dovers() {
  print "stripmime version $version by Adam Glass <adam\@clarity.net>\n";
  print "  This program is distributed with NO WARRANTY WHATSOEVER under ";
  print "the terms\n  of the GNU Public License (GPL), details about which ";
  print "can be found at:\n  ";
  print "    http://www.gnu.org/copyleft/gpl.txt\n";
  exit 2;
}
