#!/usr/bin/perl # stripmime - get rid of MIME crap in email # # by Adam Glass # modified by Dave Sherohman ############################################################################ $version = "0.8.3a-dps"; # version string $headers = 1; # default to echo message headers $cleanheaders = 0; # strip non-essential headers in "forward" message sections $echomime = 0; # default to not echo mime section info $sep = ""; # section separator $needsep = 0; # flag for section change $echobody = 0; # default to not echoing message body $sectionmark = 0; # 0=MIME headers, 1=message's headers, 2+ otherwise $sectiontype = 0; # section ID (0=message headers,1=body/nonmime,2=body/mime) $mimesection = ""; # used to record MIME type of current section $sectionname = ""; # Used to record name of current section $printremoved = 0; # print notice when attachments removed $printnamed = 0; # Print notice when named attachments removed #$pr_template = "#### MIME type \"\{\}\" removed by stripmime ####"; $pr_template = "There is an attachment named SECTION."; #$DEBUG = 1; # uncomment for diagnostic info ########################################################################### # (If you're customizing the code and adding/removing MIME types to include # by default, this would be a good place to do it. %mimes is a Perl # associative array (hash) of MIME types. If the value of a key is: # 1: It is passed through to STDOUT (if it appears in the input stream) # 3: It is passed to STDOUT but each line is prepended with "> " # Any other value is ignored -- not passed to STDOUT. (Or, in the # parlance of the command line arguments, excluded.) $mimes{"text/plain"} = 1; # default MIME type to pass $mimes{"message/delivery-status"} = 1; # this is probably useful stuff $mimes{"message/rfc822"} = 3; # forwarded messages should get passed $mimes{"text/rfc822-headers"} = 3; # webtv's (and others'?) MTA uses this ############################################################################ parse_command_line(); if ($DEBUG) { show_options(); } while ($intxt = ($savethatinput ? $savethatinput : )) { $savethatinput = ""; if ($sep && ($intxt eq "--$sep--\n")) { # found final MIME boundary process_final_boundary(); } elsif ($sep && ($intxt eq "--$sep\n")) { # found MIME boundary process_mime_boundary(); } elsif ($sectiontype == 0) { # deal with message headers process_message_headers(); } elsif ($sectiontype == 1) { # deal with non-MIME body process_nonmime_body(); } elsif (($sectiontype == 2) && # deal with MIME in body (($mimes{$mimesection} == 1) || # ("1" = include it ($mimes{$mimesection} == 3))) { # "3" = "forward" it) process_mime_section(); } } # (print a terminal separator if need be) if ($sep) { print "--$sep--\n" if ($echomime && $needsep); } exit 0; # # wait, you mean it's not that simple? # # # oh... ############################################################################ sub parse_command_line { foreach $arg (@ARGV) { if (substr($arg,0,1) eq "-") { # starts with a -? dovers() if ($arg =~ /^(-)?-version/i); # version text dohelp() if ($arg =~ /^(-)?-help/i); # help/usage message $sectiontype = 0; # reset for later use for ($i=1;$i) following types else { # not a valid flag print "invalid option: ".substr($arg,$i,1)."\n"; dohelp(); # do help/usage message } } } elsif ($sectiontype != 0) { # expecting MIME types $arg =~ tr/ //d; @mimelist = split(",",$arg); # temporary array if (@mimelist == 0) { dohelp(); } # none? do help/usage msg foreach $mime (@mimelist) { # for each one, if ($mime !~ /\w\/\w/) { dohelp(); } else { # valid-looking? $mimes{lc($mime)} = $sectiontype; # set action to take } } $sectiontype = 0; # reset for next argument } } # If -i/-e/-f was last argument, we didn't get MIME types we were expecting. if ($sectiontype != 0) { dohelp(); } # do help/usage msg if ($echomime) { $cleanheaders = 0; } # -m overrides -c } sub show_options { print "$0 $version -- DEBUG mode ON -- options:\n"; print ($headers ? " WILL" : " Will NOT"); print " pass-through message's headers.\n"; print ($cleanheaders ? " WILL" : " Will NOT"); print " clean forwarded-sections' headers.\n"; print ($echomime ? " WILL" : " Will NOT"); print " leave message looking like a MIME message.\n"; print ($echobody ? " WILL" : " Will NOT"); print " pass-through non-MIME part of message body.\n"; print ($printremoved ? " WILL" : " Will NOT"); print " insert warning when any MIME sections are removed.\n"; print ($printnamed ? " WILL" : " Will NOT"); print " insert warning when named MIME sections are removed.\n"; } sub clean_text { # clean input lines local($cleanme) = @_; chomp $cleanme; # remove trailing \n chop $cleanme if (substr($cleanme,-1,1) eq "\r"); # and \r, as needed if ($DEBUG) { print "Translated cr/nl into nl...\n"; } # do i want to do anything else here?? return $cleanme; # and send it on back } sub process_final_boundary { $needsep = 1; # print final boudary (if -m) $sectiontype = 1; # just in case there's more if (@seplist>1) { pop @seplist; $sep = $seplist[@seplist-1]; if ($DEBUG) { print ">>> DROPPED BACK TO \"$sep\"\n"; } } } sub process_mime_boundary { $needsep = 1; # need a separator for this $printedremoved = 0; # reset flag marking printed if ($DEBUG) { print ">>> HIT SEPARATOR\n"; } # in case you care... ("token documentation") # $sectionid is the action to take with this MIME section # (1=echo,3=forward,anything else=ignore) # $sectionmark is the location within a section (0=in MIME headers, # >0=in MIME body. For MIME sections which are themselves messages # (as in the case of message/rfc822-style forwarded messages, $sectionmark # will be 1 in the forwarded message's headers, 2+ afterwards) # MIME boundaries mean we're moving onto a new section, so check # Content-Type field to identify section and set sectiontype as needed $done = 0; $spool = ""; $hiteof = 0; $mimesection = ""; $sectionname = ""; while (!$done && !$hiteof) { $hiteof = 1 unless ($intxt = ); # note down EOF if it happened $spool .= $intxt; # save mime headers $hassemi = index($intxt,";"); if ($hassemi != -1) { $intxt = substr($intxt,0,$hassemi); } if (!$mimesection) { if ($intxt =~ /^Content-Type: (.*);?/i) { # does it look right? $mimesection = lc($1); # save for later chop $mimesection if (substr($mimesection,-1,1) eq ";"); } } elsif (!$sectionname) { if ($intxt =~ /name=\"([^\"]+)\"/i) { $sectionname = $1; } } if ($intxt eq "\n") { $mimesection="text/plain" if (!$mimesection); # default per RFC 2045 $done = 1; # end of headers - exit loop } } die "Hit EOF scanning MIME headers" if ($hiteof); # we should have a Content-Type by this point -- is it multipart? # if so, record the boundary value on the stack if (index($mimesection,"multipart/") == 0) { if ($spool =~ /boundary="?(.*)"?;?\s*/im) { # should find it in there $sep = $1; $sep =~ tr/"//d; # spooled MIME headers push @seplist,$sep; # add to boundary stack } if ($DEBUG) { print ">>> NEW BOUNDARY \"$sep\"\n"; } } if ($DEBUG) { print ">>> SWITCHING TO: \"$mimesection\" ($mimes{$mimesection})\n"; } # assuming we got a good content-type header... $sectiontype = 2; # set sectiontype = MIME $sectionmark = 1; # set section location to body if (($mimes{$mimesection} == 1) || ($mimes{$mimesection} == 3)) { if ($needsep && $echomime) { # and we need to print a print "--@seplist[0]\n" unless ($mimes{$mimesection} == 3); # MIME separator line, do so $needsep = 0; # and reset need flag } if ($echomime && ($mimes{$mimesection} != 3)) { @spool_lines = split("\n",$spool); # convert to array foreach $spooled_line (@spool_lines) { # for each line spooled... print "$spooled_line\n"; # definitely print the line } } print "\n" unless ($mimes{$mimesection} != 3); } else { # not something we should print if (($printremoved || ($printnamed && $sectionname)) && !$printedremoved) { # look closely, they're different $printedremoved = 1; $removetext = $pr_template; $removetext =~ s/\{\}/$mimesection/ig; $removetext =~ s/SECTION/$sectionname/ig; print "$removetext\n"; } } } sub process_message_headers { if ($intxt =~ /^Content-Type:/i) { # if it's a MIME content-type def # this next line is wrong -- we don't necessarily want to echo # the input MIME type. but what to use instead? multipart/something? # text/plain? ... fix this at some point. print "$intxt" if ($echomime && $headers); # print it if we should if (!$sep) { # don't already have one? if ($intxt =~ /boundary="?(.*)"?;?\s*/i) { # find the boundary $sep = $1; $sep = substr($sep,1) if (substr($sep,0,1) eq "\""); $sep = substr($sep,0,index($sep,"\"")) if (index($sep,"\"") != -1); $sep =~ tr/"//d; } else { # it's not here $spool = ""; # start spooling $done = 0; # reset flag while (!$done) { if ($intxt = ) { die "Premature end of MIME header" if ($intxt eq "\n"); if ($intxt !~ /^\s+/) { # ok, fine ... no boundary recorded! $savethatinput = $intxt; return; } if ($intxt =~ /boundary="?(.*)"?;?\s*/i) { # found a boundary? $sep = $1; # yep - save it $sep = substr($sep,1) if (substr($sep,0,1) eq "\""); $sep = substr($sep,0,index($sep,"\"")) if (index($sep,"\"") != -1); $sep =~ tr/"//d; $done = 1; # now we're done, so print "$spool$intxt" if ($echomime && $headers); } else { $spool .= "$intxt"; # spool it up for later } } else { die "Premature end of input while reading headers"; } } } } else { # this would be a second Content-Type -- not kosher die "Saw two Content-Type headers in input!"; } chop $sep if (substr($sep,-1,1) eq ";"); # chop trailing ; if ($DEBUG) { print ">>> BOUNDARY IS \"$sep\"\n"; } push @seplist,$sep; # add to boundary stack $done = 0; while (!$done) { if ($intxt = ) { if ($intxt eq "\n") { $sectiontype = 1; $sectionmark = 0; print $intxt unless (!$headers); $done = 1; } elsif ($intxt =~ /^\S+/) { $savethatinput = $intxt; $done = 1; } else { print $intxt unless (!$echomime || !$headers); } } else { die "End of input in middle of headers!"; } } } else { if ($intxt eq "\n") { # watch for end of headers $sectiontype = 1; # state: now in message BODY $sectionmark = 0; # and at the start, no less! print $intxt unless (!$headers); # no extra \n if !printing hdrs } elsif ($headers) { # should we print ANY headers? if (($intxt !~ /^Content-/i) && # is this a *non* MIME header? ($intxt !~ /^Mime-/i)) { print $intxt; # then automatically print it } else { # but if it IS a MIME header, print $intxt if ($echomime); # only print it if we should } } } } sub process_nonmime_body { print "$intxt" if ($echobody || !$sep); } sub process_mime_section { $sectionid = $mimes{$mimesection}; if ($sectionid == 3) { # FORWARD section (echo with >) if ($sectionmark == 0) { # in section's MIME headers print "> $intxt" if ($echomime);# were we told to print MIME hdrs? } elsif ($sectionmark == 1) { # in forwarded message's headers if ($cleanheaders) { # cleaning them? if (($intxt =~ /^(From|Date|Subject|To|Cc): /i) || ($intxt eq "\n")) { # must preserve head/body sep (\n\n) print "> $intxt"; # yes + good header = print } } else { print "> $intxt"; # not cleaning, so always print } } else { # in the forwarded message's body print "> $intxt"; # so always print it } } elsif ($sectionid == 1) { # INCLUDE section (print it verbatim) if ($sectionmark == 0) { # in section's MIME headers print "$intxt" if ($echomime); # were we told to print MIME hdrs? } elsif ($sectionmark > 0) { # not in MIME headers print "$intxt"; # so always print it } } $sectionmark += 1 if ($intxt eq "\n"); # past MIME or message headers? } ############################################################################ sub dohelp() { print <] Expects an email message on STDIN and echoes the email to STDOUT except for MIME attachments. Options: -i -- comma-separated MIME types to include in output -e -- comma-separated MIME types to exclude from output -f -- comma-separated MIME types to "forward" (prepend "> ") -b -- echo the non-MIME part of the message body (default ignores it) -m -- echo MIME separators and message/section header info (default won't) -r -- print a warning when $0 removes MIME from the message -R -- print a warning when $0 removes named MIME sections -h -- doesn't echo the message headers (default will) -c -- only echo basic headers in forward (-f) MIME types (overridden by -m) Or: --help -- print this help message --version -- print version info and exits For -i, -e and -f options, command line arguments are parsed left-to-right, and later directives override earlier ones. Unless you explicitly exclude (-e) them, text/plain and message/delivery-status will be included (-i), and message/rfc822 and text/rfc822-headers will be forwarded (-f) by default. ZZEOF ; exit 1; } sub dovers() { print "stripmime version $version by Adam Glass \n"; print " This program is distributed with NO WARRANTY WHATSOEVER under "; print "the terms\n of the GNU Public License (GPL), details about which "; print "can be found at:\n "; print " http://www.gnu.org/copyleft/gpl.txt\n"; exit 2; }