#!/usr/bin/perl -T -w # # $Id: mbox-filter,v 1.5 2004/06/19 11:49:32 suter Exp $ # Copyright (C) 2002,2003 Mark Suter # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # [MJS 11 Sep 2002] Script to make glimpse indexing my email a more useful thing via .glimpse_filters # [MJS 17 Sep 2003] More generic - just output some plain text for whatever purpose use strict; use Getopt::Long; use Pod::Usage; use POSIX; use Mail::Box::Manager; use HTML::FormatText; use HTML::TreeBuilder; $ENV{'PATH'} = '/bin:/usr/bin'; my %opt = (man => 0, help => 0, root => undef, url => undef); GetOptions(\%opt, "man", "help", "root=s", "url=s") or pod2usage(0); $opt{man} and pod2usage(-exitval => 0, -verbose => 2); $opt{help} and pod2usage(0); scalar @ARGV >= 1 or pod2usage(0); # Some quick checks on our options defined($opt{root}) and defined($opt{url}) or pod2usage(0); ($opt{root}) = map { /(.+)/; $1 } $opt{root}; # Explicit trust! -d $opt{root} or die "$0: document root $opt{root} is not a directory.\n"; -w $opt{root} or die "$0: document root $opt{root} is not writable.\n"; $opt{url} =~ s{/$}{}; # remove a trailing slash # Parse a filename out of a string # (TODO: review security :TODO) sub get_filename { my $name = $_[0]->attribute('filename') || $_[0]->attribute('name') || POSIX::strftime("%Y%m%dT%H%M%S.bin", gmtime); $name =~ tr{-.A-Za-z0-9}{}cd; # Leave only good characters return $name; } # Recurse and print the text/plain parts sub show_content($); sub show_content($) { my $msg = shift; if ($msg->isMultipart) { if ($msg->body->mimeType eq 'multipart/alternative') { # Assume the alternative includes a text/plain foreach my $part (grep { $_->body->mimeType eq 'text/plain' } $msg->parts) { show_content($part); } } else { foreach my $part ($msg->parts) { show_content($part); } } } elsif ($msg->body->isText) { if ($msg->body->mimeType eq 'text/html') { print HTML::FormatText->new->format(HTML::TreeBuilder->new_from_content($msg->body->decoded)); } else { $msg->body->decoded->print; } } else { my $name = get_filename $msg->body->disposition; my $file = IO::File->new("$opt{root}/$name", "w") or die "$0: IO::File->new: $!\n"; $msg->print($file) or die "$0: error writing file: $!\n"; printf "\n*** %s/%s was attached (%d bytes)\n", $opt{url}, $name, $msg->size; } } # Simple output filter if (open FILTER, "-|") { while () { s/[^[:print:][:space:]]//g; print; } } # Loop over each file (untainting and checking before we start doing anything) foreach my $file (map { /(.+)/ and -f $1 and -r _ or die "mbox-prune: File $1 not readable!\n"; $1 } @ARGV) { # Build a "mailbox" my $mgr = Mail::Box::Manager->new(defaultTrace => 'ERRORS') or die "$0: Can't initialise Mail::Box::Manager: $!\n"; my $folder = $mgr->open(folder => $file, extract => 'LAZY', access => 'r') or die "$0: Can't open filter: $!\n"; # Iterate over each message foreach ($folder->messages) { print "Date: ", ($_->date or ""), "\nFrom: ", defined($_->from) ? defined(($_->from)[0]) ? ($_->from)[0]->format : "" : "", "\nTo: ", defined($_->destinations) ? join ", ", map $_->format, $_->destinations : "", "\nSubject: ", ($_->subject or ""), "\n\n"; show_content $_; print "\n", chr 12, "\n"; # Form Feed character } $folder->close; } __END__ =head1 NAME mbox-filter =head1 SYNOPSIS mbox-filter --root /var/www/attach --url http://www.example.org/attach mbox... =head1 OPTIONS =over 8 =item B<--man> Print the manual page and exit. =item B<--root> The document root where this script will place any attachments it does not convert into text. =item B<--url> The uniform resource locator corresponding to the document root given. =item B<--help> Print a brief help message and exit. =back =head1 DESCRIPTION B is a filter for "normal" mbox format files. It will print out some useful text based on the emails found. No modifications are attempted on the input. For email parts that doesn't have a ready text version, the part is written to the provided document root and a a reference is output using the provided url. =head1 EXIT CODES If B exits with a zero exit status and the correct output is on standard output. Nothing else should be printed to standard output. B will exit with a non-zero exit status if there was a fatal error. Both fatal and non-fatal errors will cause output on standard error. =head1 AUTHOR Mark Suter EFE =head1 COPYRIGHT Copyright (C) 2002,2003 Mark Suter EFE This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA or from the following webpage. http://www.gnu.org/licenses/gpl.txt =cut