#!/usr/bin/perl -T -w
#
# $Id: mbox-filter,v 1.5 2004/06/19 11:49:32 suter Exp $
# Copyright (C) 2002,2003 Mark Suter <suter@humbug.org.au>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
# [MJS 11 Sep 2002] Script to make glimpse indexing my email a more useful thing via .glimpse_filters
# [MJS 17 Sep 2003] More generic - just output some plain text for whatever purpose

use strict;
use Getopt::Long;
use Pod::Usage;
use POSIX;
use Mail::Box::Manager;
use HTML::FormatText;
use HTML::TreeBuilder;

$ENV{'PATH'} = '/bin:/usr/bin';
my %opt = (man => 0, help => 0, root => undef, url => undef);
GetOptions(\%opt, "man", "help", "root=s", "url=s") or pod2usage(0);
$opt{man} and pod2usage(-exitval => 0, -verbose => 2);
$opt{help} and pod2usage(0);
scalar @ARGV >= 1 or pod2usage(0);

# Some quick checks on our options
defined($opt{root}) and defined($opt{url}) or pod2usage(0);
($opt{root}) = map { /(.+)/; $1 } $opt{root};	# Explicit trust!
-d $opt{root} or die "$0: document root $opt{root} is not a directory.\n";
-w $opt{root} or die "$0: document root $opt{root} is not writable.\n";
$opt{url} =~ s{/$}{}; # remove a trailing slash

# Parse a filename out of a string
# (TODO: review security :TODO)
sub get_filename {
    my $name = $_[0]->attribute('filename') || $_[0]->attribute('name') || POSIX::strftime("%Y%m%dT%H%M%S.bin", gmtime);
    $name =~ tr{-.A-Za-z0-9}{}cd;	# Leave only good characters
    return $name;
}

# Recurse and print the text/plain parts
sub show_content($);
sub show_content($) {
    my $msg = shift;

    if ($msg->isMultipart) {
	if ($msg->body->mimeType eq 'multipart/alternative') {
	    # Assume the alternative includes a text/plain
	    foreach my $part (grep { $_->body->mimeType eq 'text/plain' } $msg->parts) {
		show_content($part);
	    }
	} else {
	    foreach my $part ($msg->parts) {
		show_content($part);
	    }
	}
    } elsif ($msg->body->isText) {
	if ($msg->body->mimeType eq 'text/html') {
	    print HTML::FormatText->new->format(HTML::TreeBuilder->new_from_content($msg->body->decoded));
	} else {
	    $msg->body->decoded->print;
	}
    } else {
	my $name = get_filename $msg->body->disposition;
	my $file = IO::File->new("$opt{root}/$name", "w") or die "$0: IO::File->new: $!\n";
	$msg->print($file) or die "$0: error writing file: $!\n";
	printf "\n*** %s/%s was attached (%d bytes)\n", $opt{url}, $name, $msg->size;
    }
}

# Simple output filter
if (open FILTER, "-|") {
    while (<FILTER>) {
	s/[^[:print:][:space:]]//g;
	print;
    }
}

# Loop over each file (untainting and checking before we start doing anything)
foreach my $file (map { /(.+)/ and -f $1 and -r _ or die "mbox-prune: File $1 not readable!\n"; $1 } @ARGV) {

    # Build a "mailbox"
    my $mgr = Mail::Box::Manager->new(defaultTrace => 'ERRORS') or die "$0: Can't initialise Mail::Box::Manager: $!\n";
    my $folder = $mgr->open(folder => $file, extract => 'LAZY', access => 'r') or die "$0: Can't open filter: $!\n";

    # Iterate over each message
    foreach ($folder->messages) {
	print "Date: ", ($_->date or ""),
	    "\nFrom: ", defined($_->from) ? defined(($_->from)[0]) ? ($_->from)[0]->format : "" : "",
	    "\nTo: ", defined($_->destinations) ? join ", ", map $_->format, $_->destinations : "",
	    "\nSubject: ", ($_->subject or ""), "\n\n";
	show_content $_;
	print "\n", chr 12, "\n";	# Form Feed character
    }

    $folder->close;
}

__END__

=head1 NAME

mbox-filter

=head1 SYNOPSIS

mbox-filter --root /var/www/attach --url http://www.example.org/attach mbox...

=head1 OPTIONS

=over 8

=item B<--man>

Print the manual page and exit.

=item B<--root>

The document root where this script will place any attachments
it does not convert into text.

=item B<--url>

The uniform resource locator corresponding to the document root given.

=item B<--help>

Print a brief help message and exit.

=back

=head1 DESCRIPTION

B<This program> is a filter for "normal" mbox format files.  It
will print out some useful text based on the emails found.  No
modifications are attempted on the input.

For email parts that doesn't have a ready text version, the part
is written to the provided document root and a a reference is
output using the provided url.

=head1 EXIT CODES

If B<this program> exits with a zero exit status and the correct
output is on standard output.  Nothing else should be printed to
standard output.

B<This program> will exit with a non-zero exit status if there
was a fatal error.  Both fatal and non-fatal errors will cause
output on standard error.

=head1 AUTHOR

Mark Suter E<lt>F<suter@humbug.org.au>E<gt>

=head1 COPYRIGHT

Copyright (C) 2002,2003 Mark Suter E<lt>F<suter@humbug.org.au>E<gt>

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA or from the following webpage.

    http://www.gnu.org/licenses/gpl.txt

=cut

