#!/usr/bin/perl -w
#
# $Id: mailq-summary,v 1.4 2004/06/19 11:49:32 suter Exp $
#
# Copyright (c) 2002,2003,2004 Mark Suter <mark.suter@miju.com.au>
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

use strict;
use Getopt::Long;
use POSIX qw(strftime);
use Pod::Usage;
use Time::Local;

# Fragments longer than this are not 'local'
use constant TRUNCATION_LENGTH => 25;

# Instead of installing a function like Date::Calc::Decode_Month
my %months = ( "Jan" => 0, "Feb" => 1, "Mar" => 2, "Apr" => 3, "May" => 4, "Jun" => 5,
               "Jul" => 6, "Aug" => 7, "Sep" => 8, "Oct" => 9, "Nov" => 10, "Dec" => 11);

# Each time unit, as compared to seconds, because they use differing bases.
my @time_units = reverse ( [ s => 1 ], [ m => 60 ], [ h => 60*60 ], [ d => 24*60*60 ], [ w => 7*24*60*60 ] );

# International System of Units (SI) prefixes
my @si_prefixes = ( [ "" => 0], [ K => 1 ], [ M => 2 ], [ G => 3 ], [ T => 4 ], [ P => 5 ], [ E => 6 ], );

# Process a single entry from "mailq -v" and update the variables passed in by reference.
sub process_record(\%\%\$@) {
    my ($opt, $records, $total_queued, @record) = @_;
    scalar @record >= 1 or return (); # Nothing to do with empty records

    # So we can warn sensibly if something goes awry
    my @copy = @record;

    # Split the line, possibly fixing the Priority+Month "feature" ;)
    chomp @record >= 1 or warn "$0: chomp didn't do anything to the input below!\n", @copy, "\n";
    my @parts = split ' ', shift @record, 7;
    scalar @parts == 6 and @parts = (@parts[0..1], split(/\+/, $parts[2]), @parts[3..5]);
    scalar @parts == 7 or die "$0: process_record: Can't split line properly: @parts\n";
    my ($queueid, $size, undef, $mon, $mday, $h_m, $sender) = @parts;

    # Handle the processing option
    not $opt->{processing} and $queueid =~ /\*$/ and return;

    # Get the timestamp
    my $time = undef;
    eval {
	$time = timelocal 0, reverse(split /:/, $h_m), $mday, $months{$mon},
	    (localtime)[5] - ((localtime)[4] < $months{$mon} ? 1 : 0);      # HACK: This year or last?
    };
    if ($@ or not defined $time) {
	warn "$0: timelocal croaked on input below: $@";
	warn @record, "\n";
	return;
    }

    # The remaining lines are recipients or reasons
    my %counted = ();
    foreach (@record) {
        # Skip the reason lines
        m/ ^ \s+ (?:8BITMIME)? \s+ \( (.+) \) \s* $ /ix and next;

        # Get a useful match (based on user's options)
	my $target = $opt->{field} eq "from" ? $sender : $_ ;
        my $match = "local_address";
	if ($target =~ m/ ^ \s* <? (.+) \@ (.+?) >? \s* $ /x) {
	    $match = $opt->{match} eq "domain" ? lc $2 : lc "$1\@$2";
	} elsif (length $target > TRUNCATION_LENGTH) {
	    $match = "truncated_address";
	}

	# Don't record one email against the same match twice in one record
        $counted{$match}++ and next;

	# Possibly approximate the match
	if ($opt->{ndots} > 0) {
	    my @parts = split /\./, $match;
	    $match = join ".", @parts[ -$opt->{ndots} .. -1 ] if scalar @parts > $opt->{ndots};
	}

        # Update the stats
        if (not exists $records->{$match}) {
            $records->{$match}[2] = $time;
            $records->{$match}[3] = $time;
        } else {
            $records->{$match}[2] = $time if $time < $records->{$match}[2];
            $records->{$match}[3] = $time if $time > $records->{$match}[3];
        }
        $records->{$match}[0]++;
        $records->{$match}[1] += $size;
    }

    $$total_queued++;

    return ();
}

# A sort sub for the domain sort described in the manual
sub domain_sort {
    my @b = reverse split /\.\@/, $b;
    foreach my $x (reverse split /\.\@/, $a) {
	my $y = shift @b or return 1;       # $b ran out
	$x cmp $y and return $x cmp $y;
    }
    return -1;                              # $a ran out
}

# Return with one prefix, for example, 3.5M or 35M
sub human_size($) {
    foreach (@si_prefixes) {
	my $result = $_[0] / $$_[1];
	my $log = $result > 0 ? int log($result)/log(10) : 0;
	$log == 0 and return sprintf "%3.1f$$_[0]", $result;
	$log <= 2 and return sprintf "%3.0f$$_[0]", $result;
    }
    die "$0: human_size: $_[0]: this error should not occur.\n";
}

# Return with two prefixes, that is, 0m5s, 45m23s, 5h9m, 3d8h and finally 96w4d.
sub human_time($) {
    for (my $i = 0; $i < @time_units; $i++) {
	my $result = int $_[0] / $time_units[$i][1];
	$result >= 1 and return sprintf("%d$time_units[$i][0]", $result) . ( defined($time_units[$i+1][0]) ? 
	    sprintf("%1.0f$time_units[$i+1][0]", ($_[0] % $time_units[$i][1]) / $time_units[$i+1][1]) : "" );
    }
    die "$0: human_time: $_[0]: this error should not occur.\n";
}

# Display the accumulated data
sub print_report(\%\%\$) {
    my ($opt, $records, $total_queued, @records) = @_;

    # Sort as requested
    foreach ($opt->{sort}) {
        /age/    and @records = sort { $records->{$a}[2] <=> $records->{$b}[2] } keys %{$records};
        /domain/ and @records = sort domain_sort keys %{$records};
        /number/ and @records = sort { $records->{$b}[0] <=> $records->{$a}[0] } keys %{$records};
        /size/   and @records = sort { $records->{$b}[1] <=> $records->{$a}[1] } keys %{$records};
    }

    # Print the report
    print "Total emails queued: $$total_queued\n";
    printf "%-40s %8s %10s %8s %8s\n", ($opt->{field} eq "to" ? "Recipient" : "Sender") . " " . ($opt->{match} eq "domain" ? "Domain" : "Email"), qw(Number Size Oldest Newest);
    printf "%-40s %8s %10s %8s %8s\n", "By $opt->{sort}", "", $opt->{human} ? ("", "", "") : ("bytes", "seconds", "seconds");
    foreach my $record (@records) {
        printf "%-40s %8d %10s %8s %8s\n",
            $record,
            $records->{$record}[0],
	    ( map { $opt->{human} ? human_size $_ : $_ } $records->{$record}[1] ),
	    map { $opt->{human} ? human_time (time - $_) : time - $_ }
		$records->{$record}[2], $records->{$record}[3];
    }
}

sub main () {
    # Defaults for the options
    my %opt = (man => 0, help => 0, sort => "age", ndots => 0, processing => 1, human => 1, base => "si", match => "domain", field => "to");

    # Process options and exit with error message if needed.
    $ENV{PATH} = "/bin:/usr/bin";
    GetOptions(\%opt, "man", "help", "sort=s", "ndots=i", "processing!", "human!", "base=s", "match=s", "field=s")
        or pod2usage(-exitval => 1, -verbose => 0);
    $opt{man} and pod2usage(-exitval => 0, -verbose => 2);
    $opt{help} and pod2usage(-exitval => 0, -verbose => 1);
    shift @ARGV and pod2usage(-exitval => 0, -verbose => 1);
    $opt{sort} =~ /age|domain|number|size/ or die "Invalid sort order: $opt{sort}\n";
    $opt{base} =~ /^(binary|si)$/ or die "Invalid base option: $opt{base}\n";
    $opt{match} =~ /^(domain|all)$/ or die "Invalid match option: $opt{match}\n";
    $opt{field} =~ /^(to|from)$/ or die "Invalid field option: $opt{field}\n";

    # Expand the prefixes according to the user's wishes.
    @si_prefixes = map { [ $$_[0], ( ($opt{base} eq "si" ? 10**3 : 2**10) ** $$_[1]) ] } @si_prefixes; 

    # Initialize summary
    my %records = ();
    my $total_queued = 0;

    # Pass each record to process_record()
    my @record = ();
    while (<>) {
        # Ignore the banners and totals
        next if m{ ^ ( -----Q-ID----- | \s* / \S+ / | \s+ Total \s requests: | \S+ \s+ Queue \s+ status ) }ix;

        # Ignore any completed jobs
        next if m{ ^ .{10,20} \s \(job \s completed\) }x;

        # Process the rest
        m{ ^ \s? \w+ [-*]? \s+ }x and @record = process_record(%opt, %records, $total_queued, @record);

        # Build up the record to process
        push @record, $_;
    }

    # Process last record
    @record = process_record(%opt, %records, $total_queued, @record);

    # Print the report
    print_report(%opt, %records, $total_queued);
}

main();

__END__

=head1 NAME

mailq-summary - Filter producing summary of sendmail's mailq output

=head1 SYNOPSIS

B<mailq> -v | B<mailq-summary> [ options ]

B<mailq-summary> --help

B<mailq-summary> --man

=head1 DESCRIPTION

B<This program> processes the output of sendmail's mailq tool to
produce a summary of the queued emails.  The summary has three
header lines and then continuous rows of data.

The verbose option to mailq is needed to avoid the truncation of
the sender and recipient email addresses.

=head1 OPTIONS

=over 8

=item B<--help>

Print this brief help message and exit.

=item B<--man>

Display the manual page and exit.

=item B<--match=domain|all>

Select how much to match when doing the summarisation, either
just the domain or the entire email address.  When matching the
domain, consider the B<--ndots> option.  The default is "domain".

=item B<--field=to|from>

Select whether to match means the recipients' addresses ("to") or
the senders' addresses ("from").  The default is "to".

=item B<--sort=age|domain|number|size>

Select the sort order for the report.  The sort is ascending and
the default is "age".

=over 8

=item age

Chronologically by the oldest email queued

=item domain

Alphabetically using the most significant parts of the domain, for
example: example.com, a.example.com, z.example.com, example.org,
a.example.org and then example.net.

=item number

Numerically by number of emails queued

=item size

Numerically by the total bytes of of emails queued

=back

=item B<--ndots=<number>>

When summarising by domains, take this many "parts" of the domain,
starting from the most significant, for example, a setting of 1 may
produce "au, com, net, org, uk." The default is 0, that is, unlimited.

=item B<--noprocessing>

Emails that are currently being processed are indicated by an asterisk
after the queue id, for example, "g1B4eZqd005652*"  By default, these
emails are included in the report.

This option allows you to exclude these emails, possibly giving a more
accurate indication of domains that are failing.

=item B<--nohuman>

By default, the report is presented in a Human Readable manner, that
is, using weeks, days, hours and minutes for time and prefixes for
sizes (see the B<--base> option).

This option allows you to access the raw data for additional
processing, that is, seconds and bytes.

=item B<--base=binary|si>

Select the base for the prefixes used in Human Readable output.
The default is "si".

=over 8

=item binary

The "traditional" base 2 prefixes: "K" is 2**10 and "M" is
2**20, 1024 and 1048576 respectively.

=item si

The International System of Units (SI) base 10 prefixes: "K" is 10**3
and "M" is 10**6, 1000 and 1000000 respectively.

=back

There is real confusion and the potential for incompatibility
in standards and in implemented systems surrounding the use of
prefixes.  See the following site for more information.

http://physics.nist.gov/cuu/Units/binary.html

=back

=head1 SUGGESTED USAGE

I recommend using the B<tee(1)> utility to allow you to make
several invocations of this script with different options, for
example,

    $ mailq -v | tee mailq-v.txt | mailq-summary
    $ mailq-summary --sort=domain < mailq-v.txt
    $ mailq-summary --sort=number < mailq-v.txt

=head1 EXIT STATUS

This script will exit with zero upon successfull completion, non-zero on any
error.  All debugging or diagnostic output is written to STDERR.  Please do
not ignore STDERR.

=head1 AUTHOR

Mark Suter E<lt>F<mark.suter@miju.com.au>E<gt>

=head1 COPYRIGHT

Copyright (c) 2002,2003,2004 Mark Suter <mark.suter@miju.com.au>

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
or from the following webpage.

    http://www.gnu.org/licenses/gpl.txt

=cut