#!/usr/bin/perl -w
# $Id: clean-up-html-code,v 1.2 2017/01/11 01:40:39 suter Exp $
#
# Copyright (c) 2001 Mark Suter <mjs@miju.com.au>  All rights reserved.
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.

use strict;
use HTML::Clean;

## Our single file to play with
my $filename = $ARGV[0];

## Initialise our cleaner
my $h = new HTML::Clean($filename) or die "$0: can't initialise HTML::Clean: $!\n";

## Select all optimisations
$h->level(9);

## Before we start
my $d = $h->data();
my $origlen = length($$d);

# Select what I want, comments from "perldoc HTML::Clean"
$h->strip({
    whitespace    => 0, # Remove excess whitespace
    shortertags   => 0, # <strong> -> <b>, etc..
    blink         => 1, # No blink tags.
    contenttype   => 0, # Remove default contenttype.
    comments      => 1, # Remove excess comments.
    entities      => 0, # &quot; -> ", etc.
    dequote       => 0, # remove quotes from tag parameters where possible.
    defcolor      => 1, # recode colors in shorter form. (#ffffff -> white, etc.)
    javascript    => 1, # remove excess spaces and newlines in javascript code.
    htmldefaults  => 0, # remove default values for some html tags
    lowercasetags => 1, # translate all HTML tags to lowercase
});

## Was it worth the effort?
my $newlen = length($$d);
if ($newlen < $origlen) {
    ## warn sprintf "%6d -> %6d %.1f%% %s\n", $origlen, $newlen, (100 * abs($origlen - $newlen)) / $origlen, $filename;

    ## Dump the cleaned code (working on a ttree a procesed copy, so no backup needed)
    open OUTPUT, "> $filename"		or die "$0: can't overwrite '$filename': $!\n";
    print OUTPUT $$d			or die "$0: can't print to filehandle: $!\n";
    close OUTPUT			or die "$0: can't close filehandle: $!\n";
}