#!/usr/bin/perl -w # $Id: clean-up-html-code,v 1.1 2004/06/19 11:50:37 suter Exp $ # # Copyright (c) 2001 Mark Suter All rights reserved. # This program is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # # Possible usage: # find /var/www/www.example.com/ -iname \*.html -exec ~/bin/clean-up-html-code {} \; use strict; use HTML::Clean; ## Our single file to play with my $filename = $ARGV[0]; ## Initialise our cleaner my $h = new HTML::Clean($filename) or die "$0: can't initialise HTML::Clean: $!\n"; ## Select all optimisations $h->level(9); ## Before we start my $d = $h->data(); my $origlen = length($$d); # Select what I want, comments from "perldoc HTML::Clean" $h->strip({ whitespace => 1, # Remove excess whitespace shortertags => 0, # -> , etc.. blink => 0, # No blink tags. contenttype => 0, # Remove default contenttype. comments => 1, # Remove excess comments. entities => 0, # " -> ", etc. dequote => 0, # remove quotes from tag parameters where possible. defcolor => 1, # recode colors in shorter form. (#ffffff -> white, etc.) javascript => 1, # remove excess spaces and newlines in javascript code. htmldefaults => 0, # remove default values for some html tags lowercasetags => 1, # translate all HTML tags to lowercase }); ## Was it worth the effort? my $newlen = length($$d); printf "%6d -> %6d %2d%% %s\n", $origlen, $newlen, (100 * abs($origlen - $newlen)) / $origlen, $filename; ## Dump the cleaned code (this is a procesed copy, so no backup needed) open OUTPUT, "> $filename" or die "$0: can't overwrite '$filename': $!\n"; print OUTPUT $$d or die "$0: can't print to filehandle: $!\n"; close OUTPUT or die "$0: can't close filehandle: $!\n";