#!/usr/bin/perl -w
# -*- Perl -*-
###########################################################################
#  Copyright (C) 2004, 2005, 2006, 2008  R. Bernstein email: rocky@gnu.org
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with GNU Make; see the file COPYING.  If not, write to the
#   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
#   MA 02110-1301 USA.  You should have received a copy of the GNU
#   General Public License
###########################################################################
my $vcid ='$Id: fs-report.in,v 1.19 2008/07/03 13:57:28 rockyb Exp $';
# Things to do when a filesystem seems full...

sub usage {
  my($full_help) = @_;

  print "
usage:

   $program [OPTIONS..] [directory..]

   Gives a report of files system usages in order to glean why
   a filesystem might be full.

   If no directory is given use the current directory `.'.

options:
    --help                 -- print this help and exit
    --version              -- show a CVS version string and exit
    --debug *n*            -- give debugging output. The higher the
                              number, the more the output. Default is $debug.
    --size-lines *n*       -- Maximum number of lines output in the
                              \"largest files\" section.
    --newest-lines *n*     -- Maximum number of lines given in the
                              \"newest files\" section.
    --ls-lines *n*         -- Maximum number of lines given in the
                              top-level directory list section.
    --lines *n*            -- Maximum number of lines if no other limit
                              is specified. Default is $lines.
    --du                   -- Set program to use for running du. The default is
                              $du_program
    --find                 -- Set program to use for running find. The default:
                              $find_program
    --ls                   -- Show/omit computing \"list files\" section.
    --size                 -- Show/omit computing \"largest files\" section.
    --fast                 -- fast listing for large filesystems
    --[no]core             -- Show/omit computing \"core files\" section.
    --core-pat             -- pattern for core files to look for. The default:
                              $core_pat
    --newest               -- Show/omit computing \"newest files\" section.
    --[no]common           -- Show/omit computing intersection between
                              newest files and largest files.
    --file-time *n*        -- Report files newer than *n* minutes.
                              Default is $old_file_time minutes.
    --find-time *s*        -- Test to use on 'find' for getting time. 
                              Common possibilities are -atime, -mtime, -ctime,
                              -mmin, -newer *file*, -anewer *file*, or
                              -cnewer *file*. The default is -cmin
";
  exit 100;
}

use vars qw($0 $program $version $old_file_time $lines $separator
	    @intersect %seen $debug $do_core $do_ls $do_size $do_newest
	    $do_common $size_lines $fast $newest_lines $ls_lines
            $du_program $find_program $core_pat $du_a $find_time
            );

use strict;


# The name of the directory we're scanning on. Should be passed as an
# argument.
sub init();
sub process_options();
sub run_cmd($$$);
sub show_version();

init();
process_options();

foreach my $directory (@ARGV) {
  if (!chdir $directory) {
    print STDERR "Can't cd to $directory. Skipping...\n";
    next;
  }


  chomp(my $cwd = `pwd`);
  print "* * * $program on $cwd, Version $version * * *\n";

  if ($do_core && $core_pat ne 'nocore') {
    print $separator;
    print "core files ....\n";
    run_cmd("$find_program $directory -xdev -name $core_pat -type f -ls", $debug, $do_core);
  }

  if ($do_size) {
    print $separator;
    print "Largest files/directories\n";
    # Print the KB right justified if possible.
    print "     KB file/directory\n";
    print "------- ---------------------------------------\n";

    my $dir = $fast ? "$directory/*" : $directory;
    my $cmd = "$du_program $du_a -k -x  $dir | "
	. "sort -nr 2>/dev/null";
    print "$program running $cmd\n" if $debug;
    if (!open(OUTPUT, "$cmd|")) {
      print STDERR "Can't run $cmd: $!\n";
    } else {
      $size_lines = 100000 if $size_lines == 0;
      my $i=0;
      my $line;
      while ( defined($line=<OUTPUT>) && ($i++ < $size_lines) ) {
	my($size, $name) = split(/\s+/, $line);
	printf "%7d %s\n", $size, $name;
	$seen{$name} = 1;
      }
      close(OUTPUT);
    }
  }

  
  if ($do_newest && $find_time ne '') {
    my $time_opt;
    my $what_access='changed';
    print $separator;
    if ($find_time =~ /^-[acm]min/) {
      $time_opt = "$find_time -$old_file_time";
    } elsif ($find_time =~ /^-[acm]time/) {
      my $hours= $old_file_time / 60;
      $time_opt = "$find_time -$hours";
    }
    if ($find_time =~ /^-a/) {
	$what_access='accessed';
    } elsif ($find_time =~ /^-m/) {
	$what_access='modified';
    }
    if ( $old_file_time > 60 ) {
      my $hours= $old_file_time / 60;
      print "Files/directories $what_access within the last ${hours}+ hours...\n";
    } else {
      print "Files/directories $what_access within the last $old_file_time minutes...\n";
    }
    my $cmd = "$find_program $directory -xdev $time_opt -type f -ls";
    print "$program running $cmd\n" if $debug;
    if (!open(OUTPUT, "$cmd|")) {
      print STDERR "Can't run $cmd: $!\n";
    } else {
      $newest_lines = 100000 if $newest_lines == 0;
      my $i=0;
      my $line;
      while ( defined($line=<OUTPUT>) && ($i++ < $newest_lines) ) {
	my @fields=split(/\s+/, $line);
	my($junk, $inode, $nblocks, $mode, $nlinks, $user, $group, $size, 
	   $mon, $day, $time, $file);
	if (10 == $#fields) {
	    ($junk, $inode, $mode, $nlinks, $user, $group, $size, 
	     $mon, $day, $time, $file) = @fields;
	} elsif (11 == $#fields) {
	    ($junk, $inode, $nblocks, $mode, $nlinks, $user, $group, $size, 
	     $mon, $day, $time, $file) = @fields;
	} else {
	    # Do something intelligent here?
	    next;
	}
	print $line;
	if (defined($seen{$file})) {
	  push(@intersect, [$size, "$mon $day $time $file"]);
	}
      }
      close(OUTPUT);
    }
  }

  if ($do_common && $do_newest && $do_size) {
    print $separator;
    print "Files which are both big AND new...\n";
    @intersect = sort { $b->[0] <=> $a->[0] } @intersect;
    foreach my $r (@intersect) {
	printf "%7d %s\n", $r->[0], $r->[1];
    }
  }

  if ($do_ls) {
    print $separator;
    print "Top level listing\n";
    run_cmd("ls -l $directory", $debug, $ls_lines+1);
  }
}

exit 0;

sub init() {
  use File::Basename;
  $program = basename($0); # Who am I today, anyway?

  $version='1.7';
  # How many minutes for a file/directory to be considered recent?
  $old_file_time=360 ;

  # How many lines of output for various groups
  $lines=20;

  $separator = "=" x 40 . "\n";

  $debug = 0;
  $do_core = $do_size = $do_common = $do_newest = 1;
  $do_ls = 0;
  @intersect = ();
  $SIG{PIPE} = 'IGNORE';
  $du_program = "/usr/bin/du";
  $find_program = "/usr/bin/find";
  $find_time = '-cmin';
  $core_pat = 'core'
}

# The bane of programming.
sub process_options() {
  use Getopt::Long;
  my(@opt_cf);
  $Getopt::Long::autoabbrev = 1;
  my($help, $long_help, $show_version);

  my $result = &GetOptions
    (
     'common!'        => \$do_common,
     'core!'          => \$do_core,
     'core-pat=s'     => \$core_pat,
     'debug=n'        => \$debug,
     'doc'            => \$long_help,
     'du:s'           => \$du_program,
     'fast'           => \$fast,
     'file-time=n'    => \$old_file_time,
     'find-time=s'    => \$find_time,
     'find:s'         => \$find_program,
     'help'           => \$help,
     'lines=n'        => \$lines,
     'ls!'            => \$do_ls,
     'ls-lines=n'     => \$ls_lines,
     'newest!'        => \$do_newest,
     'newest-lines=n' => \$newest_lines,
     'size!'          => \$do_size,
     'size-lines=n'   => \$size_lines,
     'version'        => \$show_version,
    );
  show_version() if $show_version;

  usage(0) unless $result || @ARGV > 1;
  usage(1) if $help;
  podthis() if $long_help;

  # Set default lines
  $size_lines   = $lines if !defined($size_lines);
  $newest_lines = $lines if !defined($newest_lines);
  $ls_lines     = $lines if !defined($ls_lines);
  $du_a         = $fast ? '-s' : '-a';

  push(@ARGV, '.') if @ARGV==0;
}


sub run_cmd($$$) {
  my($cmd, $show_cmd, $lines) = @_;
  print "$program running $cmd\n" if $show_cmd;
  if (!open(OUTPUT, "$cmd|")) {
    print STDERR "Can't run $cmd: $!\n";
    return 0;
  } else {
    $lines = 100000 if $lines == 0;
    my $i=0;
    my $line;
    while ( defined($line=<OUTPUT>) && ($i++ < $lines) ) {
      print $line;
    }
    close(OUTPUT);
    return $i;
  }
}

# Show the CVS version id string and quit.
sub show_version() {
    print "$program $vcid\n";
    print "Version $version\n";
    exit 1;
}
sub podthis {
  use Pod::Text;
  $^W = 0;
  pod2text $0;
  exit 101;
}

=pod

=head1 NAME

fs-report - size report program

=head1 SYNOPSIS

B<fs-report> [I<options>...] [directory]

Gives a report of files system space in order to glean why
a filesystem might be full.

If no directory is given use the current directory `.'.

=head1 DESCRIPTION

Gives a report of files system space in order to glean why a
filesystem might be full. The report is in a number of sections: "core
files", "largest files and directories", "files created recently", the
intersection of the latter two sections, and just a listing of what is
in the directory. Any or all of these sections may be omitted from the
report.

"Core" files are created automatically when a program crashes. Their
usefulness is often short-lived, in getting a stack trace and perhaps
looking at some variables when the program died. It is not uncommon
that they get created when a program crashes and are subsequently
never used or used once and forgotten about. Since the core file
contains information about all of the variables in use when a program
died it is not uncommon for large programs to produce large core
files.

The section on "largest files and directories" suggests what might be
removed to give the most space back. fs-report takes care to line up
the magnitude of the file sizes in columns to make it easier to read.

Often what tipped a filesystem over the edge are the files that were
recently created. So there is a section showing which files are new.

After this we take the common files between what is large and what is
new. When something is listed here, it is often the culpret of why a
filesystem recently filled up.

Finally there is just a good ol' "ls" (turned off by default). Some
people find this useful.

=head2 OPTIONS

=over 4

=item --help

Give rudimentary help and exit

=item --version

show a CVS version string and exit

=item --debug I<integer>

give debugging output. The higher the
number, the more the output. Default is no debug information (0).

=item --size-lines I<integer>

Maximum number of lines output in the "largest files" section.

=item --newest-lines I<integer>

Maximum number of lines given in the "newest files" section.

=item --ls-lines I<integer>

Maximum number of lines given in the top-level directory list section.

=item --lines I<n>

Maximum number of lines if no other limit is specified.

=item --du I<path-to-du-program>

Set program to use for running du. The default is /usr/bin/du.

=item --find I<path-to-find-program>

Set program to use for running "find". The default is /usr/bin/find.

=item --ls | --nols

Show computing "list files" section. To omit this section (the default)
use --no-ls.

=item --size | --nosize

Show computing "largest files" section. To omit this section use
--no-size.

For this section to work, a suitable "du" program must have been set
or automatically configured.

=item --core | --nocore

Show a list of the"core files" that appear in this directory or a
subdirectory of this directory. To omit this section use --nocore.

For this section to work, a suitable "find" program must have been set
or automatically configured.

=item --newest | --nonewest

Show computing "newest files" section. To omit this section use
--no-newest.

=item --common | --nocommon

Compute the common files that are both "new" and "large".
To omit this section use --nocommon.

=item --file-time I<hours>

Set the file time for files listed in the "newest files" section. This
also has an effect on the files in the common section.

=item --find-time I<string>

Set the test used by "find" in the "newest files" section. The default
is -cmin, but other possibilites include (assuming your "find"
supports it:

=over 4

=item  -atime 

To set if file access is more recent than the value set by --file-time (or
its default value)

=item  -anewer I<file>

To set if a file access is more recent than I<file>.

=item  -cnewer I<file>

To set if a file status changed more recently than I<file>.

=item  -ctime

To set if a file status is more recent than the value set by --file-time (or
its default value).

=item  -mtime (modification time)

To set if file's data was modified more recently than the value set by
--file-time (or its default value).

=item  -newer I<file>

To set if the file was modified more recently than file.

=back

=back

=head1 BUGS

We probably need to do better with dealing with different du commands and
GNU find options.

=head1 SEE ALSO

My log rotation program L<http://recycle-logs.sourceforge.net> and
file removal/archival program L<http://rm-old-files.sourceforge.net>
might assist in maintenance the filesystem so it doesn't fill up.

=head1 AUTHOR

The current version is maintained (or not) by C<rocky@cpan.org>.

=head1 COPYRIGHT

  Copyright (C) 2004, 2005, 2006 Rocky Bernstein, email:
  <rocky@cpan.org>.  

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with GNU Make; see the file COPYING.  If not, write to the
  Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
  MA 02110-1301 USA.  You should have received a copy of the GNU
  General Public License

I<$Id: fs-report.in,v 1.19 2008/07/03 13:57:28 rockyb Exp $>

=cut

