#!/usr/bin/perl
#############################################################################
##
## File: build-tex-stats-table.pl
## Date Created: 2006-05-16
##
## Copyright (c) 2006 David D. Allen
##
## Permission is hereby granted, free of charge, to any person obtaining a
## copy of this software and associated documentation files (the "Software"),
## to deal in the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in
## all copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS IN THE SOFTWARE.
##
#############################################################################

# don't allow questionable usage of perl
use strict;

# get the command line arguments
my ( $inputStatsDir, $tableLabel, $tableID, $outputTex ) = @ARGV;

# get stats on files
my $finalStats = GetFinalStats( $inputStatsDir );

# get the sorted list of data sets
my @dataSetNames = sort( keys( %{ $finalStats } ));

# open the output file
open( OUTPUT, ">$outputTex" ) ||
   die "Could not open output file \"$outputTex\"\n";

# output header
print OUTPUT << 'DONE_HEADER';
   \begin{table}[hbp]
      \begin{center}
         \begin{tabular}{|l|r|r|r|}
            \hline
            Data Set & Classes & PD Percentage& FP Percentage \\
            \hline
            \hline
DONE_HEADER

# display stats on each data set
foreach my $dataSetName ( @dataSetNames )
{
   # get the raw stats
   my $classCount = $finalStats->{ $dataSetName }->[ 2 ];
   my $detPercent = $finalStats->{ $dataSetName }->[ 0 ];
   my $avgExtra = $finalStats->{ $dataSetName }->[ 1 ];

   # display data set stats
   printf OUTPUT "%s & %d & %0.1f\\%% & %0.1f\\%% \\\\\n\\hline\n",
      $dataSetName, $classCount, 100 * $detPercent, 100 * $avgExtra;
}  # display stats on each data set

# output trailer
print OUTPUT << "DONE_TRAILER";
         \\end{tabular}
      \\end{center}
      \\caption
      {
         $tableLabel
         \\label{$tableID}
      }
   \\end{table}
DONE_TRAILER

# close the file
close( OUTPUT );


#============================================================================
#============================================================================
sub GetFinalStats
{
   # get the parameters
   my ( $dirName ) = @_;

   # get list of files
   my @fileNames = glob( "$dirName/*.csv" );

   # process each file
   my %fileStats;
   foreach my $fileName ( @fileNames )
   {
      # get the class results data
      my $classData = GetDataSet( $fileName, 1, 1, [ 1 ] );

      # get the summary results data
      my $summaryData = GetDataSet( $fileName, 2, 1, [ 1, 5, 6 ] );

      # process summary data from the file
      for ( @{ $summaryData } )
      {
         # add entry to the file stats
         $fileStats{ $_->[0] } = [ $_->[1], $_->[2],
            scalar( @{ $classData } ) ];
      }  # process summary data from the file

      # close the file
      close( INPUT );
   }  # process each file

   # return the final file stats
   return( \%fileStats );
}  # GetFinalStats()


#============================================================================
#============================================================================
sub GetDataSet
{
   # get the parameters
   my ( $fileName, $dataSet, $skipHeader, $indexArray ) = @_;

   # open file
   open( INPUT, "<$fileName" ) ||
      die "Could not open input file \"$fileName\"\n";

   # extract the data from the file
   my ( $currentDataSet, $skipBlankFlag ) = ( 0, 0 );
   my @fileData;
   while ( <INPUT> )
   {
      # skip header if in the correct state
      if ( $skipHeader )
      {
         # skip header
         $skipHeader = 0;
         next;
      }  # skip header if in the correct state

      # skip blank lines if in the correct state
      if ( $skipBlankFlag )
      {
         # skip line if it is blank, otherwise no more blank lines
         /^$/ and next;
         $skipBlankFlag = 0;
      }  # skip blank lines if in the correct state

      # skip data sets if neccesary
      if ( $currentDataSet < ( $dataSet - 1 ))
      {
         # move onto next data set when blank lines are encountered
         if ( /^$/ )
         {
            $skipBlankFlag = 1;
            $skipHeader = 1;
            ++$currentDataSet;
         }  # move onto next data set when blank lines are encountered

         # skip to next line
         next;
      }  # skip data sets if neccesary

      # exit if done processing the data set
      ( $currentDataSet == ( $dataSet - 1 )) and ( /^$/ ) and last;

      # split the record from the line text
      chomp;
      my @record = split( /,/ );

      # grab values from specified columns
      my @valueArray;
      for ( my $i = 0; $i < scalar( @{ $indexArray } ); ++$i )
      {
         # store the current value
         push( @valueArray, $record[$indexArray->[$i] - 1] );
      }  # grab values from specified columns

      # store the value array
      push( @fileData, \@valueArray );
   }  # extract the data from the file

   # close the file
   close( INPUT );

   # return the final file data
   return( \@fileData );
}  # GetDataSet()

