#!/usr/bin/perl
#############################################################################
##
## File: classStats.pl
## Date Created: 2006-03-25
##
## Copyright (c) 2006 David D. Allen
##
## Permission is hereby granted, free of charge, to any person obtaining a
## copy of this software and associated documentation files (the "Software"),
## to deal in the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in
## all copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS IN THE SOFTWARE.
##
#############################################################################

# don't allow questionable usage of perl
use strict;

# get the command line arguments
my ( $dataCsv, $resultFolder, $finalCsv, $eraSize ) = @ARGV;

# open the data CSV file
open( DATA_CSV, "<$dataCsv" ) ||
   die "Could not open input data CSV file \"$dataCsv\"\n";

# skip header of data CSV
<DATA_CSV>;

# read each line of data CSV, and collect final stats
my ( %classTotalCount, %classDetectedSum, %classExtraDetectedSum,
     %classExtraSum );
my ( $dataSetTotalCount, $dataSetDetectedSum, $dataExtraDetectedSum,
     $dataSetExtraSum ) = ( 0, 0, 0, 0 );
my $dataSetName;
for ( <DATA_CSV> )
{
   # split line record into fields
   chomp;
   my @record = split( /,/ );

   # create results file name from the data
   my $resultsName = sprintf( "%s/%s-%02d-%02d.csv", $resultFolder,
      $record[0], $record[2], $record[3] );

   # get the data set name
   $dataSetName = $record[0];

   # calculate the era number that the hold-out class appears in
   my $targetEra = int((( $record[5] - 1 ) / $eraSize ) + 1 );

   # open the results CSV file
   open( RESULTS_CSV, "<$resultsName" ) ||
      die "Could not open results CSV file \"$resultsName\"\n";

   # data set to pull from the CSV file
   my $targetResultDataSet = 4;
   my $targetColumn = 1;

   # get the target column of data
   my ( $currentDataSet, $skipBlankFlag, $skipHeader ) = ( 0, 0, 1 );
   my @detectedArray;
   while ( <RESULTS_CSV> )
   {
      # skip header if in the correct state
      if ( $skipHeader )
      {
         # skip header
         $skipHeader = 0;
         next;
      }  # skip header if in the correct state

      # skip blank lines if in the correct state
      if ( $skipBlankFlag )
      {
         # skip line if it is blank, otherwise no more blank lines
         /^$/ and next;
         $skipBlankFlag = 0;
      }  # skip blank lines if in the correct state

      # skip data sets if neccesary
      if ( $currentDataSet < ( $targetResultDataSet - 1 ))
      {
         # move onto next data set when blank lines are encountered
         if ( /^$/ )
         {
            $skipBlankFlag = 1;
            $skipHeader = 1;
            ++$currentDataSet;
         }  # move onto next data set when blank lines are encountered

         # skip to next line
         next;
      }  # skip data sets if neccesary

      # exit if done processing the data set
      ( $currentDataSet == ( $targetResultDataSet - 1 )) and ( /^$/ ) and
         last;

      # split the record from the line text
      chomp;
      my @record = split( /,/ );

      # store the value from the target column
      push( @detectedArray, $record[ $targetColumn - 1] );
   }  # get the target column of data

   # close the results CSV file
   close( RESULTS_CSV );

   # summarize stats based on locations of detected classes
   my ( $targetDetected, $extraCount ) = ( 0, 0 );
   for ( @detectedArray )
   {
      # set flag if the target class has been detected in the correct era
      if (( $targetDetected == 0 ) &&
          (( $_ == $targetEra ) || ( $_ == ( $targetEra + 1 ))))
      {
         # mark target as detected
         $targetDetected = 1;
      }

      # otherwise, count as incorrect extra detection
      else
      {
         # count as incorrect detection
         ++$extraCount;
      }
   }  # summarize stats based on locations of detected classes

   # store class stats
   ++$classTotalCount{ $record[4] };
   $classDetectedSum{ $record[4] } += $targetDetected;
   $classExtraSum{ $record[4] } += $extraCount;
   ( $extraCount > 0 ) and ++$classExtraDetectedSum{ $record[4] };

   # store data set stats
   ++$dataSetTotalCount;
   $dataSetDetectedSum += $targetDetected;
   $dataSetExtraSum += $extraCount;
   ( $extraCount > 0 ) and ++$dataExtraDetectedSum;
}  # read each line of data CSV

# close the data CSV file
close( DATA_CSV );

# get sorted list of classes
my @classNames = sort( keys( %classTotalCount ));

# open the final CSV file
open( FINAL_CSV, ">$finalCsv" ) ||
   die "Could not open input final CSV file \"$finalCsv\"\n";

# write header for class stats
#print FINAL_CSV "class,total count,correct detection,extra detection," .
#   "correct percentage,extra percent,average extra\n";
print FINAL_CSV "class,total count,correct detection,extra detection," .
   "correct percentage,average extra\n";

# write class stats
for ( @classNames )
{
   print FINAL_CSV $_, ",", $classTotalCount{ $_ }, ",",
      $classDetectedSum{ $_ }, ",", $classExtraSum{ $_ }, ",",
      $classDetectedSum{ $_ } / $classTotalCount{ $_ }, ",",
      #$classExtraDetectedSum{ $_ } / $classTotalCount{ $_ }, ",",
      $classExtraSum{ $_ } / $classTotalCount{ $_ }, "\n";
}  # write class stats

# write header for data set stats
#print FINAL_CSV "\ndata set,total count,correct detection,extra detection," .
#   "correct percentage,extra percent,average extra\n";
print FINAL_CSV "\ndata set,total count,correct detection,extra detection," .
   "correct percentage,average extra\n";

# write data set stats
print FINAL_CSV $dataSetName, ",", $dataSetTotalCount, ",",
   $dataSetDetectedSum, ",", $dataSetExtraSum, ",",
   $dataSetDetectedSum / $dataSetTotalCount, ",",
   #$dataExtraDetectedSum / $dataSetTotalCount, ",",
   $dataSetExtraSum / $dataSetTotalCount, "\n";

# close final CSV
close( FINAL_CSV );

