tests/diff-out - public/gem5 - Git at Google

 #!/usr/bin/perl
 # Copyright (c) 2001-2005 The Regents of The University of Michigan
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met: redistributions of source code must retain the above copyright
 # notice, this list of conditions and the following disclaimer;
 # redistributions in binary form must reproduce the above copyright
 # notice, this list of conditions and the following disclaimer in the
 # documentation and/or other materials provided with the distribution;
 # neither the name of the copyright holders nor the names of its
 # contributors may be used to endorse or promote products derived from
 # this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #
 # This script diffs two SimpleScalar statistics output files.
 #

 use Getopt::Std;

 getopts('adn:t:h');

 if ($#ARGV < 1)
 {
     print "\nError: need two file arguments (<reference> <new>).\n";
     print "   Options: -d = Ignore distributions\n";
     print "            -a = Sort errors alphabetically (default: by percentage)\n";
     print "            -h = Diff header info separately from stats\n";
     print "            -n <num> = Print top <num> errors (default 20, 0 for all)\n";
     print "            -t <num> = Ignore errors below <num> percent (default 0)\n\n";
     exit;
 }

 open(REF, "<$ARGV[0]") or die "Error: can't open $ARGV[0].\n";
 open(NEW, "<$ARGV[1]") or die "Error: can't open $ARGV[1].\n";


 #
 # Things that really should be adjustable via the command line
 #

 # Ignorable error (in percent)
 $err_thresh = defined($opt_t) ? $opt_t : 0;

 # Number of stats to print before omitting
 $omit_count = defined($opt_n) ? $opt_n : 20;


 #
 # First copy everything up to the simulation statistics to a pair of
 # temporary files, stripping out date-related items, and do a plain
 # diff.  Any differences in the arguments are not necessarily an issue;
 # any differences in the program output should be caught by the EIO
 # mechanism if an EIO file is used.
 #

 # copy_header takes input filehandle and output filename

 sub copy_header
 {
     my ($inhandle, $outname) = @_;

     open(OUTPUT, ">$outname") or die "Error: can't open $outname.\n";

     while (<$inhandle>)
     {
 	# strip out lines that can vary
 	next if /^(command line:|M5 compiled on |M5 simulation started |M5 executing on )/;
 	last if /Begin Simulation Statistics/;
 	print OUTPUT;
     }
     close OUTPUT;
 }

 if ($opt_h) {

     # Diff header separately from stats

     $refheader = "/tmp/smt-test.refheader.$$";
     $newheader = "/tmp/smt-test.newheader.$$";

     copy_header(\*REF, $refheader);
     copy_header(\*NEW, $newheader);

     print "\n===== Header and program output differences =====\n\n";

     print `diff $refheader $newheader`;

     print "\n===== Statistics differences =====\n\n";
 }

 #
 # Now parse statistics
 #

 #
 # This function takes an open filehandle and returns a reference to
 # a hash containing all the statistics variables and their values.
 #
 sub parse_file
 {
     $stathandle = shift;

     $in_dist = undef;
     $hashref = { };	# initialize hash for values

     while (<$stathandle>)
     {
 	next if /^\s*$/;	# skip blank lines
 	last if /End Simulation Statistics/;

 	s/ *#.*//;		# strip comments

 	if (/^Memory usage: (\d+) KBytes/) {
 	    $stat = 'memory usage';
 	    $value = $1;
 	}
 	elsif ($in_dist) {
 	    if (/(.*)\.end_dist/) {
 		# end line of distribution: clear $in_dist flag
 		$in_dist = undef;
 		next;
 	    }
 	    if ($opt_d) {
 		next;		#  bail out if we are ignoring dists...
 	    } elsif (/(.*)\.(min|max)_value/) {
 		# treat these like normal stats
 		($stat, $value) = /^(\S+)\s+(.*)/;
 	    } else {
 		($stat, $value) =
 		  /^(\S+(?:.*\S)?)\s+(\d+)\s+\d+\.\d+%/;
 		$stat = $in_dist . '::' . $stat;
 	    }
 	}
 	else {
 	    if (/(.*)\.start_dist/) {
 		# start line of distribution: set $in_dist flag
 		# and save distribution name for future reference
 		$in_dist = $1;
 		$stat = $1;
 		$value = 0;
 	    }
 	    else {
 		($stat, $value) = /^(\S+)\s+(.*)/;
 	    }
 	}

 	$$hashref{$stat} = $value;
     }

     close($stathandle);
     return $hashref;
 }


 #
 # pct_diff($old, $new) returns percent difference from $old to $new.
 #
 sub pct_diff
 {
     my ($old, $new) = @_;
     return ($old == 0) ? (($new == 0) ? 0 : 9999) : 100 * ($new - $old) / $old;
 }


 #
 # Statistics to ignore: these relate to simulator performance, not
 # correctness, so don't fail on changes here.
 #
 %ignore = (
   'host_seconds' => 1,
   'host_tick_rate' => 1,
   'host_inst_rate' => 1,
   'host_op_rate' => 1,
   'host_mem_usage' => 1
 );

 #
 # List of key statistics (always displayed)
 #  ==> list stats here WITHOUT trailing thread ID
 #
 @key_stat_list = (
   'ipc',
   'committedInsts',
   'committedOps',
   'sim_insts',
   'sim_ops',
   'sim_ticks',
   'host_inst_rate',
   'host_mem_usage'
 );

 $key_stat_pattern = join('|', @key_stat_list);

 # initialize first statistics from each file

 $max_err_mag = 0;

 $refhash = parse_file(\*REF);
 $newhash = parse_file(\*NEW);

 # The string sim-smt prints on a divide by zero
 $divbyzero = '<err: divide by zero>';

 foreach $stat (sort keys %$refhash)
 {
     $refvalue = $$refhash{$stat};
     $newvalue = $$newhash{$stat};

     if (!defined($newvalue)) {
 	# stat missing from new file
 	push @missing_stats, $stat;
 	next;
     }

     if ($stat =~ /($key_stat_pattern)/o) {
 	# key statistics: always record & display changes in these
 	push @key_stats, [$stat, $refvalue, $newvalue];
     }

     if ($ignore{$stat} or $refvalue eq $newvalue) {
 	# stat is in "ignore" list, or hasn't changed
     }
     else {
 	if ($refvalue eq $divbyzero || $newvalue eq $divbyzero) {
 	    # one or the other was a divide by zero:
 	    # no point in trying to quantify error
 	    print "$stat: $refvalue --> $newvalue\n";
 	}
 	else {
 	    $reldiff = pct_diff($refvalue, $newvalue);
 	    $diffmag = abs($reldiff);

 	    if ($diffmag > $err_thresh) {
 		push @errs,
 		[$stat, $refvalue, $newvalue, $reldiff];
 	    }

 	    if ($diffmag > $max_err_mag) {
 		$max_err_mag = $diffmag;
 	    }
 	}
     }

     # remove from new hash so we can detect added stats
     delete $$newhash{$stat};
 }


 #
 # All done.  Print comparison summary.
 #

 printf("Maximum error magnitude: %+f%%\n\n", $max_err_mag);

 printf("  %-30s %10s %10s %10s   %7s\n", ' ', 'Reference', 'New Value', 'Abs Diff', 'Pct Chg');

 printf("Key statistics:\n\n");

 foreach $key_stat (@key_stats)
 {
     ($statname, $refvalue, $newvalue, $reldiff) = @$key_stat;

     # deduce format from reference value
     $pointpos = rindex($refvalue, '.');
     $digits = ($pointpos < 0) ? 0 :(length($refvalue) - $pointpos - 1);
     $fmt = "%10.${digits}f";

     # print differing values with absolute and relative error
     printf("  %-30s $fmt $fmt $fmt  %+7.2f%%\n",
 	   $statname, $refvalue, $newvalue,
 	   $newvalue - $refvalue, pct_diff($refvalue, $newvalue));
 }

 printf("\nDifferences > %d%%:\n\n", $err_thresh);

 if ($opt_a) {
     # leave stats sorted alphabetically, doesn't make sense to cut them off
     $omit_count = 0;
 } else {
     # sort differences by percent change
     @errs = sort { abs($$b[3]) <=> abs($$a[3]) } @errs;
 }

 $num_errs = 0;

 foreach $err (@errs)
 {
     ($statname, $refvalue, $newvalue, $reldiff) = @$err;

     # deduce format from reference value
     $pointpos1 = rindex($refvalue, '.');
     $digits1 = ($pointpos1 < 0) ? 0 :(length($refvalue) - $pointpos1 - 1);
     $pointpos2 = rindex($newvalue, '.');
     $digits2 = ($pointpos2 < 0) ? 0 :(length($newvalue) - $pointpos2 - 1);
     $digits = ($digits1 > $digits2) ? $digits1 : $digits2;
     $fmt = "%10.${digits}f";

     # print differing values with absolute and relative error
     printf("  %-30s $fmt $fmt $fmt  %+7.2f%%\n",
 	   $statname, $refvalue, $newvalue, $newvalue - $refvalue, $reldiff);

     # only print top N errors
     if ($omit_count > 0 && ++$num_errs >= $omit_count)
     {
 	print "[... showing top $omit_count errors only, additional errors omitted ...]\n";
 	last;
     }
 }

 #
 # Report missing stats
 #
 # get count
 $missing_stats = scalar(@missing_stats);

 if ($missing_stats)
 {
     print "\nMissing $missing_stats reference statistics:\n\n";
     foreach $stat (@missing_stats)
     {
 #	print "\t$stat\n";
 	printf "  %-50s    ", $stat;
 	print  "$$refhash{$stat}\n";
     }
 }

 #
 # Any stats left in newhash are added since the reference file
 #

 @added_stats = keys %$newhash;

 # get count
 $added_stats = scalar(@added_stats);

 if ($added_stats)
 {
     print "\nFound $added_stats new statistics:\n\n";
     foreach $stat (sort @added_stats)
     {
 #	print "\t$stat\n";
 	printf "  %-50s    ", $stat;
 	print  "$$newhash{$stat}\n";
     }
 }

 cleanup();
 # Exit codes:
 # 0 if all stats are found (with no extras) & no stats error
 # 1 if there are additional stats, but no stat errors
 # 2 otherwise
 $no_hard_errors = $missing_stats == 0 && $max_err_mag == 0.0;
 $status = $no_hard_errors ? ($added_stats == 0 ? 0 : 1) : 2;
 exit $status;

 sub cleanup
 {
     unlink($refheader) if ($refheader);
     unlink($newheader) if ($newheader);
 }
	#!/usr/bin/perl
	# Copyright (c) 2001-2005 The Regents of The University of Michigan
	# All rights reserved.
	#
	# Redistribution and use in source and binary forms, with or without
	# modification, are permitted provided that the following conditions are
	# met: redistributions of source code must retain the above copyright
	# notice, this list of conditions and the following disclaimer;
	# redistributions in binary form must reproduce the above copyright
	# notice, this list of conditions and the following disclaimer in the
	# documentation and/or other materials provided with the distribution;
	# neither the name of the copyright holders nor the names of its
	# contributors may be used to endorse or promote products derived from
	# this software without specific prior written permission.
	#
	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	#
	# This script diffs two SimpleScalar statistics output files.
	#

	use Getopt::Std;

	getopts('adn:t:h');

	if ($#ARGV < 1)
	{
	print "\nError: need two file arguments (<reference> <new>).\n";
	print " Options: -d = Ignore distributions\n";
	print " -a = Sort errors alphabetically (default: by percentage)\n";
	print " -h = Diff header info separately from stats\n";
	print " -n <num> = Print top <num> errors (default 20, 0 for all)\n";
	print " -t <num> = Ignore errors below <num> percent (default 0)\n\n";
	exit;
	}

	open(REF, "<$ARGV[0]") or die "Error: can't open $ARGV[0].\n";
	open(NEW, "<$ARGV[1]") or die "Error: can't open $ARGV[1].\n";


	#
	# Things that really should be adjustable via the command line
	#

	# Ignorable error (in percent)
	$err_thresh = defined($opt_t) ? $opt_t : 0;

	# Number of stats to print before omitting
	$omit_count = defined($opt_n) ? $opt_n : 20;


	#
	# First copy everything up to the simulation statistics to a pair of
	# temporary files, stripping out date-related items, and do a plain
	# diff. Any differences in the arguments are not necessarily an issue;
	# any differences in the program output should be caught by the EIO
	# mechanism if an EIO file is used.
	#

	# copy_header takes input filehandle and output filename

	sub copy_header
	{
	my ($inhandle, $outname) = @_;

	open(OUTPUT, ">$outname") or die "Error: can't open $outname.\n";

	while (<$inhandle>)
	{
	# strip out lines that can vary
	next if /^(command line:\|M5 compiled on \|M5 simulation started \|M5 executing on )/;
	last if /Begin Simulation Statistics/;
	print OUTPUT;
	}
	close OUTPUT;
	}

	if ($opt_h) {

	# Diff header separately from stats

	$refheader = "/tmp/smt-test.refheader.$$";
	$newheader = "/tmp/smt-test.newheader.$$";

	copy_header(\*REF, $refheader);
	copy_header(\*NEW, $newheader);

	print "\n===== Header and program output differences =====\n\n";

	print `diff $refheader $newheader`;

	print "\n===== Statistics differences =====\n\n";
	}

	#
	# Now parse statistics
	#

	#
	# This function takes an open filehandle and returns a reference to
	# a hash containing all the statistics variables and their values.
	#
	sub parse_file
	{
	$stathandle = shift;

	$in_dist = undef;
	$hashref = { }; # initialize hash for values

	while (<$stathandle>)
	{
	next if /^\s*$/; # skip blank lines
	last if /End Simulation Statistics/;

	s/ #.//; # strip comments

	if (/^Memory usage: (\d+) KBytes/) {
	$stat = 'memory usage';
	$value = $1;
	}
	elsif ($in_dist) {
	if (/(.*)\.end_dist/) {
	# end line of distribution: clear $in_dist flag
	$in_dist = undef;
	next;
	}
	if ($opt_d) {
	next; # bail out if we are ignoring dists...
	} elsif (/(.*)\.(min\|max)_value/) {
	# treat these like normal stats
	($stat, $value) = /^(\S+)\s+(.*)/;
	} else {
	($stat, $value) =
	/^(\S+(?:.*\S)?)\s+(\d+)\s+\d+\.\d+%/;
	$stat = $in_dist . '::' . $stat;
	}
	}
	else {
	if (/(.*)\.start_dist/) {
	# start line of distribution: set $in_dist flag
	# and save distribution name for future reference
	$in_dist = $1;
	$stat = $1;
	$value = 0;
	}
	else {
	($stat, $value) = /^(\S+)\s+(.*)/;
	}
	}

	$$hashref{$stat} = $value;
	}

	close($stathandle);
	return $hashref;
	}


	#
	# pct_diff($old, $new) returns percent difference from $old to $new.
	#
	sub pct_diff
	{
	my ($old, $new) = @_;
	return ($old == 0) ? (($new == 0) ? 0 : 9999) : 100 * ($new - $old) / $old;
	}


	#
	# Statistics to ignore: these relate to simulator performance, not
	# correctness, so don't fail on changes here.
	#
	%ignore = (
	'host_seconds' => 1,
	'host_tick_rate' => 1,
	'host_inst_rate' => 1,
	'host_op_rate' => 1,
	'host_mem_usage' => 1
	);

	#
	# List of key statistics (always displayed)
	# ==> list stats here WITHOUT trailing thread ID
	#
	@key_stat_list = (
	'ipc',
	'committedInsts',
	'committedOps',
	'sim_insts',
	'sim_ops',
	'sim_ticks',
	'host_inst_rate',
	'host_mem_usage'
	);

	$key_stat_pattern = join('\|', @key_stat_list);

	# initialize first statistics from each file

	$max_err_mag = 0;

	$refhash = parse_file(\*REF);
	$newhash = parse_file(\*NEW);

	# The string sim-smt prints on a divide by zero
	$divbyzero = '<err: divide by zero>';

	foreach $stat (sort keys %$refhash)
	{
	$refvalue = $$refhash{$stat};
	$newvalue = $$newhash{$stat};

	if (!defined($newvalue)) {
	# stat missing from new file
	push @missing_stats, $stat;
	next;
	}

	if ($stat =~ /($key_stat_pattern)/o) {
	# key statistics: always record & display changes in these
	push @key_stats, [$stat, $refvalue, $newvalue];
	}

	if ($ignore{$stat} or $refvalue eq $newvalue) {
	# stat is in "ignore" list, or hasn't changed
	}
	else {
	if ($refvalue eq $divbyzero \|\| $newvalue eq $divbyzero) {
	# one or the other was a divide by zero:
	# no point in trying to quantify error
	print "$stat: $refvalue --> $newvalue\n";
	}
	else {
	$reldiff = pct_diff($refvalue, $newvalue);
	$diffmag = abs($reldiff);

	if ($diffmag > $err_thresh) {
	push @errs,
	[$stat, $refvalue, $newvalue, $reldiff];
	}

	if ($diffmag > $max_err_mag) {
	$max_err_mag = $diffmag;
	}
	}
	}

	# remove from new hash so we can detect added stats
	delete $$newhash{$stat};
	}


	#
	# All done. Print comparison summary.
	#

	printf("Maximum error magnitude: %+f%%\n\n", $max_err_mag);

	printf(" %-30s %10s %10s %10s %7s\n", ' ', 'Reference', 'New Value', 'Abs Diff', 'Pct Chg');

	printf("Key statistics:\n\n");

	foreach $key_stat (@key_stats)
	{
	($statname, $refvalue, $newvalue, $reldiff) = @$key_stat;

	# deduce format from reference value
	$pointpos = rindex($refvalue, '.');
	$digits = ($pointpos < 0) ? 0 :(length($refvalue) - $pointpos - 1);
	$fmt = "%10.${digits}f";

	# print differing values with absolute and relative error
	printf(" %-30s $fmt $fmt $fmt %+7.2f%%\n",
	$statname, $refvalue, $newvalue,
	$newvalue - $refvalue, pct_diff($refvalue, $newvalue));
	}

	printf("\nDifferences > %d%%:\n\n", $err_thresh);

	if ($opt_a) {
	# leave stats sorted alphabetically, doesn't make sense to cut them off
	$omit_count = 0;
	} else {
	# sort differences by percent change
	@errs = sort { abs($$b[3]) <=> abs($$a[3]) } @errs;
	}

	$num_errs = 0;

	foreach $err (@errs)
	{
	($statname, $refvalue, $newvalue, $reldiff) = @$err;

	# deduce format from reference value
	$pointpos1 = rindex($refvalue, '.');
	$digits1 = ($pointpos1 < 0) ? 0 :(length($refvalue) - $pointpos1 - 1);
	$pointpos2 = rindex($newvalue, '.');
	$digits2 = ($pointpos2 < 0) ? 0 :(length($newvalue) - $pointpos2 - 1);
	$digits = ($digits1 > $digits2) ? $digits1 : $digits2;
	$fmt = "%10.${digits}f";

	# print differing values with absolute and relative error
	printf(" %-30s $fmt $fmt $fmt %+7.2f%%\n",
	$statname, $refvalue, $newvalue, $newvalue - $refvalue, $reldiff);

	# only print top N errors
	if ($omit_count > 0 && ++$num_errs >= $omit_count)
	{
	print "[... showing top $omit_count errors only, additional errors omitted ...]\n";
	last;
	}
	}

	#
	# Report missing stats
	#
	# get count
	$missing_stats = scalar(@missing_stats);

	if ($missing_stats)
	{
	print "\nMissing $missing_stats reference statistics:\n\n";
	foreach $stat (@missing_stats)
	{
	# print "\t$stat\n";
	printf " %-50s ", $stat;
	print "$$refhash{$stat}\n";
	}
	}

	#
	# Any stats left in newhash are added since the reference file
	#

	@added_stats = keys %$newhash;

	# get count
	$added_stats = scalar(@added_stats);

	if ($added_stats)
	{
	print "\nFound $added_stats new statistics:\n\n";
	foreach $stat (sort @added_stats)
	{
	# print "\t$stat\n";
	printf " %-50s ", $stat;
	print "$$newhash{$stat}\n";
	}
	}

	cleanup();
	# Exit codes:
	# 0 if all stats are found (with no extras) & no stats error
	# 1 if there are additional stats, but no stat errors
	# 2 otherwise
	$no_hard_errors = $missing_stats == 0 && $max_err_mag == 0.0;
	$status = $no_hard_errors ? ($added_stats == 0 ? 0 : 1) : 2;
	exit $status;

	sub cleanup
	{
	unlink($refheader) if ($refheader);
	unlink($newheader) if ($newheader);
	}