blob: 3bfb4f5d2ebec05a5c9bbe896aa7cadcb19fc446 [file] [log] [blame]
#!/bin/bash
#########################################################################################
##Software Implementation, Non-parametric Hierarchical Performance Testing (HPT)
##Authors: Yue Wu (Email: wuyue@ict.ac.cn) and Tianshi Chen
##Institute of Computing Technology, Chinese Academy of SCiences
##Beijing 100190, China
##Last Update: 17th June, 2013
##HPT on the web: http://novel.ict.ac.cn/tchen/hpt/
##
##Reference: T. Chen, Y. Chen, Q. Guo, O. Temam, Y. Wu, and W. Hu,
##"Statistical Performance Comparisons of Computers",
##In Proceedings of HPCA-18, New Orleans, Louisiana, 2012.
##
##Provided for non-commercial research and educational use only.
##Not for reproduction or distribution or commercial use.
##
##
##Please retain the above information when you reuse the code in your own tool.
#########################################################################################
configfile=$1
if [ -z "${configfile}" ]; then
configfile="parameter.cfg"
fi
ACC=20 # number of digits after the decimal point
acc_maxsu=2
NCOL_BASELINE=5
select1_table_file=table_select1
select2_table_file=table_select2
#########################################################################
# Math Part
#########################################################################
display_matrix()
{
# FUNCTION: display a data table
# FORMAT: display_matrix nrow ncol MTRX name
# OUTPUT: (None)
local i; local j; local mtrx
eval mtrx=(\${$3[@]})
printf "\n========================================\n"
printf "Data matrix($4):\n"
for i in `seq 0 $(($1-1))`; do
for j in `seq 0 $(($2-1))`; do
printf "%12.3f " ${mtrx[$(($i*$2+$j))]}
done
printf "\n"
done
}
combination()
{
# FUNCTION: return "n choose k", for example, `combination 4 2` equals to 6
# FORMAT: combination n m
# OUTPUT: `combination n m`
local A=$1
local B=1
local RES=1
local i
for i in `seq 1 $2`
do
RES=$(($RES*$A/$B))
A=$(($A-1))
B=$(($B+1))
done
expr $RES
}
cdfnorm()
{
# FUNCTION: an approximation of cumulative density function for standard norm distribution
# FORMAT: cdfnorm x
# OUTPUT: ret_cdf
local ACC=24
local L=$1
local a1=0.31938153; local a2=-0.356563782; local a3=1.781477937; local a4=-1.821255978
local a5=1.330274429; local Pi=3.141592653589793238462643
if [ `echo "$L<0"|bc` -eq 1 ]; then
L=`echo "scale=$ACC;(-1)*$L"|bc`
fi
local K=`echo "scale=$ACC;1/(1+0.2316419*$L)"|bc`
local tmp=`echo "(((($a5*$K+$a4)*$K+$a3)*$K+$a2)*$K+$a1)*$K"|bc`
tmp=`echo "scale=$ACC;e(0-$L*$L/2)*$tmp/sqrt(2*$Pi)"|bc -l`
if [[ `echo "$1>0"|bc` -eq 1 ]]; then
tmp=`echo "scale=$ACC;1-$tmp"|bc`
fi
ret_cdf=`printf "%.${ACC}f" $tmp`
}
qnorm()
{
# FUNCTION: quantile function of standard norm distribution
# FORMAT: qnorm p
# OUTPUT: ret_qnorm
local p=$1
if [[ `echo "$p<=0"|bc` -eq 1 ]] || [[ `echo "$p>=1"|bc` -eq 1 ]]; then
echo "[Error][in function \"qnorm\"] the value of p is out of range."
unset ret_qnorm
return
fi
if [[ `echo "$p==0.5"|bc` -eq 1 ]]; then
ret_qnorm=0
else
y=`echo "scale=$ACC;-l(4*$p*(1-$p))"|bc -l`
b=(1.570796288 0.03706987906 -0.0008364353589 -0.0002250947176 0.000006841218299 0.000005824238515 -0.00000104527497 0.00000008360937017 -0.000000003231081277 0.00000000003657763036 0.0000000000006936233982)
u=0
pow=1
for i in `seq 0 $((${#b[@]}-1))`; do
pow=`echo "scale=$ACC;$pow*$y"|bc`
u=`echo "scale=$ACC;$u+$pow*${b[$i]}"|bc`
done
u=`echo "scale=$ACC;sqrt($u)"|bc`
if [[ `echo "$p<0.5"|bc` -eq 1 ]]; then
u=`echo "scale=$ACC;$u*(-1)"|bc`
fi
ret_qnorm=$u
fi
}
get_rank()
{
# FUNCTION: get rank and repeating times of a sequence
# for example, seq_x=(22 33 11 22), then seq_rank=(2 4 1 2), seq_rep=(2 1 1 2)
# FORMAT: get_rank len seq_x seq_rank seq_rep
# OUTPUT: (seq_rank, seq_rep)
local i; local j; local tmp0; local tmp1; local tmp2; local cmpr; local gr_x
local len=$1
eval gr_x=(\${$2[@]})
##### CAUTION #######
unset "$3" ###
unset "$4" ###
##### CAUTION #######
for i in `seq 0 $(($len-1))`; do
tmp0=${gr_x[$i]}
tmp1=0
tmp2=0
for j in `seq 0 $(($len-1))`; do
cmpr=${gr_x[$j]}
if [[ `echo "$cmpr<$tmp0"|bc` -eq 1 ]]; then
tmp1=$(($tmp1+1))
elif [[ `echo "$cmpr==$tmp0"|bc` -eq 1 ]]; then
tmp2=$(($tmp2+1))
fi
done
eval $3[$i]=$(($tmp1+1))
eval $4[$i]=$tmp2
done
}
get_ranksum()
{
# FUNCTION: get the rank-sum of a subsequence
# for example, if start=0, end=1, seq_rank=(2 4 1 2), rep=(2 1 1 2), then ret_ranksum=2.5+4=6.5
# FORMAT: get_ranksum start end seq_rank seq_rep
# OUTPUT: $ret_ranksum
local i; local rk; local n; local grs_rank; local grs_rep
unset grs_rank; unset grs_rep
eval grs_rank=(\${$3[@]})
eval grs_rep=(\${$4[@]})
local w=0
for i in `seq $1 $2`; do
eval rk=${grs_rank[$i]}
eval n=${grs_rep[$i]}
w=`echo "scale=1;$w+$rk+($n-1)/2"|bc` ## scale=1 is enough
done
ret_ranksum=$w
}
thexth()
{
# FUNCTION: return the "x"th smallest number in the sequence {x_0 x_1 x_2 ... x_(len-1)}
# the idea of "quick sort" is used in this implementation
# FORMAT: thexth len xth x_0 x_1 x_2 ... x_(len-1)
# OUTPUT: ret_thexth
local i; local tmp
local arr; local arr0; local arrl; local arrg
local len=$1
local xth=$2
local il=0
local ig=0
eval arr0=(\$@)
for i in `seq 0 $(($len-1))`; do
tmp=$(($i+2))
eval arr[$i]=${arr0[$tmp]}
done
local ref=${arr[0]}
for i in `seq 1 $(($len-1))`; do
tmp=${arr[$i]}
if [[ `echo "$tmp<$ref"|bc` -eq 1 ]]; then
arrl[$il]=$tmp
il=$(($il+1))
elif [[ `echo "$tmp>$ref"|bc` -eq 1 ]]; then
arrg[$ig]=$tmp
ig=$(($ig+1))
fi
done
if [[ $(($len-$ig)) -lt $xth ]]; then
thexth $ig $((xth-$len+$ig)) ${arrg[@]}
elif [[ $il -lt $xth ]]; then
ret_thexth=$ref
else
thexth $il $xth ${arrl[@]}
fi
}
get_median()
{
# FUNCTION: return the median of the subsequence of seq_x, from (start)th element to (end)th element
# FORMAT: get_median start end seq_x
# OUTPUT: ret_median
local tmp; local arr0; local arr; local i
local start=$1
local end=$2
eval arr0=(\${$3[@]})
for i in `seq $start $end`; do
tmp=$(($i-$start))
arr[$tmp]=${arr0[$i]}
done
local len=$(($end-$start+1))
if [[ $(($len%2)) -eq 1 ]]; then
tmp=$(($len/2+1))
thexth $len $tmp ${arr[@]}
ret_median=$ret_thexth
else
tmp=$(($len/2))
thexth $len $tmp ${arr[@]}
ret_median=$ret_thexth
tmp=$(($len/2+1))
thexth $len $tmp ${arr[@]}
ret_median=`echo "scale=$ACC;($ret_thexth+$ret_median)/2"|bc`
fi
}
prepare_one_row()
{
# FUNCTION: the length of seq_x is col1+col2. define seq_l as the subsequence containing the first col1 elements
# of seq_x, and seq_r as the subsequence containing the last col2 elements of seq_x. Return the median
# of seq_l and seq_r seperately(ml & mr), and the ranksum of the left part and the right part(wl & wr).
# FORMAT: prepare_one_row col1 col2 seq_x
# OUTPUT: ml,mr,wl,wr
local por_x; local por_rank; local por_rep
unset por_x; unset por_rank; unset por_rep
eval por_x=(\${$3[@]})
get_rank $(($1+$2)) por_x por_rank por_rep ## get_rank has sentences "unset rank; unset rep"
get_ranksum 0 $(($1-1)) por_rank por_rep
wl=$ret_ranksum
get_ranksum $1 $(($1+$2-1)) por_rank por_rep
wr=$ret_ranksum
get_median 0 $(($1-1)) por_x
ml=$ret_median
get_median $1 $(($1+$2-1)) por_x
mr=$ret_median
}
select1()
{
# FUNCTION: return the number of subsets of set {1,2,...,n}, the sum of elements of which is no greater than target
# FORMAT: select1 n target
# OUTPUT: ret_select1
local n=$1
local target=$2
local tag=0
local itg; local tmp; local tmp0; local HEAD; local CONTENT
if [[ $n -lt 25 ]]; then
itg=`echo "scale=2;$target+1"|bc`
itg=${itg%.*}
itg=$(($itg-1))
while read HEAD CONTENT; do
if [ "$HEAD" == [$n] ]; then
tag=1;
break;
fi
done <$select1_table_file
if [[ $tag -eq 1 ]]; then
CONTENT=`echo $CONTENT`
CONTENT=" $CONTENT"
if [[ `echo "$itg==$target"|bc` -eq 1 ]]; then
tmp=${CONTENT##* $itg:}
tmp0=${tmp%% *}
itg=$(($itg-1))
if [[ $itg -lt 0 ]]; then
tmp=0
else
tmp=${CONTENT##* $itg:}
tmp=${tmp0%% *}
fi
tmp=`echo "scale=2;$tmp0/2+$tmp/2"|bc`
else
tmp=${CONTENT##* $itg:}
tmp=${tmp%% *}
fi
else
echo "[Error][in function \"select1\"] Cannot find the entry."
tmp=-1
fi
fi
ret_select1=$tmp
}
ranksum_table()
{
# FUNCTION: calculate the upper and lower bound of rank-sum in a rank-sum test of a sequence with length "n"
# under the significant level "alpha".
# FORMAT: ranksum_table n alpha
# OUTPUT: ret_rst_n ret_rst_alpha ret_rst_upper ret_rst_lower
local tag=0; local HEAD; local CONTENT; local pos; local value; local res; local tmp; local bound
local n=$1
local alpha=$2
local mu; local stddev
if [[ `echo "$alpha<=0"|bc` -eq 1 ]] || [[ `echo "$alpha>=0.5"|bc` -eq 1 ]]; then
echo "[Error][in function \"ranksum_table\"] Alpha value($alpha) is out of range"
fi
if [[ $n -eq $ret_rst_n ]] && [[ `echo "$alpha==$ret_rst_alpha"|bc` -eq 1 ]]; then
return
fi
if [[ $n -le 12 ]] && [[ $n -ge 3 ]]; then
tag=0
while read HEAD CONTENT; do
if [ "$HEAD" == [$n] ]; then
tag=1;
break;
fi
done <$select2_table_file
if [[ tag -eq 0 ]]; then
echo "[Error][in function \"ranksum_table\"] Cannot find the entry."
ret_rst_upper=-1; ret_rst_lower=-1; ret_rst_n=-1; ret_rst_alpha=-1
return
fi
bound=`combination $(($n*2)) $n`
bound=`echo "scale=$ACC;$bound*$alpha"|bc`
res=$CONTENT
while [[ 1 -eq 1 ]]; do
pos=${res%%:*}
res=${res#*:}
value=${res%% *}
res=${res#* }
if [[ `echo "$value<=$bound"|bc` -eq 1 ]]; then
tmppos=$pos
tmpvalue=$value
else
break
fi
done
if [[ `echo "$value+$tmpvalue<=2*$bound"|bc` -eq 1 ]]; then
ret_rst_lower=$pos
ret_rst_upper=$(($n*($n*2+1)-$ret_rst_lower))
else
ret_rst_lower=`echo "scale=2;$tmppos+0.5"|bc`
ret_rst_upper=`echo "scale=2;$n*($n*2+1)-$ret_rst_lower"|bc`
fi
ret_rst_n=$n
ret_rst_alpha=$alpha
elif [[ $n -gt 12 ]]; then
qnorm $alpha
mu=`echo "scale=$ACC;$n*($n*2+1)/2"|bc`
stddev=`echo "scale=$ACC;sqrt((2*$n+1)/3)*$n/2"|bc`
tmp=`echo "scale=$ACC;$ret_qnorm*$stddev"|bc`
ret_rst_lower=`echo "scale=$ACC;$mu-($tmp)"|bc`
ret_rst_upper=`echo "scale=$ACC;$mu+($tmp)"|bc`
ret_rst_n=$n
ret_rst_alpha=$alpha
fi
}
unibench()
{
# FUNCTION: the uni-bench level test for the sequence "seq_x" with length "n" under the significant level "alpha"
# if significant, return diff_median(=median_l-median_r); else return 0.
# FORMAT: unibench n alpha seq_x
# OUTPUT: ret_unibench
local n=$1
local alpha=$2
local ub_x
eval ub_x=(\${$3[@]})
prepare_one_row $n $n ub_x
local target=$wl
ret_unibench=0
if [[ $n -gt 2 ]]; then
ranksum_table $n $alpha
if [[ `echo "$target<=$ret_rst_lower"|bc` -eq 1 ]] || [[ `echo "$target>=$ret_rst_upper"|bc` -eq 1 ]]; then
ret_unibench=`echo "$ml-$mr"|bc`
fi
elif [[ $n -le 2 ]]; then
ret_unibench=`echo "$ml-$mr"|bc`
fi
}
crossbench()
{
# FUNCTION: the cross-bench level test. "seq_x" represents the diff_median sequence with length "row"
# this function will return the reliability of the hypothesis "data_a is better than data_b".
# FORMAT: crossbench row seq_x
# OUTPUT: ret_crossbench ret_cb_wp ret_cb_wn
local row=$1
local cb_x; local cb_rank; local cb_rep; local sign
eval cb_x=(\${$2[@]})
local i; local tmp1; local tmp2
local wp=0
local wn=0
for i in `seq 0 $(($row-1))`; do
if [[ `echo "${cb_x[$i]}<0"|bc` -eq 1 ]]; then
sign[$i]=-1
cb_x[$i]=`echo "scale=$ACC;${cb_x[$i]}*(-1)"|bc`
elif [[ `echo "${cb_x[$i]}>0"|bc` -eq 1 ]]; then
sign[$i]=1
else
sign[$i]=0
fi
done
get_rank $row cb_x cb_rank cb_rep
for i in `seq 0 $(($row-1))`; do
if [[ ${sign[$i]} -eq 1 ]]; then
wp=`echo "scale=$ACC;$wp+${cb_rank[$i]}+${cb_rep[$i]}/2-1/2"|bc`
elif [[ ${sign[$i]} -eq -1 ]]; then
wn=`echo "scale=$ACC;$wn+${cb_rank[$i]}+${cb_rep[$i]}/2-1/2"|bc`
else
wp=`echo "scale=$ACC;$wp+${cb_rank[$i]}/2+${cb_rep[$i]}/4-1/4"|bc`
wn=`echo "scale=$ACC;$wn+${cb_rank[$i]}/2+${cb_rep[$i]}/4-1/4"|bc`
fi
done
if [[ -n "$ret_cb_wp" ]] && [[ -n "$ret_cb_wn" ]]; then ## not NULL
if [[ `echo "$ret_cb_wp==$wp"|bc` -eq 1 ]] && [[ `echo "$ret_cb_wn==$wn"|bc` -eq 1 ]]; then
return ## skip re-calculating
fi
fi
ret_cb_wp=$wp
ret_cb_wn=$wn
if [[ $row -lt 25 ]]; then
select1 $row $wp
tmp1=$ret_select1
tmp2=$((2**$row))
ret_crossbench=`echo "scale=$ACC;$tmp1/$tmp2"|bc`
else
tmp1=`echo "scale=$ACC;$wp-$row*($row+1)/4"|bc`
tmp2=`echo "scale=$ACC;sqrt($row*($row+1)*(2*$row+1)/24)"|bc`
tmp1=`echo "scale=$ACC;$tmp1/$tmp2"|bc`
cdfnorm $tmp1
ret_crossbench=$ret_cdf
fi
}
hpt_basic()
{
# FUNCTION: Do HPTest for the two data matrices(MTRX1' & MTRX2, MTRX1'=multi*MTRX1), using "alpha_uni" as the
# significant level in the uni-bench level test. The size of both matrices is row*col
# FORMAT: hpt_basic row col multi alpha_uni MTRX1 MTRX2
# OUTPUT: ret_hpt_basic ret_hpt_wp ret_hpt_wn
local hpt_x
local seq_rank; local seq_rep; local MTRX1; local MTRX2; local meddiff
local i; local j; local k; local tmp1; local tmp2
local row=$1
local col=$2
local multi=$3
local alpha_uni=$4
eval MTRX1=(\${$5[@]})
eval MTRX2=(\${$6[@]})
for i in `seq 0 $(($row-1))`; do
unset upt_x
for j in `seq 0 $(($col-1))`; do
hpt_x[$j]=`echo "scale=$ACC;$multi*${MTRX1[$(($i*$col+$j))]}"|bc`
hpt_x[$(($j+$col))]=${MTRX2[$(($i*$col+$j))]}
done
unibench $col $alpha_uni hpt_x
meddiff[$i]=$ret_unibench
done
# echo "hpt: meddiff(${#meddiff[@]}): ${meddiff[@]}"
crossbench $row meddiff
ret_hpt_basic=$ret_crossbench
ret_hpt_wp=$ret_cb_wp
ret_hpt_wn=$ret_cb_wn
# echo ============ ret_hpt_basic=$ret_hpt_basic,wp=$ret_hpt_wp,wn=$ret_hpt_wn =============
}
maxspeedup()
{
# FUNCTION: return the maximum speedup value under reliability "reliability". Most of the parameters are the
# same as the function hpt_basic.
# FORMAT: maxspeedup row col reliability alpha_uni bool_wp_smaller MTRX1 MTRX2
# OUTPUT: ret_maxspeedup
local row=$1
local col=$2
local reliability=$3
local alpha=$4
local bool_wp_smaller=$5
local MMTRX1; local MMTRX2
eval MMTRX1=(\${$6[@]})
eval MMTRX2=(\${$7[@]})
local su; local basesu; local myscale; local min; local max; local mid; local step; local reci
if [[ `echo "$reliability<0.5"|bc` -eq 1 ]]; then
echo "[Warning: Skip Calculating][in function \"maxspeedup\"] The reliability value($reliability), which is less than 0.5, will lead to a meaningless conclusion. Skip calculating."
ret_maxspeedup=-2
return
elif [[ $bool_wp_smaller -eq 1 ]]; then
su=10
hpt_basic $row $col $su $alpha MMTRX1 MMTRX2
if [[ `echo "$ret_hpt_basic<1-$reliability"|bc` -eq 1 ]]; then
echo "[Error][in function \"maxspeedup\"] Overflow: the maximum_speedup is beyond the upper bound 10. Stop calculating."
ret_maxspeedup=-1
return
else
step=-1
myscale=1
min=1
max=10
basesu=0
while [[ $step -le $acc_maxsu ]]; do
mid=$((($max-$min)/2+$min))
su=`echo "scale=$ACC;$basesu+$myscale*$mid"|bc`
hpt_basic $row $col $su $alpha MMTRX1 MMTRX2
if [[ `echo "$ret_hpt_basic<1-$reliability"|bc` -eq 1 ]]; then
min=$mid
else
max=$mid
fi
if [[ $min -eq $(($max-1)) ]]; then
basesu=`echo "scale=$ACC;$basesu+$min*$myscale"|bc`
myscale=`echo "scale=$ACC;$myscale/10"|bc`
step=$(($step+1))
min=0
max=10
fi
done
ret_maxspeedup=`printf "%.${acc_maxsu}f" $basesu`
fi
else #elif [[ $bool_wp_smaller -ne 1 ]]; then
su=10
reci=`echo "scale=$ACC;1/$su"|bc`
hpt_basic $row $col $reci $alpha MMTRX1 MMTRX2
if [[ `echo "$ret_hpt_basic>$reliability"|bc` -eq 1 ]]; then
echo "[Error][in function \"maxspeedup\"] Overflow: the maximum_speedup is beyond the upper bound 10. Stop calculating."
ret_maxspeedup=-1
return
else
step=-1
myscale=1
min=1
max=10
basesu=0
while [[ $step -le $acc_maxsu ]]; do
mid=$((($max-$min)/2+$min))
su=`echo "scale=$ACC;$basesu+$myscale*$mid"|bc`
reci=`echo "scale=$ACC;1/$su"|bc`
hpt_basic $row $col $reci $alpha MMTRX1 MMTRX2
if [[ `echo "$ret_hpt_basic>$reliability"|bc` -eq 1 ]]; then
min=$mid
else
max=$mid
fi
if [[ $min -eq $(($max-1)) ]]; then
basesu=`echo "scale=$ACC;$basesu+$min*$myscale"|bc`
myscale=`echo "scale=$ACC;$myscale/10"|bc`
step=$(($step+1))
min=0
max=10
fi
done
ret_maxspeedup=`printf "%.${acc_maxsu}f" $basesu`
fi
fi
}
#########################################################################
# Math Part END
#########################################################################
#########################################################################
# Readfile Part
#########################################################################
read_config()
{
# FUNCTION: set all the key parameters by reading the config file.
# FORMAT: read_config cfg_file printinfo(display the config info when printinfo=1 and do not display when 0)
# OUTPUT: $? (1 for Error and 0 for No_error)
local cfgfile=$1
local printinfo=$2
if [[ $# -ne 2 ]]; then
echo "[Error][in function \"read_config\"] Format error."
return 1
fi
current_status=0
AutoCompareOff=0
i3=0
len_trineseq=-1
cnt=0
matched=0
ReadTriple=-1
ReadDefaultMTRX=-1
n_col_default=5
str_1_1=HaveCompareGroup
str_1_2=Name1
str_1_3=LogGroup1
str_1_4=Name2
str_1_5=LogGroup2
str_1_6=output_file
str_1_7=DefaultBaselineName
str_1_8=DefaultBaselineStatisticsLog
str_2_1=repeat
str_2_2=alpha_unibench
str_2_3=speedup
str_2_4=reliability
printf "Reading NPT parameters at `date`...\n"
while read LINE; do
LINE=`echo $LINE`
if [[ "$LINE" == *"##"* ]]; then
LINE=${LINE%%##*}
LINE=`echo $LINE`
fi
if [[ "$LINE" == "" ]]; then
continue
fi
if [[ "$LINE" == "[CONFIG FILE END]" ]]; then
current_status=-1
break
elif [[ $current_status -eq 0 ]]; then
if [[ "$LINE" == "[Basic Info]" ]]; then
current_status=1
elif [[ "$LINE" == "[HPT Parameters]" ]]; then
current_status=2
elif [[ "$LINE" == "[Workload Settings]" ]]; then
current_status=3
elif [[ "$LINE" == "[Default Baseline Statistics]" ]]; then
current_status=4
fi
continue
elif [[ $current_status -eq 1 ]]; then
if [[ "$LINE" == "[Basic Info End]" ]]; then
current_status=0
continue
elif [[ "${LINE:0:${#str_1_1}}" == $str_1_1 ]]; then
content=${LINE##HaveCompareGroup}
content=`echo $content`
content=${content##=}
content=`echo $content`
if [[ "$content" == "TRUE" ]] || [[ "$content" == "1" ]]; then
AutoCompareOff=1
else
AutoCompareOff=0
fi
elif [[ "${LINE:0:${#str_1_2}}" == $str_1_2 ]]; then
content=${LINE##Name1}
content=`echo $content`
content=${content##=}
name1=`echo $content`
elif [[ "${LINE:0:${#str_1_3}}" == $str_1_3 ]]; then
if [ -d loggroup1 ]; then
if [ -d loggroup1.old ]; then
rm -rf loggroup1.old
fi
mv loggroup1 loggroup1.old
fi
mkdir loggroup1
content=${LINE##LogGroup1}
content=`echo $content`
content=${content##=}
content=`echo $content`
for file in `ls $content`; do
if [ -f $file ]; then
cp $file loggroup1/
fi
done
elif [[ "${LINE:0:${#str_1_4}}" == $str_1_4 ]]; then
if [[ $AutoCompareOff -eq 1 ]]; then
content=${LINE##Name2}
content=`echo $content`
content=${content##=}
name2=`echo $content`
fi
elif [[ "${LINE:0:${#str_1_5}}" == $str_1_5 ]]; then
if [[ $AutoCompareOff -eq 1 ]]; then
if [ -d loggroup2 ]; then
if [ -d loggroup2.old ]; then
rm -rf loggroup2.old
fi
mv loggroup2 loggroup2.old
fi
mkdir loggroup2
content=${LINE##LogGroup2}
content=`echo $content`
content=${content##=}
content=`echo $content`
for file in `ls $content`; do
if [ -f $file ]; then
cp $file loggroup2/
fi
done
# eval loggroup2=(`ls $content`)
fi
elif [[ "${LINE:0:${#str_1_7}}" == $str_1_7 ]]; then
if [[ $AutoCompareOff -eq 0 ]]; then
content=${LINE##DefaultBaselineName}
content=`echo $content`
content=${content##=}
name2=`echo $content`
fi
elif [[ "${LINE:0:${#str_1_8}}" == $str_1_8 ]]; then
if [[ $AutoCompareOff -eq 0 ]]; then
content=${LINE##DefaultBaselineStatisticsLog}
content=`echo $content`
content=${content##=}
loggroup2=`echo $content`
fi
elif [[ "${LINE:0:${#str_1_6}}" == $str_1_6 ]]; then
content=${LINE##output_file}
content=`echo $content`
content=${content##=}
outputfile=`echo $content`
fi
elif [[ $current_status -eq 2 ]]; then
if [[ "$LINE" == "[HPT Parameters End]" ]]; then
current_status=0
continue
elif [[ "${LINE:0:${#str_2_1}}" == $str_2_1 ]]; then
content=${LINE##repeat}
content=`echo $content`
content=${content##=}
repeat=`echo $content`
elif [[ "${LINE:0:${#str_2_2}}" == $str_2_2 ]]; then
content=${LINE##alpha_unibench}
content=`echo $content`
content=${content##=}
alpha_uni=`echo $content`
elif [[ "${LINE:0:${#str_2_3}}" == $str_2_3 ]]; then
content=${LINE##speedup}
content=`echo $content`
content=${content##=}
content=`echo $content`
eval speedup=(\$content)
elif [[ "${LINE:0:${#str_2_4}}" == $str_2_4 ]]; then
content=${LINE##reliability}
content=`echo $content`
content=${content##=}
content=`echo $content`
eval reliability=(\$content)
fi
elif [[ $current_status -eq 3 ]]; then
if [[ "$LINE" == "[Workload Settings End]" ]]; then
len_trineseq=$i3
current_status=0
continue
else
trine=${LINE%% *}
switch=${LINE##* }
if [[ $switch -eq 1 ]]; then
trineseq[$i3]=$trine
i3=$(($i3+1))
fi
fi
fi
done<$cfgfile
if [[ $repeat -le 2 ]]; then
echo "[Warning] Then uni-bench level test will be skipped because of the small sample size(n_col<3)"
alpha_uni=NaN
elif [[ `echo "$alpha_uni!=0.1"|bc` -eq 1 ]] && [[ $repeat -lt 5 ]]; then
echo "[Warning] The alpha value of uni-bench level is set to 0.1 by force because of the small sample size(n_col<=5)"
alpha_uni=0.1
fi
printf "Complete reading NPT parameters at `date`\n\n"
if [[ printinfo -eq 1 ]]; then
check_cfg_info
fi
return 0
}
readmatrix()
{
# FUNCTION: fill the matrix "var_matrix" using the data from all the log file under the directory "filegroup_dir".
# Only data among the list "seq_trine" will be collected. The size of the matrix is row*col.
# FORMAT: readmatrix row col seq_trine filegroup_dir var_matrix
# OUTPUT: $? (1 for Error and 0 for No_error) (var_matrix)
local i; local j; local seq_trine; local seq_file; local cnt; local flag_break=0; local filename
local row=$1
local col=$2
eval seq_trine=(\${$3[@]})
# eval seq_file=(\${$4[@]})
local filegroup_dir=$4
local matrixname=$5
local gotcha=0
local totalcnt=0
echo "Reading $filegroup_dir at `date`..."
grep -E '\[PARSEC\]\ \[==========\ Running\ benchmark\ |real' $filegroup_dir/* >loadmatrix.tmp
local str1="[PARSEC] [========== Running benchmark "
if [[ ${#seq_trine[@]} -ne $row ]]; then
echo "[Error][in function \"readmatrix\"] Variant \"row\" does not match!"
return 1
fi
for i in `seq 0 $(($row-1))`; do
cnt[$i]=0
done
#echo "trineseq(${#trineseq[@]}):${trineseq[@]}"
while read LINE; do
filename=${LINE%\.log*}
thread=${filename##*/}
thread=${thread#*run_}
thread=${thread%%_*}
inputsize=${filename##*/}
inputsize=${inputsize#*run_*_}
inputsize=${inputsize%%_*}
LINE=${LINE#*\.log:}
#echo thread=$thread, inputsize=$inputsize
#echo ${LINE:0:${#str1}} VS. $str1
if [[ "${LINE:0:${#str1}}" == "$str1" ]]; then
if [[ $gotcha -eq 1 ]]; then
echo "[Error][in function \"readmatrix\"] Incorrect log file format!"
return 1
fi
tmpstr=`expr match "$LINE" '.\+benchmark\ \(.\+\)\ \[.*'`
tmpstr="$tmpstr-$inputsize-$thread"
for j in `seq 0 $(($row-1))`; do
if [[ $tmpstr == ${seq_trine[$j]} ]] && [[ ${cnt[$j]} -lt $col ]]; then
pos=$(($col*$j+${cnt[$j]}))
jth=$j
gotcha=1
break
fi
done
elif [[ `expr substr "$LINE" 1 4` == "real" ]] && [[ $gotcha -eq 1 ]]; then
LINE=`echo $LINE`
mvalue=`expr match "$LINE" '.\+\ \(.\+\)m.*'`
svalue=`expr match "$LINE" '.\+*m\(.\+\)s.*'`
timevalue=`echo "scale=3;$mvalue*60+$svalue"|bc`
eval $matrixname[$pos]=$timevalue
totalcnt=$((1+$totalcnt))
cnt[$jth]=$((1+${cnt[$jth]}))
gotcha=0
fi
if [[ $totalcnt -eq $(($row*$col)) ]]; then
flag_break=1
break
fi
done <loadmatrix.tmp
rm loadmatrix.tmp
# check:
for i in `seq 0 $(($row-1))`; do
if [[ ${cnt[$i]} -ne $col ]]; then
echo "[Error][in function \"readmatrix\"] Insufficient data!"
return 1
fi
done
echo "Complete reading $filegroup_dir at `date`"
}
read_default()
{
# FUNCTION: read the default baseline matrix from file "filename" to matrix "matrixname".
# FORMAT: read_default n_col seq_trine filename matrixname
# OUTPUT: (matrixname)
local seq_trine; local trine; local numseq; local i; local tmp; local cnt=0; local val
#local j; local nt=12
local n_col=$1
eval seq_trine=(\${$2[@]})
local filename=$3
if [[ -f $filename ]]; then
echo "Loading default baseline matrix..."
else
echo "[Error][in function \"read_default\"] Can not locate $filename!"
return
fi
while read trine numseq; do
for i in `seq 0 $((${#seq_trine[@]}-1))`; do
if [ "${seq_trine[$i]}" == $trine ]; then
cnt=$(($cnt+1))
tmp=0
for val in $numseq; do
eval $4[$(($i*$n_col+$tmp))]=$val
tmp=$(($tmp+1))
if [[ $tmp -ge $n_col ]]; then
break
fi
done
break
fi
done
if [[ $cnt -eq ${#seq_trine[@]} ]]; then
return
fi
done <$filename
echo "[Error][in function \"read_default\"] Can not locate all the workload when searching the baseline file."
}
#########################################################################
# Readfile Part End
#########################################################################
check_cfg_info()
{
# FUNCTION: print the config parameters.
# FORMAT: check_cfg_info
# OUTPUT: (None)
echo "===========[Parameter Info Checklist]=========="
echo "HaveCompareGroup: $AutoCompareOff"
echo "Machine1: $name1"
echo "Machine2: $name2"
echo "DataLog1: `ls loggroup1`"
echo
if [ "$AutoCompareOff" == "1" ] || [ "$AutoCompareOff" == "TRUE" ]; then
echo "DataLog2: `ls loggroup2`"
else
echo "DataLog2: ${loggroup2}"
fi
echo "Outputfile: $outputfile"
echo "repeat: $repeat"
echo "alpha: $alpha_uni"
if [[ ${#speedup[@]} -eq 0 ]]; then
echo "Speedup: None"
else
echo "Speedup(${#speedup[@]}):${speedup[@]}"
fi
if [[ ${#reliability[@]} -eq 0 ]]; then
echo "Reliability: None"
else
echo "Reliability(${#reliability[@]}):${reliability[@]}"
fi
echo "Benchmark-Inputsize-Nthread(${#trineseq[@]} in total):"
echo ${trineseq[@]}
echo "================[Checklist End]================"
echo
}
main()
{
# FUNCTION: the whole HPTest, including reading config file, reading data matrices, reliability computing,
# and maximum speedup computing.
# FORMAT:
# OUTPUT:
printf "\n=================================================================\n"
printf "========================= HPT ==============================\n"
printf "=================================================================\n\n"
if [[ $# -ne 1 ]]; then
echo "[Error][in function \"main\"] the standard format should be \"bash hpt.sh cfgfile\""
return 1
fi
read_config $1 1
if [[ $? -eq 1 ]]; then
return 1
fi
echo "=================================================================" >$outputfile
echo "========================= HPT ==============================" >>$outputfile
echo "=================================================================" >>$outputfile
echo "" >>$outputfile
echo "[Date] `date`" >>$outputfile
echo "" >>$outputfile
check_cfg_info >>$outputfile
row=${#trineseq[@]}
if [[ AutoCompareOff -eq 0 ]] && [[ $repeat -gt $NCOL_BASELINE ]]; then
echo "[Overflow] repeat times(which is $repeat) should be no larger than NCOL_BASELINE($NCOL_BASELINE) when using default baseline as comparison."
return
fi
readmatrix ${#trineseq[@]} $repeat trineseq loggroup1 matrix_a
if [[ $? -eq 1 ]]; then
return
fi
if [[ AutoCompareOff -eq 1 ]]; then
readmatrix ${#trineseq[@]} $repeat trineseq loggroup2 matrix_b
if [[ $? -eq 1 ]]; then
return
fi
else
read_default $repeat trineseq $loggroup2 matrix_b
#read_default $repeat $loggroup2 matrix_b
fi
display_matrix $row $repeat matrix_a $name1
display_matrix $row $repeat matrix_b $name2
display_matrix $row $repeat matrix_a $name1 >>$outputfile
display_matrix $row $repeat matrix_b $name2 >>$outputfile
echo "" >>$outputfile
printf "\nAnalyzing and calculating, this may take a few minutes...\n\n"
# hpt_basic $row $repeat 1 $alpha_uni matrix_a matrix_b
time hpt_basic $row $repeat 1 $alpha_uni matrix_a matrix_b
printf "===========================================================================================\n"
local n_mul=$((${#speedup[@]}+($acc_maxsu+1)*4*${#reliability[@]}))
printf "[Time estimating] The whole process will complete in about $n_mul times the above time.\n"
printf "===========================================================================================\n\n"
if [[ `echo "$ret_hpt_wp<$ret_hpt_wn"|bc` -eq 1 ]]; then
tmp_reli=`echo "scale=4;1-$ret_hpt_basic"|bc`
t_name1="machine_A($name1)"
t_name2="machine_B($name2)"
better=1
else
tmp_reli=`echo "scale=4;$ret_hpt_basic"|bc`
t_name1="machine_B($name2)"
t_name2="machine_A($name1)"
better=0
fi
printf "[Comparison][original]\n$t_name1 is better than $t_name2 with reliability %.4f.\n\n" $tmp_reli
printf "[Comparison][original]\n$t_name1 is better than $t_name2 with reliability %.4f.\n\n" $tmp_reli >>$outputfile
for i in `seq 0 $((${#speedup[@]}-1))`; do
su=${speedup[$i]}
if [[ $better -eq 1 ]]; then
hpt_basic $row $repeat $su $alpha_uni matrix_a matrix_b
tmp_reli=`echo "scale=4;1-$ret_hpt_basic"|bc`
else
hpt_basic $row $repeat $su $alpha_uni matrix_b matrix_a
tmp_reli=`echo "scale=4;1-$ret_hpt_basic"|bc`
fi
printf "[Comparison][speedup=%.4f]\n" $su
printf "The reliability of that $t_name1 outperforms $t_name2 with speedup %.4f is %.4f.\n\n" $su $tmp_reli
printf "[Comparison][speedup=%.4f]\n" $su >>$outputfile
printf "The reliability of that $t_name1 outperforms $t_name2 with speedup %.4f is %.4f.\n\n" $su $tmp_reli >>$outputfile
done
for i in `seq 0 $((${#reliability[@]}-1))`; do
reli=${reliability[$i]}
maxspeedup $row $repeat $reli $alpha_uni $better matrix_a matrix_b
printf "[Comparison][reliability=%.4f]\n" $reli
printf "[Comparison][reliability=%.4f]\n" $reli >>$outputfile
if [[ `echo "$ret_maxspeedup>0"|bc` -eq 1 ]]; then
printf "The maximum speedup of $t_name1 over $t_name2 is %.${acc_maxsu}f with the given reliability %.4f.\n\n" $ret_maxspeedup $reli
printf "The maximum speedup of $t_name1 over $t_name2 is %.${acc_maxsu}f with the given reliability %.4f.\n\n" $ret_maxspeedup $reli >>$outputfile
else
printf "Skipped\n\n"
printf "Skipped\n\n" >>$outputfile
fi
done
printf "\n\n[HPT COMPLETED] `date`\n" >>$outputfile
if [[ -f loadmatrix.tmp ]]; then
rm loadmatrix.tmp
fi
}
main $1