src/npb/disk-image/npb/npb-hooks/NPB3.3.1/NPB3.3-MPI/EP/ep.f - public/gem5-resources - Git at Google

 !-------------------------------------------------------------------------!
 !                                                                         !
 !        N  A  S     P A R A L L E L     B E N C H M A R K S  3.3         !
 !                                                                         !
 !                                   E P                                   !
 !                                                                         !
 !-------------------------------------------------------------------------!
 !                                                                         !
 !    This benchmark is part of the NAS Parallel Benchmark 3.3 suite.      !
 !    It is described in NAS Technical Reports 95-020 and 02-007           !
 !                                                                         !
 !    Permission to use, copy, distribute and modify this software         !
 !    for any purpose with or without fee is hereby granted.  We           !
 !    request, however, that all derived work reference the NAS            !
 !    Parallel Benchmarks 3.3. This software is provided "as is"           !
 !    without express or implied warranty.                                 !
 !                                                                         !
 !    Information on NPB 3.3, including the technical report, the          !
 !    original specifications, source code, results and information        !
 !    on how to submit new results, is available at:                       !
 !                                                                         !
 !           http://www.nas.nasa.gov/Software/NPB/                         !
 !                                                                         !
 !    Send comments or suggestions to  npb@nas.nasa.gov                    !
 !                                                                         !
 !          NAS Parallel Benchmarks Group                                  !
 !          NASA Ames Research Center                                      !
 !          Mail Stop: T27A-1                                              !
 !          Moffett Field, CA   94035-1000                                 !
 !                                                                         !
 !          E-mail:  npb@nas.nasa.gov                                      !
 !          Fax:     (650) 604-3957                                        !
 !                                                                         !
 !-------------------------------------------------------------------------!


 c---------------------------------------------------------------------
 c
 c Authors: P. O. Frederickson
 c          D. H. Bailey
 c          A. C. Woo
 c          R. F. Van der Wijngaart
 c---------------------------------------------------------------------

 c---------------------------------------------------------------------
       program EMBAR
 c---------------------------------------------------------------------
 C
 c   This is the MPI version of the APP Benchmark 1,
 c   the "embarassingly parallel" benchmark.
 c
 c
 c   M is the Log_2 of the number of complex pairs of uniform (0, 1) random
 c   numbers.  MK is the Log_2 of the size of each batch of uniform random
 c   numbers.  MK can be set for convenience on a given system, since it does
 c   not affect the results.

       implicit none

       include 'npbparams.h'
       include 'mpinpb.h'

       double precision Mops, epsilon, a, s, t1, t2, t3, t4, x, x1,
      >                 x2, q, sx, sy, tm, an, tt, gc, dum(3),
      >                 timer_read
       double precision sx_verify_value, sy_verify_value, sx_err, sy_err
       integer          mk, mm, nn, nk, nq, np, ierr, node, no_nodes,
      >                 i, ik, kk, l, k, nit, ierrcode, no_large_nodes,
      >                 np_add, k_offset, j
       logical          verified, timers_enabled
       external         randlc, timer_read
       double precision randlc, qq
       character*15     size

       integer          fstatus
       integer          t_total, t_gpairs, t_randn, t_rcomm, t_last
       parameter (t_total=1, t_gpairs=2, t_randn=3, t_rcomm=4, t_last=4)
       double precision tsum(t_last+2), t1m(t_last+2),
      >                 tming(t_last+2), tmaxg(t_last+2)
       character        t_recs(t_last+2)*8

       parameter (mk = 16, mm = m - mk, nn = 2 ** mm,
      >           nk = 2 ** mk, nq = 10, epsilon=1.d-8,
      >           a = 1220703125.d0, s = 271828183.d0)

       common/storage/ x(2*nk), q(0:nq-1), qq(10000)
       data             dum /1.d0, 1.d0, 1.d0/

       data t_recs/'total', 'gpairs', 'randn', 'rcomm',
      >            ' totcomp', ' totcomm'/


       call mpi_init(ierr)
       call mpi_comm_rank(MPI_COMM_WORLD,node,ierr)
       call mpi_comm_size(MPI_COMM_WORLD,no_nodes,ierr)

       root = 0

       if (.not. convertdouble) then
          dp_type = MPI_DOUBLE_PRECISION
       else
          dp_type = MPI_REAL
       endif

       if (node.eq.root)  then

 c   Because the size of the problem is too large to store in a 32-bit
 c   integer for some classes, we put it into a string (for printing).
 c   Have to strip off the decimal point put in there by the floating
 c   point print statement (internal file)

           write(*, 1000)
           write(size, '(f15.0)' ) 2.d0**(m+1)
           j = 15
           if (size(j:j) .eq. '.') j = j - 1
           write (*,1001) size(1:j)
           write(*, 1003) no_nodes

  1000 format(/,' NAS Parallel Benchmarks 3.3 -- EP Benchmark',/)
  1001     format(' Number of random numbers generated: ', a15)
  1003     format(' Number of active processes:         ', 2x, i13, /)

           open (unit=2,file='timer.flag',status='old',iostat=fstatus)
           timers_enabled = .false.
           if (fstatus .eq. 0) then
              timers_enabled = .true.
              close(2)
           endif
       endif

       call mpi_bcast(timers_enabled, 1, MPI_LOGICAL, root,
      >               MPI_COMM_WORLD, ierr)

       verified = .false.

 c   Compute the number of "batches" of random number pairs generated
 c   per processor. Adjust if the number of processors does not evenly
 c   divide the total number

       np = nn / no_nodes
       no_large_nodes = mod(nn, no_nodes)
       if (node .lt. no_large_nodes) then
          np_add = 1
       else
          np_add = 0
       endif
       np = np + np_add

       if (np .eq. 0) then
          write (6, 1) no_nodes, nn
  1       format ('Too many nodes:',2i6)
          ierrcode = 1
          call mpi_abort(MPI_COMM_WORLD,ierrcode,ierr)
          stop
       endif

 c   Call the random number generator functions and initialize
 c   the x-array to reduce the effects of paging on the timings.
 c   Also, call all mathematical functions that are used. Make
 c   sure these initializations cannot be eliminated as dead code.

       call vranlc(0, dum(1), dum(2), dum(3))
       dum(1) = randlc(dum(2), dum(3))
       do 5    i = 1, 2*nk
          x(i) = -1.d99
  5    continue
       Mops = log(sqrt(abs(max(1.d0,1.d0))))

 c---------------------------------------------------------------------
 c      Synchronize before placing time stamp
 c---------------------------------------------------------------------
       do i = 1, t_last
          call timer_clear(i)
       end do
       call mpi_barrier(MPI_COMM_WORLD, ierr)
       call timer_start(1)

       t1 = a
       call vranlc(0, t1, a, x)

 c   Compute AN = A ^ (2 * NK) (mod 2^46).

       t1 = a

       do 100 i = 1, mk + 1
          t2 = randlc(t1, t1)
  100  continue

       an = t1
       tt = s
       gc = 0.d0
       sx = 0.d0
       sy = 0.d0

       do 110 i = 0, nq - 1
          q(i) = 0.d0
  110  continue

 c   Each instance of this loop may be performed independently. We compute
 c   the k offsets separately to take into account the fact that some nodes
 c   have more numbers to generate than others

       if (np_add .eq. 1) then
          k_offset = node * np -1
       else
          k_offset = no_large_nodes*(np+1) + (node-no_large_nodes)*np -1
       endif

       do 150 k = 1, np
          kk = k_offset + k
          t1 = s
          t2 = an

 c        Find starting seed t1 for this kk.

          do 120 i = 1, 100
             ik = kk / 2
             if (2 * ik .ne. kk) t3 = randlc(t1, t2)
             if (ik .eq. 0) goto 130
             t3 = randlc(t2, t2)
             kk = ik
  120     continue

 c        Compute uniform pseudorandom numbers.
  130     continue

          if (timers_enabled) call timer_start(t_randn)
          call vranlc(2 * nk, t1, a, x)
          if (timers_enabled) call timer_stop(t_randn)

 c        Compute Gaussian deviates by acceptance-rejection method and
 c        tally counts in concentric square annuli.  This loop is not
 c        vectorizable.

          if (timers_enabled) call timer_start(t_gpairs)

          do 140 i = 1, nk
             x1 = 2.d0 * x(2*i-1) - 1.d0
             x2 = 2.d0 * x(2*i) - 1.d0
             t1 = x1 ** 2 + x2 ** 2
             if (t1 .le. 1.d0) then
                t2   = sqrt(-2.d0 * log(t1) / t1)
                t3   = (x1 * t2)
                t4   = (x2 * t2)
                l    = max(abs(t3), abs(t4))
                q(l) = q(l) + 1.d0
                sx   = sx + t3
                sy   = sy + t4
             endif
  140     continue

          if (timers_enabled) call timer_stop(t_gpairs)

  150  continue

       if (timers_enabled) call timer_start(t_rcomm)
       call mpi_allreduce(sx, x, 1, dp_type,
      >                   MPI_SUM, MPI_COMM_WORLD, ierr)
       sx = x(1)
       call mpi_allreduce(sy, x, 1, dp_type,
      >                   MPI_SUM, MPI_COMM_WORLD, ierr)
       sy = x(1)
       call mpi_allreduce(q, x, nq, dp_type,
      >                   MPI_SUM, MPI_COMM_WORLD, ierr)
       if (timers_enabled) call timer_stop(t_rcomm)

       do i = 1, nq
          q(i-1) = x(i)
       enddo

       do 160 i = 0, nq - 1
         gc = gc + q(i)
  160  continue

       call timer_stop(1)
       tm  = timer_read(1)

       call mpi_allreduce(tm, x, 1, dp_type,
      >                   MPI_MAX, MPI_COMM_WORLD, ierr)
       tm = x(1)

       if (node.eq.root) then
          nit=0
          verified = .true.
          if (m.eq.24) then
             sx_verify_value = -3.247834652034740D+3
             sy_verify_value = -6.958407078382297D+3
          elseif (m.eq.25) then
             sx_verify_value = -2.863319731645753D+3
             sy_verify_value = -6.320053679109499D+3
          elseif (m.eq.28) then
             sx_verify_value = -4.295875165629892D+3
             sy_verify_value = -1.580732573678431D+4
          elseif (m.eq.30) then
             sx_verify_value =  4.033815542441498D+4
             sy_verify_value = -2.660669192809235D+4
          elseif (m.eq.32) then
             sx_verify_value =  4.764367927995374D+4
             sy_verify_value = -8.084072988043731D+4
          elseif (m.eq.36) then
             sx_verify_value =  1.982481200946593D+5
             sy_verify_value = -1.020596636361769D+5
          elseif (m.eq.40) then
             sx_verify_value = -5.319717441530D+05
             sy_verify_value = -3.688834557731D+05
          else
             verified = .false.
          endif
          if (verified) then
             sx_err = abs((sx - sx_verify_value)/sx_verify_value)
             sy_err = abs((sy - sy_verify_value)/sy_verify_value)
             verified = ((sx_err.le.epsilon) .and. (sy_err.le.epsilon))
          endif
          Mops = 2.d0**(m+1)/tm/1000000.d0

          write (6,11) tm, m, gc, sx, sy, (i, q(i), i = 0, nq - 1)
  11      format ('EP Benchmark Results:'//'CPU Time =',f10.4/'N = 2^',
      >           i5/'No. Gaussian Pairs =',f15.0/'Sums = ',1p,2d25.15/
      >           'Counts:'/(i3,0p,f15.0))

          call print_results('EP', class, m+1, 0, 0, nit, npm,
      >                      no_nodes, tm, Mops,
      >                      'Random numbers generated',
      >                      verified, npbversion, compiletime, cs1,
      >                      cs2, cs3, cs4, cs5, cs6, cs7)

       endif


       if (.not.timers_enabled) goto 999

       do i = 1, t_last
          t1m(i) = timer_read(i)
       end do
       t1m(t_last+2) = t1m(t_rcomm)
       t1m(t_last+1) = t1m(t_total) - t1m(t_last+2)

       call MPI_Reduce(t1m, tsum,  t_last+2, dp_type, MPI_SUM,
      >                0, MPI_COMM_WORLD, ierr)
       call MPI_Reduce(t1m, tming, t_last+2, dp_type, MPI_MIN,
      >                0, MPI_COMM_WORLD, ierr)
       call MPI_Reduce(t1m, tmaxg, t_last+2, dp_type, MPI_MAX,
      >                0, MPI_COMM_WORLD, ierr)

       if (node .eq. 0) then
          write(*, 800) no_nodes
          do i = 1, t_last+2
             tsum(i) = tsum(i) / no_nodes
             write(*, 810) i, t_recs(i), tming(i), tmaxg(i), tsum(i)
          end do
       endif
  800  format(' nprocs =', i6, 11x, 'minimum', 5x, 'maximum',
      >       5x, 'average')
  810  format(' timer ', i2, '(', A8, ') :', 3(2x,f10.4))

  999  continue
       call mpi_finalize(ierr)

       end
	!-------------------------------------------------------------------------!
	! !
	! N A S P A R A L L E L B E N C H M A R K S 3.3 !
	! !
	! E P !
	! !
	!-------------------------------------------------------------------------!
	! !
	! This benchmark is part of the NAS Parallel Benchmark 3.3 suite. !
	! It is described in NAS Technical Reports 95-020 and 02-007 !
	! !
	! Permission to use, copy, distribute and modify this software !
	! for any purpose with or without fee is hereby granted. We !
	! request, however, that all derived work reference the NAS !
	! Parallel Benchmarks 3.3. This software is provided "as is" !
	! without express or implied warranty. !
	! !
	! Information on NPB 3.3, including the technical report, the !
	! original specifications, source code, results and information !
	! on how to submit new results, is available at: !
	! !
	! http://www.nas.nasa.gov/Software/NPB/ !
	! !
	! Send comments or suggestions to npb@nas.nasa.gov !
	! !
	! NAS Parallel Benchmarks Group !
	! NASA Ames Research Center !
	! Mail Stop: T27A-1 !
	! Moffett Field, CA 94035-1000 !
	! !
	! E-mail: npb@nas.nasa.gov !
	! Fax: (650) 604-3957 !
	! !
	!-------------------------------------------------------------------------!


	c---------------------------------------------------------------------
	c
	c Authors: P. O. Frederickson
	c D. H. Bailey
	c A. C. Woo
	c R. F. Van der Wijngaart
	c---------------------------------------------------------------------

	c---------------------------------------------------------------------
	program EMBAR
	c---------------------------------------------------------------------
	C
	c This is the MPI version of the APP Benchmark 1,
	c the "embarassingly parallel" benchmark.
	c
	c
	c M is the Log_2 of the number of complex pairs of uniform (0, 1) random
	c numbers. MK is the Log_2 of the size of each batch of uniform random
	c numbers. MK can be set for convenience on a given system, since it does
	c not affect the results.

	implicit none

	include 'npbparams.h'
	include 'mpinpb.h'

	double precision Mops, epsilon, a, s, t1, t2, t3, t4, x, x1,
	> x2, q, sx, sy, tm, an, tt, gc, dum(3),
	> timer_read
	double precision sx_verify_value, sy_verify_value, sx_err, sy_err
	integer mk, mm, nn, nk, nq, np, ierr, node, no_nodes,
	> i, ik, kk, l, k, nit, ierrcode, no_large_nodes,
	> np_add, k_offset, j
	logical verified, timers_enabled
	external randlc, timer_read
	double precision randlc, qq
	character*15 size

	integer fstatus
	integer t_total, t_gpairs, t_randn, t_rcomm, t_last
	parameter (t_total=1, t_gpairs=2, t_randn=3, t_rcomm=4, t_last=4)
	double precision tsum(t_last+2), t1m(t_last+2),
	> tming(t_last+2), tmaxg(t_last+2)
	character t_recs(t_last+2)*8

	parameter (mk = 16, mm = m - mk, nn = 2 ** mm,
	> nk = 2 ** mk, nq = 10, epsilon=1.d-8,
	> a = 1220703125.d0, s = 271828183.d0)

	common/storage/ x(2*nk), q(0:nq-1), qq(10000)
	data dum /1.d0, 1.d0, 1.d0/

	data t_recs/'total', 'gpairs', 'randn', 'rcomm',
	> ' totcomp', ' totcomm'/


	call mpi_init(ierr)
	call mpi_comm_rank(MPI_COMM_WORLD,node,ierr)
	call mpi_comm_size(MPI_COMM_WORLD,no_nodes,ierr)

	root = 0

	if (.not. convertdouble) then
	dp_type = MPI_DOUBLE_PRECISION
	else
	dp_type = MPI_REAL
	endif

	if (node.eq.root) then

	c Because the size of the problem is too large to store in a 32-bit
	c integer for some classes, we put it into a string (for printing).
	c Have to strip off the decimal point put in there by the floating
	c point print statement (internal file)

	write(*, 1000)
	write(size, '(f15.0)' ) 2.d0**(m+1)
	j = 15
	if (size(j:j) .eq. '.') j = j - 1
	write (*,1001) size(1:j)
	write(*, 1003) no_nodes

	1000 format(/,' NAS Parallel Benchmarks 3.3 -- EP Benchmark',/)
	1001 format(' Number of random numbers generated: ', a15)
	1003 format(' Number of active processes: ', 2x, i13, /)

	open (unit=2,file='timer.flag',status='old',iostat=fstatus)
	timers_enabled = .false.
	if (fstatus .eq. 0) then
	timers_enabled = .true.
	close(2)
	endif
	endif

	call mpi_bcast(timers_enabled, 1, MPI_LOGICAL, root,
	> MPI_COMM_WORLD, ierr)

	verified = .false.

	c Compute the number of "batches" of random number pairs generated
	c per processor. Adjust if the number of processors does not evenly
	c divide the total number

	np = nn / no_nodes
	no_large_nodes = mod(nn, no_nodes)
	if (node .lt. no_large_nodes) then
	np_add = 1
	else
	np_add = 0
	endif
	np = np + np_add

	if (np .eq. 0) then
	write (6, 1) no_nodes, nn
	1 format ('Too many nodes:',2i6)
	ierrcode = 1
	call mpi_abort(MPI_COMM_WORLD,ierrcode,ierr)
	stop
	endif

	c Call the random number generator functions and initialize
	c the x-array to reduce the effects of paging on the timings.
	c Also, call all mathematical functions that are used. Make
	c sure these initializations cannot be eliminated as dead code.

	call vranlc(0, dum(1), dum(2), dum(3))
	dum(1) = randlc(dum(2), dum(3))
	do 5 i = 1, 2*nk
	x(i) = -1.d99
	5 continue
	Mops = log(sqrt(abs(max(1.d0,1.d0))))

	c---------------------------------------------------------------------
	c Synchronize before placing time stamp
	c---------------------------------------------------------------------
	do i = 1, t_last
	call timer_clear(i)
	end do
	call mpi_barrier(MPI_COMM_WORLD, ierr)
	call timer_start(1)

	t1 = a
	call vranlc(0, t1, a, x)

	c Compute AN = A ^ (2 * NK) (mod 2^46).

	t1 = a

	do 100 i = 1, mk + 1
	t2 = randlc(t1, t1)
	100 continue

	an = t1
	tt = s
	gc = 0.d0
	sx = 0.d0
	sy = 0.d0

	do 110 i = 0, nq - 1
	q(i) = 0.d0
	110 continue

	c Each instance of this loop may be performed independently. We compute
	c the k offsets separately to take into account the fact that some nodes
	c have more numbers to generate than others

	if (np_add .eq. 1) then
	k_offset = node * np -1
	else
	k_offset = no_large_nodes(np+1) + (node-no_large_nodes)np -1
	endif

	do 150 k = 1, np
	kk = k_offset + k
	t1 = s
	t2 = an

	c Find starting seed t1 for this kk.

	do 120 i = 1, 100
	ik = kk / 2
	if (2 * ik .ne. kk) t3 = randlc(t1, t2)
	if (ik .eq. 0) goto 130
	t3 = randlc(t2, t2)
	kk = ik
	120 continue

	c Compute uniform pseudorandom numbers.
	130 continue

	if (timers_enabled) call timer_start(t_randn)
	call vranlc(2 * nk, t1, a, x)
	if (timers_enabled) call timer_stop(t_randn)

	c Compute Gaussian deviates by acceptance-rejection method and
	c tally counts in concentric square annuli. This loop is not
	c vectorizable.

	if (timers_enabled) call timer_start(t_gpairs)

	do 140 i = 1, nk
	x1 = 2.d0 * x(2*i-1) - 1.d0
	x2 = 2.d0 * x(2*i) - 1.d0
	t1 = x1 2 + x2 2
	if (t1 .le. 1.d0) then
	t2 = sqrt(-2.d0 * log(t1) / t1)
	t3 = (x1 * t2)
	t4 = (x2 * t2)
	l = max(abs(t3), abs(t4))
	q(l) = q(l) + 1.d0
	sx = sx + t3
	sy = sy + t4
	endif
	140 continue

	if (timers_enabled) call timer_stop(t_gpairs)

	150 continue

	if (timers_enabled) call timer_start(t_rcomm)
	call mpi_allreduce(sx, x, 1, dp_type,
	> MPI_SUM, MPI_COMM_WORLD, ierr)
	sx = x(1)
	call mpi_allreduce(sy, x, 1, dp_type,
	> MPI_SUM, MPI_COMM_WORLD, ierr)
	sy = x(1)
	call mpi_allreduce(q, x, nq, dp_type,
	> MPI_SUM, MPI_COMM_WORLD, ierr)
	if (timers_enabled) call timer_stop(t_rcomm)

	do i = 1, nq
	q(i-1) = x(i)
	enddo

	do 160 i = 0, nq - 1
	gc = gc + q(i)
	160 continue

	call timer_stop(1)
	tm = timer_read(1)

	call mpi_allreduce(tm, x, 1, dp_type,
	> MPI_MAX, MPI_COMM_WORLD, ierr)
	tm = x(1)

	if (node.eq.root) then
	nit=0
	verified = .true.
	if (m.eq.24) then
	sx_verify_value = -3.247834652034740D+3
	sy_verify_value = -6.958407078382297D+3
	elseif (m.eq.25) then
	sx_verify_value = -2.863319731645753D+3
	sy_verify_value = -6.320053679109499D+3
	elseif (m.eq.28) then
	sx_verify_value = -4.295875165629892D+3
	sy_verify_value = -1.580732573678431D+4
	elseif (m.eq.30) then
	sx_verify_value = 4.033815542441498D+4
	sy_verify_value = -2.660669192809235D+4
	elseif (m.eq.32) then
	sx_verify_value = 4.764367927995374D+4
	sy_verify_value = -8.084072988043731D+4
	elseif (m.eq.36) then
	sx_verify_value = 1.982481200946593D+5
	sy_verify_value = -1.020596636361769D+5
	elseif (m.eq.40) then
	sx_verify_value = -5.319717441530D+05
	sy_verify_value = -3.688834557731D+05
	else
	verified = .false.
	endif
	if (verified) then
	sx_err = abs((sx - sx_verify_value)/sx_verify_value)
	sy_err = abs((sy - sy_verify_value)/sy_verify_value)
	verified = ((sx_err.le.epsilon) .and. (sy_err.le.epsilon))
	endif
	Mops = 2.d0**(m+1)/tm/1000000.d0

	write (6,11) tm, m, gc, sx, sy, (i, q(i), i = 0, nq - 1)
	11 format ('EP Benchmark Results:'//'CPU Time =',f10.4/'N = 2^',
	> i5/'No. Gaussian Pairs =',f15.0/'Sums = ',1p,2d25.15/
	> 'Counts:'/(i3,0p,f15.0))

	call print_results('EP', class, m+1, 0, 0, nit, npm,
	> no_nodes, tm, Mops,
	> 'Random numbers generated',
	> verified, npbversion, compiletime, cs1,
	> cs2, cs3, cs4, cs5, cs6, cs7)

	endif


	if (.not.timers_enabled) goto 999

	do i = 1, t_last
	t1m(i) = timer_read(i)
	end do
	t1m(t_last+2) = t1m(t_rcomm)
	t1m(t_last+1) = t1m(t_total) - t1m(t_last+2)

	call MPI_Reduce(t1m, tsum, t_last+2, dp_type, MPI_SUM,
	> 0, MPI_COMM_WORLD, ierr)
	call MPI_Reduce(t1m, tming, t_last+2, dp_type, MPI_MIN,
	> 0, MPI_COMM_WORLD, ierr)
	call MPI_Reduce(t1m, tmaxg, t_last+2, dp_type, MPI_MAX,
	> 0, MPI_COMM_WORLD, ierr)

	if (node .eq. 0) then
	write(*, 800) no_nodes
	do i = 1, t_last+2
	tsum(i) = tsum(i) / no_nodes
	write(*, 810) i, t_recs(i), tming(i), tmaxg(i), tsum(i)
	end do
	endif
	800 format(' nprocs =', i6, 11x, 'minimum', 5x, 'maximum',
	> 5x, 'average')
	810 format(' timer ', i2, '(', A8, ') :', 3(2x,f10.4))

	999 continue
	call mpi_finalize(ierr)

	end