| /*************************************************************************/ |
| /* */ |
| /* Copyright (c) 1994 Stanford University */ |
| /* */ |
| /* All rights reserved. */ |
| /* */ |
| /* Permission is given to use, copy, and modify this software for any */ |
| /* non-commercial purpose as long as this copyright notice is not */ |
| /* removed. All other uses, including redistribution in whole or in */ |
| /* part, are forbidden without prior written permission. */ |
| /* */ |
| /* This software is provided with absolutely no warranty and no */ |
| /* support. */ |
| /* */ |
| /*************************************************************************/ |
| |
| /*************************************************************************/ |
| /* */ |
| /* SPLASH Ocean Code */ |
| /* */ |
| /* This application studies the role of eddy and boundary currents in */ |
| /* influencing large-scale ocean movements. This implementation uses */ |
| /* dynamically allocated four-dimensional arrays for grid data storage. */ |
| /* */ |
| /* Command line options: */ |
| /* */ |
| /* -nN : Simulate NxN ocean. N must be (power of 2)+2. */ |
| /* -pP : P = number of processors. P must be power of 2. */ |
| /* -eE : E = error tolerance for iterative relaxation. */ |
| /* -rR : R = distance between grid points in meters. */ |
| /* -tT : T = timestep in seconds. */ |
| /* -s : Print timing statistics. */ |
| /* -o : Print out relaxation residual values. */ |
| /* -h : Print out command line options. */ |
| /* */ |
| /* Default: OCEAN -n130 -p1 -e1e-7 -r20000.0 -t28800.0 */ |
| /* */ |
| /* NOTE: This code works under both the FORK and SPROC models. */ |
| /* */ |
| /*************************************************************************/ |
| |
| #ifdef ENABLE_PARSEC_HOOKS |
| #include <hooks.h> |
| #endif |
| MAIN_ENV |
| |
| #define DEFAULT_N 258 |
| #define DEFAULT_P 1 |
| #define DEFAULT_E 1e-7 |
| #define DEFAULT_T 28800.0 |
| #define DEFAULT_R 20000.0 |
| #define UP 0 |
| #define DOWN 1 |
| #define LEFT 2 |
| #define RIGHT 3 |
| #define UPLEFT 4 |
| #define UPRIGHT 5 |
| #define DOWNLEFT 6 |
| #define DOWNRIGHT 7 |
| #define PAGE_SIZE 4096 |
| |
| #include <stdio.h> |
| #include <math.h> |
| #include <time.h> |
| #include <stdlib.h> |
| #include "decs.h" |
| |
| struct multi_struct *multi; |
| struct global_struct *global; |
| struct locks_struct *locks; |
| struct bars_struct *bars; |
| |
| double ****psi; |
| double ****psim; |
| double ***psium; |
| double ***psilm; |
| double ***psib; |
| double ***ga; |
| double ***gb; |
| double ****work1; |
| double ***work2; |
| double ***work3; |
| double ****work4; |
| double ****work5; |
| double ***work6; |
| double ****work7; |
| double ****temparray; |
| double ***tauz; |
| double ***oldga; |
| double ***oldgb; |
| double *f; |
| double ****q_multi; |
| double ****rhs_multi; |
| |
| long nprocs = DEFAULT_P; |
| double h1 = 1000.0; |
| double h3 = 4000.0; |
| double h = 5000.0; |
| double lf = -5.12e11; |
| double res = DEFAULT_R; |
| double dtau = DEFAULT_T; |
| double f0 = 8.3e-5; |
| double beta = 2.0e-11; |
| double gpr = 0.02; |
| long im = DEFAULT_N; |
| long jm; |
| double tolerance = DEFAULT_E; |
| double eig2; |
| double ysca; |
| long jmm1; |
| double pi; |
| double t0 = 0.5e-4 ; |
| double outday0 = 1.0; |
| double outday1 = 2.0; |
| double outday2 = 2.0; |
| double outday3 = 2.0; |
| double factjacob; |
| double factlap; |
| long numlev; |
| long *imx; |
| long *jmx; |
| double *lev_res; |
| double *lev_tol; |
| double maxwork = 10000.0; |
| |
| struct Global_Private *gp; |
| |
| double *i_int_coeff; |
| double *j_int_coeff; |
| long xprocs; |
| long yprocs; |
| long *xpts_per_proc; |
| long *ypts_per_proc; |
| long minlevel; |
| long do_stats = 0; |
| long do_output = 0; |
| |
| int main(int argc, char *argv[]) |
| { |
| #ifdef ENABLE_PARSEC_HOOKS |
| __parsec_bench_begin (__splash2_ocean_cp); |
| #endif |
| long i; |
| long j; |
| long k; |
| long x_part; |
| long y_part; |
| long d_size; |
| long itemp; |
| long jtemp; |
| double procsqrt; |
| long temp = 0; |
| double min_total; |
| double max_total; |
| double avg_total; |
| double min_multi; |
| double max_multi; |
| double avg_multi; |
| double min_frac; |
| double max_frac; |
| double avg_frac; |
| long ch; |
| extern char *optarg; |
| unsigned long computeend; |
| unsigned long start; |
| |
| CLOCK(start) |
| |
| while ((ch = getopt(argc, argv, "n:p:e:r:t:soh")) != -1) { |
| switch(ch) { |
| case 'n': im = atoi(optarg); |
| if (log_2(im-2) == -1) { |
| printerr("Grid must be ((power of 2)+2) in each dimension\n"); |
| exit(-1); |
| } |
| break; |
| case 'p': nprocs = atoi(optarg); |
| if (nprocs < 1) { |
| printerr("P must be >= 1\n"); |
| exit(-1); |
| } |
| if (log_2(nprocs) == -1) { |
| printerr("P must be a power of 2\n"); |
| exit(-1); |
| } |
| break; |
| case 'e': tolerance = atof(optarg); break; |
| case 'r': res = atof(optarg); break; |
| case 't': dtau = atof(optarg); break; |
| case 's': do_stats = !do_stats; break; |
| case 'o': do_output = !do_output; break; |
| case 'h': printf("Usage: OCEAN <options>\n\n"); |
| printf("options:\n"); |
| printf(" -nN : Simulate NxN ocean. N must be (power of 2)+2.\n"); |
| printf(" -pP : P = number of processors. P must be power of 2.\n"); |
| printf(" -eE : E = error tolerance for iterative relaxation.\n"); |
| printf(" -rR : R = distance between grid points in meters.\n"); |
| printf(" -tT : T = timestep in seconds.\n"); |
| printf(" -s : Print timing statistics.\n"); |
| printf(" -o : Print out relaxation residual values.\n"); |
| printf(" -h : Print out command line options.\n\n"); |
| printf("Default: OCEAN -n%1d -p%1d -e%1g -r%1g -t%1g\n", |
| DEFAULT_N,DEFAULT_P,DEFAULT_E,DEFAULT_R,DEFAULT_T); |
| exit(0); |
| break; |
| } |
| } |
| |
| MAIN_INITENV(,60000000) |
| |
| jm = im; |
| printf("\n"); |
| printf("Ocean simulation with W-cycle multigrid solver\n"); |
| printf(" Processors : %1ld\n",nprocs); |
| printf(" Grid size : %1ld x %1ld\n",im,jm); |
| printf(" Grid resolution (meters) : %0.2f\n",res); |
| printf(" Time between relaxations (seconds) : %0.0f\n",dtau); |
| printf(" Error tolerance : %0.7g\n",tolerance); |
| printf("\n"); |
| |
| xprocs = 0; |
| yprocs = 0; |
| procsqrt = sqrt((double) nprocs); |
| j = (long) procsqrt; |
| while ((xprocs == 0) && (j > 0)) { |
| k = nprocs / j; |
| if (k * j == nprocs) { |
| if (k > j) { |
| xprocs = j; |
| yprocs = k; |
| } else { |
| xprocs = k; |
| yprocs = j; |
| } |
| } |
| j--; |
| } |
| if (xprocs == 0) { |
| printerr("Could not find factors for subblocking\n"); |
| exit(-1); |
| } |
| |
| minlevel = 0; |
| itemp = 1; |
| jtemp = 1; |
| numlev = 0; |
| minlevel = 0; |
| while (itemp < (im-2)) { |
| itemp = itemp*2; |
| jtemp = jtemp*2; |
| if ((itemp/yprocs > 1) && (jtemp/xprocs > 1)) { |
| numlev++; |
| } |
| } |
| |
| if (numlev == 0) { |
| printerr("Must have at least 2 grid points per processor in each dimension\n"); |
| exit(-1); |
| } |
| |
| imx = (long *) G_MALLOC(numlev*sizeof(long)); |
| jmx = (long *) G_MALLOC(numlev*sizeof(long)); |
| lev_res = (double *) G_MALLOC(numlev*sizeof(double)); |
| lev_tol = (double *) G_MALLOC(numlev*sizeof(double)); |
| i_int_coeff = (double *) G_MALLOC(numlev*sizeof(double)); |
| j_int_coeff = (double *) G_MALLOC(numlev*sizeof(double)); |
| xpts_per_proc = (long *) G_MALLOC(numlev*sizeof(long)); |
| ypts_per_proc = (long *) G_MALLOC(numlev*sizeof(long)); |
| |
| imx[numlev-1] = im; |
| jmx[numlev-1] = jm; |
| lev_res[numlev-1] = res; |
| lev_tol[numlev-1] = tolerance; |
| |
| for (i=numlev-2;i>=0;i--) { |
| imx[i] = ((imx[i+1] - 2) / 2) + 2; |
| jmx[i] = ((jmx[i+1] - 2) / 2) + 2; |
| lev_res[i] = lev_res[i+1] * 2; |
| } |
| |
| for (i=0;i<numlev;i++) { |
| xpts_per_proc[i] = (jmx[i]-2) / xprocs; |
| ypts_per_proc[i] = (imx[i]-2) / yprocs; |
| } |
| for (i=numlev-1;i>=0;i--) { |
| if ((xpts_per_proc[i] < 2) || (ypts_per_proc[i] < 2)) { |
| minlevel = i+1; |
| break; |
| } |
| } |
| |
| for (i=0;i<numlev;i++) { |
| temp += imx[i]; |
| } |
| temp = 0; |
| j = 0; |
| for (k=0;k<numlev;k++) { |
| for (i=0;i<imx[k];i++) { |
| j++; |
| temp += jmx[k]; |
| } |
| } |
| |
| d_size = nprocs*sizeof(double ***); |
| psi = (double ****) G_MALLOC(d_size); |
| psim = (double ****) G_MALLOC(d_size); |
| work1 = (double ****) G_MALLOC(d_size); |
| work4 = (double ****) G_MALLOC(d_size); |
| work5 = (double ****) G_MALLOC(d_size); |
| work7 = (double ****) G_MALLOC(d_size); |
| temparray = (double ****) G_MALLOC(d_size); |
| |
| d_size = 2*sizeof(double **); |
| for (i=0;i<nprocs;i++) { |
| psi[i] = (double ***) G_MALLOC(d_size); |
| psim[i] = (double ***) G_MALLOC(d_size); |
| work1[i] = (double ***) G_MALLOC(d_size); |
| work4[i] = (double ***) G_MALLOC(d_size); |
| work5[i] = (double ***) G_MALLOC(d_size); |
| work7[i] = (double ***) G_MALLOC(d_size); |
| temparray[i] = (double ***) G_MALLOC(d_size); |
| } |
| |
| d_size = nprocs*sizeof(double **); |
| psium = (double ***) G_MALLOC(d_size); |
| psilm = (double ***) G_MALLOC(d_size); |
| psib = (double ***) G_MALLOC(d_size); |
| ga = (double ***) G_MALLOC(d_size); |
| gb = (double ***) G_MALLOC(d_size); |
| work2 = (double ***) G_MALLOC(d_size); |
| work3 = (double ***) G_MALLOC(d_size); |
| work6 = (double ***) G_MALLOC(d_size); |
| tauz = (double ***) G_MALLOC(d_size); |
| oldga = (double ***) G_MALLOC(d_size); |
| oldgb = (double ***) G_MALLOC(d_size); |
| |
| gp = (struct Global_Private *) G_MALLOC((nprocs+1)*sizeof(struct Global_Private)); |
| for (i=0;i<nprocs;i++) { |
| gp[i].rel_num_x = (long *) G_MALLOC(numlev*sizeof(long)); |
| gp[i].rel_num_y = (long *) G_MALLOC(numlev*sizeof(long)); |
| gp[i].eist = (long *) G_MALLOC(numlev*sizeof(long)); |
| gp[i].ejst = (long *) G_MALLOC(numlev*sizeof(long)); |
| gp[i].oist = (long *) G_MALLOC(numlev*sizeof(long)); |
| gp[i].ojst = (long *) G_MALLOC(numlev*sizeof(long)); |
| gp[i].rlist = (long *) G_MALLOC(numlev*sizeof(long)); |
| gp[i].rljst = (long *) G_MALLOC(numlev*sizeof(long)); |
| gp[i].rlien = (long *) G_MALLOC(numlev*sizeof(long)); |
| gp[i].rljen = (long *) G_MALLOC(numlev*sizeof(long)); |
| gp[i].multi_time = 0; |
| gp[i].total_time = 0; |
| } |
| |
| subblock(); |
| |
| x_part = (jm - 2)/xprocs + 2; |
| y_part = (im - 2)/yprocs + 2; |
| |
| d_size = x_part*y_part*sizeof(double) + y_part*sizeof(double *); |
| |
| global = (struct global_struct *) G_MALLOC(sizeof(struct global_struct)); |
| for (i=0;i<nprocs;i++) { |
| psi[i][0] = (double **) G_MALLOC(d_size); |
| psi[i][1] = (double **) G_MALLOC(d_size); |
| psim[i][0] = (double **) G_MALLOC(d_size); |
| psim[i][1] = (double **) G_MALLOC(d_size); |
| psium[i] = (double **) G_MALLOC(d_size); |
| psilm[i] = (double **) G_MALLOC(d_size); |
| psib[i] = (double **) G_MALLOC(d_size); |
| ga[i] = (double **) G_MALLOC(d_size); |
| gb[i] = (double **) G_MALLOC(d_size); |
| work1[i][0] = (double **) G_MALLOC(d_size); |
| work1[i][1] = (double **) G_MALLOC(d_size); |
| work2[i] = (double **) G_MALLOC(d_size); |
| work3[i] = (double **) G_MALLOC(d_size); |
| work4[i][0] = (double **) G_MALLOC(d_size); |
| work4[i][1] = (double **) G_MALLOC(d_size); |
| work5[i][0] = (double **) G_MALLOC(d_size); |
| work5[i][1] = (double **) G_MALLOC(d_size); |
| work6[i] = (double **) G_MALLOC(d_size); |
| work7[i][0] = (double **) G_MALLOC(d_size); |
| work7[i][1] = (double **) G_MALLOC(d_size); |
| temparray[i][0] = (double **) G_MALLOC(d_size); |
| temparray[i][1] = (double **) G_MALLOC(d_size); |
| tauz[i] = (double **) G_MALLOC(d_size); |
| oldga[i] = (double **) G_MALLOC(d_size); |
| oldgb[i] = (double **) G_MALLOC(d_size); |
| } |
| f = (double *) G_MALLOC(im*sizeof(double)); |
| |
| multi = (struct multi_struct *) G_MALLOC(sizeof(struct multi_struct)); |
| |
| d_size = numlev*sizeof(double **); |
| if (numlev%2 == 1) { /* To make sure that the actual data |
| starts double word aligned, add an extra |
| pointer */ |
| d_size += sizeof(double **); |
| } |
| for (i=0;i<numlev;i++) { |
| d_size += ((imx[i]-2)/yprocs+2)*((jmx[i]-2)/xprocs+2)*sizeof(double)+ |
| ((imx[i]-2)/yprocs+2)*sizeof(double *); |
| } |
| |
| d_size *= nprocs; |
| |
| if (nprocs%2 == 1) { /* To make sure that the actual data |
| starts double word aligned, add an extra |
| pointer */ |
| d_size += sizeof(double ***); |
| } |
| |
| d_size += nprocs*sizeof(double ***); |
| q_multi = (double ****) G_MALLOC(d_size); |
| rhs_multi = (double ****) G_MALLOC(d_size); |
| |
| locks = (struct locks_struct *) G_MALLOC(sizeof(struct locks_struct)); |
| bars = (struct bars_struct *) G_MALLOC(sizeof(struct bars_struct)); |
| |
| LOCKINIT(locks->idlock) |
| LOCKINIT(locks->psiailock) |
| LOCKINIT(locks->psibilock) |
| LOCKINIT(locks->donelock) |
| LOCKINIT(locks->error_lock) |
| LOCKINIT(locks->bar_lock) |
| |
| #if defined(MULTIPLE_BARRIERS) |
| BARINIT(bars->iteration, nprocs) |
| BARINIT(bars->gsudn, nprocs) |
| BARINIT(bars->p_setup, nprocs) |
| BARINIT(bars->p_redph, nprocs) |
| BARINIT(bars->p_soln, nprocs) |
| BARINIT(bars->p_subph, nprocs) |
| BARINIT(bars->sl_prini, nprocs) |
| BARINIT(bars->sl_psini, nprocs) |
| BARINIT(bars->sl_onetime, nprocs) |
| BARINIT(bars->sl_phase_1, nprocs) |
| BARINIT(bars->sl_phase_2, nprocs) |
| BARINIT(bars->sl_phase_3, nprocs) |
| BARINIT(bars->sl_phase_4, nprocs) |
| BARINIT(bars->sl_phase_5, nprocs) |
| BARINIT(bars->sl_phase_6, nprocs) |
| BARINIT(bars->sl_phase_7, nprocs) |
| BARINIT(bars->sl_phase_8, nprocs) |
| BARINIT(bars->sl_phase_9, nprocs) |
| BARINIT(bars->sl_phase_10, nprocs) |
| BARINIT(bars->error_barrier, nprocs) |
| #else |
| BARINIT(bars->barrier, nprocs) |
| #endif |
| |
| link_all(); |
| |
| multi->err_multi = 0.0; |
| i_int_coeff[0] = 0.0; |
| j_int_coeff[0] = 0.0; |
| for (i=0;i<numlev;i++) { |
| i_int_coeff[i] = 1.0/(imx[i]-1); |
| j_int_coeff[i] = 1.0/(jmx[i]-1); |
| } |
| |
| /* initialize constants and variables |
| |
| id is a global shared variable that has fetch-and-add operations |
| performed on it by processes to obtain their pids. */ |
| |
| global->id = 0; |
| global->psibi = 0.0; |
| pi = atan(1.0); |
| pi = 4.*pi; |
| |
| factjacob = -1./(12.*res*res); |
| factlap = 1./(res*res); |
| eig2 = -h*f0*f0/(h1*h3*gpr); |
| |
| jmm1 = jm-1 ; |
| ysca = ((double) jmm1)*res ; |
| |
| im = (imx[numlev-1]-2)/yprocs + 2; |
| jm = (jmx[numlev-1]-2)/xprocs + 2; |
| |
| if (do_output) { |
| printf(" MULTIGRID OUTPUTS\n"); |
| } |
| |
| #ifdef ENABLE_PARSEC_HOOKS |
| __parsec_roi_begin(); |
| #endif |
| CREATE(slave, nprocs); |
| WAIT_FOR_END(nprocs); |
| #ifdef ENABLE_PARSEC_HOOKS |
| __parsec_roi_end(); |
| #endif |
| CLOCK(computeend) |
| |
| printf("\n"); |
| printf(" PROCESS STATISTICS\n"); |
| printf(" Total Multigrid Multigrid\n"); |
| printf(" Proc Time Time Fraction\n"); |
| printf(" 0 %15.0f %15.0f %10.3f\n", gp[0].total_time,gp[0].multi_time, gp[0].multi_time/gp[0].total_time); |
| |
| if (do_stats) { |
| min_total = max_total = avg_total = gp[0].total_time; |
| min_multi = max_multi = avg_multi = gp[0].multi_time; |
| min_frac = max_frac = avg_frac = gp[0].multi_time/gp[0].total_time; |
| for (i=1;i<nprocs;i++) { |
| if (gp[i].total_time > max_total) { |
| max_total = gp[i].total_time; |
| } |
| if (gp[i].total_time < min_total) { |
| min_total = gp[i].total_time; |
| } |
| if (gp[i].multi_time > max_multi) { |
| max_multi = gp[i].multi_time; |
| } |
| if (gp[i].multi_time < min_multi) { |
| min_multi = gp[i].multi_time; |
| } |
| if (gp[i].multi_time/gp[i].total_time > max_frac) { |
| max_frac = gp[i].multi_time/gp[i].total_time; |
| } |
| if (gp[i].multi_time/gp[i].total_time < min_frac) { |
| min_frac = gp[i].multi_time/gp[i].total_time; |
| } |
| avg_total += gp[i].total_time; |
| avg_multi += gp[i].multi_time; |
| avg_frac += gp[i].multi_time/gp[i].total_time; |
| } |
| avg_total = avg_total / nprocs; |
| avg_multi = avg_multi / nprocs; |
| avg_frac = avg_frac / nprocs; |
| for (i=1;i<nprocs;i++) { |
| printf(" %3ld %15.0f %15.0f %10.3f\n", i,gp[i].total_time,gp[i].multi_time, gp[i].multi_time/gp[i].total_time); |
| } |
| printf(" Avg %15.0f %15.0f %10.3f\n", avg_total,avg_multi,avg_frac); |
| printf(" Min %15.0f %15.0f %10.3f\n", min_total,min_multi,min_frac); |
| printf(" Max %15.0f %15.0f %10.3f\n", max_total,max_multi,max_frac); |
| } |
| printf("\n"); |
| |
| global->starttime = start; |
| printf(" TIMING INFORMATION\n"); |
| printf("Start time : %16lu\n", global->starttime); |
| printf("Initialization finish time : %16lu\n", global->trackstart); |
| printf("Overall finish time : %16lu\n", computeend); |
| printf("Total time with initialization : %16lu\n", computeend-global->starttime); |
| printf("Total time without initialization : %16lu\n", computeend-global->trackstart); |
| printf(" (excludes first timestep)\n"); |
| printf("\n"); |
| |
| MAIN_END |
| #ifdef ENABLE_PARSEC_HOOKS |
| __parsec_bench_end(); |
| #endif |
| } |
| |
| long log_2(long number) |
| { |
| long cumulative = 1; |
| long out = 0; |
| long done = 0; |
| |
| while ((cumulative < number) && (!done) && (out < 50)) { |
| if (cumulative == number) { |
| done = 1; |
| } else { |
| cumulative = cumulative * 2; |
| out ++; |
| } |
| } |
| |
| if (cumulative == number) { |
| return(out); |
| } else { |
| return(-1); |
| } |
| } |
| |
| void printerr(char *s) |
| { |
| fprintf(stderr,"ERROR: %s\n",s); |
| } |
| |