| /*************************************************************************/ |
| /* */ |
| /* SPLASH Ocean Code */ |
| /* */ |
| /* This application studies the role of eddy and boundary currents in */ |
| /* influencing large-scale ocean movements. This implementation uses */ |
| /* statically allocated two-dimensional arrays for grid data storage. */ |
| /* */ |
| /* Command line options: */ |
| /* */ |
| /* -nN : Simulate NxN ocean. N must be (power of 2)+2. */ |
| /* -pP : P = number of processors. P must be power of 2. */ |
| /* -eE : E = error tolerance for iterative relaxation. */ |
| /* -rR : R = distance between grid points in meters. */ |
| /* -tT : T = timestep in seconds. */ |
| /* -s : Print timing statistics. */ |
| /* -o : Print out relaxation residual values. */ |
| /* -h : Print out command line options. */ |
| /* */ |
| /* Default: OCEAN -n130 -p1 -e1e-7 -r20000.0 -t28800.0 */ |
| /* */ |
| /* NOTE: This code works under both the FORK and SPROC models. */ |
| /* */ |
| /*************************************************************************/ |
| |
| #include <stdio.h> |
| #include <math.h> |
| #include <time.h> |
| #include <stdlib.h> |
| #include "decs.h" |
| |
| #ifdef ENABLE_PARSEC_HOOKS |
| #include <hooks.h> |
| #endif |
| MAIN_ENV |
| |
| #define DEFAULT_N 258 |
| #define DEFAULT_P 1 |
| #define DEFAULT_E 1e-7 |
| #define DEFAULT_T 28800.0 |
| #define DEFAULT_R 20000.0 |
| #define PAGE_SIZE 4096 |
| |
| struct global_struct *global; |
| struct fields_struct *fields; |
| struct fields2_struct *fields2; |
| struct wrk1_struct *wrk1; |
| struct wrk3_struct *wrk3; |
| struct wrk2_struct *wrk2; |
| struct wrk4_struct *wrk4; |
| struct wrk6_struct *wrk6; |
| struct wrk5_struct *wrk5; |
| struct frcng_struct *frcng; |
| struct iter_struct *iter; |
| struct guess_struct *guess; |
| struct multi_struct *multi; |
| struct locks_struct *locks; |
| struct bars_struct *bars; |
| |
| long startcol[2][INPROCS]; |
| long nprocs = DEFAULT_P; |
| long startrow[2][INPROCS]; |
| double h1 = 1000.0; |
| double h3 = 4000.0; |
| double h = 5000.0; |
| double lf = -5.12e11; |
| double eps = 0; |
| double res = DEFAULT_R; |
| double dtau = DEFAULT_T; |
| double f0 = 8.3e-5; |
| double beta = 2.0e-11; |
| double gpr = 0.02; |
| long im = DEFAULT_N; |
| long jm; |
| double tolerance = DEFAULT_E; |
| double eig2; |
| double ysca; |
| long jmm1; |
| double pi; |
| double t0 = 0.5e-4 ; |
| double outday0 = 1.0; |
| double outday1 = 2.0; |
| double outday2 = 2.0; |
| double outday3 = 2.0; |
| double factjacob; |
| double factlap; |
| long numlev; |
| long minlev; |
| long imx[MAX_LEVELS]; |
| long jmx[MAX_LEVELS]; |
| double lev_res[MAX_LEVELS]; |
| double lev_tol[MAX_LEVELS]; |
| double maxwork = 10000.0; |
| |
| struct Global_Private *gp; |
| |
| double i_int_coeff[MAX_LEVELS]; |
| double j_int_coeff[MAX_LEVELS]; |
| long xprocs; |
| long yprocs; |
| long do_stats = 0; |
| long do_output = 0; |
| |
| int main(int argc, char *argv[]) |
| { |
| #ifdef ENABLE_PARSEC_HOOKS |
| __parsec_bench_begin (__splash2_ocean_ncp); |
| #endif |
| long i; |
| long j; |
| long xextra; |
| long xportion; |
| long yextra; |
| long yportion; |
| long lower; |
| double procsqrt; |
| long k; |
| long logtest; |
| long my_num; |
| unsigned long computeend; |
| double min_total; |
| double max_total; |
| double avg_total; |
| double min_multi; |
| double max_multi; |
| double avg_multi; |
| double min_frac; |
| double max_frac; |
| double avg_frac; |
| extern char *optarg; |
| long ch; |
| unsigned long start; |
| double *tmp_ptr; |
| |
| CLOCK(start) |
| |
| while ((ch = getopt(argc, argv, "n:p:e:r:t:soh")) != -1) { |
| switch(ch) { |
| case 'n': im = atoi(optarg); |
| if (im > IMAX) { |
| printerr("Max grid size exceeded\n"); |
| exit(-1); |
| } |
| if (log_2(im-2) == -1) { |
| printerr("Grid must be ((power of 2)+2) in each dimension\n"); |
| exit(-1); |
| } |
| break; |
| case 'p': nprocs = atoi(optarg); |
| if (nprocs < 1) { |
| printerr("P must be >= 1\n"); |
| exit(-1); |
| } |
| if (log_2(nprocs) == -1) { |
| printerr("P must be a power of 2\n"); |
| exit(-1); |
| } |
| break; |
| case 'e': tolerance = atof(optarg); break; |
| case 'r': res = atof(optarg); break; |
| case 't': dtau = atof(optarg); break; |
| case 's': do_stats = !do_stats; break; |
| case 'o': do_output = !do_output; break; |
| case 'h': printf("Usage: OCEAN <options>\n\n"); |
| printf("options:\n"); |
| printf(" -nN : Simulate NxN ocean. N must be (power of 2)+2.\n"); |
| printf(" -pP : P = number of processors. P must be power of 2.\n"); |
| printf(" -eE : E = error tolerance for iterative relaxation.\n"); |
| printf(" -rR : R = distance between grid points in meters.\n"); |
| printf(" -tT : T = timestep in seconds.\n"); |
| printf(" -s : Print timing statistics.\n"); |
| printf(" -o : Print out relaxation residual values.\n"); |
| printf(" -h : Print out command line options.\n\n"); |
| printf("Default: OCEAN -n%1d -p%1d -e%1g -r%1g -t%1g\n", |
| DEFAULT_N,DEFAULT_P,DEFAULT_E,DEFAULT_R,DEFAULT_T); |
| exit(0); |
| break; |
| } |
| } |
| |
| MAIN_INITENV(,60000000) |
| |
| |
| |
| logtest = im-2; |
| numlev = 1; |
| while (logtest != 1) { |
| if (logtest%2 != 0) { |
| printerr("Cannot determine number of multigrid levels\n"); |
| exit(-1); |
| } |
| logtest = logtest / 2; |
| numlev++; |
| } |
| |
| if (numlev > MAX_LEVELS) { |
| printerr("Max grid levels exceeded for multigrid\n"); |
| exit(-1); |
| } |
| |
| jm = im; |
| printf("\n"); |
| printf("Ocean simulation with W-cycle multigrid solver\n"); |
| printf(" Processors : %1ld\n",nprocs); |
| printf(" Grid size : %1ld x %1ld\n",im,jm); |
| printf(" Grid resolution (meters) : %0.2f\n",res); |
| printf(" Time between relaxations (seconds) : %0.0f\n",dtau); |
| printf(" Error tolerance : %0.7g\n",tolerance); |
| printf("\n"); |
| |
| gp = (struct Global_Private *) G_MALLOC((nprocs+1)*sizeof(struct Global_Private)); |
| for (i=0;i<nprocs;i++) { |
| gp[i].multi_time = 0; |
| gp[i].total_time = 0; |
| } |
| global = (struct global_struct *) G_MALLOC(sizeof(struct global_struct)); |
| fields = (struct fields_struct *) G_MALLOC(sizeof(struct fields_struct)); |
| fields2 = (struct fields2_struct *) G_MALLOC(sizeof(struct fields2_struct)); |
| wrk1 = (struct wrk1_struct *) G_MALLOC(sizeof(struct wrk1_struct)); |
| wrk3 = (struct wrk3_struct *) G_MALLOC(sizeof(struct wrk3_struct)); |
| wrk2 = (struct wrk2_struct *) G_MALLOC(sizeof(struct wrk2_struct)); |
| wrk4 = (struct wrk4_struct *) G_MALLOC(sizeof(struct wrk4_struct)); |
| wrk6 = (struct wrk6_struct *) G_MALLOC(sizeof(struct wrk6_struct)); |
| wrk5 = (struct wrk5_struct *) G_MALLOC(sizeof(struct wrk5_struct)); |
| frcng = (struct frcng_struct *) G_MALLOC(sizeof(struct frcng_struct)); |
| iter = (struct iter_struct *) G_MALLOC(sizeof(struct iter_struct)); |
| guess = (struct guess_struct *) G_MALLOC(sizeof(struct guess_struct)); |
| multi = (struct multi_struct *) G_MALLOC(sizeof(struct multi_struct)); |
| locks = (struct locks_struct *) G_MALLOC(sizeof(struct locks_struct)); |
| bars = (struct bars_struct *) G_MALLOC(sizeof(struct bars_struct)); |
| |
| LOCKINIT(locks->idlock) |
| LOCKINIT(locks->psiailock) |
| LOCKINIT(locks->psibilock) |
| LOCKINIT(locks->donelock) |
| LOCKINIT(locks->error_lock) |
| LOCKINIT(locks->bar_lock) |
| |
| #if defined(MULTIPLE_BARRIERS) |
| BARINIT(bars->iteration, nprocs) |
| BARINIT(bars->gsudn, nprocs) |
| BARINIT(bars->p_setup, nprocs) |
| BARINIT(bars->p_redph, nprocs) |
| BARINIT(bars->p_soln, nprocs) |
| BARINIT(bars->p_subph, nprocs) |
| BARINIT(bars->sl_prini, nprocs) |
| BARINIT(bars->sl_psini, nprocs) |
| BARINIT(bars->sl_onetime, nprocs) |
| BARINIT(bars->sl_phase_1, nprocs) |
| BARINIT(bars->sl_phase_2, nprocs) |
| BARINIT(bars->sl_phase_3, nprocs) |
| BARINIT(bars->sl_phase_4, nprocs) |
| BARINIT(bars->sl_phase_5, nprocs) |
| BARINIT(bars->sl_phase_6, nprocs) |
| BARINIT(bars->sl_phase_7, nprocs) |
| BARINIT(bars->sl_phase_8, nprocs) |
| BARINIT(bars->sl_phase_9, nprocs) |
| BARINIT(bars->sl_phase_10, nprocs) |
| BARINIT(bars->error_barrier, nprocs) |
| #else |
| BARINIT(bars->barrier, nprocs) |
| #endif |
| |
| imx[numlev-1] = im; |
| jmx[numlev-1] = jm; |
| lev_res[numlev-1] = res; |
| lev_tol[numlev-1] = tolerance; |
| multi->err_multi = 0.0; |
| multi->numspin = 0; |
| for (i=0;i<nprocs;i++) { |
| multi->spinflag[i] = 0; |
| } |
| |
| for (i=numlev-2;i>=0;i--) { |
| imx[i] = ((imx[i+1] - 2) / 2) + 2; |
| jmx[i] = ((jmx[i+1] - 2) / 2) + 2; |
| lev_res[i] = lev_res[i+1] * 2; |
| } |
| |
| xprocs = 0; |
| yprocs = 0; |
| procsqrt = sqrt((double) nprocs); |
| j = (long) procsqrt; |
| while ((xprocs == 0) && (j > 0)) { |
| k = nprocs / j; |
| if (k * j == nprocs) { |
| if (k > j) { |
| xprocs = j; |
| yprocs = k; |
| } else { |
| xprocs = k; |
| yprocs = j; |
| } |
| } |
| j--; |
| } |
| if (xprocs == 0) { |
| printerr("Could not find factors for subblocking\n"); |
| exit(-1); |
| } |
| |
| /* Determine starting coord and number of points to process in */ |
| /* each direction */ |
| |
| for (i=0;i<numlev;i++) { |
| xportion = (jmx[i] - 2) / xprocs; |
| xextra = (jmx[i] - 2) % xprocs; |
| for (j=0;j<xprocs;j++) { |
| if (xextra == 0) { |
| for (k=0;k<yprocs;k++) { |
| gp[k*xprocs+j].rel_start_x[i] = j * xportion + 1; |
| gp[k*xprocs+j].rel_num_x[i] = xportion; |
| } |
| } else { |
| if (j + 1 > xextra) { |
| for (k=0;k<yprocs;k++) { |
| lower = xextra * (xportion + 1); |
| gp[k*xprocs+j].rel_start_x[i] = lower + (j - xextra) * xportion + 1; |
| gp[k*xprocs+j].rel_num_x[i] = xportion; |
| } |
| } else { |
| for (k=0;k<yprocs;k++) { |
| gp[k*xprocs+j].rel_start_x[i] = j * (xportion + 1) + 1; |
| gp[k*xprocs+j].rel_num_x[i] = xportion + 1; |
| } |
| } |
| } |
| } |
| yportion = (imx[i] - 2) / yprocs; |
| yextra = (imx[i] - 2) % yprocs; |
| for (j=0;j<yprocs;j++) { |
| if (yextra == 0) { |
| for (k=0;k<xprocs;k++) { |
| gp[j*xprocs+k].rel_start_y[i] = j * yportion + 1; |
| gp[j*xprocs+k].rel_num_y[i] = yportion; |
| } |
| } else { |
| if (j + 1 > yextra) { |
| for (k=0;k<xprocs;k++) { |
| lower = yextra * (yportion + 1); |
| gp[j*xprocs+k].rel_start_y[i] = lower + (j - yextra) * yportion + 1; |
| gp[j*xprocs+k].rel_num_y[i] = yportion; |
| } |
| } else { |
| for (k=0;k<xprocs;k++) { |
| gp[j*xprocs+k].rel_start_y[i] = j * (yportion + 1) + 1; |
| gp[j*xprocs+k].rel_num_y[i] = yportion + 1; |
| } |
| } |
| } |
| } |
| } |
| |
| i_int_coeff[0] = 0.0; |
| j_int_coeff[0] = 0.0; |
| for (i=0;i<numlev;i++) { |
| i_int_coeff[i] = 1.0/(imx[i]-1); |
| j_int_coeff[i] = 1.0/(jmx[i]-1); |
| } |
| |
| for (my_num=0;my_num<nprocs;my_num++) { |
| for (i=0;i<numlev;i++) { |
| gp[my_num].rlist[i] = gp[my_num].rel_start_y[i]; |
| gp[my_num].rljst[i] = gp[my_num].rel_start_x[i]; |
| gp[my_num].rlien[i] = gp[my_num].rlist[i] + gp[my_num].rel_num_y[i] - 1; |
| gp[my_num].rljen[i] = gp[my_num].rljst[i] + gp[my_num].rel_num_x[i] - 1; |
| gp[my_num].iist[i] = gp[my_num].rel_start_y[i]; |
| gp[my_num].ijst[i] = gp[my_num].rel_start_x[i]; |
| gp[my_num].iien[i] = gp[my_num].iist[i] + gp[my_num].rel_num_y[i] - 1; |
| gp[my_num].ijen[i] = gp[my_num].ijst[i] + gp[my_num].rel_num_x[i] - 1; |
| gp[my_num].pist[i] = gp[my_num].rel_start_y[i]; |
| gp[my_num].pjst[i] = gp[my_num].rel_start_x[i]; |
| gp[my_num].pien[i] = gp[my_num].pist[i] + gp[my_num].rel_num_y[i] - 1; |
| gp[my_num].pjen[i] = gp[my_num].pjst[i] + gp[my_num].rel_num_x[i] - 1; |
| |
| if (gp[my_num].pist[i] == 1) { |
| gp[my_num].pist[i] = 0; |
| } |
| if (gp[my_num].pjst[i] == 1) { |
| gp[my_num].pjst[i] = 0; |
| } |
| if (gp[my_num].pien[i] == imx[i] - 2) { |
| gp[my_num].pien[i] = imx[i]-1; |
| } |
| if (gp[my_num].pjen[i] == jmx[i] - 2) { |
| gp[my_num].pjen[i] = jmx[i]-1; |
| } |
| |
| if (gp[my_num].rlist[i] % 2 == 0) { |
| gp[my_num].eist[i] = gp[my_num].rlist[i]; |
| gp[my_num].oist[i] = gp[my_num].rlist[i] + 1; |
| } else { |
| gp[my_num].eist[i] = gp[my_num].rlist[i] + 1; |
| gp[my_num].oist[i] = gp[my_num].rlist[i]; |
| } |
| if (gp[my_num].rljst[i] % 2 == 0) { |
| gp[my_num].ejst[i] = gp[my_num].rljst[i]; |
| gp[my_num].ojst[i] = gp[my_num].rljst[i] + 1; |
| } else { |
| gp[my_num].ejst[i] = gp[my_num].rljst[i] + 1; |
| gp[my_num].ojst[i] = gp[my_num].rljst[i]; |
| } |
| if (gp[my_num].rlien[i] == imx[i]-2) { |
| gp[my_num].rlien[i] = gp[my_num].rlien[i] - 1; |
| if (gp[my_num].rlien[i] % 2 == 0) { |
| gp[my_num].ojest[i] = gp[my_num].ojst[i]; |
| gp[my_num].ejest[i] = gp[my_num].ejst[i]; |
| } else { |
| gp[my_num].ojest[i] = gp[my_num].ejst[i]; |
| gp[my_num].ejest[i] = gp[my_num].ojst[i]; |
| } |
| } |
| if (gp[my_num].rljen[i] == jmx[i]-2) { |
| gp[my_num].rljen[i] = gp[my_num].rljen[i] - 1; |
| if (gp[my_num].rljen[i] % 2 == 0) { |
| gp[my_num].oiest[i] = gp[my_num].oist[i]; |
| gp[my_num].eiest[i] = gp[my_num].eist[i]; |
| } else { |
| gp[my_num].oiest[i] = gp[my_num].eist[i]; |
| gp[my_num].eiest[i] = gp[my_num].oist[i]; |
| } |
| } |
| } |
| } |
| |
| /* initialize constants and variables |
| |
| id is a global shared variable that has fetch-and-add operations |
| performed on it by processes to obtain their pids. */ |
| |
| global->id = 0; |
| global->psibi = 0.0; |
| pi = atan(1.0); |
| pi = 4.*pi; |
| |
| factjacob = -1./(12.*res*res); |
| factlap = 1./(res*res); |
| eig2 = -h*f0*f0/(h1*h3*gpr); |
| jmm1 = jm-1 ; |
| ysca = ((double) jmm1)*res ; |
| for (i=0;i<im;i++) { |
| for (j=0;j<jm;j++) { |
| guess->oldga[i][j] = 0.0; |
| guess->oldgb[i][j] = 0.0; |
| } |
| } |
| |
| if (do_output) { |
| printf(" MULTIGRID OUTPUTS\n"); |
| } |
| |
| #ifdef ENABLE_PARSEC_HOOKS |
| __parsec_roi_begin(); |
| #endif |
| CREATE(slave, nprocs); |
| WAIT_FOR_END(nprocs); |
| #ifdef ENABLE_PARSEC_HOOKS |
| __parsec_roi_end(); |
| #endif |
| CLOCK(computeend) |
| |
| printf("\n"); |
| printf(" PROCESS STATISTICS\n"); |
| printf(" Total Multigrid Multigrid\n"); |
| printf(" Proc Time Time Fraction\n"); |
| printf(" 0 %15.0f %15.0f %10.3f\n", gp[0].total_time,gp[0].multi_time, gp[0].multi_time/gp[0].total_time); |
| |
| if (do_stats) { |
| min_total = max_total = avg_total = gp[0].total_time; |
| min_multi = max_multi = avg_multi = gp[0].multi_time; |
| min_frac = max_frac = avg_frac = gp[0].multi_time/gp[0].total_time; |
| for (i=1;i<nprocs;i++) { |
| if (gp[i].total_time > max_total) { |
| max_total = gp[i].total_time; |
| } |
| if (gp[i].total_time < min_total) { |
| min_total = gp[i].total_time; |
| } |
| if (gp[i].multi_time > max_multi) { |
| max_multi = gp[i].multi_time; |
| } |
| if (gp[i].multi_time < min_multi) { |
| min_multi = gp[i].multi_time; |
| } |
| if (gp[i].multi_time/gp[i].total_time > max_frac) { |
| max_frac = gp[i].multi_time/gp[i].total_time; |
| } |
| if (gp[i].multi_time/gp[i].total_time < min_frac) { |
| min_frac = gp[i].multi_time/gp[i].total_time; |
| } |
| avg_total += gp[i].total_time; |
| avg_multi += gp[i].multi_time; |
| avg_frac += gp[i].multi_time/gp[i].total_time; |
| } |
| avg_total = avg_total / nprocs; |
| avg_multi = avg_multi / nprocs; |
| avg_frac = avg_frac / nprocs; |
| for (i=1;i<nprocs;i++) { |
| printf(" %3ld %15.0f %15.0f %10.3f\n", i, gp[i].total_time, gp[i].multi_time, gp[i].multi_time/gp[i].total_time); |
| } |
| printf(" Avg %15.0f %15.0f %10.3f\n", avg_total,avg_multi,avg_frac); |
| printf(" Min %15.0f %15.0f %10.3f\n", min_total,min_multi,min_frac); |
| printf(" Max %15.0f %15.0f %10.3f\n", max_total,max_multi,max_frac); |
| } |
| printf("\n"); |
| |
| global->starttime = start; |
| printf(" TIMING INFORMATION\n"); |
| printf("Start time : %16lu\n", global->starttime); |
| printf("Initialization finish time : %16lu\n", global->trackstart); |
| printf("Overall finish time : %16lu\n", computeend); |
| printf("Total time with initialization : %16lu\n", computeend-global->starttime); |
| printf("Total time without initialization : %16lu\n", computeend-global->trackstart); |
| printf(" (excludes first timestep)\n"); |
| printf("\n"); |
| |
| MAIN_END |
| #ifdef ENABLE_PARSEC_HOOKS |
| __parsec_bench_end(); |
| #endif |
| } |
| |
| long log_2(long number) |
| { |
| long cumulative = 1; |
| long out = 0; |
| long done = 0; |
| |
| while ((cumulative < number) && (!done) && (out < 50)) { |
| if (cumulative == number) { |
| done = 1; |
| } else { |
| cumulative = cumulative * 2; |
| out ++; |
| } |
| } |
| |
| if (cumulative == number) { |
| return(out); |
| } else { |
| return(-1); |
| } |
| } |
| |
| void printerr(char *s) |
| { |
| fprintf(stderr,"ERROR: %s\n",s); |
| } |