| /*************************************************************************/ |
| /* */ |
| /* Copyright (c) 1994 Stanford University */ |
| /* */ |
| /* All rights reserved. */ |
| /* */ |
| /* Permission is given to use, copy, and modify this software for any */ |
| /* non-commercial purpose as long as this copyright notice is not */ |
| /* removed. All other uses, including redistribution in whole or in */ |
| /* part, are forbidden without prior written permission. */ |
| /* */ |
| /* This software is provided with absolutely no warranty and no */ |
| /* support. */ |
| /* */ |
| /*************************************************************************/ |
| |
| /* Shared memory implementation of the multigrid method |
| Implementation uses red-black gauss-seidel relaxation |
| iterations, w cycles, and the method of half-injection for |
| residual computation. */ |
| |
| EXTERN_ENV |
| |
| #include <stdio.h> |
| #include <math.h> |
| #include <time.h> |
| #include "decs.h" |
| |
| void multig(int); |
| void relax(int,double *,int,int); |
| void rescal(int,int); |
| void intadd(int,int); |
| void putz(int,int); |
| void copy_borders(int,int); |
| void copy_rhs_borders(int,int); |
| void copy_red(int,int); |
| void copy_black(int,int); |
| void printerr(char *); |
| |
| void multig(my_id) |
| |
| /* perform multigrid (w cycles) */ |
| |
| int my_id; |
| |
| { |
| int iter; |
| double wu; |
| double errp; |
| int m; |
| int flag1; |
| int flag2; |
| int k; |
| int my_num; |
| double wmax; |
| double local_err; |
| double red_local_err; |
| double black_local_err; |
| double g_error; |
| int i; |
| int j; |
| |
| flag1 = 0; |
| flag2 = 0; |
| iter = 0; |
| m = numlev-1; |
| wmax = maxwork; |
| my_num = my_id; |
| wu = 0.0; |
| |
| k = m; |
| g_error = 1.0e30; |
| while ((!flag1) && (!flag2)) { |
| errp = g_error; |
| iter++; |
| if (my_num == MASTER) { |
| multi->err_multi = 0.0; |
| } |
| |
| /* barrier to make sure all procs have finished intadd or rescal */ |
| /* before proceeding with relaxation */ |
| |
| BARRIER(bars->error_barrier,nprocs) |
| copy_black(k,my_num); |
| |
| relax(k,&red_local_err,RED_ITER,my_num); |
| |
| /* barrier to make sure all red computations have been performed */ |
| |
| BARRIER(bars->error_barrier,nprocs) |
| copy_red(k,my_num); |
| |
| relax(k,&black_local_err,BLACK_ITER,my_num); |
| |
| /* compute max local error from red_local_err and black_local_err */ |
| |
| if (red_local_err > black_local_err) { |
| local_err = red_local_err; |
| } else { |
| local_err = black_local_err; |
| } |
| |
| /* update the global error if necessary */ |
| |
| LOCK(locks->error_lock) |
| if (local_err > multi->err_multi) { |
| multi->err_multi = local_err; |
| } |
| UNLOCK(locks->error_lock) |
| |
| /* a single relaxation sweep at the finest level is one unit of */ |
| /* work */ |
| |
| wu+=pow((double)4.0,(double)k-m); |
| |
| /* barrier to make sure all processors have checked local error */ |
| |
| BARRIER(bars->error_barrier,nprocs) |
| |
| g_error = multi->err_multi; |
| |
| /* barrier to make sure master does not cycle back to top of loop */ |
| /* and reset global->err before we read it and decide what to do */ |
| |
| BARRIER(bars->error_barrier,nprocs) |
| |
| if (g_error >= lev_tol[k]) { |
| if (wu > wmax) { |
| /* max work exceeded */ |
| flag1 = 1; |
| fprintf(stderr,"ERROR: Maximum work limit %0.5f exceeded\n",wmax); |
| exit(-1); |
| } else { |
| /* if we have not converged */ |
| if ((k != 0) && (g_error/errp >= 0.6) && |
| (k > minlevel)) { |
| /* if need to go to coarser grid */ |
| |
| copy_borders(k,my_num); |
| copy_rhs_borders(k,my_num); |
| |
| /* This bar is needed because the routine rescal uses the neighbor's |
| border points to compute s4. We must ensure that the neighbor's |
| border points have been written before we try computing the new |
| rescal values */ |
| |
| BARRIER(bars->error_barrier,nprocs) |
| |
| rescal(k,my_num); |
| |
| /* transfer residual to rhs of coarser grid */ |
| lev_tol[k-1] = 0.3 * g_error; |
| k = k-1; |
| putz(k,my_num); |
| /* make initial guess on coarser grid zero */ |
| g_error = 1.0e30; |
| } |
| } |
| } else { |
| /* if we have converged at this level */ |
| if (k == m) { |
| /* if finest grid, we are done */ |
| flag2 = 1; |
| } else { |
| /* else go to next finest grid */ |
| |
| copy_borders(k,my_num); |
| |
| intadd(k,my_num); |
| /* changes the grid values at the finer level. rhs at finer level */ |
| /* remains what it already is */ |
| k++; |
| g_error = 1.0e30; |
| } |
| } |
| } |
| if (do_output) { |
| if (my_num == MASTER) { |
| printf("iter %d, level %d, residual norm %12.8e, work = %7.3f\n", |
| iter,k,multi->err_multi,wu); |
| } |
| } |
| } |
| |
| void relax(k,err,color,my_num) |
| |
| int k; |
| double *err; |
| int color; |
| int my_num; |
| |
| /* perform red or black iteration (not both) */ |
| |
| { |
| int i; |
| int j; |
| int iend; |
| int jend; |
| int oddistart; |
| int oddjstart; |
| int evenistart; |
| int evenjstart; |
| double a; |
| double h; |
| double factor; |
| double maxerr; |
| double newerr; |
| double oldval; |
| double newval; |
| double **t2a; |
| double **t2b; |
| double *t1a; |
| double *t1b; |
| double *t1c; |
| double *t1d; |
| |
| i = 0; |
| j = 0; |
| |
| *err = 0.0; |
| h = lev_res[k]; |
| |
| /* points whose sum of row and col index is even do a red iteration, */ |
| /* others do a black */ |
| |
| evenistart = gp[my_num].eist[k]; |
| evenjstart = gp[my_num].ejst[k]; |
| oddistart = gp[my_num].oist[k]; |
| oddjstart = gp[my_num].ojst[k]; |
| |
| iend = gp[my_num].rlien[k]; |
| jend = gp[my_num].rljen[k]; |
| |
| factor = 4.0 - eig2 * h * h ; |
| maxerr = 0.0; |
| t2a = (double **) q_multi[my_num][k]; |
| t2b = (double **) rhs_multi[my_num][k]; |
| if (color == RED_ITER) { |
| for (i=evenistart;i<iend;i+=2) { |
| t1a = (double *) t2a[i]; |
| t1b = (double *) t2b[i]; |
| t1c = (double *) t2a[i-1]; |
| t1d = (double *) t2a[i+1]; |
| for (j=evenjstart;j<jend;j+=2) { |
| a = t1a[j+1] + t1a[j-1] + |
| t1c[j] + t1d[j] - |
| t1b[j] ; |
| oldval = t1a[j]; |
| newval = a / factor; |
| newerr = oldval - newval; |
| t1a[j] = newval; |
| if (fabs(newerr) > maxerr) { |
| maxerr = fabs(newerr); |
| } |
| } |
| } |
| for (i=oddistart;i<iend;i+=2) { |
| t1a = (double *) t2a[i]; |
| t1b = (double *) t2b[i]; |
| t1c = (double *) t2a[i-1]; |
| t1d = (double *) t2a[i+1]; |
| for (j=oddjstart;j<jend;j+=2) { |
| a = t1a[j+1] + t1a[j-1] + |
| t1c[j] + t1d[j] - |
| t1b[j] ; |
| oldval = t1a[j]; |
| newval = a / factor; |
| newerr = oldval - newval; |
| t1a[j] = newval; |
| if (fabs(newerr) > maxerr) { |
| maxerr = fabs(newerr); |
| } |
| } |
| } |
| } else if (color == BLACK_ITER) { |
| for (i=evenistart;i<iend;i+=2) { |
| t1a = (double *) t2a[i]; |
| t1b = (double *) t2b[i]; |
| t1c = (double *) t2a[i-1]; |
| t1d = (double *) t2a[i+1]; |
| for (j=oddjstart;j<jend;j+=2) { |
| a = t1a[j+1] + t1a[j-1] + |
| t1c[j] + t1d[j] - |
| t1b[j] ; |
| oldval = t1a[j]; |
| newval = a / factor; |
| newerr = oldval - newval; |
| t1a[j] = newval; |
| if (fabs(newerr) > maxerr) { |
| maxerr = fabs(newerr); |
| } |
| } |
| } |
| for (i=oddistart;i<iend;i+=2) { |
| t1a = (double *) t2a[i]; |
| t1b = (double *) t2b[i]; |
| t1c = (double *) t2a[i-1]; |
| t1d = (double *) t2a[i+1]; |
| for (j=evenjstart;j<jend;j+=2) { |
| a = t1a[j+1] + t1a[j-1] + |
| t1c[j] + t1d[j] - |
| t1b[j] ; |
| oldval = t1a[j]; |
| newval = a / factor; |
| newerr = oldval - newval; |
| t1a[j] = newval; |
| if (fabs(newerr) > maxerr) { |
| maxerr = fabs(newerr); |
| } |
| } |
| } |
| } |
| *err = maxerr; |
| } |
| |
| void rescal(kf,my_num) |
| |
| /* perform half-injection to next coarsest level */ |
| |
| int kf; |
| int my_num; |
| |
| { |
| int ic; |
| int if17; |
| int jf; |
| int jc; |
| int krc; |
| int istart; |
| int iend; |
| int jstart; |
| int jend; |
| double hf; |
| double hc; |
| double s; |
| double s1; |
| double s2; |
| double s3; |
| double s4; |
| double factor; |
| double h; |
| double int1; |
| double int2; |
| double i_int_factor; |
| double j_int_factor; |
| double int_val; |
| int i_off; |
| int j_off; |
| int up_proc; |
| int left_proc; |
| int im; |
| int jm; |
| double temp; |
| double temp2; |
| double **t2a; |
| double **t2b; |
| double **t2c; |
| double *t1a; |
| double *t1b; |
| double *t1c; |
| double *t1d; |
| double *t1e; |
| double *t1f; |
| double *t1g; |
| double *t1h; |
| |
| krc = kf - 1; |
| hc = lev_res[krc]; |
| hf = lev_res[kf]; |
| i_off = gp[my_num].rownum*ypts_per_proc[krc]; |
| j_off = gp[my_num].colnum*xpts_per_proc[krc]; |
| up_proc = gp[my_num].neighbors[UP]; |
| left_proc = gp[my_num].neighbors[LEFT]; |
| im = (imx[kf]-2)/yprocs; |
| jm = (jmx[kf]-2)/xprocs; |
| |
| istart = gp[my_num].rlist[krc]; |
| jstart = gp[my_num].rljst[krc]; |
| iend = gp[my_num].rlien[krc] - 1; |
| jend = gp[my_num].rljen[krc] - 1; |
| |
| factor = 4.0 - eig2 * hf * hf; |
| |
| t2a = (double **) q_multi[my_num][kf]; |
| t2b = (double **) rhs_multi[my_num][kf]; |
| t2c = (double **) rhs_multi[my_num][krc]; |
| if17=2*(istart-1); |
| for(ic=istart;ic<=iend;ic++) { |
| if17+=2; |
| i_int_factor = (ic+i_off) * i_int_coeff[krc] * 0.5; |
| jf = 2 * (jstart - 1); |
| t1a = (double *) t2a[if17]; |
| t1b = (double *) t2b[if17]; |
| t1c = (double *) t2c[ic]; |
| t1d = (double *) t2a[if17-1]; |
| t1e = (double *) t2a[if17+1]; |
| t1f = (double *) t2a[if17-2]; |
| t1g = (double *) t2a[if17-3]; |
| t1h = (double *) t2b[if17-2]; |
| for(jc=jstart;jc<=jend;jc++) { |
| jf+=2; |
| j_int_factor = (jc+j_off)*j_int_coeff[krc] * 0.5; |
| |
| /* method of half-injection uses 2.0 instead of 4.0 */ |
| |
| /* do bilinear interpolation */ |
| s = t1a[jf+1] + t1a[jf-1] + t1d[jf] + t1e[jf]; |
| s1 = 2.0 * (t1b[jf] - s + factor * t1a[jf]); |
| if (((if17 == 2) && (gp[my_num].neighbors[UP] == -1)) || |
| ((jf == 2) && (gp[my_num].neighbors[LEFT] == -1))) { |
| s2 = 0; |
| s3 = 0; |
| s4 = 0; |
| } else if ((if17 == 2) || (jf == 2)) { |
| if (jf == 2) { |
| temp = q_multi[left_proc][kf][if17][jm-1]; |
| } else { |
| temp = t1a[jf-3]; |
| } |
| s = t1a[jf-1] + temp + t1d[jf-2] + t1e[jf-2]; |
| s2 = 2.0 * (t1b[jf-2] - s + factor * t1a[jf-2]); |
| if (if17 == 2) { |
| temp = q_multi[up_proc][kf][im-1][jf]; |
| } else { |
| temp = t1g[jf]; |
| } |
| s = t1f[jf+1]+ t1f[jf-1]+ temp + t1d[jf]; |
| s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]); |
| if (jf == 2) { |
| temp = q_multi[left_proc][kf][if17-2][jm-1]; |
| } else { |
| temp = t1f[jf-3]; |
| } |
| if (if17 == 2) { |
| temp2 = q_multi[up_proc][kf][im-1][jf-2]; |
| } else { |
| temp2 = t1g[jf-2]; |
| } |
| s = t1f[jf-1]+ temp + temp2 + t1d[jf-2]; |
| s4 = 2.0 * (t1h[jf-2] - s + factor * t1f[jf-2]); |
| } else { |
| s = t1a[jf-1] + t1a[jf-3] + t1d[jf-2] + t1e[jf-2]; |
| s2 = 2.0 * (t1b[jf-2] - s + factor * t1a[jf-2]); |
| s = t1f[jf+1]+ t1f[jf-1]+ t1g[jf] + t1d[jf]; |
| s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]); |
| s = t1f[jf-1]+ t1f[jf-3]+ t1g[jf-2]+ t1d[jf-2]; |
| s4 = 2.0 * (t1h[jf-2] - s + factor * t1f[jf-2]); |
| } |
| int1 = j_int_factor*s4 + (1.0-j_int_factor)*s3; |
| int2 = j_int_factor*s2 + (1.0-j_int_factor)*s1; |
| int_val = i_int_factor*int1+(1.0-i_int_factor)*int2; |
| t1c[jc] = i_int_factor*int1+(1.0-i_int_factor)*int2; |
| } |
| } |
| } |
| |
| void intadd(kc,my_num) |
| |
| /* perform interpolation and addition to next finest grid */ |
| |
| int kc; |
| int my_num; |
| |
| { |
| int ic; |
| int if17; |
| int jf; |
| int jc; |
| int i; |
| int kf; |
| int istart; |
| int jstart; |
| int iend; |
| int jend; |
| double a; |
| double am; |
| double hc; |
| double hf; |
| double int1; |
| double int2; |
| double i_int_factor1; |
| double j_int_factor1; |
| double i_int_factor2; |
| double j_int_factor2; |
| double int_val; |
| int i_off; |
| int j_off; |
| double **t2a; |
| double **t2b; |
| double *t1a; |
| double *t1b; |
| double *t1c; |
| double *t1d; |
| double *t1e; |
| |
| kf = kc + 1; |
| hc = lev_res[kc]; |
| hf = lev_res[kf]; |
| |
| istart = gp[my_num].rlist[kc]; |
| jstart = gp[my_num].rljst[kc]; |
| iend = gp[my_num].rlien[kc] - 1; |
| jend = gp[my_num].rljen[kc] - 1; |
| i_off = gp[my_num].rownum*ypts_per_proc[kc]; |
| j_off = gp[my_num].colnum*xpts_per_proc[kc]; |
| |
| t2a = (double **) q_multi[my_num][kc]; |
| t2b = (double **) q_multi[my_num][kf]; |
| if17 = 2*(istart-1); |
| for(ic=istart;ic<=iend;ic++) { |
| if17+=2; |
| i_int_factor1= ((imx[kc]-2)-(ic+i_off-1)) * (i_int_coeff[kf]); |
| i_int_factor2= (ic+i_off) * i_int_coeff[kf]; |
| jf = 2*(jstart-1); |
| |
| t1a = (double *) t2a[ic]; |
| t1b = (double *) t2a[ic-1]; |
| t1c = (double *) t2a[ic+1]; |
| t1d = (double *) t2b[if17]; |
| t1e = (double *) t2b[if17-1]; |
| for(jc=jstart;jc<=jend;jc++) { |
| jf+=2; |
| j_int_factor1= ((jmx[kc]-2)-(jc+j_off-1)) * (j_int_coeff[kf]); |
| j_int_factor2= (jc+j_off) * j_int_coeff[kf]; |
| |
| int1 = j_int_factor1*t1a[jc-1] + (1.0-j_int_factor1)*t1a[jc]; |
| int2 = j_int_factor1*t1b[jc-1] + (1.0-j_int_factor1)*t1b[jc]; |
| t1e[jf-1] += i_int_factor1*int2 + (1.0-i_int_factor1)*int1; |
| int2 = j_int_factor1*t1c[jc-1] + (1.0-j_int_factor1)*t1c[jc]; |
| t1d[jf-1] += i_int_factor2*int2 + (1.0-i_int_factor2)*int1; |
| int1 = j_int_factor2*t1a[jc+1] + (1.0-j_int_factor2)*t1a[jc]; |
| int2 = j_int_factor2*t1b[jc+1] + (1.0-j_int_factor2)*t1b[jc]; |
| t1e[jf] += i_int_factor1*int2 + (1.0-i_int_factor1)*int1; |
| int2 = j_int_factor2*t1c[jc+1] + (1.0-j_int_factor2)*t1c[jc]; |
| t1d[jf] += i_int_factor2*int2 + (1.0-i_int_factor2)*int1; |
| } |
| } |
| } |
| |
| void putz(k,my_num) |
| |
| /* initialize a grid to zero in parallel */ |
| |
| int k; |
| int my_num; |
| |
| { |
| int i; |
| int j; |
| int istart; |
| int jstart; |
| int iend; |
| int jend; |
| double **t2a; |
| double *t1a; |
| |
| istart = gp[my_num].rlist[k]; |
| jstart = gp[my_num].rljst[k]; |
| iend = gp[my_num].rlien[k]; |
| jend = gp[my_num].rljen[k]; |
| |
| t2a = (double **) q_multi[my_num][k]; |
| for (i=istart;i<=iend;i++) { |
| t1a = (double *) t2a[i]; |
| for (j=jstart;j<=jend;j++) { |
| t1a[j] = 0.0; |
| } |
| } |
| } |
| |
| void copy_borders(k,pid) |
| |
| int k; |
| int pid; |
| |
| { |
| int i; |
| int j; |
| int jj; |
| int im; |
| int jm; |
| int lastrow; |
| int lastcol; |
| double **t2a; |
| double **t2b; |
| double *t1a; |
| double *t1b; |
| |
| im = (imx[k]-2)/yprocs + 2; |
| jm = (jmx[k]-2)/xprocs + 2; |
| lastrow = (imx[k]-2)/yprocs; |
| lastcol = (jmx[k]-2)/xprocs; |
| |
| t2a = (double **) q_multi[pid][k]; |
| jj = gp[pid].neighbors[UPLEFT]; |
| if (jj != -1) { |
| t2a[0][0]=q_multi[jj][k][im-2][jm-2]; |
| } |
| jj = gp[pid].neighbors[UPRIGHT]; |
| if (jj != -1) { |
| t2a[0][jm-1]=q_multi[jj][k][im-2][1]; |
| } |
| jj = gp[pid].neighbors[DOWNLEFT]; |
| if (jj != -1) { |
| t2a[im-1][0]=q_multi[jj][k][1][jm-2]; |
| } |
| jj = gp[pid].neighbors[DOWNRIGHT]; |
| if (jj != -1) { |
| t2a[im-1][jm-1]=q_multi[jj][k][1][1]; |
| } |
| |
| if (gp[pid].neighbors[UP] == -1) { |
| jj = gp[pid].neighbors[LEFT]; |
| if (jj != -1) { |
| t2a[0][0] = q_multi[jj][k][0][jm-2]; |
| } else { |
| jj = gp[pid].neighbors[DOWN]; |
| if (jj != -1) { |
| t2a[im-1][0] = q_multi[jj][k][1][0]; |
| } |
| } |
| jj = gp[pid].neighbors[RIGHT]; |
| if (jj != -1) { |
| t2a[0][jm-1] = q_multi[jj][k][0][1]; |
| } else { |
| jj = gp[pid].neighbors[DOWN]; |
| if (jj != -1) { |
| t2a[im-1][jm-1] = q_multi[jj][k][1][jm-1]; |
| } |
| } |
| } else if (gp[pid].neighbors[DOWN] == -1) { |
| jj = gp[pid].neighbors[LEFT]; |
| if (jj != -1) { |
| t2a[im-1][0] = q_multi[jj][k][im-1][jm-2]; |
| } else { |
| jj = gp[pid].neighbors[UP]; |
| if (jj != -1) { |
| t2a[0][0] = q_multi[jj][k][im-2][0]; |
| } |
| } |
| jj = gp[pid].neighbors[RIGHT]; |
| if (jj != -1) { |
| t2a[im-1][jm-1] = q_multi[jj][k][im-1][1]; |
| } else { |
| jj = gp[pid].neighbors[UP]; |
| if (jj != -1) { |
| t2a[0][jm-1] = q_multi[jj][k][im-2][jm-1]; |
| } |
| } |
| } else if (gp[pid].neighbors[LEFT] == -1) { |
| jj = gp[pid].neighbors[UP]; |
| if (jj != -1) { |
| t2a[0][0] = q_multi[jj][k][im-2][0]; |
| } |
| jj = gp[pid].neighbors[DOWN]; |
| if (jj != -1) { |
| t2a[im-1][0] = q_multi[jj][k][1][0]; |
| } |
| } else if (gp[pid].neighbors[RIGHT] == -1) { |
| jj = gp[pid].neighbors[UP]; |
| if (jj != -1) { |
| t2a[0][jm-1] = q_multi[jj][k][im-2][jm-1]; |
| } |
| jj = gp[pid].neighbors[DOWN]; |
| if (jj != -1) { |
| t2a[im-1][jm-1] = q_multi[jj][k][1][jm-1]; |
| } |
| } |
| |
| j = gp[pid].neighbors[UP]; |
| if (j != -1) { |
| t1a = (double *) t2a[0]; |
| t1b = (double *) q_multi[j][k][im-2]; |
| for (i=1;i<=lastcol;i++) { |
| t1a[i] = t1b[i]; |
| } |
| } |
| j = gp[pid].neighbors[DOWN]; |
| if (j != -1) { |
| t1a = (double *) t2a[im-1]; |
| t1b = (double *) q_multi[j][k][1]; |
| for (i=1;i<=lastcol;i++) { |
| t1a[i] = t1b[i]; |
| } |
| } |
| j = gp[pid].neighbors[LEFT]; |
| if (j != -1) { |
| t2b = (double **) q_multi[j][k]; |
| for (i=1;i<=lastrow;i++) { |
| t2a[i][0] = t2b[i][jm-2]; |
| } |
| } |
| j = gp[pid].neighbors[RIGHT]; |
| if (j != -1) { |
| t2b = (double **) q_multi[j][k]; |
| for (i=1;i<=lastrow;i++) { |
| t2a[i][jm-1] = t2b[i][1]; |
| } |
| } |
| |
| } |
| |
| void copy_rhs_borders(k,procid) |
| |
| int k; |
| int procid; |
| |
| { |
| int i; |
| int j; |
| int im; |
| int jm; |
| int lastrow; |
| int lastcol; |
| double **t2a; |
| double **t2b; |
| double *t1a; |
| double *t1b; |
| |
| im = (imx[k]-2)/yprocs+2; |
| jm = (jmx[k]-2)/xprocs+2; |
| lastrow = (imx[k]-2)/yprocs; |
| lastcol = (jmx[k]-2)/xprocs; |
| |
| t2a = (double **) rhs_multi[procid][k]; |
| if (gp[procid].neighbors[UPLEFT] != -1) { |
| j = gp[procid].neighbors[UPLEFT]; |
| t2a[0][0] = rhs_multi[j][k][im-2][jm-2]; |
| } |
| |
| if (gp[procid].neighbors[UP] != -1) { |
| j = gp[procid].neighbors[UP]; |
| if (j != -1) { |
| t1a = (double *) t2a[0]; |
| t1b = (double *) rhs_multi[j][k][im-2]; |
| for (i=2;i<=lastcol;i+=2) { |
| t1a[i] = t1b[i]; |
| } |
| } |
| } |
| if (gp[procid].neighbors[LEFT] != -1) { |
| j = gp[procid].neighbors[LEFT]; |
| if (j != -1) { |
| t2b = (double **) rhs_multi[j][k]; |
| for (i=2;i<=lastrow;i+=2) { |
| t2a[i][0] = t2b[i][jm-2]; |
| } |
| } |
| } |
| } |
| |
| void copy_red(k,procid) |
| |
| int k; |
| int procid; |
| |
| { |
| int i; |
| int j; |
| int im; |
| int jm; |
| int lastrow; |
| int lastcol; |
| double **t2a; |
| double **t2b; |
| double *t1a; |
| double *t1b; |
| |
| im = (imx[k]-2)/yprocs+2; |
| jm = (jmx[k]-2)/xprocs+2; |
| lastrow = (imx[k]-2)/yprocs; |
| lastcol = (jmx[k]-2)/xprocs; |
| |
| t2a = (double **) q_multi[procid][k]; |
| j = gp[procid].neighbors[UP]; |
| if (j != -1) { |
| t1a = (double *) t2a[0]; |
| t1b = (double *) q_multi[j][k][im-2]; |
| for (i=2;i<=lastcol;i+=2) { |
| t1a[i] = t1b[i]; |
| } |
| } |
| j = gp[procid].neighbors[DOWN]; |
| if (j != -1) { |
| t1a = (double *) t2a[im-1]; |
| t1b = (double *) q_multi[j][k][1]; |
| for (i=1;i<=lastcol;i+=2) { |
| t1a[i] = t1b[i]; |
| } |
| } |
| j = gp[procid].neighbors[LEFT]; |
| if (j != -1) { |
| t2b = (double **) q_multi[j][k]; |
| for (i=2;i<=lastrow;i+=2) { |
| t2a[i][0] = t2b[i][jm-2]; |
| } |
| } |
| j = gp[procid].neighbors[RIGHT]; |
| if (j != -1) { |
| t2b = (double **) q_multi[j][k]; |
| for (i=1;i<=lastrow;i+=2) { |
| t2a[i][jm-1] = t2b[i][1]; |
| } |
| } |
| } |
| |
| void copy_black(k,procid) |
| |
| int k; |
| int procid; |
| |
| { |
| int i; |
| int j; |
| int im; |
| int jm; |
| int lastrow; |
| int lastcol; |
| double **t2a; |
| double **t2b; |
| double *t1a; |
| double *t1b; |
| |
| im = (imx[k]-2)/yprocs+2; |
| jm = (jmx[k]-2)/xprocs+2; |
| lastrow = (imx[k]-2)/yprocs; |
| lastcol = (jmx[k]-2)/xprocs; |
| |
| t2a = (double **) q_multi[procid][k]; |
| j = gp[procid].neighbors[UP]; |
| if (j != -1) { |
| t1a = (double *) t2a[0]; |
| t1b = (double *) q_multi[j][k][im-2]; |
| for (i=1;i<=lastcol;i+=2) { |
| t1a[i] = t1b[i]; |
| } |
| } |
| j = gp[procid].neighbors[DOWN]; |
| if (j != -1) { |
| t1a = (double *) t2a[im-1]; |
| t1b = (double *) q_multi[j][k][1]; |
| for (i=2;i<=lastcol;i+=2) { |
| t1a[i] = t1b[i]; |
| } |
| } |
| j = gp[procid].neighbors[LEFT]; |
| if (j != -1) { |
| t2b = (double **) q_multi[j][k]; |
| for (i=1;i<=lastrow;i+=2) { |
| t2a[i][0] = t2b[i][jm-2]; |
| } |
| } |
| j = gp[procid].neighbors[RIGHT]; |
| if (j != -1) { |
| t2b = (double **) q_multi[j][k]; |
| for (i=2;i<=lastrow;i+=2) { |
| t2a[i][jm-1] = t2b[i][1]; |
| } |
| } |
| } |
| |