blob: 76b750213d8965f610831bb5a59bc196f9716342 [file] [log] [blame]
/*************************************************************************/
/* */
/* Copyright (c) 1994 Stanford University */
/* */
/* All rights reserved. */
/* */
/* Permission is given to use, copy, and modify this software for any */
/* non-commercial purpose as long as this copyright notice is not */
/* removed. All other uses, including redistribution in whole or in */
/* part, are forbidden without prior written permission. */
/* */
/* This software is provided with absolutely no warranty and no */
/* support. */
/* */
/*************************************************************************/
/* shared memory implementation of the multigrid method
implementation uses red-black gauss-seidel relaxation
iterations, w cycles, and the method of half-injection for
residual computation */
#include <stdio.h>
#include <math.h>
#include <time.h>
#include <stdlib.h>
#include "decs.h"
/* perform multigrid (w cycles) */
void multig(long my_id)
{
long iter;
double wu;
double errp;
long m;
long minlevel;
long flag1;
long flag2;
long k;
long my_num;
double wmax;
double local_err;
double red_local_err;
double black_local_err;
double g_error;
flag1 = 0;
flag2 = 0;
iter = 0;
m = numlev-1;
wmax = maxwork;
minlevel = minlev;
my_num = my_id;
wu = 0.0;
k = m;
g_error = 1.0e30;
while ((!flag1) && (!flag2)) {
errp = g_error;
iter++;
if (my_num == MASTER) {
multi->err_multi = 0.0;
}
/* barrier to make sure all procs have finished intadd or rescal */
/* before proceeding with relaxation */
#if defined(MULTIPLE_BARRIERS)
BARRIER(bars->error_barrier,nprocs)
#else
BARRIER(bars->barrier,nprocs)
#endif
relax(k,&red_local_err,RED_ITER,my_num);
/* barrier to make sure all red computations have been performed */
#if defined(MULTIPLE_BARRIERS)
BARRIER(bars->error_barrier,nprocs)
#else
BARRIER(bars->barrier,nprocs)
#endif
relax(k,&black_local_err,BLACK_ITER,my_num);
/* compute max local error from red_local_err and black_local_err */
if (red_local_err > black_local_err) {
local_err = red_local_err;
} else {
local_err = black_local_err;
}
/* update the global error if necessary */
LOCK(locks->error_lock)
if (local_err > multi->err_multi) {
multi->err_multi = local_err;
}
UNLOCK(locks->error_lock)
/* a single relaxation sweep at the finest level is one unit of */
/* work */
wu+=pow((double)4.0,(double)k-m);
/* barrier to make sure all processors have checked local error */
#if defined(MULTIPLE_BARRIERS)
BARRIER(bars->error_barrier,nprocs)
#else
BARRIER(bars->barrier,nprocs)
#endif
g_error = multi->err_multi;
/* barrier to make sure master does not cycle back to top of loop */
/* and reset global->err before we read it and decide what to do */
#if defined(MULTIPLE_BARRIERS)
BARRIER(bars->error_barrier,nprocs)
#else
BARRIER(bars->barrier,nprocs)
#endif
if (g_error >= lev_tol[k]) {
if (wu > wmax) {
/* max work exceeded */
flag1 = 1;
fprintf(stderr,"ERROR: Maximum work limit %0.5f exceeded\n",wmax);
exit(-1);
} else {
/* if we have not converged */
if ((k != 1) && (g_error/errp >= 0.6) && (k > minlevel)) {
/* if need to go to coarser grid */
rescal(k,my_num);
/* transfer residual to rhs of coarser grid */
lev_tol[k-1] = 0.3 * g_error;
k = k-1;
putz(k,my_num);
/* make initial guess on coarser grid zero */
g_error = 1.0e30;
}
}
} else {
/* if we have converged at this level */
if (k == m) {
/* if finest grid, we are done */
flag2 = 1;
} else {
/* else go to next finest grid */
intadd(k,my_num);
k++;
g_error = 1.0e30;
}
}
}
if (do_output) {
if (my_num == MASTER) {
printf("iter %ld, level %ld, residual norm %12.8e, work = %7.3f\n", iter,k,multi->err_multi,wu);
}
}
}
/* perform red or black iteration (not both) */
void relax(long k, double *err, long color, long my_num)
{
long i;
long j;
long iend;
long jend;
long oddistart;
long oddjstart;
long evenistart;
long evenjstart;
long oddiendst;
long eveniendst;
long oddjendst;
long evenjendst;
double a;
double h;
double factor;
double maxerr;
double newerr;
double oldval;
double newval;
i = 0;
j = 0;
*err = 0.0;
h = lev_res[k];
/* points whose sum of row and col index is even do a red iteration, */
/* others do a black */
evenistart = gp[my_num].eist[k];
evenjstart = gp[my_num].ejst[k];
oddistart = gp[my_num].oist[k];
oddjstart = gp[my_num].ojst[k];
eveniendst = gp[my_num].eiest[k];
evenjendst = gp[my_num].ejest[k];
oddiendst = gp[my_num].oiest[k];
oddjendst = gp[my_num].ojest[k];
iend = gp[my_num].rel_start_y[k] + gp[my_num].rel_num_y[k];
jend = gp[my_num].rel_start_x[k] + gp[my_num].rel_num_x[k];
factor = 4.0 - eig2 * h * h ;
maxerr = 0.0;
if (color == RED_ITER) {
for (i=evenistart;i<iend;i+=2) {
for (j=evenjstart;j<jend;j+=2) {
a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
multi->rhs_multi[k][i][j] ;
oldval = multi->q_multi[k][i][j];
newval = a / factor;
newerr = oldval - newval;
multi->q_multi[k][i][j] = newval;
if (fabs(newerr) > maxerr) {
maxerr = fabs(newerr);
}
}
}
for (i=oddistart;i<iend;i+=2) {
for (j=oddjstart;j<jend;j+=2) {
a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
multi->rhs_multi[k][i][j] ;
oldval = multi->q_multi[k][i][j];
newval = a / factor;
newerr = oldval - newval;
multi->q_multi[k][i][j] = newval;
if (fabs(newerr) > maxerr) {
maxerr = fabs(newerr);
}
}
}
} else if (color == BLACK_ITER) {
for (i=evenistart;i<iend;i+=2) {
for (j=oddjstart;j<jend;j+=2) {
a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
multi->rhs_multi[k][i][j] ;
oldval = multi->q_multi[k][i][j];
newval = a / factor;
newerr = oldval - newval;
multi->q_multi[k][i][j] = newval;
if (fabs(newerr) > maxerr) {
maxerr = fabs(newerr);
}
}
}
for (i=oddistart;i<iend;i+=2) {
for (j=evenjstart;j<jend;j+=2) {
a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
multi->rhs_multi[k][i][j] ;
oldval = multi->q_multi[k][i][j];
newval = a / factor;
newerr = oldval - newval;
multi->q_multi[k][i][j] = newval;
if (fabs(newerr) > maxerr) {
maxerr = fabs(newerr);
}
}
}
}
*err = maxerr;
}
/* perform half-injection to next coarsest level */
void rescal(long kf, long my_num)
{
long ic;
long if17;
long jf;
long jc;
long krc;
long istart;
long iend;
long jstart;
long jend;
double hf;
double hc;
double s;
double s1;
double s2;
double s3;
double s4;
double factor;
double int1;
double int2;
double i_int_factor;
double j_int_factor;
double int_val;
krc = kf - 1;
hc = lev_res[krc];
hf = lev_res[kf];
istart = gp[my_num].rlist[krc];
jstart = gp[my_num].rljst[krc];
iend = gp[my_num].rlien[krc];
jend = gp[my_num].rljen[krc];
iend = gp[my_num].rel_start_y[krc] + gp[my_num].rel_num_y[krc] - 1;
jend = gp[my_num].rel_start_x[krc] + gp[my_num].rel_num_x[krc] - 1;
factor = 4.0 - eig2 * hf * hf;
if17=2*(istart-1);
for(ic=istart;ic<=iend;ic++) {
if17+=2;
i_int_factor = ic * i_int_coeff[krc] * 0.5;
jf = 2 * (jstart - 1);
for(jc=jstart;jc<=jend;jc++) {
jf+=2;
j_int_factor = jc*j_int_coeff[krc] * 0.5;
/* method of half-injection uses 2.0 instead of 4.0 */
s = multi->q_multi[kf][if17][jf+1] + multi->q_multi[kf][if17][jf-1] +
multi->q_multi[kf][if17-1][jf] + multi->q_multi[kf][if17+1][jf];
s1 = 2.0 * (multi->rhs_multi[kf][if17][jf] - s +
factor * multi->q_multi[kf][if17][jf]);
if ((if17 == 2) || (jf ==2)) {
s2 = 0;
} else {
s = multi->q_multi[kf][if17][jf-1] + multi->q_multi[kf][if17][jf-3] +
multi->q_multi[kf][if17-1][jf-2] + multi->q_multi[kf][if17+1][jf-2];
s2 = 2.0 * (multi->rhs_multi[kf][if17][jf-2] - s +
factor * multi->q_multi[kf][if17][jf-2]);
}
if ((if17 == 2) || (jf ==2)) {
s3 = 0;
} else {
s = multi->q_multi[kf][if17-2][jf+1] + multi->q_multi[kf][if17-2][jf-1] +
multi->q_multi[kf][if17-3][jf] + multi->q_multi[kf][if17-1][jf];
s3 = 2.0 * (multi->rhs_multi[kf][if17-2][jf] - s +
factor * multi->q_multi[kf][if17-2][jf]);
}
if ((if17 == 2) || (jf ==2)) {
s4 = 0;
} else {
s = multi->q_multi[kf][if17-2][jf-1] + multi->q_multi[kf][if17-2][jf-3] +
multi->q_multi[kf][if17-3][jf-2] + multi->q_multi[kf][if17-1][jf-2];
s4 = 2.0 * (multi->rhs_multi[kf][if17-2][jf-2] - s +
factor * multi->q_multi[kf][if17-2][jf-2]);
}
int1 = j_int_factor*s4 + (1.0-j_int_factor)*s3;
int2 = j_int_factor*s2 + (1.0-j_int_factor)*s1;
int_val = i_int_factor*int1+(1.0-i_int_factor)*int2;
multi->rhs_multi[krc][ic][jc] = i_int_factor*int1+(1.0-i_int_factor)*int2;
}
}
}
/* perform interpolation and addition to next finest grid */
void intadd(long kc, long my_num)
{
long ic;
long if17;
long jf;
long jc;
long kf;
long istart;
long jstart;
long iend;
long jend;
double hc;
double hf;
long ifine1;
long ifine2;
long jfine1;
long jfine2;
double int1;
double int2;
double i_int_factor1;
double j_int_factor1;
double i_int_factor2;
double j_int_factor2;
kf = kc + 1;
hc = lev_res[kc];
hf = lev_res[kf];
istart = gp[my_num].iist[kc];
jstart = gp[my_num].ijst[kc];
iend = gp[my_num].iien[kc];
jend = gp[my_num].ijen[kc];
istart = gp[my_num].rel_start_y[kc];
jstart = gp[my_num].rel_start_x[kc];
iend = gp[my_num].rel_start_y[kc] + gp[my_num].rel_num_y[kc] - 1;
jend = gp[my_num].rel_start_x[kc] + gp[my_num].rel_num_x[kc] - 1;
if17 = 2*(istart-1);
for(ic=istart;ic<=iend;ic++) {
if17+=2;
ifine1 = if17-1;
ifine2 = if17;
i_int_factor1= ((imx[kc]-2)-(ic-1)) * (i_int_coeff[kf]);
i_int_factor2= ic * i_int_coeff[kf];
jf = 2*(jstart-1);
for(jc=jstart;jc<=jend;jc++) {
jf+=2;
jfine1 = jf-1;
jfine2 = jf;
j_int_factor1= ((jmx[kc]-2)-(jc-1)) * (j_int_coeff[kf]);
j_int_factor2= jc * j_int_coeff[kf];
int1 = j_int_factor1*multi->q_multi[kc][ic][jc-1] +
(1.0-j_int_factor1)*multi->q_multi[kc][ic][jc];
int2 = j_int_factor1*multi->q_multi[kc][ic-1][jc-1] +
(1.0-j_int_factor1)*multi->q_multi[kc][ic-1][jc];
multi->q_multi[kf][if17-1][jf-1] += i_int_factor1*int2 +
(1.0-i_int_factor1)*int1;
int2 = j_int_factor1*multi->q_multi[kc][ic+1][jc-1] +
(1.0-j_int_factor1)*multi->q_multi[kc][ic+1][jc];
multi->q_multi[kf][if17][jf-1] += i_int_factor2*int2 +
(1.0-i_int_factor2)*int1;
int1 = j_int_factor2*multi->q_multi[kc][ic][jc+1] +
(1.0-j_int_factor2)*multi->q_multi[kc][ic][jc];
int2 = j_int_factor2*multi->q_multi[kc][ic-1][jc+1] +
(1.0-j_int_factor2)*multi->q_multi[kc][ic-1][jc];
multi->q_multi[kf][if17-1][jf] += i_int_factor1*int2 +
(1.0-i_int_factor1)*int1;
int2 = j_int_factor2*multi->q_multi[kc][ic+1][jc+1] +
(1.0-j_int_factor2)*multi->q_multi[kc][ic+1][jc];
multi->q_multi[kf][if17][jf] += i_int_factor2*int2 +
(1.0-i_int_factor2)*int1;
}
}
}
/* initialize a grid to zero in parallel */
void putz(long k, long my_num)
{
long i;
long j;
long istart;
long jstart;
long iend;
long jend;
istart = gp[my_num].pist[k];
jstart = gp[my_num].pjst[k];
iend = gp[my_num].pien[k];
jend = gp[my_num].pjen[k];
for (i=istart;i<=iend;i++) {
for (j=jstart;j<=jend;j++) {
multi->q_multi[k][i][j] = 0.0;
}
}
}