src/parsec/disk-image/parsec/parsec-benchmark/ext/splash2x/apps/ocean_ncp/src/multi.C - public/gem5-resources - Git at Google

 /*************************************************************************/
 /*                                                                       */
 /*  Copyright (c) 1994 Stanford University                               */
 /*                                                                       */
 /*  All rights reserved.                                                 */
 /*                                                                       */
 /*  Permission is given to use, copy, and modify this software for any   */
 /*  non-commercial purpose as long as this copyright notice is not       */
 /*  removed.  All other uses, including redistribution in whole or in    */
 /*  part, are forbidden without prior written permission.                */
 /*                                                                       */
 /*  This software is provided with absolutely no warranty and no         */
 /*  support.                                                             */
 /*                                                                       */
 /*************************************************************************/

 /* shared memory implementation of the multigrid method
    implementation uses red-black gauss-seidel relaxation
    iterations, w cycles, and the method of half-injection for
    residual computation */

 #include <stdio.h>
 #include <math.h>
 #include <time.h>
 #include <stdlib.h>
 #include "decs.h"

 /* perform multigrid (w cycles)                                     */
 void multig(long my_id)
 {
    long iter;
    double wu;
    double errp;
    long m;
    long minlevel;
    long flag1;
    long flag2;
    long k;
    long my_num;
    double wmax;
    double local_err;
    double red_local_err;
    double black_local_err;
    double g_error;

    flag1 = 0;
    flag2 = 0;
    iter = 0;
    m = numlev-1;
    wmax = maxwork;
    minlevel = minlev;
    my_num = my_id;
    wu = 0.0;

    k = m;
    g_error = 1.0e30;
    while ((!flag1) && (!flag2)) {
      errp = g_error;
      iter++;
      if (my_num == MASTER) {
        multi->err_multi = 0.0;
      }

 /* barrier to make sure all procs have finished intadd or rescal   */
 /* before proceeding with relaxation                               */

 #if defined(MULTIPLE_BARRIERS)
      BARRIER(bars->error_barrier,nprocs)
 #else
      BARRIER(bars->barrier,nprocs)
 #endif
      relax(k,&red_local_err,RED_ITER,my_num);

 /* barrier to make sure all red computations have been performed   */

 #if defined(MULTIPLE_BARRIERS)
      BARRIER(bars->error_barrier,nprocs)
 #else
      BARRIER(bars->barrier,nprocs)
 #endif
      relax(k,&black_local_err,BLACK_ITER,my_num);

 /* compute max local error from red_local_err and black_local_err  */

      if (red_local_err > black_local_err) {
        local_err = red_local_err;
      } else {
        local_err = black_local_err;
      }

 /* update the global error if necessary                            */

      LOCK(locks->error_lock)
      if (local_err > multi->err_multi) {
        multi->err_multi = local_err;
      }
      UNLOCK(locks->error_lock)

 /* a single relaxation sweep at the finest level is one unit of    */
 /* work                                                            */

      wu+=pow((double)4.0,(double)k-m);

 /* barrier to make sure all processors have checked local error    */

 #if defined(MULTIPLE_BARRIERS)
      BARRIER(bars->error_barrier,nprocs)
 #else
      BARRIER(bars->barrier,nprocs)
 #endif
      g_error = multi->err_multi;

 /* barrier to make sure master does not cycle back to top of loop  */
 /* and reset global->err before we read it and decide what to do   */

 #if defined(MULTIPLE_BARRIERS)
      BARRIER(bars->error_barrier,nprocs)
 #else
      BARRIER(bars->barrier,nprocs)
 #endif
      if (g_error >= lev_tol[k]) {
        if (wu > wmax) {
 /* max work exceeded                                               */
          flag1 = 1;
          fprintf(stderr,"ERROR: Maximum work limit %0.5f exceeded\n",wmax);
          exit(-1);
        } else {
 /* if we have not converged                                        */
          if ((k != 1) && (g_error/errp >= 0.6) && (k > minlevel)) {
 /* if need to go to coarser grid                                   */
            rescal(k,my_num);
 /* transfer residual to rhs of coarser grid                        */
            lev_tol[k-1] = 0.3 * g_error;
            k = k-1;
            putz(k,my_num);
 /* make initial guess on coarser grid zero                         */
            g_error = 1.0e30;
          }
        }
      } else {
 /* if we have converged at this level                              */
        if (k == m) {
 /* if finest grid, we are done                                     */
          flag2 = 1;
        } else {
 /* else go to next finest grid                                     */
          intadd(k,my_num);
          k++;
          g_error = 1.0e30;
        }
      }
    }
    if (do_output) {
      if (my_num == MASTER) {
        printf("iter %ld, level %ld, residual norm %12.8e, work = %7.3f\n", iter,k,multi->err_multi,wu);
      }
    }
 }

 /* perform red or black iteration (not both)                    */
 void relax(long k, double *err, long color, long my_num)
 {
    long i;
    long j;
    long iend;
    long jend;
    long oddistart;
    long oddjstart;
    long evenistart;
    long evenjstart;
    long oddiendst;
    long eveniendst;
    long oddjendst;
    long evenjendst;
    double a;
    double h;
    double factor;
    double maxerr;
    double newerr;
    double oldval;
    double newval;

    i = 0;
    j = 0;

    *err = 0.0;
    h = lev_res[k];

 /* points whose sum of row and col index is even do a red iteration, */
 /* others do a black				                     */

    evenistart = gp[my_num].eist[k];
    evenjstart = gp[my_num].ejst[k];
    oddistart = gp[my_num].oist[k];
    oddjstart = gp[my_num].ojst[k];
    eveniendst = gp[my_num].eiest[k];
    evenjendst = gp[my_num].ejest[k];
    oddiendst = gp[my_num].oiest[k];
    oddjendst = gp[my_num].ojest[k];

    iend = gp[my_num].rel_start_y[k] + gp[my_num].rel_num_y[k];
    jend = gp[my_num].rel_start_x[k] + gp[my_num].rel_num_x[k];

    factor = 4.0 - eig2 * h * h ;
    maxerr = 0.0;
    if (color == RED_ITER) {
      for (i=evenistart;i<iend;i+=2) {
        for (j=evenjstart;j<jend;j+=2) {
          a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
 	     multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
 	     multi->rhs_multi[k][i][j] ;
          oldval = multi->q_multi[k][i][j];
          newval = a / factor;
          newerr = oldval - newval;
          multi->q_multi[k][i][j] = newval;
          if (fabs(newerr) > maxerr) {
            maxerr = fabs(newerr);
          }
        }
      }
      for (i=oddistart;i<iend;i+=2) {
        for (j=oddjstart;j<jend;j+=2) {
          a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
 	     multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
 	     multi->rhs_multi[k][i][j] ;
          oldval = multi->q_multi[k][i][j];
          newval = a / factor;
          newerr = oldval - newval;
          multi->q_multi[k][i][j] = newval;
          if (fabs(newerr) > maxerr) {
            maxerr = fabs(newerr);
          }
        }
      }
    } else if (color == BLACK_ITER) {
      for (i=evenistart;i<iend;i+=2) {
        for (j=oddjstart;j<jend;j+=2) {
          a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
 	     multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
 	     multi->rhs_multi[k][i][j] ;
          oldval = multi->q_multi[k][i][j];
          newval = a / factor;
          newerr = oldval - newval;
          multi->q_multi[k][i][j] = newval;
          if (fabs(newerr) > maxerr) {
            maxerr = fabs(newerr);
          }
        }
      }
      for (i=oddistart;i<iend;i+=2) {
        for (j=evenjstart;j<jend;j+=2) {
          a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
 	     multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
 	     multi->rhs_multi[k][i][j] ;
          oldval = multi->q_multi[k][i][j];
          newval = a / factor;
          newerr = oldval - newval;
          multi->q_multi[k][i][j] = newval;
          if (fabs(newerr) > maxerr) {
            maxerr = fabs(newerr);
          }
        }
      }
    }
    *err = maxerr;
 }

 /* perform half-injection to next coarsest level                */
 void rescal(long kf, long my_num)
 {
    long ic;
    long if17;
    long jf;
    long jc;
    long krc;
    long istart;
    long iend;
    long jstart;
    long jend;
    double hf;
    double hc;
    double s;
    double s1;
    double s2;
    double s3;
    double s4;
    double factor;
    double int1;
    double int2;
    double i_int_factor;
    double j_int_factor;
    double int_val;

    krc = kf - 1;
    hc = lev_res[krc];
    hf = lev_res[kf];

    istart = gp[my_num].rlist[krc];
    jstart = gp[my_num].rljst[krc];
    iend = gp[my_num].rlien[krc];
    jend = gp[my_num].rljen[krc];
    iend = gp[my_num].rel_start_y[krc] + gp[my_num].rel_num_y[krc] - 1;
    jend = gp[my_num].rel_start_x[krc] + gp[my_num].rel_num_x[krc] - 1;

    factor = 4.0 - eig2 * hf * hf;

    if17=2*(istart-1);
    for(ic=istart;ic<=iend;ic++) {
      if17+=2;
      i_int_factor = ic * i_int_coeff[krc] * 0.5;
      jf = 2 * (jstart - 1);
      for(jc=jstart;jc<=jend;jc++) {
        jf+=2;
        j_int_factor = jc*j_int_coeff[krc] * 0.5;
 /* method of half-injection uses 2.0 instead of 4.0 */
        s = multi->q_multi[kf][if17][jf+1] + multi->q_multi[kf][if17][jf-1] +
            multi->q_multi[kf][if17-1][jf] + multi->q_multi[kf][if17+1][jf];
        s1 = 2.0 * (multi->rhs_multi[kf][if17][jf] - s +
 		   factor * multi->q_multi[kf][if17][jf]);
        if ((if17 == 2) || (jf ==2)) {
          s2 = 0;
        } else {
          s = multi->q_multi[kf][if17][jf-1] + multi->q_multi[kf][if17][jf-3] +
              multi->q_multi[kf][if17-1][jf-2] + multi->q_multi[kf][if17+1][jf-2];
          s2 = 2.0 * (multi->rhs_multi[kf][if17][jf-2] - s +
 	  	   factor * multi->q_multi[kf][if17][jf-2]);
        }
        if ((if17 == 2) || (jf ==2)) {
          s3 = 0;
        } else {
          s = multi->q_multi[kf][if17-2][jf+1] + multi->q_multi[kf][if17-2][jf-1] +
              multi->q_multi[kf][if17-3][jf] + multi->q_multi[kf][if17-1][jf];
          s3 = 2.0 * (multi->rhs_multi[kf][if17-2][jf] - s +
 		     factor * multi->q_multi[kf][if17-2][jf]);
        }
        if ((if17 == 2) || (jf ==2)) {
          s4 = 0;
        } else {
          s = multi->q_multi[kf][if17-2][jf-1] + multi->q_multi[kf][if17-2][jf-3] +
          multi->q_multi[kf][if17-3][jf-2] + multi->q_multi[kf][if17-1][jf-2];
          s4 = 2.0 * (multi->rhs_multi[kf][if17-2][jf-2] - s +
 		   factor * multi->q_multi[kf][if17-2][jf-2]);
        }
        int1 = j_int_factor*s4 + (1.0-j_int_factor)*s3;
        int2 = j_int_factor*s2 + (1.0-j_int_factor)*s1;
        int_val = i_int_factor*int1+(1.0-i_int_factor)*int2;
        multi->rhs_multi[krc][ic][jc] = i_int_factor*int1+(1.0-i_int_factor)*int2;
      }
    }
 }

 /* perform interpolation and addition to next finest grid       */
 void intadd(long kc, long my_num)
 {
    long ic;
    long if17;
    long jf;
    long jc;
    long kf;
    long istart;
    long jstart;
    long iend;
    long jend;
    double hc;
    double hf;
    long ifine1;
    long ifine2;
    long jfine1;
    long jfine2;
    double int1;
    double int2;
    double i_int_factor1;
    double j_int_factor1;
    double i_int_factor2;
    double j_int_factor2;

    kf = kc + 1;
    hc = lev_res[kc];
    hf = lev_res[kf];

    istart = gp[my_num].iist[kc];
    jstart = gp[my_num].ijst[kc];
    iend = gp[my_num].iien[kc];
    jend = gp[my_num].ijen[kc];

    istart = gp[my_num].rel_start_y[kc];
    jstart = gp[my_num].rel_start_x[kc];
    iend = gp[my_num].rel_start_y[kc] + gp[my_num].rel_num_y[kc] - 1;
    jend = gp[my_num].rel_start_x[kc] + gp[my_num].rel_num_x[kc] - 1;
    if17 = 2*(istart-1);
    for(ic=istart;ic<=iend;ic++) {

      if17+=2;
      ifine1 = if17-1;
      ifine2 = if17;
      i_int_factor1= ((imx[kc]-2)-(ic-1)) * (i_int_coeff[kf]);
      i_int_factor2= ic * i_int_coeff[kf];

      jf = 2*(jstart-1);

      for(jc=jstart;jc<=jend;jc++) {
        jf+=2;
        jfine1 = jf-1;
        jfine2 = jf;
        j_int_factor1= ((jmx[kc]-2)-(jc-1)) * (j_int_coeff[kf]);
        j_int_factor2= jc * j_int_coeff[kf];

        int1 = j_int_factor1*multi->q_multi[kc][ic][jc-1] +
 	      (1.0-j_int_factor1)*multi->q_multi[kc][ic][jc];
        int2 = j_int_factor1*multi->q_multi[kc][ic-1][jc-1] +
 	      (1.0-j_int_factor1)*multi->q_multi[kc][ic-1][jc];
        multi->q_multi[kf][if17-1][jf-1] += i_int_factor1*int2 +
 	      (1.0-i_int_factor1)*int1;
        int2 = j_int_factor1*multi->q_multi[kc][ic+1][jc-1] +
 	      (1.0-j_int_factor1)*multi->q_multi[kc][ic+1][jc];
        multi->q_multi[kf][if17][jf-1] += i_int_factor2*int2 +
 	      (1.0-i_int_factor2)*int1;
        int1 = j_int_factor2*multi->q_multi[kc][ic][jc+1] +
 	      (1.0-j_int_factor2)*multi->q_multi[kc][ic][jc];
        int2 = j_int_factor2*multi->q_multi[kc][ic-1][jc+1] +
 	      (1.0-j_int_factor2)*multi->q_multi[kc][ic-1][jc];
        multi->q_multi[kf][if17-1][jf] += i_int_factor1*int2 +
 	      (1.0-i_int_factor1)*int1;
        int2 = j_int_factor2*multi->q_multi[kc][ic+1][jc+1] +
 	      (1.0-j_int_factor2)*multi->q_multi[kc][ic+1][jc];
        multi->q_multi[kf][if17][jf] += i_int_factor2*int2 +
 	      (1.0-i_int_factor2)*int1;
      }
    }
 }

 /* initialize a grid to zero in parallel                        */
 void putz(long k, long my_num)
 {
    long i;
    long j;
    long istart;
    long jstart;
    long iend;
    long jend;

    istart = gp[my_num].pist[k];
    jstart = gp[my_num].pjst[k];
    iend = gp[my_num].pien[k];
    jend = gp[my_num].pjen[k];
    for (i=istart;i<=iend;i++) {
      for (j=jstart;j<=jend;j++) {
        multi->q_multi[k][i][j] = 0.0;
      }
    }
 }
	/*************************************************************************/
	/* */
	/* Copyright (c) 1994 Stanford University */
	/* */
	/* All rights reserved. */
	/* */
	/* Permission is given to use, copy, and modify this software for any */
	/* non-commercial purpose as long as this copyright notice is not */
	/* removed. All other uses, including redistribution in whole or in */
	/* part, are forbidden without prior written permission. */
	/* */
	/* This software is provided with absolutely no warranty and no */
	/* support. */
	/* */
	/*************************************************************************/

	/* shared memory implementation of the multigrid method
	implementation uses red-black gauss-seidel relaxation
	iterations, w cycles, and the method of half-injection for
	residual computation */

	#include <stdio.h>
	#include <math.h>
	#include <time.h>
	#include <stdlib.h>
	#include "decs.h"

	/* perform multigrid (w cycles) */
	void multig(long my_id)
	{
	long iter;
	double wu;
	double errp;
	long m;
	long minlevel;
	long flag1;
	long flag2;
	long k;
	long my_num;
	double wmax;
	double local_err;
	double red_local_err;
	double black_local_err;
	double g_error;

	flag1 = 0;
	flag2 = 0;
	iter = 0;
	m = numlev-1;
	wmax = maxwork;
	minlevel = minlev;
	my_num = my_id;
	wu = 0.0;

	k = m;
	g_error = 1.0e30;
	while ((!flag1) && (!flag2)) {
	errp = g_error;
	iter++;
	if (my_num == MASTER) {
	multi->err_multi = 0.0;
	}

	/* barrier to make sure all procs have finished intadd or rescal */
	/* before proceeding with relaxation */

	#if defined(MULTIPLE_BARRIERS)
	BARRIER(bars->error_barrier,nprocs)
	#else
	BARRIER(bars->barrier,nprocs)
	#endif
	relax(k,&red_local_err,RED_ITER,my_num);

	/* barrier to make sure all red computations have been performed */

	#if defined(MULTIPLE_BARRIERS)
	BARRIER(bars->error_barrier,nprocs)
	#else
	BARRIER(bars->barrier,nprocs)
	#endif
	relax(k,&black_local_err,BLACK_ITER,my_num);

	/* compute max local error from red_local_err and black_local_err */

	if (red_local_err > black_local_err) {
	local_err = red_local_err;
	} else {
	local_err = black_local_err;
	}

	/* update the global error if necessary */

	LOCK(locks->error_lock)
	if (local_err > multi->err_multi) {
	multi->err_multi = local_err;
	}
	UNLOCK(locks->error_lock)

	/* a single relaxation sweep at the finest level is one unit of */
	/* work */

	wu+=pow((double)4.0,(double)k-m);

	/* barrier to make sure all processors have checked local error */

	#if defined(MULTIPLE_BARRIERS)
	BARRIER(bars->error_barrier,nprocs)
	#else
	BARRIER(bars->barrier,nprocs)
	#endif
	g_error = multi->err_multi;

	/* barrier to make sure master does not cycle back to top of loop */
	/* and reset global->err before we read it and decide what to do */

	#if defined(MULTIPLE_BARRIERS)
	BARRIER(bars->error_barrier,nprocs)
	#else
	BARRIER(bars->barrier,nprocs)
	#endif
	if (g_error >= lev_tol[k]) {
	if (wu > wmax) {
	/* max work exceeded */
	flag1 = 1;
	fprintf(stderr,"ERROR: Maximum work limit %0.5f exceeded\n",wmax);
	exit(-1);
	} else {
	/* if we have not converged */
	if ((k != 1) && (g_error/errp >= 0.6) && (k > minlevel)) {
	/* if need to go to coarser grid */
	rescal(k,my_num);
	/* transfer residual to rhs of coarser grid */
	lev_tol[k-1] = 0.3 * g_error;
	k = k-1;
	putz(k,my_num);
	/* make initial guess on coarser grid zero */
	g_error = 1.0e30;
	}
	}
	} else {
	/* if we have converged at this level */
	if (k == m) {
	/* if finest grid, we are done */
	flag2 = 1;
	} else {
	/* else go to next finest grid */
	intadd(k,my_num);
	k++;
	g_error = 1.0e30;
	}
	}
	}
	if (do_output) {
	if (my_num == MASTER) {
	printf("iter %ld, level %ld, residual norm %12.8e, work = %7.3f\n", iter,k,multi->err_multi,wu);
	}
	}
	}

	/* perform red or black iteration (not both) */
	void relax(long k, double *err, long color, long my_num)
	{
	long i;
	long j;
	long iend;
	long jend;
	long oddistart;
	long oddjstart;
	long evenistart;
	long evenjstart;
	long oddiendst;
	long eveniendst;
	long oddjendst;
	long evenjendst;
	double a;
	double h;
	double factor;
	double maxerr;
	double newerr;
	double oldval;
	double newval;

	i = 0;
	j = 0;

	*err = 0.0;
	h = lev_res[k];

	/* points whose sum of row and col index is even do a red iteration, */
	/* others do a black */

	evenistart = gp[my_num].eist[k];
	evenjstart = gp[my_num].ejst[k];
	oddistart = gp[my_num].oist[k];
	oddjstart = gp[my_num].ojst[k];
	eveniendst = gp[my_num].eiest[k];
	evenjendst = gp[my_num].ejest[k];
	oddiendst = gp[my_num].oiest[k];
	oddjendst = gp[my_num].ojest[k];

	iend = gp[my_num].rel_start_y[k] + gp[my_num].rel_num_y[k];
	jend = gp[my_num].rel_start_x[k] + gp[my_num].rel_num_x[k];

	factor = 4.0 - eig2 * h * h ;
	maxerr = 0.0;
	if (color == RED_ITER) {
	for (i=evenistart;i<iend;i+=2) {
	for (j=evenjstart;j<jend;j+=2) {
	a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
	multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
	multi->rhs_multi[k][i][j] ;
	oldval = multi->q_multi[k][i][j];
	newval = a / factor;
	newerr = oldval - newval;
	multi->q_multi[k][i][j] = newval;
	if (fabs(newerr) > maxerr) {
	maxerr = fabs(newerr);
	}
	}
	}
	for (i=oddistart;i<iend;i+=2) {
	for (j=oddjstart;j<jend;j+=2) {
	a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
	multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
	multi->rhs_multi[k][i][j] ;
	oldval = multi->q_multi[k][i][j];
	newval = a / factor;
	newerr = oldval - newval;
	multi->q_multi[k][i][j] = newval;
	if (fabs(newerr) > maxerr) {
	maxerr = fabs(newerr);
	}
	}
	}
	} else if (color == BLACK_ITER) {
	for (i=evenistart;i<iend;i+=2) {
	for (j=oddjstart;j<jend;j+=2) {
	a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
	multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
	multi->rhs_multi[k][i][j] ;
	oldval = multi->q_multi[k][i][j];
	newval = a / factor;
	newerr = oldval - newval;
	multi->q_multi[k][i][j] = newval;
	if (fabs(newerr) > maxerr) {
	maxerr = fabs(newerr);
	}
	}
	}
	for (i=oddistart;i<iend;i+=2) {
	for (j=evenjstart;j<jend;j+=2) {
	a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
	multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
	multi->rhs_multi[k][i][j] ;
	oldval = multi->q_multi[k][i][j];
	newval = a / factor;
	newerr = oldval - newval;
	multi->q_multi[k][i][j] = newval;
	if (fabs(newerr) > maxerr) {
	maxerr = fabs(newerr);
	}
	}
	}
	}
	*err = maxerr;
	}

	/* perform half-injection to next coarsest level */
	void rescal(long kf, long my_num)
	{
	long ic;
	long if17;
	long jf;
	long jc;
	long krc;
	long istart;
	long iend;
	long jstart;
	long jend;
	double hf;
	double hc;
	double s;
	double s1;
	double s2;
	double s3;
	double s4;
	double factor;
	double int1;
	double int2;
	double i_int_factor;
	double j_int_factor;
	double int_val;

	krc = kf - 1;
	hc = lev_res[krc];
	hf = lev_res[kf];

	istart = gp[my_num].rlist[krc];
	jstart = gp[my_num].rljst[krc];
	iend = gp[my_num].rlien[krc];
	jend = gp[my_num].rljen[krc];
	iend = gp[my_num].rel_start_y[krc] + gp[my_num].rel_num_y[krc] - 1;
	jend = gp[my_num].rel_start_x[krc] + gp[my_num].rel_num_x[krc] - 1;

	factor = 4.0 - eig2 * hf * hf;

	if17=2*(istart-1);
	for(ic=istart;ic<=iend;ic++) {
	if17+=2;
	i_int_factor = ic * i_int_coeff[krc] * 0.5;
	jf = 2 * (jstart - 1);
	for(jc=jstart;jc<=jend;jc++) {
	jf+=2;
	j_int_factor = jcj_int_coeff[krc] 0.5;
	/* method of half-injection uses 2.0 instead of 4.0 */
	s = multi->q_multi[kf][if17][jf+1] + multi->q_multi[kf][if17][jf-1] +
	multi->q_multi[kf][if17-1][jf] + multi->q_multi[kf][if17+1][jf];
	s1 = 2.0 * (multi->rhs_multi[kf][if17][jf] - s +
	factor * multi->q_multi[kf][if17][jf]);
	if ((if17 == 2) \|\| (jf ==2)) {
	s2 = 0;
	} else {
	s = multi->q_multi[kf][if17][jf-1] + multi->q_multi[kf][if17][jf-3] +
	multi->q_multi[kf][if17-1][jf-2] + multi->q_multi[kf][if17+1][jf-2];
	s2 = 2.0 * (multi->rhs_multi[kf][if17][jf-2] - s +
	factor * multi->q_multi[kf][if17][jf-2]);
	}
	if ((if17 == 2) \|\| (jf ==2)) {
	s3 = 0;
	} else {
	s = multi->q_multi[kf][if17-2][jf+1] + multi->q_multi[kf][if17-2][jf-1] +
	multi->q_multi[kf][if17-3][jf] + multi->q_multi[kf][if17-1][jf];
	s3 = 2.0 * (multi->rhs_multi[kf][if17-2][jf] - s +
	factor * multi->q_multi[kf][if17-2][jf]);
	}
	if ((if17 == 2) \|\| (jf ==2)) {
	s4 = 0;
	} else {
	s = multi->q_multi[kf][if17-2][jf-1] + multi->q_multi[kf][if17-2][jf-3] +
	multi->q_multi[kf][if17-3][jf-2] + multi->q_multi[kf][if17-1][jf-2];
	s4 = 2.0 * (multi->rhs_multi[kf][if17-2][jf-2] - s +
	factor * multi->q_multi[kf][if17-2][jf-2]);
	}
	int1 = j_int_factors4 + (1.0-j_int_factor)s3;
	int2 = j_int_factors2 + (1.0-j_int_factor)s1;
	int_val = i_int_factorint1+(1.0-i_int_factor)int2;
	multi->rhs_multi[krc][ic][jc] = i_int_factorint1+(1.0-i_int_factor)int2;
	}
	}
	}

	/* perform interpolation and addition to next finest grid */
	void intadd(long kc, long my_num)
	{
	long ic;
	long if17;
	long jf;
	long jc;
	long kf;
	long istart;
	long jstart;
	long iend;
	long jend;
	double hc;
	double hf;
	long ifine1;
	long ifine2;
	long jfine1;
	long jfine2;
	double int1;
	double int2;
	double i_int_factor1;
	double j_int_factor1;
	double i_int_factor2;
	double j_int_factor2;

	kf = kc + 1;
	hc = lev_res[kc];
	hf = lev_res[kf];

	istart = gp[my_num].iist[kc];
	jstart = gp[my_num].ijst[kc];
	iend = gp[my_num].iien[kc];
	jend = gp[my_num].ijen[kc];

	istart = gp[my_num].rel_start_y[kc];
	jstart = gp[my_num].rel_start_x[kc];
	iend = gp[my_num].rel_start_y[kc] + gp[my_num].rel_num_y[kc] - 1;
	jend = gp[my_num].rel_start_x[kc] + gp[my_num].rel_num_x[kc] - 1;
	if17 = 2*(istart-1);
	for(ic=istart;ic<=iend;ic++) {

	if17+=2;
	ifine1 = if17-1;
	ifine2 = if17;
	i_int_factor1= ((imx[kc]-2)-(ic-1)) * (i_int_coeff[kf]);
	i_int_factor2= ic * i_int_coeff[kf];

	jf = 2*(jstart-1);

	for(jc=jstart;jc<=jend;jc++) {
	jf+=2;
	jfine1 = jf-1;
	jfine2 = jf;
	j_int_factor1= ((jmx[kc]-2)-(jc-1)) * (j_int_coeff[kf]);
	j_int_factor2= jc * j_int_coeff[kf];

	int1 = j_int_factor1*multi->q_multi[kc][ic][jc-1] +
	(1.0-j_int_factor1)*multi->q_multi[kc][ic][jc];
	int2 = j_int_factor1*multi->q_multi[kc][ic-1][jc-1] +
	(1.0-j_int_factor1)*multi->q_multi[kc][ic-1][jc];
	multi->q_multi[kf][if17-1][jf-1] += i_int_factor1*int2 +
	(1.0-i_int_factor1)*int1;
	int2 = j_int_factor1*multi->q_multi[kc][ic+1][jc-1] +
	(1.0-j_int_factor1)*multi->q_multi[kc][ic+1][jc];
	multi->q_multi[kf][if17][jf-1] += i_int_factor2*int2 +
	(1.0-i_int_factor2)*int1;
	int1 = j_int_factor2*multi->q_multi[kc][ic][jc+1] +
	(1.0-j_int_factor2)*multi->q_multi[kc][ic][jc];
	int2 = j_int_factor2*multi->q_multi[kc][ic-1][jc+1] +
	(1.0-j_int_factor2)*multi->q_multi[kc][ic-1][jc];
	multi->q_multi[kf][if17-1][jf] += i_int_factor1*int2 +
	(1.0-i_int_factor1)*int1;
	int2 = j_int_factor2*multi->q_multi[kc][ic+1][jc+1] +
	(1.0-j_int_factor2)*multi->q_multi[kc][ic+1][jc];
	multi->q_multi[kf][if17][jf] += i_int_factor2*int2 +
	(1.0-i_int_factor2)*int1;
	}
	}
	}

	/* initialize a grid to zero in parallel */
	void putz(long k, long my_num)
	{
	long i;
	long j;
	long istart;
	long jstart;
	long iend;
	long jend;

	istart = gp[my_num].pist[k];
	jstart = gp[my_num].pjst[k];
	iend = gp[my_num].pien[k];
	jend = gp[my_num].pjen[k];
	for (i=istart;i<=iend;i++) {
	for (j=jstart;j<=jend;j++) {
	multi->q_multi[k][i][j] = 0.0;
	}
	}
	}