src/parsec/disk-image/parsec/parsec-benchmark/pkgs/apps/vips/src/libvips/convolution/im_conv.c - public/gem5-resources - Git at Google

 /* im_conv
  *
  * Copyright: 1990, N. Dessipris.
  *
  * Author: Nicos Dessipris & Kirk Martinez
  * Written on: 29/04/1991
  * Modified on: 19/05/1991
  * 8/7/93 JC
  *      - adapted for partial v2
  *      - memory leaks fixed
  *      - ANSIfied
  * 23/7/93 JC
  *	- inner loop unrolled with a switch - 25% speed-up!
  * 13/12/93 JC
  *	- tiny rounding error removed
  * 7/10/94 JC
  *	- new IM_ARRAY() macro
  *	- various simplifications
  *	- evalend callback added
  * 1/2/95 JC
  *	- use of IM_REGION_ADDR() updated
  *	- output size was incorrect! see comment below
  *	- bug with large non-square matricies fixed too
  *	- uses new im_embed() function
  * 13/7/98 JC
  *	- wierd bug ... im_free_imask is no longer directly called for close
  *	  callback, caused SIGKILL on solaris 2.6 ... linker bug?
  * 9/3/01 JC
  *	- reworked and simplified, about 10% faster
  *	- slightly better range clipping
  * 27/7/01 JC
  *	- reject masks with scale == 0
  * 7/4/04
  *	- im_conv() now uses im_embed() with edge stretching on the input, not
  *	  the output
  *	- sets Xoffset / Yoffset
  * 11/11/05
  * 	- simpler inner loop avoids gcc4 bug
  * 7/11/07
  * 	- new evalstart/end callbacks
  * 12/5/08
  * 	- int rounding was +1 too much, argh
  * 	- only rebuild the buffer offsets if bpl changes
  * 5/4/09
  * 	- tiny speedups and cleanups
  * 	- add restrict, though it doesn't seem to help gcc
  * 12/11/09
  * 	- only check for non-zero elements once
  * 	- add mask-all-zero check
  * 	- cleanups
  * 3/2/10
  * 	- gtkdoc
  * 	- more cleanups
  */

 /*

     This file is part of VIPS.

     VIPS is free software; you can redistribute it and/or modify
     it under the terms of the GNU Lesser General Public License as published by
     the Free Software Foundation; either version 2 of the License, or
     (at your option) any later version.

     This program is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     GNU Lesser General Public License for more details.

     You should have received a copy of the GNU Lesser General Public License
     along with this program; if not, write to the Free Software
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

  */

 /*

     These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk

  */

 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif /*HAVE_CONFIG_H*/
 #include <vips/intl.h>

 #include <stdio.h>
 #include <stdlib.h>
 #include <limits.h>

 #include <vips/vips.h>

 #ifdef WITH_DMALLOC
 #include <dmalloc.h>
 #endif /*WITH_DMALLOC*/

 /* Our parameters ... we take a copy of the mask argument, plus we make a
  * smaller version with the zeros squeezed out.
  */
 typedef struct {
 	IMAGE *in;
 	IMAGE *out;
 	INTMASK *mask;		/* Copy of mask arg */

 	int nnz;		/* Number of non-zero mask elements */
 	int *coeff;		/* Array of non-zero mask coefficients */
 	int *coeff_pos;		/* Index of each nnz element in mask->coeff */

 	int underflow;		/* Global underflow/overflow counts */
 	int overflow;
 } Conv;

 static int
 conv_close( Conv *conv )
 {
 	IM_FREEF( im_free_imask, conv->mask );

         return( 0 );
 }

 static int
 conv_evalstart( Conv *conv )
 {
 	/* Reset underflow/overflow count.
 	 */
 	conv->overflow = 0;
 	conv->underflow = 0;

         return( 0 );
 }

 static int
 conv_evalend( Conv *conv )
 {
 	/* Print underflow/overflow count.
 	 */
 	if( conv->overflow || conv->underflow )
 		im_warn( "im_conv",
 			_( "%d overflows and %d underflows detected" ),
 			conv->overflow, conv->underflow );

         return( 0 );
 }

 static Conv *
 conv_new( IMAGE *in, IMAGE *out, INTMASK *mask )
 {
         Conv *conv = IM_NEW( out, Conv );
 	const int ne = mask->xsize * mask->ysize;
         int i;

         if( !conv )
                 return( NULL );

         conv->in = in;
         conv->out = out;
         conv->mask = NULL;
         conv->nnz = 0;
         conv->coeff = NULL;
         conv->coeff_pos = NULL;
         conv->underflow = 0;
         conv->overflow = 0;

         if( im_add_close_callback( out,
 		(im_callback_fn) conv_close, conv, NULL ) ||
 		im_add_close_callback( out,
 			(im_callback_fn) conv_evalstart, conv, NULL ) ||
 		im_add_close_callback( out,
 			(im_callback_fn) conv_evalend, conv, NULL ) ||
         	!(conv->coeff = IM_ARRAY( out, ne, int )) ||
         	!(conv->coeff_pos = IM_ARRAY( out, ne, int )) ||
         	!(conv->mask = im_dup_imask( mask, "conv_mask" )) )
                 return( NULL );

         /* Find non-zero mask elements.
          */
         for( i = 0; i < ne; i++ )
                 if( mask->coeff[i] ) {
 			conv->coeff[conv->nnz] = mask->coeff[i];
 			conv->coeff_pos[conv->nnz] = i;
 			conv->nnz += 1;
 		}

 	/* Was the whole mask zero? We must have at least 1 element in there:
 	 * set it to zero.
 	 */
 	if( conv->nnz == 0 ) {
 		conv->coeff[0] = mask->coeff[0];
 		conv->coeff_pos[0] = 0;
 		conv->nnz = 1;
 	}

         return( conv );
 }

 /* Our sequence value.
  */
 typedef struct {
 	Conv *conv;
 	REGION *ir;		/* Input region */

 	int *offsets;		/* Offsets for each non-zero matrix element */
 	PEL **pts;		/* Per-non-zero mask element pointers */

 	int underflow;		/* Underflow/overflow counts */
 	int overflow;

 	int last_bpl;		/* Avoid recalcing offsets, if we can */
 } ConvSequence;

 /* Free a sequence value.
  */
 static int
 conv_stop( void *vseq, void *a, void *b )
 {
 	ConvSequence *seq = (ConvSequence *) vseq;
 	Conv *conv = (Conv *) b;

 	/* Add local under/over counts to global counts.
 	 */
 	conv->overflow += seq->overflow;
 	conv->underflow += seq->underflow;

 	IM_FREEF( im_region_free, seq->ir );

 	return( 0 );
 }

 /* Convolution start function.
  */
 static void *
 conv_start( IMAGE *out, void *a, void *b )
 {
 	IMAGE *in = (IMAGE *) a;
 	Conv *conv = (Conv *) b;
 	ConvSequence *seq;

 	if( !(seq = IM_NEW( out, ConvSequence )) )
 		return( NULL );

 	/* Init!
 	 */
 	seq->conv = conv;
 	seq->ir = NULL;
 	seq->pts = NULL;
 	seq->underflow = 0;
 	seq->overflow = 0;
 	seq->last_bpl = -1;

 	/* Attach region and arrays.
 	 */
 	seq->ir = im_region_create( in );
 	seq->offsets = IM_ARRAY( out, conv->nnz, int );
 	seq->pts = IM_ARRAY( out, conv->nnz, PEL * );
 	if( !seq->ir || !seq->offsets || !seq->pts ) {
 		conv_stop( seq, in, conv );
 		return( NULL );
 	}

 	return( seq );
 }

 #define INNER { \
 	sum += t[i] * p[i][x]; \
 	i += 1; \
 }

 /* INT inner loops.
  */
 #define CONV_INT( TYPE, IM_CLIP ) { \
 	TYPE ** restrict p = (TYPE **) seq->pts; \
 	TYPE * restrict q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
 	\
 	for( x = 0; x < sz; x++ ) {  \
 		int sum; \
 		int i; \
  		\
 		sum = 0; \
 		i = 0; \
 		IM_UNROLL( conv->nnz, INNER ); \
  		\
 		sum = ((sum + rounding) / mask->scale) + mask->offset; \
  		\
 		IM_CLIP; \
 		\
 		q[x] = sum;  \
 	}  \
 }

 /* FLOAT inner loops.
  */
 #define CONV_FLOAT( TYPE ) { \
 	TYPE ** restrict p = (TYPE **) seq->pts; \
 	TYPE * restrict q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
 	\
 	for( x = 0; x < sz; x++ ) {  \
 		double sum; \
 		int i; \
  		\
 		sum = 0; \
 		i = 0; \
 		IM_UNROLL( conv->nnz, INNER ); \
  		\
 		sum = (sum / mask->scale) + mask->offset; \
 		\
 		q[x] = sum;  \
 	}  \
 }

 /* Convolve!
  */
 static int
 conv_gen( REGION *or, void *vseq, void *a, void *b )
 {
 	ConvSequence *seq = (ConvSequence *) vseq;
 	IMAGE *in = (IMAGE *) a;
 	Conv *conv = (Conv *) b;
 	REGION *ir = seq->ir;
 	INTMASK *mask = conv->mask;
 	int * restrict t = conv->coeff;

 	/* You might think this should be (scale + 1) / 2, but then we'd be
 	 * adding one for scale == 1.
 	 */
 	int rounding = mask->scale / 2;

 	Rect *r = &or->valid;
 	Rect s;
 	int le = r->left;
 	int to = r->top;
 	int bo = IM_RECT_BOTTOM( r );
 	int sz = IM_REGION_N_ELEMENTS( or );

 	int x, y, z, i;

 	/* Prepare the section of the input image we need. A little larger
 	 * than the section of the output image we are producing.
 	 */
 	s = *r;
 	s.width += mask->xsize - 1;
 	s.height += mask->ysize - 1;
 	if( im_prepare( ir, &s ) )
 		return( -1 );

         /* Fill offset array. Only do this if the bpl has changed since the
 	 * previous im_prepare().
 	 */
 	if( seq->last_bpl != IM_REGION_LSKIP( ir ) ) {
 		seq->last_bpl = IM_REGION_LSKIP( ir );

 		for( i = 0; i < conv->nnz; i++ ) {
 			z = conv->coeff_pos[i];
 			x = z % conv->mask->xsize;
 			y = z / conv->mask->xsize;

 			seq->offsets[i] =
 				IM_REGION_ADDR( ir, x + le, y + to ) -
 				IM_REGION_ADDR( ir, le, to );
 		}
 	}

 	for( y = to; y < bo; y++ ) {
 		/* Init pts for this line of PELs.
 		 */
                 for( z = 0; z < conv->nnz; z++ )
                         seq->pts[z] = seq->offsets[z] +
                                 (PEL *) IM_REGION_ADDR( ir, le, y );

 		switch( in->BandFmt ) {
 		case IM_BANDFMT_UCHAR:
 			CONV_INT( unsigned char, IM_CLIP_UCHAR( sum, seq ) );
 			break;

 		case IM_BANDFMT_CHAR:
 			CONV_INT( signed char, IM_CLIP_CHAR( sum, seq ) );
 			break;

 		case IM_BANDFMT_USHORT:
 			CONV_INT( unsigned short, IM_CLIP_USHORT( sum, seq ) );
 			break;

 		case IM_BANDFMT_SHORT:
 			CONV_INT( signed short, IM_CLIP_SHORT( sum, seq ) );
 			break;

 		case IM_BANDFMT_UINT:
 			CONV_INT( unsigned int, IM_CLIP_NONE( sum, seq ) );
 			break;

 		case IM_BANDFMT_INT:
 			CONV_INT( signed int, IM_CLIP_NONE( sum, seq ) );
 			break;

 		case IM_BANDFMT_FLOAT:
 			CONV_FLOAT( float );
 			break;

 		case IM_BANDFMT_DOUBLE:
 			CONV_FLOAT( double );
 			break;

 		default:
 			g_assert( 0 );
 		}
 	}

 	return( 0 );
 }

 int
 im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask )
 {
 	Conv *conv;

 	/* Check parameters.
 	 */
 	if( im_piocheck( in, out ) ||
 		im_check_uncoded( "im_conv", in ) ||
 		im_check_noncomplex( "im_conv", in ) ||
 		im_check_imask( "im_conv", mask ) )
 		return( -1 );
 	if( mask->scale == 0 ) {
 		im_error( "im_conv", "%s", "mask scale must be non-zero" );
 		return( -1 );
 	}
 	if( !(conv = conv_new( in, out, mask )) )
 		return( -1 );

 	/* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output
 	 * would be 1x1.
 	 */
 	if( im_cp_desc( out, in ) )
 		return( -1 );
 	out->Xsize -= mask->xsize - 1;
 	out->Ysize -= mask->ysize - 1;
 	if( out->Xsize <= 0 || out->Ysize <= 0 ) {
 		im_error( "im_conv", "%s", _( "image too small for mask" ) );
 		return( -1 );
 	}

 	/* Set demand hints. FATSTRIP is good for us, as THINSTRIP will cause
 	 * too many recalculations on overlaps.
 	 */
 	if( im_demand_hint( out, IM_FATSTRIP, in, NULL ) ||
 		im_generate( out, conv_start, conv_gen, conv_stop, in, conv ) )
 		return( -1 );

 	out->Xoffset = -mask->xsize / 2;
 	out->Yoffset = -mask->ysize / 2;

 	return( 0 );
 }

 /**
  * im_conv:
  * @in: input image
  * @out: output image
  * @mask: convolution mask
  *
  * Convolve @in with @mask using integer arithmetic. The output image
  * always has the same #VipsBandFmt as the input image. Non-complex images
  * only.
  *
  * Each output pixel is
  * calculated as sigma[i]{pixel[i] * mask[i]} / scale + offset, where scale
  * and offset are part of @mask. For integer @in, the division by scale
  * includes round-to-nearest.
  *
  * See also: im_conv_f(), im_convsep(), im_create_imaskv().
  *
  * Returns: 0 on success, -1 on error
  */
 int
 im_conv( IMAGE *in, IMAGE *out, INTMASK *mask )
 {
 	IMAGE *t1 = im_open_local( out, "im_conv intermediate", "p" );

 	if( !t1 ||
 		im_embed( in, t1, 1, mask->xsize / 2, mask->ysize / 2,
 			in->Xsize + mask->xsize - 1,
 			in->Ysize + mask->ysize - 1 ) ||
 		im_conv_raw( t1, out, mask ) )
 		return( -1 );

 	out->Xoffset = 0;
 	out->Yoffset = 0;

 	return( 0 );
 }
	/* im_conv
	*
	* Copyright: 1990, N. Dessipris.
	*
	* Author: Nicos Dessipris & Kirk Martinez
	* Written on: 29/04/1991
	* Modified on: 19/05/1991
	* 8/7/93 JC
	* - adapted for partial v2
	* - memory leaks fixed
	* - ANSIfied
	* 23/7/93 JC
	* - inner loop unrolled with a switch - 25% speed-up!
	* 13/12/93 JC
	* - tiny rounding error removed
	* 7/10/94 JC
	* - new IM_ARRAY() macro
	* - various simplifications
	* - evalend callback added
	* 1/2/95 JC
	* - use of IM_REGION_ADDR() updated
	* - output size was incorrect! see comment below
	* - bug with large non-square matricies fixed too
	* - uses new im_embed() function
	* 13/7/98 JC
	* - wierd bug ... im_free_imask is no longer directly called for close
	* callback, caused SIGKILL on solaris 2.6 ... linker bug?
	* 9/3/01 JC
	* - reworked and simplified, about 10% faster
	* - slightly better range clipping
	* 27/7/01 JC
	* - reject masks with scale == 0
	* 7/4/04
	* - im_conv() now uses im_embed() with edge stretching on the input, not
	* the output
	* - sets Xoffset / Yoffset
	* 11/11/05
	* - simpler inner loop avoids gcc4 bug
	* 7/11/07
	* - new evalstart/end callbacks
	* 12/5/08
	* - int rounding was +1 too much, argh
	* - only rebuild the buffer offsets if bpl changes
	* 5/4/09
	* - tiny speedups and cleanups
	* - add restrict, though it doesn't seem to help gcc
	* 12/11/09
	* - only check for non-zero elements once
	* - add mask-all-zero check
	* - cleanups
	* 3/2/10
	* - gtkdoc
	* - more cleanups
	*/

	/*

	This file is part of VIPS.

	VIPS is free software; you can redistribute it and/or modify
	it under the terms of the GNU Lesser General Public License as published by
	the Free Software Foundation; either version 2 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU Lesser General Public License for more details.

	You should have received a copy of the GNU Lesser General Public License
	along with this program; if not, write to the Free Software
	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

	*/

	/*

	These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk

	*/

	#ifdef HAVE_CONFIG_H
	#include <config.h>
	#endif /HAVE_CONFIG_H/
	#include <vips/intl.h>

	#include <stdio.h>
	#include <stdlib.h>
	#include <limits.h>

	#include <vips/vips.h>

	#ifdef WITH_DMALLOC
	#include <dmalloc.h>
	#endif /WITH_DMALLOC/

	/* Our parameters ... we take a copy of the mask argument, plus we make a
	* smaller version with the zeros squeezed out.
	*/
	typedef struct {
	IMAGE *in;
	IMAGE *out;
	INTMASK mask; / Copy of mask arg */

	int nnz; /* Number of non-zero mask elements */
	int coeff; / Array of non-zero mask coefficients */
	int coeff_pos; / Index of each nnz element in mask->coeff */

	int underflow; /* Global underflow/overflow counts */
	int overflow;
	} Conv;

	static int
	conv_close( Conv *conv )
	{
	IM_FREEF( im_free_imask, conv->mask );

	return( 0 );
	}

	static int
	conv_evalstart( Conv *conv )
	{
	/* Reset underflow/overflow count.
	*/
	conv->overflow = 0;
	conv->underflow = 0;

	return( 0 );
	}

	static int
	conv_evalend( Conv *conv )
	{
	/* Print underflow/overflow count.
	*/
	if( conv->overflow \|\| conv->underflow )
	im_warn( "im_conv",
	_( "%d overflows and %d underflows detected" ),
	conv->overflow, conv->underflow );

	return( 0 );
	}

	static Conv *
	conv_new( IMAGE in, IMAGE out, INTMASK *mask )
	{
	Conv *conv = IM_NEW( out, Conv );
	const int ne = mask->xsize * mask->ysize;
	int i;

	if( !conv )
	return( NULL );

	conv->in = in;
	conv->out = out;
	conv->mask = NULL;
	conv->nnz = 0;
	conv->coeff = NULL;
	conv->coeff_pos = NULL;
	conv->underflow = 0;
	conv->overflow = 0;

	if( im_add_close_callback( out,
	(im_callback_fn) conv_close, conv, NULL ) \|\|
	im_add_close_callback( out,
	(im_callback_fn) conv_evalstart, conv, NULL ) \|\|
	im_add_close_callback( out,
	(im_callback_fn) conv_evalend, conv, NULL ) \|\|
	!(conv->coeff = IM_ARRAY( out, ne, int )) \|\|
	!(conv->coeff_pos = IM_ARRAY( out, ne, int )) \|\|
	!(conv->mask = im_dup_imask( mask, "conv_mask" )) )
	return( NULL );

	/* Find non-zero mask elements.
	*/
	for( i = 0; i < ne; i++ )
	if( mask->coeff[i] ) {
	conv->coeff[conv->nnz] = mask->coeff[i];
	conv->coeff_pos[conv->nnz] = i;
	conv->nnz += 1;
	}

	/* Was the whole mask zero? We must have at least 1 element in there:
	* set it to zero.
	*/
	if( conv->nnz == 0 ) {
	conv->coeff[0] = mask->coeff[0];
	conv->coeff_pos[0] = 0;
	conv->nnz = 1;
	}

	return( conv );
	}

	/* Our sequence value.
	*/
	typedef struct {
	Conv *conv;
	REGION ir; / Input region */

	int offsets; / Offsets for each non-zero matrix element */
	PEL *pts; / Per-non-zero mask element pointers */

	int underflow; /* Underflow/overflow counts */
	int overflow;

	int last_bpl; /* Avoid recalcing offsets, if we can */
	} ConvSequence;

	/* Free a sequence value.
	*/
	static int
	conv_stop( void vseq, void a, void *b )
	{
	ConvSequence seq = (ConvSequence ) vseq;
	Conv conv = (Conv ) b;

	/* Add local under/over counts to global counts.
	*/
	conv->overflow += seq->overflow;
	conv->underflow += seq->underflow;

	IM_FREEF( im_region_free, seq->ir );

	return( 0 );
	}

	/* Convolution start function.
	*/
	static void *
	conv_start( IMAGE out, void a, void *b )
	{
	IMAGE in = (IMAGE ) a;
	Conv conv = (Conv ) b;
	ConvSequence *seq;

	if( !(seq = IM_NEW( out, ConvSequence )) )
	return( NULL );

	/* Init!
	*/
	seq->conv = conv;
	seq->ir = NULL;
	seq->pts = NULL;
	seq->underflow = 0;
	seq->overflow = 0;
	seq->last_bpl = -1;

	/* Attach region and arrays.
	*/
	seq->ir = im_region_create( in );
	seq->offsets = IM_ARRAY( out, conv->nnz, int );
	seq->pts = IM_ARRAY( out, conv->nnz, PEL * );
	if( !seq->ir \|\| !seq->offsets \|\| !seq->pts ) {
	conv_stop( seq, in, conv );
	return( NULL );
	}

	return( seq );
	}

	#define INNER { \
	sum += t[i] * p[i][x]; \
	i += 1; \
	}

	/* INT inner loops.
	*/
	#define CONV_INT( TYPE, IM_CLIP ) { \
	TYPE restrict p = (TYPE ) seq->pts; \
	TYPE * restrict q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
	\
	for( x = 0; x < sz; x++ ) { \
	int sum; \
	int i; \
	\
	sum = 0; \
	i = 0; \
	IM_UNROLL( conv->nnz, INNER ); \
	\
	sum = ((sum + rounding) / mask->scale) + mask->offset; \
	\
	IM_CLIP; \
	\
	q[x] = sum; \
	} \
	}

	/* FLOAT inner loops.
	*/
	#define CONV_FLOAT( TYPE ) { \
	TYPE restrict p = (TYPE ) seq->pts; \
	TYPE * restrict q = (TYPE *) IM_REGION_ADDR( or, le, y ); \
	\
	for( x = 0; x < sz; x++ ) { \
	double sum; \
	int i; \
	\
	sum = 0; \
	i = 0; \
	IM_UNROLL( conv->nnz, INNER ); \
	\
	sum = (sum / mask->scale) + mask->offset; \
	\
	q[x] = sum; \
	} \
	}

	/* Convolve!
	*/
	static int
	conv_gen( REGION or, void vseq, void a, void b )
	{
	ConvSequence seq = (ConvSequence ) vseq;
	IMAGE in = (IMAGE ) a;
	Conv conv = (Conv ) b;
	REGION *ir = seq->ir;
	INTMASK *mask = conv->mask;
	int * restrict t = conv->coeff;

	/* You might think this should be (scale + 1) / 2, but then we'd be
	* adding one for scale == 1.
	*/
	int rounding = mask->scale / 2;

	Rect *r = &or->valid;
	Rect s;
	int le = r->left;
	int to = r->top;
	int bo = IM_RECT_BOTTOM( r );
	int sz = IM_REGION_N_ELEMENTS( or );

	int x, y, z, i;

	/* Prepare the section of the input image we need. A little larger
	* than the section of the output image we are producing.
	*/
	s = *r;
	s.width += mask->xsize - 1;
	s.height += mask->ysize - 1;
	if( im_prepare( ir, &s ) )
	return( -1 );

	/* Fill offset array. Only do this if the bpl has changed since the
	* previous im_prepare().
	*/
	if( seq->last_bpl != IM_REGION_LSKIP( ir ) ) {
	seq->last_bpl = IM_REGION_LSKIP( ir );

	for( i = 0; i < conv->nnz; i++ ) {
	z = conv->coeff_pos[i];
	x = z % conv->mask->xsize;
	y = z / conv->mask->xsize;

	seq->offsets[i] =
	IM_REGION_ADDR( ir, x + le, y + to ) -
	IM_REGION_ADDR( ir, le, to );
	}
	}

	for( y = to; y < bo; y++ ) {
	/* Init pts for this line of PELs.
	*/
	for( z = 0; z < conv->nnz; z++ )
	seq->pts[z] = seq->offsets[z] +
	(PEL *) IM_REGION_ADDR( ir, le, y );

	switch( in->BandFmt ) {
	case IM_BANDFMT_UCHAR:
	CONV_INT( unsigned char, IM_CLIP_UCHAR( sum, seq ) );
	break;

	case IM_BANDFMT_CHAR:
	CONV_INT( signed char, IM_CLIP_CHAR( sum, seq ) );
	break;

	case IM_BANDFMT_USHORT:
	CONV_INT( unsigned short, IM_CLIP_USHORT( sum, seq ) );
	break;

	case IM_BANDFMT_SHORT:
	CONV_INT( signed short, IM_CLIP_SHORT( sum, seq ) );
	break;

	case IM_BANDFMT_UINT:
	CONV_INT( unsigned int, IM_CLIP_NONE( sum, seq ) );
	break;

	case IM_BANDFMT_INT:
	CONV_INT( signed int, IM_CLIP_NONE( sum, seq ) );
	break;

	case IM_BANDFMT_FLOAT:
	CONV_FLOAT( float );
	break;

	case IM_BANDFMT_DOUBLE:
	CONV_FLOAT( double );
	break;

	default:
	g_assert( 0 );
	}
	}

	return( 0 );
	}

	int
	im_conv_raw( IMAGE in, IMAGE out, INTMASK *mask )
	{
	Conv *conv;

	/* Check parameters.
	*/
	if( im_piocheck( in, out ) \|\|
	im_check_uncoded( "im_conv", in ) \|\|
	im_check_noncomplex( "im_conv", in ) \|\|
	im_check_imask( "im_conv", mask ) )
	return( -1 );
	if( mask->scale == 0 ) {
	im_error( "im_conv", "%s", "mask scale must be non-zero" );
	return( -1 );
	}
	if( !(conv = conv_new( in, out, mask )) )
	return( -1 );

	/* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output
	* would be 1x1.
	*/
	if( im_cp_desc( out, in ) )
	return( -1 );
	out->Xsize -= mask->xsize - 1;
	out->Ysize -= mask->ysize - 1;
	if( out->Xsize <= 0 \|\| out->Ysize <= 0 ) {
	im_error( "im_conv", "%s", _( "image too small for mask" ) );
	return( -1 );
	}

	/* Set demand hints. FATSTRIP is good for us, as THINSTRIP will cause
	* too many recalculations on overlaps.
	*/
	if( im_demand_hint( out, IM_FATSTRIP, in, NULL ) \|\|
	im_generate( out, conv_start, conv_gen, conv_stop, in, conv ) )
	return( -1 );

	out->Xoffset = -mask->xsize / 2;
	out->Yoffset = -mask->ysize / 2;

	return( 0 );
	}

	/**
	* im_conv:
	* @in: input image
	* @out: output image
	* @mask: convolution mask
	*
	* Convolve @in with @mask using integer arithmetic. The output image
	* always has the same #VipsBandFmt as the input image. Non-complex images
	* only.
	*
	* Each output pixel is
	* calculated as sigma[i]{pixel[i] * mask[i]} / scale + offset, where scale
	* and offset are part of @mask. For integer @in, the division by scale
	* includes round-to-nearest.
	*
	* See also: im_conv_f(), im_convsep(), im_create_imaskv().
	*
	* Returns: 0 on success, -1 on error
	*/
	int
	im_conv( IMAGE in, IMAGE out, INTMASK *mask )
	{
	IMAGE *t1 = im_open_local( out, "im_conv intermediate", "p" );

	if( !t1 \|\|
	im_embed( in, t1, 1, mask->xsize / 2, mask->ysize / 2,
	in->Xsize + mask->xsize - 1,
	in->Ysize + mask->ysize - 1 ) \|\|
	im_conv_raw( t1, out, mask ) )
	return( -1 );

	out->Xoffset = 0;
	out->Yoffset = 0;

	return( 0 );
	}