File Coverage

lib/PDL/ImageND-pp-convolveND.c

Criterion	Covered	Total	%
statement	60	513	11.7
branch	42	634	6.6
condition			n/a
subroutine			n/a
pod			n/a
total	102	1147	8.8

line	stmt	bran	code
1
2			#line 453 "lib/PDL/PP.pm"
3			/*
4			* THIS FILE WAS GENERATED BY PDL::PP from lib/PDL/ImageND.pd! Do not modify!
5			*/
6
7			#define PDL_FREE_CODE(trans, destroy, comp_free_code, ntpriv_free_code) \
8			if (destroy) { \
9			comp_free_code \
10			} \
11			if ((trans)->dims_redone) { \
12			ntpriv_free_code \
13			}
14
15			#include "EXTERN.h"
16			#include "perl.h"
17			#include "XSUB.h"
18			#include "pdl.h"
19			#include "pdlcore.h"
20			#define PDL PDL_ImageND
21			extern Core* PDL; /* Structure hold core C functions */
22
23			#line 1846 "lib/PDL/PP.pm"
24			typedef struct pdl_params_convolveND {
25			#line 26 "lib/PDL/ImageND-pp-convolveND.c"
26			pdl *k;
27			pdl *aa;
28			pdl *a;
29			} pdl_params_convolveND;
30
31
32			#line 1857 "lib/PDL/PP.pm"
33			pdl_error pdl_convolveND_readdata(pdl_trans *__privtrans) {
34			pdl_error PDL_err = {0, NULL, 0};
35			#line 36 "lib/PDL/ImageND-pp-convolveND.c"
36	3		pdl_params_convolveND *__params = __privtrans->params; (void)__params;
37	3	50	if (!__privtrans->broadcast.incs) return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "broadcast.incs NULL");
38			/* broadcastloop declarations */
39			int __brcloopval;
40			register PDL_Indx __tind0,__tind1; /* counters along dim */
41	3		register PDL_Indx __tnpdls = __privtrans->broadcast.npdls;
42			/* dims here are how many steps along those dims */
43	3		register PDL_Indx __tinc0_k0 = PDL_BRC_INC(__privtrans->broadcast.incs,__tnpdls,0,0);
44	3		register PDL_Indx __tinc1_k0 = PDL_BRC_INC(__privtrans->broadcast.incs,__tnpdls,0,1);
45			#define PDL_BROADCASTLOOP_START_convolveND_readdata PDL_BROADCASTLOOP_START( \
46			readdata, \
47			__privtrans->broadcast, \
48			__privtrans->vtable, \
49			k0_datap += __offsp[0]; \
50			, \
51			( ,k0_datap += __tinc1_k0 - __tinc0_k0 * __tdims0 \
52			), \
53			( ,k0_datap += __tinc0_k0 \
54			) \
55			)
56			#define PDL_BROADCASTLOOP_END_convolveND_readdata PDL_BROADCASTLOOP_END( \
57			__privtrans->broadcast, \
58			k0_datap -= __tinc1_k0 * __tdims1 + __offsp[0]; \
59			)
60			#ifndef PDL_DECLARE_PARAMS_convolveND_1
61			#define PDL_DECLARE_PARAMS_convolveND_1(PDL_TYPE_OP,PDL_PPSYM_OP) \
62			PDL_DECLARE_PARAMETER(PDL_TYPE_OP, k0, (__privtrans->pdls[0]), 1, PDL_PPSYM_OP)
63			#endif
64			#define PDL_IF_BAD(t,f) f
65	3		switch (__privtrans->__datatype) { /* Start generic switch */
66	0		case PDL_SB: {
67	0	0	PDL_DECLARE_PARAMS_convolveND_1(PDL_SByte,A)
		0
		0
68	0	0	PDL_BROADCASTLOOP_START_convolveND_readdata {/*
		0
		0
		0
		0
		0
		0
69			* Direct convolution
70			*
71			* Because the kernel is usually the smaller of the two arrays to be convolved,
72			* we broadcast kernel-first to keep it in the processor's cache. The strategy:
73			* work on a padded copy of the original image, so that (even with boundary
74			* conditions) the geometry of the kernel is linearly related to the input
75			* array. Otherwise, follow the path blazed by Karl in convolve(): keep track
76			* of the offsets for each kernel element in a flattened original PDL.
77			*
78			* The first (PP) argument is a dummy that's only used to set the GENERIC()
79			* macro. The other three arguments should all have the same type as the
80			* first arguments, and are all passed in as SVs. They are: the kernel,
81			* the padded copy of the input PDL, and a pre-allocated output PDL. The
82			* input PDL should be padded by the dimensionality of the kernel.
83			*
84			*/
85
86			PDL_Indx i;
87	0		pdl k = __params->k, a = __params->a, *aa = __params->aa;
88	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(aa));
89	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(a));
90	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(k));
91
92	0		PDL_Indx ndims = aa->ndims;
93	0	0	if(ndims != k->ndims \|\| ndims != aa->ndims)
		0
94	0		return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
95
96	0		PDL_Indx koffs[k->nvals];
97	0		PDL_SByte kvals[k->nvals];
98	0		PDL_Indx ivec[ndims];
99
100			/************************************/
101			/* Fill up the koffs & kvals arrays */
102			/* koffs gets relative offsets into aa for each kernel value; */
103			/* kvals gets the kernel values in the same order (flattened) */
104	0		PDL_Indx *koff = koffs;
105	0		PDL_SByte *kval = kvals;
106	0		PDL_SByte aptr = (PDL_SByte )k->data + k->nvals - 1;
107
108	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
109	0		PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
110	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
111	0	0	for (i=0; i < ndims; i++) {
112	0		incs[i*npdls + 0] = k->dimincs[i];
113	0		incs[i*npdls + 1] = aa->dimincs[i];
114			}
115			do {
116	0		kval++ = aptr[-offs[0]]; / Copy kernel value into kernel list */
117	0		koff++ = offs[1]; / Copy current aa offset into koffs list */
118	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
119			} while (1);
120
121			/******************************/
122			/* Now do the actual convolution: for each vector in a, */
123			/* accumulate the appropriate aa-sum and stick it into a. */
124	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
125	0		aptr = a->data;
126	0		PDL_SByte *aaptr = aa->data;
127	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
128	0	0	for (i=0; i < ndims; i++) incs[inpdls + 0] = a->dimincs[i]; / got aa already */
129	0		do {
130	0		PDL_SByte acc = 0;
131	0		koff = koffs;
132	0		kval = kvals;
133	0	0	for (i=0;invals;i++)
134	0		acc += aaptr[offs[1] + koff++] (*kval++);
135	0		aptr[offs[0]] = acc;
136	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
137			} while (1);
138	0		PDL->changed(a, PDL_PARENTDATACHANGED, 0);
139	0	0	}PDL_BROADCASTLOOP_END_convolveND_readdata
		0
140	0		} break;
141	0		case PDL_B: {
142	0	0	PDL_DECLARE_PARAMS_convolveND_1(PDL_Byte,B)
		0
		0
143	0	0	PDL_BROADCASTLOOP_START_convolveND_readdata {/*
		0
		0
		0
		0
		0
		0
144			* Direct convolution
145			*
146			* Because the kernel is usually the smaller of the two arrays to be convolved,
147			* we broadcast kernel-first to keep it in the processor's cache. The strategy:
148			* work on a padded copy of the original image, so that (even with boundary
149			* conditions) the geometry of the kernel is linearly related to the input
150			* array. Otherwise, follow the path blazed by Karl in convolve(): keep track
151			* of the offsets for each kernel element in a flattened original PDL.
152			*
153			* The first (PP) argument is a dummy that's only used to set the GENERIC()
154			* macro. The other three arguments should all have the same type as the
155			* first arguments, and are all passed in as SVs. They are: the kernel,
156			* the padded copy of the input PDL, and a pre-allocated output PDL. The
157			* input PDL should be padded by the dimensionality of the kernel.
158			*
159			*/
160
161			PDL_Indx i;
162	0		pdl k = __params->k, a = __params->a, *aa = __params->aa;
163	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(aa));
164	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(a));
165	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(k));
166
167	0		PDL_Indx ndims = aa->ndims;
168	0	0	if(ndims != k->ndims \|\| ndims != aa->ndims)
		0
169	0		return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
170
171	0		PDL_Indx koffs[k->nvals];
172	0		PDL_Byte kvals[k->nvals];
173	0		PDL_Indx ivec[ndims];
174
175			/************************************/
176			/* Fill up the koffs & kvals arrays */
177			/* koffs gets relative offsets into aa for each kernel value; */
178			/* kvals gets the kernel values in the same order (flattened) */
179	0		PDL_Indx *koff = koffs;
180	0		PDL_Byte *kval = kvals;
181	0		PDL_Byte aptr = (PDL_Byte )k->data + k->nvals - 1;
182
183	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
184	0		PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
185	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
186	0	0	for (i=0; i < ndims; i++) {
187	0		incs[i*npdls + 0] = k->dimincs[i];
188	0		incs[i*npdls + 1] = aa->dimincs[i];
189			}
190			do {
191	0		kval++ = aptr[-offs[0]]; / Copy kernel value into kernel list */
192	0		koff++ = offs[1]; / Copy current aa offset into koffs list */
193	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
194			} while (1);
195
196			/******************************/
197			/* Now do the actual convolution: for each vector in a, */
198			/* accumulate the appropriate aa-sum and stick it into a. */
199	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
200	0		aptr = a->data;
201	0		PDL_Byte *aaptr = aa->data;
202	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
203	0	0	for (i=0; i < ndims; i++) incs[inpdls + 0] = a->dimincs[i]; / got aa already */
204	0		do {
205	0		PDL_Byte acc = 0;
206	0		koff = koffs;
207	0		kval = kvals;
208	0	0	for (i=0;invals;i++)
209	0		acc += aaptr[offs[1] + koff++] (*kval++);
210	0		aptr[offs[0]] = acc;
211	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
212			} while (1);
213	0		PDL->changed(a, PDL_PARENTDATACHANGED, 0);
214	0	0	}PDL_BROADCASTLOOP_END_convolveND_readdata
		0
215	0		} break;
216	0		case PDL_S: {
217	0	0	PDL_DECLARE_PARAMS_convolveND_1(PDL_Short,S)
		0
		0
218	0	0	PDL_BROADCASTLOOP_START_convolveND_readdata {/*
		0
		0
		0
		0
		0
		0
219			* Direct convolution
220			*
221			* Because the kernel is usually the smaller of the two arrays to be convolved,
222			* we broadcast kernel-first to keep it in the processor's cache. The strategy:
223			* work on a padded copy of the original image, so that (even with boundary
224			* conditions) the geometry of the kernel is linearly related to the input
225			* array. Otherwise, follow the path blazed by Karl in convolve(): keep track
226			* of the offsets for each kernel element in a flattened original PDL.
227			*
228			* The first (PP) argument is a dummy that's only used to set the GENERIC()
229			* macro. The other three arguments should all have the same type as the
230			* first arguments, and are all passed in as SVs. They are: the kernel,
231			* the padded copy of the input PDL, and a pre-allocated output PDL. The
232			* input PDL should be padded by the dimensionality of the kernel.
233			*
234			*/
235
236			PDL_Indx i;
237	0		pdl k = __params->k, a = __params->a, *aa = __params->aa;
238	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(aa));
239	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(a));
240	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(k));
241
242	0		PDL_Indx ndims = aa->ndims;
243	0	0	if(ndims != k->ndims \|\| ndims != aa->ndims)
		0
244	0		return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
245
246	0		PDL_Indx koffs[k->nvals];
247	0		PDL_Short kvals[k->nvals];
248	0		PDL_Indx ivec[ndims];
249
250			/************************************/
251			/* Fill up the koffs & kvals arrays */
252			/* koffs gets relative offsets into aa for each kernel value; */
253			/* kvals gets the kernel values in the same order (flattened) */
254	0		PDL_Indx *koff = koffs;
255	0		PDL_Short *kval = kvals;
256	0		PDL_Short aptr = (PDL_Short )k->data + k->nvals - 1;
257
258	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
259	0		PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
260	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
261	0	0	for (i=0; i < ndims; i++) {
262	0		incs[i*npdls + 0] = k->dimincs[i];
263	0		incs[i*npdls + 1] = aa->dimincs[i];
264			}
265			do {
266	0		kval++ = aptr[-offs[0]]; / Copy kernel value into kernel list */
267	0		koff++ = offs[1]; / Copy current aa offset into koffs list */
268	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
269			} while (1);
270
271			/******************************/
272			/* Now do the actual convolution: for each vector in a, */
273			/* accumulate the appropriate aa-sum and stick it into a. */
274	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
275	0		aptr = a->data;
276	0		PDL_Short *aaptr = aa->data;
277	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
278	0	0	for (i=0; i < ndims; i++) incs[inpdls + 0] = a->dimincs[i]; / got aa already */
279	0		do {
280	0		PDL_Short acc = 0;
281	0		koff = koffs;
282	0		kval = kvals;
283	0	0	for (i=0;invals;i++)
284	0		acc += aaptr[offs[1] + koff++] (*kval++);
285	0		aptr[offs[0]] = acc;
286	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
287			} while (1);
288	0		PDL->changed(a, PDL_PARENTDATACHANGED, 0);
289	0	0	}PDL_BROADCASTLOOP_END_convolveND_readdata
		0
290	0		} break;
291	0		case PDL_US: {
292	0	0	PDL_DECLARE_PARAMS_convolveND_1(PDL_Ushort,U)
		0
		0
293	0	0	PDL_BROADCASTLOOP_START_convolveND_readdata {/*
		0
		0
		0
		0
		0
		0
294			* Direct convolution
295			*
296			* Because the kernel is usually the smaller of the two arrays to be convolved,
297			* we broadcast kernel-first to keep it in the processor's cache. The strategy:
298			* work on a padded copy of the original image, so that (even with boundary
299			* conditions) the geometry of the kernel is linearly related to the input
300			* array. Otherwise, follow the path blazed by Karl in convolve(): keep track
301			* of the offsets for each kernel element in a flattened original PDL.
302			*
303			* The first (PP) argument is a dummy that's only used to set the GENERIC()
304			* macro. The other three arguments should all have the same type as the
305			* first arguments, and are all passed in as SVs. They are: the kernel,
306			* the padded copy of the input PDL, and a pre-allocated output PDL. The
307			* input PDL should be padded by the dimensionality of the kernel.
308			*
309			*/
310
311			PDL_Indx i;
312	0		pdl k = __params->k, a = __params->a, *aa = __params->aa;
313	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(aa));
314	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(a));
315	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(k));
316
317	0		PDL_Indx ndims = aa->ndims;
318	0	0	if(ndims != k->ndims \|\| ndims != aa->ndims)
		0
319	0		return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
320
321	0		PDL_Indx koffs[k->nvals];
322	0		PDL_Ushort kvals[k->nvals];
323	0		PDL_Indx ivec[ndims];
324
325			/************************************/
326			/* Fill up the koffs & kvals arrays */
327			/* koffs gets relative offsets into aa for each kernel value; */
328			/* kvals gets the kernel values in the same order (flattened) */
329	0		PDL_Indx *koff = koffs;
330	0		PDL_Ushort *kval = kvals;
331	0		PDL_Ushort aptr = (PDL_Ushort )k->data + k->nvals - 1;
332
333	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
334	0		PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
335	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
336	0	0	for (i=0; i < ndims; i++) {
337	0		incs[i*npdls + 0] = k->dimincs[i];
338	0		incs[i*npdls + 1] = aa->dimincs[i];
339			}
340			do {
341	0		kval++ = aptr[-offs[0]]; / Copy kernel value into kernel list */
342	0		koff++ = offs[1]; / Copy current aa offset into koffs list */
343	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
344			} while (1);
345
346			/******************************/
347			/* Now do the actual convolution: for each vector in a, */
348			/* accumulate the appropriate aa-sum and stick it into a. */
349	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
350	0		aptr = a->data;
351	0		PDL_Ushort *aaptr = aa->data;
352	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
353	0	0	for (i=0; i < ndims; i++) incs[inpdls + 0] = a->dimincs[i]; / got aa already */
354	0		do {
355	0		PDL_Ushort acc = 0;
356	0		koff = koffs;
357	0		kval = kvals;
358	0	0	for (i=0;invals;i++)
359	0		acc += aaptr[offs[1] + koff++] (*kval++);
360	0		aptr[offs[0]] = acc;
361	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
362			} while (1);
363	0		PDL->changed(a, PDL_PARENTDATACHANGED, 0);
364	0	0	}PDL_BROADCASTLOOP_END_convolveND_readdata
		0
365	0		} break;
366	0		case PDL_L: {
367	0	0	PDL_DECLARE_PARAMS_convolveND_1(PDL_Long,L)
		0
		0
368	0	0	PDL_BROADCASTLOOP_START_convolveND_readdata {/*
		0
		0
		0
		0
		0
		0
369			* Direct convolution
370			*
371			* Because the kernel is usually the smaller of the two arrays to be convolved,
372			* we broadcast kernel-first to keep it in the processor's cache. The strategy:
373			* work on a padded copy of the original image, so that (even with boundary
374			* conditions) the geometry of the kernel is linearly related to the input
375			* array. Otherwise, follow the path blazed by Karl in convolve(): keep track
376			* of the offsets for each kernel element in a flattened original PDL.
377			*
378			* The first (PP) argument is a dummy that's only used to set the GENERIC()
379			* macro. The other three arguments should all have the same type as the
380			* first arguments, and are all passed in as SVs. They are: the kernel,
381			* the padded copy of the input PDL, and a pre-allocated output PDL. The
382			* input PDL should be padded by the dimensionality of the kernel.
383			*
384			*/
385
386			PDL_Indx i;
387	0		pdl k = __params->k, a = __params->a, *aa = __params->aa;
388	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(aa));
389	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(a));
390	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(k));
391
392	0		PDL_Indx ndims = aa->ndims;
393	0	0	if(ndims != k->ndims \|\| ndims != aa->ndims)
		0
394	0		return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
395
396	0		PDL_Indx koffs[k->nvals];
397	0		PDL_Long kvals[k->nvals];
398	0		PDL_Indx ivec[ndims];
399
400			/************************************/
401			/* Fill up the koffs & kvals arrays */
402			/* koffs gets relative offsets into aa for each kernel value; */
403			/* kvals gets the kernel values in the same order (flattened) */
404	0		PDL_Indx *koff = koffs;
405	0		PDL_Long *kval = kvals;
406	0		PDL_Long aptr = (PDL_Long )k->data + k->nvals - 1;
407
408	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
409	0		PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
410	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
411	0	0	for (i=0; i < ndims; i++) {
412	0		incs[i*npdls + 0] = k->dimincs[i];
413	0		incs[i*npdls + 1] = aa->dimincs[i];
414			}
415			do {
416	0		kval++ = aptr[-offs[0]]; / Copy kernel value into kernel list */
417	0		koff++ = offs[1]; / Copy current aa offset into koffs list */
418	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
419			} while (1);
420
421			/******************************/
422			/* Now do the actual convolution: for each vector in a, */
423			/* accumulate the appropriate aa-sum and stick it into a. */
424	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
425	0		aptr = a->data;
426	0		PDL_Long *aaptr = aa->data;
427	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
428	0	0	for (i=0; i < ndims; i++) incs[inpdls + 0] = a->dimincs[i]; / got aa already */
429	0		do {
430	0		PDL_Long acc = 0;
431	0		koff = koffs;
432	0		kval = kvals;
433	0	0	for (i=0;invals;i++)
434	0		acc += aaptr[offs[1] + koff++] (*kval++);
435	0		aptr[offs[0]] = acc;
436	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
437			} while (1);
438	0		PDL->changed(a, PDL_PARENTDATACHANGED, 0);
439	0	0	}PDL_BROADCASTLOOP_END_convolveND_readdata
		0
440	0		} break;
441	0		case PDL_UL: {
442	0	0	PDL_DECLARE_PARAMS_convolveND_1(PDL_ULong,K)
		0
		0
443	0	0	PDL_BROADCASTLOOP_START_convolveND_readdata {/*
		0
		0
		0
		0
		0
		0
444			* Direct convolution
445			*
446			* Because the kernel is usually the smaller of the two arrays to be convolved,
447			* we broadcast kernel-first to keep it in the processor's cache. The strategy:
448			* work on a padded copy of the original image, so that (even with boundary
449			* conditions) the geometry of the kernel is linearly related to the input
450			* array. Otherwise, follow the path blazed by Karl in convolve(): keep track
451			* of the offsets for each kernel element in a flattened original PDL.
452			*
453			* The first (PP) argument is a dummy that's only used to set the GENERIC()
454			* macro. The other three arguments should all have the same type as the
455			* first arguments, and are all passed in as SVs. They are: the kernel,
456			* the padded copy of the input PDL, and a pre-allocated output PDL. The
457			* input PDL should be padded by the dimensionality of the kernel.
458			*
459			*/
460
461			PDL_Indx i;
462	0		pdl k = __params->k, a = __params->a, *aa = __params->aa;
463	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(aa));
464	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(a));
465	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(k));
466
467	0		PDL_Indx ndims = aa->ndims;
468	0	0	if(ndims != k->ndims \|\| ndims != aa->ndims)
		0
469	0		return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
470
471	0		PDL_Indx koffs[k->nvals];
472	0		PDL_ULong kvals[k->nvals];
473	0		PDL_Indx ivec[ndims];
474
475			/************************************/
476			/* Fill up the koffs & kvals arrays */
477			/* koffs gets relative offsets into aa for each kernel value; */
478			/* kvals gets the kernel values in the same order (flattened) */
479	0		PDL_Indx *koff = koffs;
480	0		PDL_ULong *kval = kvals;
481	0		PDL_ULong aptr = (PDL_ULong )k->data + k->nvals - 1;
482
483	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
484	0		PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
485	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
486	0	0	for (i=0; i < ndims; i++) {
487	0		incs[i*npdls + 0] = k->dimincs[i];
488	0		incs[i*npdls + 1] = aa->dimincs[i];
489			}
490			do {
491	0		kval++ = aptr[-offs[0]]; / Copy kernel value into kernel list */
492	0		koff++ = offs[1]; / Copy current aa offset into koffs list */
493	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
494			} while (1);
495
496			/******************************/
497			/* Now do the actual convolution: for each vector in a, */
498			/* accumulate the appropriate aa-sum and stick it into a. */
499	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
500	0		aptr = a->data;
501	0		PDL_ULong *aaptr = aa->data;
502	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
503	0	0	for (i=0; i < ndims; i++) incs[inpdls + 0] = a->dimincs[i]; / got aa already */
504	0		do {
505	0		PDL_ULong acc = 0;
506	0		koff = koffs;
507	0		kval = kvals;
508	0	0	for (i=0;invals;i++)
509	0		acc += aaptr[offs[1] + koff++] (*kval++);
510	0		aptr[offs[0]] = acc;
511	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
512			} while (1);
513	0		PDL->changed(a, PDL_PARENTDATACHANGED, 0);
514	0	0	}PDL_BROADCASTLOOP_END_convolveND_readdata
		0
515	0		} break;
516	0		case PDL_IND: {
517	0	0	PDL_DECLARE_PARAMS_convolveND_1(PDL_Indx,N)
		0
		0
518	0	0	PDL_BROADCASTLOOP_START_convolveND_readdata {/*
		0
		0
		0
		0
		0
		0
519			* Direct convolution
520			*
521			* Because the kernel is usually the smaller of the two arrays to be convolved,
522			* we broadcast kernel-first to keep it in the processor's cache. The strategy:
523			* work on a padded copy of the original image, so that (even with boundary
524			* conditions) the geometry of the kernel is linearly related to the input
525			* array. Otherwise, follow the path blazed by Karl in convolve(): keep track
526			* of the offsets for each kernel element in a flattened original PDL.
527			*
528			* The first (PP) argument is a dummy that's only used to set the GENERIC()
529			* macro. The other three arguments should all have the same type as the
530			* first arguments, and are all passed in as SVs. They are: the kernel,
531			* the padded copy of the input PDL, and a pre-allocated output PDL. The
532			* input PDL should be padded by the dimensionality of the kernel.
533			*
534			*/
535
536			PDL_Indx i;
537	0		pdl k = __params->k, a = __params->a, *aa = __params->aa;
538	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(aa));
539	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(a));
540	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(k));
541
542	0		PDL_Indx ndims = aa->ndims;
543	0	0	if(ndims != k->ndims \|\| ndims != aa->ndims)
		0
544	0		return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
545
546	0		PDL_Indx koffs[k->nvals];
547	0		PDL_Indx kvals[k->nvals];
548	0		PDL_Indx ivec[ndims];
549
550			/************************************/
551			/* Fill up the koffs & kvals arrays */
552			/* koffs gets relative offsets into aa for each kernel value; */
553			/* kvals gets the kernel values in the same order (flattened) */
554	0		PDL_Indx *koff = koffs;
555	0		PDL_Indx *kval = kvals;
556	0		PDL_Indx aptr = (PDL_Indx )k->data + k->nvals - 1;
557
558	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
559	0		PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
560	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
561	0	0	for (i=0; i < ndims; i++) {
562	0		incs[i*npdls + 0] = k->dimincs[i];
563	0		incs[i*npdls + 1] = aa->dimincs[i];
564			}
565			do {
566	0		kval++ = aptr[-offs[0]]; / Copy kernel value into kernel list */
567	0		koff++ = offs[1]; / Copy current aa offset into koffs list */
568	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
569			} while (1);
570
571			/******************************/
572			/* Now do the actual convolution: for each vector in a, */
573			/* accumulate the appropriate aa-sum and stick it into a. */
574	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
575	0		aptr = a->data;
576	0		PDL_Indx *aaptr = aa->data;
577	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
578	0	0	for (i=0; i < ndims; i++) incs[inpdls + 0] = a->dimincs[i]; / got aa already */
579	0		do {
580	0		PDL_Indx acc = 0;
581	0		koff = koffs;
582	0		kval = kvals;
583	0	0	for (i=0;invals;i++)
584	0		acc += aaptr[offs[1] + koff++] (*kval++);
585	0		aptr[offs[0]] = acc;
586	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
587			} while (1);
588	0		PDL->changed(a, PDL_PARENTDATACHANGED, 0);
589	0	0	}PDL_BROADCASTLOOP_END_convolveND_readdata
		0
590	0		} break;
591	0		case PDL_ULL: {
592	0	0	PDL_DECLARE_PARAMS_convolveND_1(PDL_ULongLong,P)
		0
		0
593	0	0	PDL_BROADCASTLOOP_START_convolveND_readdata {/*
		0
		0
		0
		0
		0
		0
594			* Direct convolution
595			*
596			* Because the kernel is usually the smaller of the two arrays to be convolved,
597			* we broadcast kernel-first to keep it in the processor's cache. The strategy:
598			* work on a padded copy of the original image, so that (even with boundary
599			* conditions) the geometry of the kernel is linearly related to the input
600			* array. Otherwise, follow the path blazed by Karl in convolve(): keep track
601			* of the offsets for each kernel element in a flattened original PDL.
602			*
603			* The first (PP) argument is a dummy that's only used to set the GENERIC()
604			* macro. The other three arguments should all have the same type as the
605			* first arguments, and are all passed in as SVs. They are: the kernel,
606			* the padded copy of the input PDL, and a pre-allocated output PDL. The
607			* input PDL should be padded by the dimensionality of the kernel.
608			*
609			*/
610
611			PDL_Indx i;
612	0		pdl k = __params->k, a = __params->a, *aa = __params->aa;
613	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(aa));
614	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(a));
615	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(k));
616
617	0		PDL_Indx ndims = aa->ndims;
618	0	0	if(ndims != k->ndims \|\| ndims != aa->ndims)
		0
619	0		return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
620
621	0		PDL_Indx koffs[k->nvals];
622	0		PDL_ULongLong kvals[k->nvals];
623	0		PDL_Indx ivec[ndims];
624
625			/************************************/
626			/* Fill up the koffs & kvals arrays */
627			/* koffs gets relative offsets into aa for each kernel value; */
628			/* kvals gets the kernel values in the same order (flattened) */
629	0		PDL_Indx *koff = koffs;
630	0		PDL_ULongLong *kval = kvals;
631	0		PDL_ULongLong aptr = (PDL_ULongLong )k->data + k->nvals - 1;
632
633	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
634	0		PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
635	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
636	0	0	for (i=0; i < ndims; i++) {
637	0		incs[i*npdls + 0] = k->dimincs[i];
638	0		incs[i*npdls + 1] = aa->dimincs[i];
639			}
640			do {
641	0		kval++ = aptr[-offs[0]]; / Copy kernel value into kernel list */
642	0		koff++ = offs[1]; / Copy current aa offset into koffs list */
643	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
644			} while (1);
645
646			/******************************/
647			/* Now do the actual convolution: for each vector in a, */
648			/* accumulate the appropriate aa-sum and stick it into a. */
649	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
650	0		aptr = a->data;
651	0		PDL_ULongLong *aaptr = aa->data;
652	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
653	0	0	for (i=0; i < ndims; i++) incs[inpdls + 0] = a->dimincs[i]; / got aa already */
654	0		do {
655	0		PDL_ULongLong acc = 0;
656	0		koff = koffs;
657	0		kval = kvals;
658	0	0	for (i=0;invals;i++)
659	0		acc += aaptr[offs[1] + koff++] (*kval++);
660	0		aptr[offs[0]] = acc;
661	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
662			} while (1);
663	0		PDL->changed(a, PDL_PARENTDATACHANGED, 0);
664	0	0	}PDL_BROADCASTLOOP_END_convolveND_readdata
		0
665	0		} break;
666	0		case PDL_LL: {
667	0	0	PDL_DECLARE_PARAMS_convolveND_1(PDL_LongLong,Q)
		0
		0
668	0	0	PDL_BROADCASTLOOP_START_convolveND_readdata {/*
		0
		0
		0
		0
		0
		0
669			* Direct convolution
670			*
671			* Because the kernel is usually the smaller of the two arrays to be convolved,
672			* we broadcast kernel-first to keep it in the processor's cache. The strategy:
673			* work on a padded copy of the original image, so that (even with boundary
674			* conditions) the geometry of the kernel is linearly related to the input
675			* array. Otherwise, follow the path blazed by Karl in convolve(): keep track
676			* of the offsets for each kernel element in a flattened original PDL.
677			*
678			* The first (PP) argument is a dummy that's only used to set the GENERIC()
679			* macro. The other three arguments should all have the same type as the
680			* first arguments, and are all passed in as SVs. They are: the kernel,
681			* the padded copy of the input PDL, and a pre-allocated output PDL. The
682			* input PDL should be padded by the dimensionality of the kernel.
683			*
684			*/
685
686			PDL_Indx i;
687	0		pdl k = __params->k, a = __params->a, *aa = __params->aa;
688	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(aa));
689	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(a));
690	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(k));
691
692	0		PDL_Indx ndims = aa->ndims;
693	0	0	if(ndims != k->ndims \|\| ndims != aa->ndims)
		0
694	0		return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
695
696	0		PDL_Indx koffs[k->nvals];
697	0		PDL_LongLong kvals[k->nvals];
698	0		PDL_Indx ivec[ndims];
699
700			/************************************/
701			/* Fill up the koffs & kvals arrays */
702			/* koffs gets relative offsets into aa for each kernel value; */
703			/* kvals gets the kernel values in the same order (flattened) */
704	0		PDL_Indx *koff = koffs;
705	0		PDL_LongLong *kval = kvals;
706	0		PDL_LongLong aptr = (PDL_LongLong )k->data + k->nvals - 1;
707
708	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
709	0		PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
710	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
711	0	0	for (i=0; i < ndims; i++) {
712	0		incs[i*npdls + 0] = k->dimincs[i];
713	0		incs[i*npdls + 1] = aa->dimincs[i];
714			}
715			do {
716	0		kval++ = aptr[-offs[0]]; / Copy kernel value into kernel list */
717	0		koff++ = offs[1]; / Copy current aa offset into koffs list */
718	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
719			} while (1);
720
721			/******************************/
722			/* Now do the actual convolution: for each vector in a, */
723			/* accumulate the appropriate aa-sum and stick it into a. */
724	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
725	0		aptr = a->data;
726	0		PDL_LongLong *aaptr = aa->data;
727	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
728	0	0	for (i=0; i < ndims; i++) incs[inpdls + 0] = a->dimincs[i]; / got aa already */
729	0		do {
730	0		PDL_LongLong acc = 0;
731	0		koff = koffs;
732	0		kval = kvals;
733	0	0	for (i=0;invals;i++)
734	0		acc += aaptr[offs[1] + koff++] (*kval++);
735	0		aptr[offs[0]] = acc;
736	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
737			} while (1);
738	0		PDL->changed(a, PDL_PARENTDATACHANGED, 0);
739	0	0	}PDL_BROADCASTLOOP_END_convolveND_readdata
		0
740	0		} break;
741	0		case PDL_F: {
742	0	0	PDL_DECLARE_PARAMS_convolveND_1(PDL_Float,F)
		0
		0
743	0	0	PDL_BROADCASTLOOP_START_convolveND_readdata {/*
		0
		0
		0
		0
		0
		0
744			* Direct convolution
745			*
746			* Because the kernel is usually the smaller of the two arrays to be convolved,
747			* we broadcast kernel-first to keep it in the processor's cache. The strategy:
748			* work on a padded copy of the original image, so that (even with boundary
749			* conditions) the geometry of the kernel is linearly related to the input
750			* array. Otherwise, follow the path blazed by Karl in convolve(): keep track
751			* of the offsets for each kernel element in a flattened original PDL.
752			*
753			* The first (PP) argument is a dummy that's only used to set the GENERIC()
754			* macro. The other three arguments should all have the same type as the
755			* first arguments, and are all passed in as SVs. They are: the kernel,
756			* the padded copy of the input PDL, and a pre-allocated output PDL. The
757			* input PDL should be padded by the dimensionality of the kernel.
758			*
759			*/
760
761			PDL_Indx i;
762	0		pdl k = __params->k, a = __params->a, *aa = __params->aa;
763	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(aa));
764	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(a));
765	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(k));
766
767	0		PDL_Indx ndims = aa->ndims;
768	0	0	if(ndims != k->ndims \|\| ndims != aa->ndims)
		0
769	0		return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
770
771	0		PDL_Indx koffs[k->nvals];
772	0		PDL_Float kvals[k->nvals];
773	0		PDL_Indx ivec[ndims];
774
775			/************************************/
776			/* Fill up the koffs & kvals arrays */
777			/* koffs gets relative offsets into aa for each kernel value; */
778			/* kvals gets the kernel values in the same order (flattened) */
779	0		PDL_Indx *koff = koffs;
780	0		PDL_Float *kval = kvals;
781	0		PDL_Float aptr = (PDL_Float )k->data + k->nvals - 1;
782
783	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
784	0		PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
785	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
786	0	0	for (i=0; i < ndims; i++) {
787	0		incs[i*npdls + 0] = k->dimincs[i];
788	0		incs[i*npdls + 1] = aa->dimincs[i];
789			}
790			do {
791	0		kval++ = aptr[-offs[0]]; / Copy kernel value into kernel list */
792	0		koff++ = offs[1]; / Copy current aa offset into koffs list */
793	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
794			} while (1);
795
796			/******************************/
797			/* Now do the actual convolution: for each vector in a, */
798			/* accumulate the appropriate aa-sum and stick it into a. */
799	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
800	0		aptr = a->data;
801	0		PDL_Float *aaptr = aa->data;
802	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
803	0	0	for (i=0; i < ndims; i++) incs[inpdls + 0] = a->dimincs[i]; / got aa already */
804	0		do {
805	0		PDL_Float acc = 0;
806	0		koff = koffs;
807	0		kval = kvals;
808	0	0	for (i=0;invals;i++)
809	0		acc += aaptr[offs[1] + koff++] (*kval++);
810	0		aptr[offs[0]] = acc;
811	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
812			} while (1);
813	0		PDL->changed(a, PDL_PARENTDATACHANGED, 0);
814	0	0	}PDL_BROADCASTLOOP_END_convolveND_readdata
		0
815	0		} break;
816	3		case PDL_D: {
817	3	50	PDL_DECLARE_PARAMS_convolveND_1(PDL_Double,D)
		50
		50
818	12	50	PDL_BROADCASTLOOP_START_convolveND_readdata {/*
		50
		50
		50
		50
		100
		100
819			* Direct convolution
820			*
821			* Because the kernel is usually the smaller of the two arrays to be convolved,
822			* we broadcast kernel-first to keep it in the processor's cache. The strategy:
823			* work on a padded copy of the original image, so that (even with boundary
824			* conditions) the geometry of the kernel is linearly related to the input
825			* array. Otherwise, follow the path blazed by Karl in convolve(): keep track
826			* of the offsets for each kernel element in a flattened original PDL.
827			*
828			* The first (PP) argument is a dummy that's only used to set the GENERIC()
829			* macro. The other three arguments should all have the same type as the
830			* first arguments, and are all passed in as SVs. They are: the kernel,
831			* the padded copy of the input PDL, and a pre-allocated output PDL. The
832			* input PDL should be padded by the dimensionality of the kernel.
833			*
834			*/
835
836			PDL_Indx i;
837	3		pdl k = __params->k, a = __params->a, *aa = __params->aa;
838	3	50	PDL_RETERROR(PDL_err, PDL->make_physical(aa));
839	3	50	PDL_RETERROR(PDL_err, PDL->make_physical(a));
840	3	50	PDL_RETERROR(PDL_err, PDL->make_physical(k));
841
842	3		PDL_Indx ndims = aa->ndims;
843	3	50	if(ndims != k->ndims \|\| ndims != aa->ndims)
		50
844	0		return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
845
846	3		PDL_Indx koffs[k->nvals];
847	3		PDL_Double kvals[k->nvals];
848	3		PDL_Indx ivec[ndims];
849
850			/************************************/
851			/* Fill up the koffs & kvals arrays */
852			/* koffs gets relative offsets into aa for each kernel value; */
853			/* kvals gets the kernel values in the same order (flattened) */
854	3		PDL_Indx *koff = koffs;
855	3		PDL_Double *kval = kvals;
856	3		PDL_Double aptr = (PDL_Double )k->data + k->nvals - 1;
857
858	9	100	for (i=0; i < ndims; i++) ivec[i] = 0;
859	3		PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
860	9	100	for (i=0; i < npdls; i++) offs[i] = 0;
861	9	100	for (i=0; i < ndims; i++) {
862	6		incs[i*npdls + 0] = k->dimincs[i];
863	6		incs[i*npdls + 1] = aa->dimincs[i];
864			}
865			do {
866	12		kval++ = aptr[-offs[0]]; / Copy kernel value into kernel list */
867	12		koff++ = offs[1]; / Copy current aa offset into koffs list */
868	12	100	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
869			} while (1);
870
871			/******************************/
872			/* Now do the actual convolution: for each vector in a, */
873			/* accumulate the appropriate aa-sum and stick it into a. */
874	9	100	for (i=0; i < ndims; i++) ivec[i] = 0;
875	3		aptr = a->data;
876	3		PDL_Double *aaptr = aa->data;
877	9	100	for (i=0; i < npdls; i++) offs[i] = 0;
878	9	100	for (i=0; i < ndims; i++) incs[inpdls + 0] = a->dimincs[i]; / got aa already */
879	105		do {
880	108		PDL_Double acc = 0;
881	108		koff = koffs;
882	108		kval = kvals;
883	540	100	for (i=0;invals;i++)
884	432		acc += aaptr[offs[1] + koff++] (*kval++);
885	108		aptr[offs[0]] = acc;
886	108	100	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
887			} while (1);
888	3		PDL->changed(a, PDL_PARENTDATACHANGED, 0);
889	3	50	}PDL_BROADCASTLOOP_END_convolveND_readdata
		50
890	3		} break;
891	0		case PDL_LD: {
892	0	0	PDL_DECLARE_PARAMS_convolveND_1(PDL_LDouble,E)
		0
		0
893	0	0	PDL_BROADCASTLOOP_START_convolveND_readdata {/*
		0
		0
		0
		0
		0
		0
894			* Direct convolution
895			*
896			* Because the kernel is usually the smaller of the two arrays to be convolved,
897			* we broadcast kernel-first to keep it in the processor's cache. The strategy:
898			* work on a padded copy of the original image, so that (even with boundary
899			* conditions) the geometry of the kernel is linearly related to the input
900			* array. Otherwise, follow the path blazed by Karl in convolve(): keep track
901			* of the offsets for each kernel element in a flattened original PDL.
902			*
903			* The first (PP) argument is a dummy that's only used to set the GENERIC()
904			* macro. The other three arguments should all have the same type as the
905			* first arguments, and are all passed in as SVs. They are: the kernel,
906			* the padded copy of the input PDL, and a pre-allocated output PDL. The
907			* input PDL should be padded by the dimensionality of the kernel.
908			*
909			*/
910
911			PDL_Indx i;
912	0		pdl k = __params->k, a = __params->a, *aa = __params->aa;
913	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(aa));
914	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(a));
915	0	0	PDL_RETERROR(PDL_err, PDL->make_physical(k));
916
917	0		PDL_Indx ndims = aa->ndims;
918	0	0	if(ndims != k->ndims \|\| ndims != aa->ndims)
		0
919	0		return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
920
921	0		PDL_Indx koffs[k->nvals];
922	0		PDL_LDouble kvals[k->nvals];
923	0		PDL_Indx ivec[ndims];
924
925			/************************************/
926			/* Fill up the koffs & kvals arrays */
927			/* koffs gets relative offsets into aa for each kernel value; */
928			/* kvals gets the kernel values in the same order (flattened) */
929	0		PDL_Indx *koff = koffs;
930	0		PDL_LDouble *kval = kvals;
931	0		PDL_LDouble aptr = (PDL_LDouble )k->data + k->nvals - 1;
932
933	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
934	0		PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
935	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
936	0	0	for (i=0; i < ndims; i++) {
937	0		incs[i*npdls + 0] = k->dimincs[i];
938	0		incs[i*npdls + 1] = aa->dimincs[i];
939			}
940			do {
941	0		kval++ = aptr[-offs[0]]; / Copy kernel value into kernel list */
942	0		koff++ = offs[1]; / Copy current aa offset into koffs list */
943	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
944			} while (1);
945
946			/******************************/
947			/* Now do the actual convolution: for each vector in a, */
948			/* accumulate the appropriate aa-sum and stick it into a. */
949	0	0	for (i=0; i < ndims; i++) ivec[i] = 0;
950	0		aptr = a->data;
951	0		PDL_LDouble *aaptr = aa->data;
952	0	0	for (i=0; i < npdls; i++) offs[i] = 0;
953	0	0	for (i=0; i < ndims; i++) incs[inpdls + 0] = a->dimincs[i]; / got aa already */
954	0		do {
955	0		PDL_LDouble acc = 0;
956	0		koff = koffs;
957	0		kval = kvals;
958	0	0	for (i=0;invals;i++)
959	0		acc += aaptr[offs[1] + koff++] (*kval++);
960	0		aptr[offs[0]] = acc;
961	0	0	if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
962			} while (1);
963	0		PDL->changed(a, PDL_PARENTDATACHANGED, 0);
964	0	0	}PDL_BROADCASTLOOP_END_convolveND_readdata
		0
965	0		} break;
966	0		default: return PDL->make_error(PDL_EUSERERROR, "PP INTERNAL ERROR in convolveND: unhandled datatype(%d), only handles (ABSULKNPQFDE)! PLEASE MAKE A BUG REPORT\n", __privtrans->__datatype);
967			}
968			#undef PDL_IF_BAD
969	3		return PDL_err;
970			}
971
972			static pdl_datatypes pdl_convolveND_vtable_gentypes[] = { PDL_SB, PDL_B, PDL_S, PDL_US, PDL_L, PDL_UL, PDL_IND, PDL_ULL, PDL_LL, PDL_F, PDL_D, PDL_LD, -1 };
973			static PDL_Indx pdl_convolveND_vtable_realdims[] = { 0 };
974			static char *pdl_convolveND_vtable_parnames[] = { "k0" };
975			static short pdl_convolveND_vtable_parflags[] = {
976			0
977			};
978			static pdl_datatypes pdl_convolveND_vtable_partypes[] = { -1 };
979			static PDL_Indx pdl_convolveND_vtable_realdims_starts[] = { 0 };
980			static PDL_Indx pdl_convolveND_vtable_realdims_ind_ids[] = { 0 };
981			static char *pdl_convolveND_vtable_indnames[] = { "" };
982			pdl_transvtable pdl_convolveND_vtable = {
983			PDL_TRANS_DO_BROADCAST, 0, pdl_convolveND_vtable_gentypes, 1, 1, NULL /CORE21/,
984			pdl_convolveND_vtable_realdims, pdl_convolveND_vtable_parnames,
985			pdl_convolveND_vtable_parflags, pdl_convolveND_vtable_partypes,
986			pdl_convolveND_vtable_realdims_starts, pdl_convolveND_vtable_realdims_ind_ids, 0,
987			0, pdl_convolveND_vtable_indnames,
988			NULL, pdl_convolveND_readdata, NULL,
989			NULL,
990			sizeof(pdl_params_convolveND),"PDL::ImageND::convolveND"
991			};
992
993
994	3		pdl_error pdl_run_convolveND(pdl k0,pdl k,pdl aa,pdl a) {
995	3		pdl_error PDL_err = {0, NULL, 0};
996	3	50	if (!PDL) return (pdl_error){PDL_EFATAL, "PDL core struct is NULL, can't continue",0};
997	3		pdl_trans *__privtrans = PDL->create_trans(&pdl_convolveND_vtable);
998	3	50	if (!__privtrans) return PDL->make_error_simple(PDL_EFATAL, "Couldn't create trans");
999	3		pdl_params_convolveND *__params = __privtrans->params;
1000	3		__privtrans->pdls[0] = k0;
1001	3	50	PDL_RETERROR(PDL_err, PDL->type_coerce(__privtrans));
1002	3		(__params->k) = (k); /* CType.get_copy */
1003	3		(__params->aa) = (aa); /* CType.get_copy */
1004	3		(__params->a) = (a); /* CType.get_copy */
1005	3	50	PDL_RETERROR(PDL_err, PDL->make_trans_mutual(__privtrans));
1006	3		return PDL_err;
1007			}