File Coverage

lib/PDL/ImageND-pp-convolveND.c
Criterion Covered Total %
statement 60 513 11.7
branch 42 634 6.6
condition n/a
subroutine n/a
pod n/a
total 102 1147 8.8


line stmt bran cond sub pod time code
1              
2             #line 453 "lib/PDL/PP.pm"
3             /*
4             * THIS FILE WAS GENERATED BY PDL::PP from lib/PDL/ImageND.pd! Do not modify!
5             */
6              
7             #define PDL_FREE_CODE(trans, destroy, comp_free_code, ntpriv_free_code) \
8             if (destroy) { \
9             comp_free_code \
10             } \
11             if ((trans)->dims_redone) { \
12             ntpriv_free_code \
13             }
14              
15             #include "EXTERN.h"
16             #include "perl.h"
17             #include "XSUB.h"
18             #include "pdl.h"
19             #include "pdlcore.h"
20             #define PDL PDL_ImageND
21             extern Core* PDL; /* Structure hold core C functions */
22              
23             #line 1846 "lib/PDL/PP.pm"
24             typedef struct pdl_params_convolveND {
25             #line 26 "lib/PDL/ImageND-pp-convolveND.c"
26             pdl *k;
27             pdl *aa;
28             pdl *a;
29             } pdl_params_convolveND;
30              
31              
32             #line 1857 "lib/PDL/PP.pm"
33             pdl_error pdl_convolveND_readdata(pdl_trans *__privtrans) {
34             pdl_error PDL_err = {0, NULL, 0};
35             #line 36 "lib/PDL/ImageND-pp-convolveND.c"
36 3           pdl_params_convolveND *__params = __privtrans->params; (void)__params;
37 3 50         if (!__privtrans->broadcast.incs) return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "broadcast.incs NULL");
38             /* broadcastloop declarations */
39             int __brcloopval;
40             register PDL_Indx __tind0,__tind1; /* counters along dim */
41 3           register PDL_Indx __tnpdls = __privtrans->broadcast.npdls;
42             /* dims here are how many steps along those dims */
43 3           register PDL_Indx __tinc0_k0 = PDL_BRC_INC(__privtrans->broadcast.incs,__tnpdls,0,0);
44 3           register PDL_Indx __tinc1_k0 = PDL_BRC_INC(__privtrans->broadcast.incs,__tnpdls,0,1);
45             #define PDL_BROADCASTLOOP_START_convolveND_readdata PDL_BROADCASTLOOP_START( \
46             readdata, \
47             __privtrans->broadcast, \
48             __privtrans->vtable, \
49             k0_datap += __offsp[0]; \
50             , \
51             ( ,k0_datap += __tinc1_k0 - __tinc0_k0 * __tdims0 \
52             ), \
53             ( ,k0_datap += __tinc0_k0 \
54             ) \
55             )
56             #define PDL_BROADCASTLOOP_END_convolveND_readdata PDL_BROADCASTLOOP_END( \
57             __privtrans->broadcast, \
58             k0_datap -= __tinc1_k0 * __tdims1 + __offsp[0]; \
59             )
60             #ifndef PDL_DECLARE_PARAMS_convolveND_1
61             #define PDL_DECLARE_PARAMS_convolveND_1(PDL_TYPE_OP,PDL_PPSYM_OP) \
62             PDL_DECLARE_PARAMETER(PDL_TYPE_OP, k0, (__privtrans->pdls[0]), 1, PDL_PPSYM_OP)
63             #endif
64             #define PDL_IF_BAD(t,f) f
65 3           switch (__privtrans->__datatype) { /* Start generic switch */
66 0           case PDL_SB: {
67 0 0         PDL_DECLARE_PARAMS_convolveND_1(PDL_SByte,A)
    0          
    0          
68 0 0         PDL_BROADCASTLOOP_START_convolveND_readdata {/*
    0          
    0          
    0          
    0          
    0          
    0          
69             * Direct convolution
70             *
71             * Because the kernel is usually the smaller of the two arrays to be convolved,
72             * we broadcast kernel-first to keep it in the processor's cache. The strategy:
73             * work on a padded copy of the original image, so that (even with boundary
74             * conditions) the geometry of the kernel is linearly related to the input
75             * array. Otherwise, follow the path blazed by Karl in convolve(): keep track
76             * of the offsets for each kernel element in a flattened original PDL.
77             *
78             * The first (PP) argument is a dummy that's only used to set the GENERIC()
79             * macro. The other three arguments should all have the same type as the
80             * first arguments, and are all passed in as SVs. They are: the kernel,
81             * the padded copy of the input PDL, and a pre-allocated output PDL. The
82             * input PDL should be padded by the dimensionality of the kernel.
83             *
84             */
85              
86             PDL_Indx i;
87 0           pdl *k = __params->k, *a = __params->a, *aa = __params->aa;
88 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(aa));
89 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(a));
90 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(k));
91              
92 0           PDL_Indx ndims = aa->ndims;
93 0 0         if(ndims != k->ndims || ndims != aa->ndims)
    0          
94 0           return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
95              
96 0           PDL_Indx koffs[k->nvals];
97 0           PDL_SByte kvals[k->nvals];
98 0           PDL_Indx ivec[ndims];
99              
100             /************************************/
101             /* Fill up the koffs & kvals arrays */
102             /* koffs gets relative offsets into aa for each kernel value; */
103             /* kvals gets the kernel values in the same order (flattened) */
104 0           PDL_Indx *koff = koffs;
105 0           PDL_SByte *kval = kvals;
106 0           PDL_SByte *aptr = (PDL_SByte *)k->data + k->nvals - 1;
107              
108 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
109 0           PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
110 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
111 0 0         for (i=0; i < ndims; i++) {
112 0           incs[i*npdls + 0] = k->dimincs[i];
113 0           incs[i*npdls + 1] = aa->dimincs[i];
114             }
115             do {
116 0           *kval++ = aptr[-offs[0]]; /* Copy kernel value into kernel list */
117 0           *koff++ = offs[1]; /* Copy current aa offset into koffs list */
118 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
119             } while (1);
120              
121             /******************************/
122             /* Now do the actual convolution: for each vector in a, */
123             /* accumulate the appropriate aa-sum and stick it into a. */
124 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
125 0           aptr = a->data;
126 0           PDL_SByte *aaptr = aa->data;
127 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
128 0 0         for (i=0; i < ndims; i++) incs[i*npdls + 0] = a->dimincs[i]; /* got aa already */
129 0           do {
130 0           PDL_SByte acc = 0;
131 0           koff = koffs;
132 0           kval = kvals;
133 0 0         for (i=0;invals;i++)
134 0           acc += aaptr[offs[1] + *koff++] * (*kval++);
135 0           aptr[offs[0]] = acc;
136 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
137             } while (1);
138 0           PDL->changed(a, PDL_PARENTDATACHANGED, 0);
139 0 0         }PDL_BROADCASTLOOP_END_convolveND_readdata
    0          
140 0           } break;
141 0           case PDL_B: {
142 0 0         PDL_DECLARE_PARAMS_convolveND_1(PDL_Byte,B)
    0          
    0          
143 0 0         PDL_BROADCASTLOOP_START_convolveND_readdata {/*
    0          
    0          
    0          
    0          
    0          
    0          
144             * Direct convolution
145             *
146             * Because the kernel is usually the smaller of the two arrays to be convolved,
147             * we broadcast kernel-first to keep it in the processor's cache. The strategy:
148             * work on a padded copy of the original image, so that (even with boundary
149             * conditions) the geometry of the kernel is linearly related to the input
150             * array. Otherwise, follow the path blazed by Karl in convolve(): keep track
151             * of the offsets for each kernel element in a flattened original PDL.
152             *
153             * The first (PP) argument is a dummy that's only used to set the GENERIC()
154             * macro. The other three arguments should all have the same type as the
155             * first arguments, and are all passed in as SVs. They are: the kernel,
156             * the padded copy of the input PDL, and a pre-allocated output PDL. The
157             * input PDL should be padded by the dimensionality of the kernel.
158             *
159             */
160              
161             PDL_Indx i;
162 0           pdl *k = __params->k, *a = __params->a, *aa = __params->aa;
163 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(aa));
164 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(a));
165 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(k));
166              
167 0           PDL_Indx ndims = aa->ndims;
168 0 0         if(ndims != k->ndims || ndims != aa->ndims)
    0          
169 0           return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
170              
171 0           PDL_Indx koffs[k->nvals];
172 0           PDL_Byte kvals[k->nvals];
173 0           PDL_Indx ivec[ndims];
174              
175             /************************************/
176             /* Fill up the koffs & kvals arrays */
177             /* koffs gets relative offsets into aa for each kernel value; */
178             /* kvals gets the kernel values in the same order (flattened) */
179 0           PDL_Indx *koff = koffs;
180 0           PDL_Byte *kval = kvals;
181 0           PDL_Byte *aptr = (PDL_Byte *)k->data + k->nvals - 1;
182              
183 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
184 0           PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
185 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
186 0 0         for (i=0; i < ndims; i++) {
187 0           incs[i*npdls + 0] = k->dimincs[i];
188 0           incs[i*npdls + 1] = aa->dimincs[i];
189             }
190             do {
191 0           *kval++ = aptr[-offs[0]]; /* Copy kernel value into kernel list */
192 0           *koff++ = offs[1]; /* Copy current aa offset into koffs list */
193 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
194             } while (1);
195              
196             /******************************/
197             /* Now do the actual convolution: for each vector in a, */
198             /* accumulate the appropriate aa-sum and stick it into a. */
199 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
200 0           aptr = a->data;
201 0           PDL_Byte *aaptr = aa->data;
202 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
203 0 0         for (i=0; i < ndims; i++) incs[i*npdls + 0] = a->dimincs[i]; /* got aa already */
204 0           do {
205 0           PDL_Byte acc = 0;
206 0           koff = koffs;
207 0           kval = kvals;
208 0 0         for (i=0;invals;i++)
209 0           acc += aaptr[offs[1] + *koff++] * (*kval++);
210 0           aptr[offs[0]] = acc;
211 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
212             } while (1);
213 0           PDL->changed(a, PDL_PARENTDATACHANGED, 0);
214 0 0         }PDL_BROADCASTLOOP_END_convolveND_readdata
    0          
215 0           } break;
216 0           case PDL_S: {
217 0 0         PDL_DECLARE_PARAMS_convolveND_1(PDL_Short,S)
    0          
    0          
218 0 0         PDL_BROADCASTLOOP_START_convolveND_readdata {/*
    0          
    0          
    0          
    0          
    0          
    0          
219             * Direct convolution
220             *
221             * Because the kernel is usually the smaller of the two arrays to be convolved,
222             * we broadcast kernel-first to keep it in the processor's cache. The strategy:
223             * work on a padded copy of the original image, so that (even with boundary
224             * conditions) the geometry of the kernel is linearly related to the input
225             * array. Otherwise, follow the path blazed by Karl in convolve(): keep track
226             * of the offsets for each kernel element in a flattened original PDL.
227             *
228             * The first (PP) argument is a dummy that's only used to set the GENERIC()
229             * macro. The other three arguments should all have the same type as the
230             * first arguments, and are all passed in as SVs. They are: the kernel,
231             * the padded copy of the input PDL, and a pre-allocated output PDL. The
232             * input PDL should be padded by the dimensionality of the kernel.
233             *
234             */
235              
236             PDL_Indx i;
237 0           pdl *k = __params->k, *a = __params->a, *aa = __params->aa;
238 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(aa));
239 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(a));
240 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(k));
241              
242 0           PDL_Indx ndims = aa->ndims;
243 0 0         if(ndims != k->ndims || ndims != aa->ndims)
    0          
244 0           return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
245              
246 0           PDL_Indx koffs[k->nvals];
247 0           PDL_Short kvals[k->nvals];
248 0           PDL_Indx ivec[ndims];
249              
250             /************************************/
251             /* Fill up the koffs & kvals arrays */
252             /* koffs gets relative offsets into aa for each kernel value; */
253             /* kvals gets the kernel values in the same order (flattened) */
254 0           PDL_Indx *koff = koffs;
255 0           PDL_Short *kval = kvals;
256 0           PDL_Short *aptr = (PDL_Short *)k->data + k->nvals - 1;
257              
258 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
259 0           PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
260 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
261 0 0         for (i=0; i < ndims; i++) {
262 0           incs[i*npdls + 0] = k->dimincs[i];
263 0           incs[i*npdls + 1] = aa->dimincs[i];
264             }
265             do {
266 0           *kval++ = aptr[-offs[0]]; /* Copy kernel value into kernel list */
267 0           *koff++ = offs[1]; /* Copy current aa offset into koffs list */
268 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
269             } while (1);
270              
271             /******************************/
272             /* Now do the actual convolution: for each vector in a, */
273             /* accumulate the appropriate aa-sum and stick it into a. */
274 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
275 0           aptr = a->data;
276 0           PDL_Short *aaptr = aa->data;
277 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
278 0 0         for (i=0; i < ndims; i++) incs[i*npdls + 0] = a->dimincs[i]; /* got aa already */
279 0           do {
280 0           PDL_Short acc = 0;
281 0           koff = koffs;
282 0           kval = kvals;
283 0 0         for (i=0;invals;i++)
284 0           acc += aaptr[offs[1] + *koff++] * (*kval++);
285 0           aptr[offs[0]] = acc;
286 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
287             } while (1);
288 0           PDL->changed(a, PDL_PARENTDATACHANGED, 0);
289 0 0         }PDL_BROADCASTLOOP_END_convolveND_readdata
    0          
290 0           } break;
291 0           case PDL_US: {
292 0 0         PDL_DECLARE_PARAMS_convolveND_1(PDL_Ushort,U)
    0          
    0          
293 0 0         PDL_BROADCASTLOOP_START_convolveND_readdata {/*
    0          
    0          
    0          
    0          
    0          
    0          
294             * Direct convolution
295             *
296             * Because the kernel is usually the smaller of the two arrays to be convolved,
297             * we broadcast kernel-first to keep it in the processor's cache. The strategy:
298             * work on a padded copy of the original image, so that (even with boundary
299             * conditions) the geometry of the kernel is linearly related to the input
300             * array. Otherwise, follow the path blazed by Karl in convolve(): keep track
301             * of the offsets for each kernel element in a flattened original PDL.
302             *
303             * The first (PP) argument is a dummy that's only used to set the GENERIC()
304             * macro. The other three arguments should all have the same type as the
305             * first arguments, and are all passed in as SVs. They are: the kernel,
306             * the padded copy of the input PDL, and a pre-allocated output PDL. The
307             * input PDL should be padded by the dimensionality of the kernel.
308             *
309             */
310              
311             PDL_Indx i;
312 0           pdl *k = __params->k, *a = __params->a, *aa = __params->aa;
313 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(aa));
314 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(a));
315 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(k));
316              
317 0           PDL_Indx ndims = aa->ndims;
318 0 0         if(ndims != k->ndims || ndims != aa->ndims)
    0          
319 0           return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
320              
321 0           PDL_Indx koffs[k->nvals];
322 0           PDL_Ushort kvals[k->nvals];
323 0           PDL_Indx ivec[ndims];
324              
325             /************************************/
326             /* Fill up the koffs & kvals arrays */
327             /* koffs gets relative offsets into aa for each kernel value; */
328             /* kvals gets the kernel values in the same order (flattened) */
329 0           PDL_Indx *koff = koffs;
330 0           PDL_Ushort *kval = kvals;
331 0           PDL_Ushort *aptr = (PDL_Ushort *)k->data + k->nvals - 1;
332              
333 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
334 0           PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
335 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
336 0 0         for (i=0; i < ndims; i++) {
337 0           incs[i*npdls + 0] = k->dimincs[i];
338 0           incs[i*npdls + 1] = aa->dimincs[i];
339             }
340             do {
341 0           *kval++ = aptr[-offs[0]]; /* Copy kernel value into kernel list */
342 0           *koff++ = offs[1]; /* Copy current aa offset into koffs list */
343 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
344             } while (1);
345              
346             /******************************/
347             /* Now do the actual convolution: for each vector in a, */
348             /* accumulate the appropriate aa-sum and stick it into a. */
349 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
350 0           aptr = a->data;
351 0           PDL_Ushort *aaptr = aa->data;
352 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
353 0 0         for (i=0; i < ndims; i++) incs[i*npdls + 0] = a->dimincs[i]; /* got aa already */
354 0           do {
355 0           PDL_Ushort acc = 0;
356 0           koff = koffs;
357 0           kval = kvals;
358 0 0         for (i=0;invals;i++)
359 0           acc += aaptr[offs[1] + *koff++] * (*kval++);
360 0           aptr[offs[0]] = acc;
361 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
362             } while (1);
363 0           PDL->changed(a, PDL_PARENTDATACHANGED, 0);
364 0 0         }PDL_BROADCASTLOOP_END_convolveND_readdata
    0          
365 0           } break;
366 0           case PDL_L: {
367 0 0         PDL_DECLARE_PARAMS_convolveND_1(PDL_Long,L)
    0          
    0          
368 0 0         PDL_BROADCASTLOOP_START_convolveND_readdata {/*
    0          
    0          
    0          
    0          
    0          
    0          
369             * Direct convolution
370             *
371             * Because the kernel is usually the smaller of the two arrays to be convolved,
372             * we broadcast kernel-first to keep it in the processor's cache. The strategy:
373             * work on a padded copy of the original image, so that (even with boundary
374             * conditions) the geometry of the kernel is linearly related to the input
375             * array. Otherwise, follow the path blazed by Karl in convolve(): keep track
376             * of the offsets for each kernel element in a flattened original PDL.
377             *
378             * The first (PP) argument is a dummy that's only used to set the GENERIC()
379             * macro. The other three arguments should all have the same type as the
380             * first arguments, and are all passed in as SVs. They are: the kernel,
381             * the padded copy of the input PDL, and a pre-allocated output PDL. The
382             * input PDL should be padded by the dimensionality of the kernel.
383             *
384             */
385              
386             PDL_Indx i;
387 0           pdl *k = __params->k, *a = __params->a, *aa = __params->aa;
388 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(aa));
389 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(a));
390 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(k));
391              
392 0           PDL_Indx ndims = aa->ndims;
393 0 0         if(ndims != k->ndims || ndims != aa->ndims)
    0          
394 0           return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
395              
396 0           PDL_Indx koffs[k->nvals];
397 0           PDL_Long kvals[k->nvals];
398 0           PDL_Indx ivec[ndims];
399              
400             /************************************/
401             /* Fill up the koffs & kvals arrays */
402             /* koffs gets relative offsets into aa for each kernel value; */
403             /* kvals gets the kernel values in the same order (flattened) */
404 0           PDL_Indx *koff = koffs;
405 0           PDL_Long *kval = kvals;
406 0           PDL_Long *aptr = (PDL_Long *)k->data + k->nvals - 1;
407              
408 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
409 0           PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
410 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
411 0 0         for (i=0; i < ndims; i++) {
412 0           incs[i*npdls + 0] = k->dimincs[i];
413 0           incs[i*npdls + 1] = aa->dimincs[i];
414             }
415             do {
416 0           *kval++ = aptr[-offs[0]]; /* Copy kernel value into kernel list */
417 0           *koff++ = offs[1]; /* Copy current aa offset into koffs list */
418 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
419             } while (1);
420              
421             /******************************/
422             /* Now do the actual convolution: for each vector in a, */
423             /* accumulate the appropriate aa-sum and stick it into a. */
424 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
425 0           aptr = a->data;
426 0           PDL_Long *aaptr = aa->data;
427 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
428 0 0         for (i=0; i < ndims; i++) incs[i*npdls + 0] = a->dimincs[i]; /* got aa already */
429 0           do {
430 0           PDL_Long acc = 0;
431 0           koff = koffs;
432 0           kval = kvals;
433 0 0         for (i=0;invals;i++)
434 0           acc += aaptr[offs[1] + *koff++] * (*kval++);
435 0           aptr[offs[0]] = acc;
436 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
437             } while (1);
438 0           PDL->changed(a, PDL_PARENTDATACHANGED, 0);
439 0 0         }PDL_BROADCASTLOOP_END_convolveND_readdata
    0          
440 0           } break;
441 0           case PDL_UL: {
442 0 0         PDL_DECLARE_PARAMS_convolveND_1(PDL_ULong,K)
    0          
    0          
443 0 0         PDL_BROADCASTLOOP_START_convolveND_readdata {/*
    0          
    0          
    0          
    0          
    0          
    0          
444             * Direct convolution
445             *
446             * Because the kernel is usually the smaller of the two arrays to be convolved,
447             * we broadcast kernel-first to keep it in the processor's cache. The strategy:
448             * work on a padded copy of the original image, so that (even with boundary
449             * conditions) the geometry of the kernel is linearly related to the input
450             * array. Otherwise, follow the path blazed by Karl in convolve(): keep track
451             * of the offsets for each kernel element in a flattened original PDL.
452             *
453             * The first (PP) argument is a dummy that's only used to set the GENERIC()
454             * macro. The other three arguments should all have the same type as the
455             * first arguments, and are all passed in as SVs. They are: the kernel,
456             * the padded copy of the input PDL, and a pre-allocated output PDL. The
457             * input PDL should be padded by the dimensionality of the kernel.
458             *
459             */
460              
461             PDL_Indx i;
462 0           pdl *k = __params->k, *a = __params->a, *aa = __params->aa;
463 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(aa));
464 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(a));
465 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(k));
466              
467 0           PDL_Indx ndims = aa->ndims;
468 0 0         if(ndims != k->ndims || ndims != aa->ndims)
    0          
469 0           return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
470              
471 0           PDL_Indx koffs[k->nvals];
472 0           PDL_ULong kvals[k->nvals];
473 0           PDL_Indx ivec[ndims];
474              
475             /************************************/
476             /* Fill up the koffs & kvals arrays */
477             /* koffs gets relative offsets into aa for each kernel value; */
478             /* kvals gets the kernel values in the same order (flattened) */
479 0           PDL_Indx *koff = koffs;
480 0           PDL_ULong *kval = kvals;
481 0           PDL_ULong *aptr = (PDL_ULong *)k->data + k->nvals - 1;
482              
483 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
484 0           PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
485 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
486 0 0         for (i=0; i < ndims; i++) {
487 0           incs[i*npdls + 0] = k->dimincs[i];
488 0           incs[i*npdls + 1] = aa->dimincs[i];
489             }
490             do {
491 0           *kval++ = aptr[-offs[0]]; /* Copy kernel value into kernel list */
492 0           *koff++ = offs[1]; /* Copy current aa offset into koffs list */
493 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
494             } while (1);
495              
496             /******************************/
497             /* Now do the actual convolution: for each vector in a, */
498             /* accumulate the appropriate aa-sum and stick it into a. */
499 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
500 0           aptr = a->data;
501 0           PDL_ULong *aaptr = aa->data;
502 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
503 0 0         for (i=0; i < ndims; i++) incs[i*npdls + 0] = a->dimincs[i]; /* got aa already */
504 0           do {
505 0           PDL_ULong acc = 0;
506 0           koff = koffs;
507 0           kval = kvals;
508 0 0         for (i=0;invals;i++)
509 0           acc += aaptr[offs[1] + *koff++] * (*kval++);
510 0           aptr[offs[0]] = acc;
511 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
512             } while (1);
513 0           PDL->changed(a, PDL_PARENTDATACHANGED, 0);
514 0 0         }PDL_BROADCASTLOOP_END_convolveND_readdata
    0          
515 0           } break;
516 0           case PDL_IND: {
517 0 0         PDL_DECLARE_PARAMS_convolveND_1(PDL_Indx,N)
    0          
    0          
518 0 0         PDL_BROADCASTLOOP_START_convolveND_readdata {/*
    0          
    0          
    0          
    0          
    0          
    0          
519             * Direct convolution
520             *
521             * Because the kernel is usually the smaller of the two arrays to be convolved,
522             * we broadcast kernel-first to keep it in the processor's cache. The strategy:
523             * work on a padded copy of the original image, so that (even with boundary
524             * conditions) the geometry of the kernel is linearly related to the input
525             * array. Otherwise, follow the path blazed by Karl in convolve(): keep track
526             * of the offsets for each kernel element in a flattened original PDL.
527             *
528             * The first (PP) argument is a dummy that's only used to set the GENERIC()
529             * macro. The other three arguments should all have the same type as the
530             * first arguments, and are all passed in as SVs. They are: the kernel,
531             * the padded copy of the input PDL, and a pre-allocated output PDL. The
532             * input PDL should be padded by the dimensionality of the kernel.
533             *
534             */
535              
536             PDL_Indx i;
537 0           pdl *k = __params->k, *a = __params->a, *aa = __params->aa;
538 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(aa));
539 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(a));
540 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(k));
541              
542 0           PDL_Indx ndims = aa->ndims;
543 0 0         if(ndims != k->ndims || ndims != aa->ndims)
    0          
544 0           return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
545              
546 0           PDL_Indx koffs[k->nvals];
547 0           PDL_Indx kvals[k->nvals];
548 0           PDL_Indx ivec[ndims];
549              
550             /************************************/
551             /* Fill up the koffs & kvals arrays */
552             /* koffs gets relative offsets into aa for each kernel value; */
553             /* kvals gets the kernel values in the same order (flattened) */
554 0           PDL_Indx *koff = koffs;
555 0           PDL_Indx *kval = kvals;
556 0           PDL_Indx *aptr = (PDL_Indx *)k->data + k->nvals - 1;
557              
558 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
559 0           PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
560 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
561 0 0         for (i=0; i < ndims; i++) {
562 0           incs[i*npdls + 0] = k->dimincs[i];
563 0           incs[i*npdls + 1] = aa->dimincs[i];
564             }
565             do {
566 0           *kval++ = aptr[-offs[0]]; /* Copy kernel value into kernel list */
567 0           *koff++ = offs[1]; /* Copy current aa offset into koffs list */
568 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
569             } while (1);
570              
571             /******************************/
572             /* Now do the actual convolution: for each vector in a, */
573             /* accumulate the appropriate aa-sum and stick it into a. */
574 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
575 0           aptr = a->data;
576 0           PDL_Indx *aaptr = aa->data;
577 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
578 0 0         for (i=0; i < ndims; i++) incs[i*npdls + 0] = a->dimincs[i]; /* got aa already */
579 0           do {
580 0           PDL_Indx acc = 0;
581 0           koff = koffs;
582 0           kval = kvals;
583 0 0         for (i=0;invals;i++)
584 0           acc += aaptr[offs[1] + *koff++] * (*kval++);
585 0           aptr[offs[0]] = acc;
586 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
587             } while (1);
588 0           PDL->changed(a, PDL_PARENTDATACHANGED, 0);
589 0 0         }PDL_BROADCASTLOOP_END_convolveND_readdata
    0          
590 0           } break;
591 0           case PDL_ULL: {
592 0 0         PDL_DECLARE_PARAMS_convolveND_1(PDL_ULongLong,P)
    0          
    0          
593 0 0         PDL_BROADCASTLOOP_START_convolveND_readdata {/*
    0          
    0          
    0          
    0          
    0          
    0          
594             * Direct convolution
595             *
596             * Because the kernel is usually the smaller of the two arrays to be convolved,
597             * we broadcast kernel-first to keep it in the processor's cache. The strategy:
598             * work on a padded copy of the original image, so that (even with boundary
599             * conditions) the geometry of the kernel is linearly related to the input
600             * array. Otherwise, follow the path blazed by Karl in convolve(): keep track
601             * of the offsets for each kernel element in a flattened original PDL.
602             *
603             * The first (PP) argument is a dummy that's only used to set the GENERIC()
604             * macro. The other three arguments should all have the same type as the
605             * first arguments, and are all passed in as SVs. They are: the kernel,
606             * the padded copy of the input PDL, and a pre-allocated output PDL. The
607             * input PDL should be padded by the dimensionality of the kernel.
608             *
609             */
610              
611             PDL_Indx i;
612 0           pdl *k = __params->k, *a = __params->a, *aa = __params->aa;
613 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(aa));
614 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(a));
615 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(k));
616              
617 0           PDL_Indx ndims = aa->ndims;
618 0 0         if(ndims != k->ndims || ndims != aa->ndims)
    0          
619 0           return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
620              
621 0           PDL_Indx koffs[k->nvals];
622 0           PDL_ULongLong kvals[k->nvals];
623 0           PDL_Indx ivec[ndims];
624              
625             /************************************/
626             /* Fill up the koffs & kvals arrays */
627             /* koffs gets relative offsets into aa for each kernel value; */
628             /* kvals gets the kernel values in the same order (flattened) */
629 0           PDL_Indx *koff = koffs;
630 0           PDL_ULongLong *kval = kvals;
631 0           PDL_ULongLong *aptr = (PDL_ULongLong *)k->data + k->nvals - 1;
632              
633 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
634 0           PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
635 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
636 0 0         for (i=0; i < ndims; i++) {
637 0           incs[i*npdls + 0] = k->dimincs[i];
638 0           incs[i*npdls + 1] = aa->dimincs[i];
639             }
640             do {
641 0           *kval++ = aptr[-offs[0]]; /* Copy kernel value into kernel list */
642 0           *koff++ = offs[1]; /* Copy current aa offset into koffs list */
643 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
644             } while (1);
645              
646             /******************************/
647             /* Now do the actual convolution: for each vector in a, */
648             /* accumulate the appropriate aa-sum and stick it into a. */
649 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
650 0           aptr = a->data;
651 0           PDL_ULongLong *aaptr = aa->data;
652 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
653 0 0         for (i=0; i < ndims; i++) incs[i*npdls + 0] = a->dimincs[i]; /* got aa already */
654 0           do {
655 0           PDL_ULongLong acc = 0;
656 0           koff = koffs;
657 0           kval = kvals;
658 0 0         for (i=0;invals;i++)
659 0           acc += aaptr[offs[1] + *koff++] * (*kval++);
660 0           aptr[offs[0]] = acc;
661 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
662             } while (1);
663 0           PDL->changed(a, PDL_PARENTDATACHANGED, 0);
664 0 0         }PDL_BROADCASTLOOP_END_convolveND_readdata
    0          
665 0           } break;
666 0           case PDL_LL: {
667 0 0         PDL_DECLARE_PARAMS_convolveND_1(PDL_LongLong,Q)
    0          
    0          
668 0 0         PDL_BROADCASTLOOP_START_convolveND_readdata {/*
    0          
    0          
    0          
    0          
    0          
    0          
669             * Direct convolution
670             *
671             * Because the kernel is usually the smaller of the two arrays to be convolved,
672             * we broadcast kernel-first to keep it in the processor's cache. The strategy:
673             * work on a padded copy of the original image, so that (even with boundary
674             * conditions) the geometry of the kernel is linearly related to the input
675             * array. Otherwise, follow the path blazed by Karl in convolve(): keep track
676             * of the offsets for each kernel element in a flattened original PDL.
677             *
678             * The first (PP) argument is a dummy that's only used to set the GENERIC()
679             * macro. The other three arguments should all have the same type as the
680             * first arguments, and are all passed in as SVs. They are: the kernel,
681             * the padded copy of the input PDL, and a pre-allocated output PDL. The
682             * input PDL should be padded by the dimensionality of the kernel.
683             *
684             */
685              
686             PDL_Indx i;
687 0           pdl *k = __params->k, *a = __params->a, *aa = __params->aa;
688 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(aa));
689 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(a));
690 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(k));
691              
692 0           PDL_Indx ndims = aa->ndims;
693 0 0         if(ndims != k->ndims || ndims != aa->ndims)
    0          
694 0           return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
695              
696 0           PDL_Indx koffs[k->nvals];
697 0           PDL_LongLong kvals[k->nvals];
698 0           PDL_Indx ivec[ndims];
699              
700             /************************************/
701             /* Fill up the koffs & kvals arrays */
702             /* koffs gets relative offsets into aa for each kernel value; */
703             /* kvals gets the kernel values in the same order (flattened) */
704 0           PDL_Indx *koff = koffs;
705 0           PDL_LongLong *kval = kvals;
706 0           PDL_LongLong *aptr = (PDL_LongLong *)k->data + k->nvals - 1;
707              
708 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
709 0           PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
710 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
711 0 0         for (i=0; i < ndims; i++) {
712 0           incs[i*npdls + 0] = k->dimincs[i];
713 0           incs[i*npdls + 1] = aa->dimincs[i];
714             }
715             do {
716 0           *kval++ = aptr[-offs[0]]; /* Copy kernel value into kernel list */
717 0           *koff++ = offs[1]; /* Copy current aa offset into koffs list */
718 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
719             } while (1);
720              
721             /******************************/
722             /* Now do the actual convolution: for each vector in a, */
723             /* accumulate the appropriate aa-sum and stick it into a. */
724 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
725 0           aptr = a->data;
726 0           PDL_LongLong *aaptr = aa->data;
727 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
728 0 0         for (i=0; i < ndims; i++) incs[i*npdls + 0] = a->dimincs[i]; /* got aa already */
729 0           do {
730 0           PDL_LongLong acc = 0;
731 0           koff = koffs;
732 0           kval = kvals;
733 0 0         for (i=0;invals;i++)
734 0           acc += aaptr[offs[1] + *koff++] * (*kval++);
735 0           aptr[offs[0]] = acc;
736 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
737             } while (1);
738 0           PDL->changed(a, PDL_PARENTDATACHANGED, 0);
739 0 0         }PDL_BROADCASTLOOP_END_convolveND_readdata
    0          
740 0           } break;
741 0           case PDL_F: {
742 0 0         PDL_DECLARE_PARAMS_convolveND_1(PDL_Float,F)
    0          
    0          
743 0 0         PDL_BROADCASTLOOP_START_convolveND_readdata {/*
    0          
    0          
    0          
    0          
    0          
    0          
744             * Direct convolution
745             *
746             * Because the kernel is usually the smaller of the two arrays to be convolved,
747             * we broadcast kernel-first to keep it in the processor's cache. The strategy:
748             * work on a padded copy of the original image, so that (even with boundary
749             * conditions) the geometry of the kernel is linearly related to the input
750             * array. Otherwise, follow the path blazed by Karl in convolve(): keep track
751             * of the offsets for each kernel element in a flattened original PDL.
752             *
753             * The first (PP) argument is a dummy that's only used to set the GENERIC()
754             * macro. The other three arguments should all have the same type as the
755             * first arguments, and are all passed in as SVs. They are: the kernel,
756             * the padded copy of the input PDL, and a pre-allocated output PDL. The
757             * input PDL should be padded by the dimensionality of the kernel.
758             *
759             */
760              
761             PDL_Indx i;
762 0           pdl *k = __params->k, *a = __params->a, *aa = __params->aa;
763 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(aa));
764 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(a));
765 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(k));
766              
767 0           PDL_Indx ndims = aa->ndims;
768 0 0         if(ndims != k->ndims || ndims != aa->ndims)
    0          
769 0           return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
770              
771 0           PDL_Indx koffs[k->nvals];
772 0           PDL_Float kvals[k->nvals];
773 0           PDL_Indx ivec[ndims];
774              
775             /************************************/
776             /* Fill up the koffs & kvals arrays */
777             /* koffs gets relative offsets into aa for each kernel value; */
778             /* kvals gets the kernel values in the same order (flattened) */
779 0           PDL_Indx *koff = koffs;
780 0           PDL_Float *kval = kvals;
781 0           PDL_Float *aptr = (PDL_Float *)k->data + k->nvals - 1;
782              
783 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
784 0           PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
785 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
786 0 0         for (i=0; i < ndims; i++) {
787 0           incs[i*npdls + 0] = k->dimincs[i];
788 0           incs[i*npdls + 1] = aa->dimincs[i];
789             }
790             do {
791 0           *kval++ = aptr[-offs[0]]; /* Copy kernel value into kernel list */
792 0           *koff++ = offs[1]; /* Copy current aa offset into koffs list */
793 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
794             } while (1);
795              
796             /******************************/
797             /* Now do the actual convolution: for each vector in a, */
798             /* accumulate the appropriate aa-sum and stick it into a. */
799 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
800 0           aptr = a->data;
801 0           PDL_Float *aaptr = aa->data;
802 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
803 0 0         for (i=0; i < ndims; i++) incs[i*npdls + 0] = a->dimincs[i]; /* got aa already */
804 0           do {
805 0           PDL_Float acc = 0;
806 0           koff = koffs;
807 0           kval = kvals;
808 0 0         for (i=0;invals;i++)
809 0           acc += aaptr[offs[1] + *koff++] * (*kval++);
810 0           aptr[offs[0]] = acc;
811 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
812             } while (1);
813 0           PDL->changed(a, PDL_PARENTDATACHANGED, 0);
814 0 0         }PDL_BROADCASTLOOP_END_convolveND_readdata
    0          
815 0           } break;
816 3           case PDL_D: {
817 3 50         PDL_DECLARE_PARAMS_convolveND_1(PDL_Double,D)
    50          
    50          
818 12 50         PDL_BROADCASTLOOP_START_convolveND_readdata {/*
    50          
    50          
    50          
    50          
    100          
    100          
819             * Direct convolution
820             *
821             * Because the kernel is usually the smaller of the two arrays to be convolved,
822             * we broadcast kernel-first to keep it in the processor's cache. The strategy:
823             * work on a padded copy of the original image, so that (even with boundary
824             * conditions) the geometry of the kernel is linearly related to the input
825             * array. Otherwise, follow the path blazed by Karl in convolve(): keep track
826             * of the offsets for each kernel element in a flattened original PDL.
827             *
828             * The first (PP) argument is a dummy that's only used to set the GENERIC()
829             * macro. The other three arguments should all have the same type as the
830             * first arguments, and are all passed in as SVs. They are: the kernel,
831             * the padded copy of the input PDL, and a pre-allocated output PDL. The
832             * input PDL should be padded by the dimensionality of the kernel.
833             *
834             */
835              
836             PDL_Indx i;
837 3           pdl *k = __params->k, *a = __params->a, *aa = __params->aa;
838 3 50         PDL_RETERROR(PDL_err, PDL->make_physical(aa));
839 3 50         PDL_RETERROR(PDL_err, PDL->make_physical(a));
840 3 50         PDL_RETERROR(PDL_err, PDL->make_physical(k));
841              
842 3           PDL_Indx ndims = aa->ndims;
843 3 50         if(ndims != k->ndims || ndims != aa->ndims)
    50          
844 0           return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
845              
846 3           PDL_Indx koffs[k->nvals];
847 3           PDL_Double kvals[k->nvals];
848 3           PDL_Indx ivec[ndims];
849              
850             /************************************/
851             /* Fill up the koffs & kvals arrays */
852             /* koffs gets relative offsets into aa for each kernel value; */
853             /* kvals gets the kernel values in the same order (flattened) */
854 3           PDL_Indx *koff = koffs;
855 3           PDL_Double *kval = kvals;
856 3           PDL_Double *aptr = (PDL_Double *)k->data + k->nvals - 1;
857              
858 9 100         for (i=0; i < ndims; i++) ivec[i] = 0;
859 3           PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
860 9 100         for (i=0; i < npdls; i++) offs[i] = 0;
861 9 100         for (i=0; i < ndims; i++) {
862 6           incs[i*npdls + 0] = k->dimincs[i];
863 6           incs[i*npdls + 1] = aa->dimincs[i];
864             }
865             do {
866 12           *kval++ = aptr[-offs[0]]; /* Copy kernel value into kernel list */
867 12           *koff++ = offs[1]; /* Copy current aa offset into koffs list */
868 12 100         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
869             } while (1);
870              
871             /******************************/
872             /* Now do the actual convolution: for each vector in a, */
873             /* accumulate the appropriate aa-sum and stick it into a. */
874 9 100         for (i=0; i < ndims; i++) ivec[i] = 0;
875 3           aptr = a->data;
876 3           PDL_Double *aaptr = aa->data;
877 9 100         for (i=0; i < npdls; i++) offs[i] = 0;
878 9 100         for (i=0; i < ndims; i++) incs[i*npdls + 0] = a->dimincs[i]; /* got aa already */
879 105           do {
880 108           PDL_Double acc = 0;
881 108           koff = koffs;
882 108           kval = kvals;
883 540 100         for (i=0;invals;i++)
884 432           acc += aaptr[offs[1] + *koff++] * (*kval++);
885 108           aptr[offs[0]] = acc;
886 108 100         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
887             } while (1);
888 3           PDL->changed(a, PDL_PARENTDATACHANGED, 0);
889 3 50         }PDL_BROADCASTLOOP_END_convolveND_readdata
    50          
890 3           } break;
891 0           case PDL_LD: {
892 0 0         PDL_DECLARE_PARAMS_convolveND_1(PDL_LDouble,E)
    0          
    0          
893 0 0         PDL_BROADCASTLOOP_START_convolveND_readdata {/*
    0          
    0          
    0          
    0          
    0          
    0          
894             * Direct convolution
895             *
896             * Because the kernel is usually the smaller of the two arrays to be convolved,
897             * we broadcast kernel-first to keep it in the processor's cache. The strategy:
898             * work on a padded copy of the original image, so that (even with boundary
899             * conditions) the geometry of the kernel is linearly related to the input
900             * array. Otherwise, follow the path blazed by Karl in convolve(): keep track
901             * of the offsets for each kernel element in a flattened original PDL.
902             *
903             * The first (PP) argument is a dummy that's only used to set the GENERIC()
904             * macro. The other three arguments should all have the same type as the
905             * first arguments, and are all passed in as SVs. They are: the kernel,
906             * the padded copy of the input PDL, and a pre-allocated output PDL. The
907             * input PDL should be padded by the dimensionality of the kernel.
908             *
909             */
910              
911             PDL_Indx i;
912 0           pdl *k = __params->k, *a = __params->a, *aa = __params->aa;
913 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(aa));
914 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(a));
915 0 0         PDL_RETERROR(PDL_err, PDL->make_physical(k));
916              
917 0           PDL_Indx ndims = aa->ndims;
918 0 0         if(ndims != k->ndims || ndims != aa->ndims)
    0          
919 0           return PDL->make_error(PDL_EUSERERROR, "Error in convolveND:" "convolveND: dims don't agree - should never happen\n");
920              
921 0           PDL_Indx koffs[k->nvals];
922 0           PDL_LDouble kvals[k->nvals];
923 0           PDL_Indx ivec[ndims];
924              
925             /************************************/
926             /* Fill up the koffs & kvals arrays */
927             /* koffs gets relative offsets into aa for each kernel value; */
928             /* kvals gets the kernel values in the same order (flattened) */
929 0           PDL_Indx *koff = koffs;
930 0           PDL_LDouble *kval = kvals;
931 0           PDL_LDouble *aptr = (PDL_LDouble *)k->data + k->nvals - 1;
932              
933 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
934 0           PDL_Indx npdls = 2, incs[npdls*ndims], offs[npdls];
935 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
936 0 0         for (i=0; i < ndims; i++) {
937 0           incs[i*npdls + 0] = k->dimincs[i];
938 0           incs[i*npdls + 1] = aa->dimincs[i];
939             }
940             do {
941 0           *kval++ = aptr[-offs[0]]; /* Copy kernel value into kernel list */
942 0           *koff++ = offs[1]; /* Copy current aa offset into koffs list */
943 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, k->dims, ivec)) break;
944             } while (1);
945              
946             /******************************/
947             /* Now do the actual convolution: for each vector in a, */
948             /* accumulate the appropriate aa-sum and stick it into a. */
949 0 0         for (i=0; i < ndims; i++) ivec[i] = 0;
950 0           aptr = a->data;
951 0           PDL_LDouble *aaptr = aa->data;
952 0 0         for (i=0; i < npdls; i++) offs[i] = 0;
953 0 0         for (i=0; i < ndims; i++) incs[i*npdls + 0] = a->dimincs[i]; /* got aa already */
954 0           do {
955 0           PDL_LDouble acc = 0;
956 0           koff = koffs;
957 0           kval = kvals;
958 0 0         for (i=0;invals;i++)
959 0           acc += aaptr[offs[1] + *koff++] * (*kval++);
960 0           aptr[offs[0]] = acc;
961 0 0         if (!pdl_broadcast_nd_step(npdls, offs, 0, ndims, incs, a->dims, ivec)) break;
962             } while (1);
963 0           PDL->changed(a, PDL_PARENTDATACHANGED, 0);
964 0 0         }PDL_BROADCASTLOOP_END_convolveND_readdata
    0          
965 0           } break;
966 0           default: return PDL->make_error(PDL_EUSERERROR, "PP INTERNAL ERROR in convolveND: unhandled datatype(%d), only handles (ABSULKNPQFDE)! PLEASE MAKE A BUG REPORT\n", __privtrans->__datatype);
967             }
968             #undef PDL_IF_BAD
969 3           return PDL_err;
970             }
971              
972             static pdl_datatypes pdl_convolveND_vtable_gentypes[] = { PDL_SB, PDL_B, PDL_S, PDL_US, PDL_L, PDL_UL, PDL_IND, PDL_ULL, PDL_LL, PDL_F, PDL_D, PDL_LD, -1 };
973             static PDL_Indx pdl_convolveND_vtable_realdims[] = { 0 };
974             static char *pdl_convolveND_vtable_parnames[] = { "k0" };
975             static short pdl_convolveND_vtable_parflags[] = {
976             0
977             };
978             static pdl_datatypes pdl_convolveND_vtable_partypes[] = { -1 };
979             static PDL_Indx pdl_convolveND_vtable_realdims_starts[] = { 0 };
980             static PDL_Indx pdl_convolveND_vtable_realdims_ind_ids[] = { 0 };
981             static char *pdl_convolveND_vtable_indnames[] = { "" };
982             pdl_transvtable pdl_convolveND_vtable = {
983             PDL_TRANS_DO_BROADCAST, 0, pdl_convolveND_vtable_gentypes, 1, 1, NULL /*CORE21*/,
984             pdl_convolveND_vtable_realdims, pdl_convolveND_vtable_parnames,
985             pdl_convolveND_vtable_parflags, pdl_convolveND_vtable_partypes,
986             pdl_convolveND_vtable_realdims_starts, pdl_convolveND_vtable_realdims_ind_ids, 0,
987             0, pdl_convolveND_vtable_indnames,
988             NULL, pdl_convolveND_readdata, NULL,
989             NULL,
990             sizeof(pdl_params_convolveND),"PDL::ImageND::convolveND"
991             };
992              
993              
994 3           pdl_error pdl_run_convolveND(pdl *k0,pdl *k,pdl *aa,pdl *a) {
995 3           pdl_error PDL_err = {0, NULL, 0};
996 3 50         if (!PDL) return (pdl_error){PDL_EFATAL, "PDL core struct is NULL, can't continue",0};
997 3           pdl_trans *__privtrans = PDL->create_trans(&pdl_convolveND_vtable);
998 3 50         if (!__privtrans) return PDL->make_error_simple(PDL_EFATAL, "Couldn't create trans");
999 3           pdl_params_convolveND *__params = __privtrans->params;
1000 3           __privtrans->pdls[0] = k0;
1001 3 50         PDL_RETERROR(PDL_err, PDL->type_coerce(__privtrans));
1002 3           (__params->k) = (k); /* CType.get_copy */
1003 3           (__params->aa) = (aa); /* CType.get_copy */
1004 3           (__params->a) = (a); /* CType.get_copy */
1005 3 50         PDL_RETERROR(PDL_err, PDL->make_trans_mutual(__privtrans));
1006 3           return PDL_err;
1007             }