File Coverage

lib/PDL/Core/pdlbroadcast.c
Criterion Covered Total %
statement 320 339 94.4
branch 295 356 82.8
condition n/a
subroutine n/a
pod n/a
total 615 695 88.4


line stmt bran cond sub pod time code
1             /* XXX NOTE THAT IT IS NOT SAFE TO USE ->pdls MEMBER OUTSIDE
2             INITBROADCASTSTRUCT! */
3              
4             #include "pdl.h" /* Data structure declarations */
5             #include "pdlcore.h" /* Core declarations */
6              
7             #define MAX2(a,b) if ((b)>(a)) a=b;
8              
9             /**** Convenience routines for moving around collections
10             **** of indices and PDL pointers.
11             ****/
12 33708           static pdl **copy_pdl_array (pdl **from, int size) {
13             pdl **to;
14 33708           Newx (to, size, pdl*);
15 33708           return (pdl **) CopyD (from, to, size, pdl*);
16             }
17              
18             /*******
19             * pdl_get_broadcastdims - get the pthread-specific broadcasting dims from a PDL
20             * Input: broadcast structure
21             * Outputs: see above (returned by function)
22             */
23 47189           PDL_Indx *pdl_get_broadcastdims(pdl_broadcast *broadcast)
24             {
25             /* The non-multithreaded case: return just the usual value */
26 47189 100         if (!(broadcast->gflags & PDL_BROADCAST_MAGICKED)) return broadcast->dims;
27 834           int thr = pdl_magic_get_thread(broadcast->pdls[broadcast->mag_nthpdl]);
28 834 50         if (thr < 0) return NULL;
29 834           return broadcast->dims + thr * broadcast->ndims;
30             }
31              
32              
33             /*******
34             * pdl_get_threadoffsp - get the pthread-specific offset arrays from a PDL
35             * Input: broadcast structure
36             * Outputs: Pointer to pthread-specific offset array (returned by function)
37             */
38 47189           PDL_Indx *pdl_get_threadoffsp(pdl_broadcast *broadcast)
39             {
40             /* The non-multithreaded case: return just the usual offsets */
41 47189 100         if (!(broadcast->gflags & PDL_BROADCAST_MAGICKED)) return broadcast->offs;
42 834           int thr = pdl_magic_get_thread(broadcast->pdls[broadcast->mag_nthpdl]);
43 834 50         if (thr < 0) return NULL;
44 834           return broadcast->offs + thr * broadcast->npdls;
45             }
46              
47              
48             /* Function to get the pthread-specific offset, indexes and pthread number for the supplied broadcast structure
49             Input: broadcast structure
50             Outputs: Pointer to pthread-specific offset array (returned by function)
51             Pointer to pthread-specific index array (ind Pointer supplied and modified by function)
52             Pointer to pthread-specific dims array (dims Pointer supplied and modified by function)
53             Pthread index for the current pthread ( thr supplied and modified by function)
54             */
55 86264           static inline PDL_Indx* pdl_get_threadoffsp_int(pdl_broadcast *broadcast, int *pthr, PDL_Indx **inds, PDL_Indx **dims)
56             {
57 86264 100         if (broadcast->gflags & PDL_BROADCAST_MAGICKED) {
58 1405 50         if (broadcast->mag_nthpdl < 0 || broadcast->mag_nthpdl >= broadcast->npdls)
    50          
59 0           return NULL;
60 1405           int thr = pdl_magic_get_thread(broadcast->pdls[broadcast->mag_nthpdl]);
61 1405 50         if (thr < 0) return NULL;
62 1405           *pthr = thr;
63 1405           *inds = broadcast->inds + thr * broadcast->ndims;
64 1405           *dims = broadcast->dims + thr * broadcast->ndims;
65 1405           return broadcast->offs + thr * broadcast->npdls;
66             }
67 84859           *pthr = 0;
68             /* The non-multithreaded case: return just the usual offsets */
69 84859           *dims = broadcast->dims;
70 84859           *inds = broadcast->inds;
71 84859           return broadcast->offs;
72             }
73              
74 33719           void pdl_freebroadcaststruct(pdl_broadcast *broadcast) {
75 33719 50         PDLDEBUG_f(printf("freebroadcaststruct(%p)\n", broadcast));
76 33719 100         if (!broadcast->inds) {return;}
77 33708           Safefree(broadcast->inds);
78 33708           Safefree(broadcast->dims);
79 33708           Safefree(broadcast->offs);
80 33708           Safefree(broadcast->incs);
81 33708           Safefree(broadcast->flags);
82 33708           Safefree(broadcast->pdls);
83 33708           pdl_clearbroadcaststruct(broadcast);
84             }
85              
86 33708           void pdl_clearbroadcaststruct(pdl_broadcast *it) {
87 33708 50         PDLDEBUG_f(printf("clearbroadcaststruct(%p)\n", it));
88 33708           it->transvtable=0; it->pdls=0; it->flags = 0;
89 33708           it->ndims = it->nimpl = it->npdls = 0;
90 33708           it->offs = it->incs = it->realdims = it->inds = it->dims = 0;
91 33708           it->gflags=0; /* unsets PDL_BROADCAST_INITIALIZED among others */
92 33708           PDL_CLRMAGIC(it);
93 33708           }
94              
95 112           pdl_error pdl_find_max_pthread(
96             pdl **pdls, int npdls, PDL_Indx* realdims, PDL_Indx* creating,
97             int target_pthread,
98             int *p_maxPthread, /* Maximum achievable pthread */
99             int *p_maxPthreadDim, /* Threaded dim number that has the max num pthreads */
100             int *p_maxPthreadPDL /* PDL that has the max (or right at the target) num pthreads */
101 112           ) {
102 112           pdl_error PDL_err = {0, NULL, 0};
103             PDL_Indx j, k, t;
104             /* Build int arrays of broadcasted dim numbers and sizes for each pdl */
105 112           PDL_Indx max_remainder = 0;
106 112           PDL_Indx nbroadcastedDims[npdls];
107 112           PDL_Indx *broadcastedDims[npdls];
108 112           PDL_Indx *broadcastedDimSizes[npdls];
109 422 100         for (j=0; j
110 310 100         if (creating[j]) continue;
111 255           broadcastedDims[j] = (PDL_Indx*) malloc(sizeof(PDL_Indx) * pdls[j]->ndims);
112 255 50         if (!broadcastedDims[j]) return pdl_make_error_simple(PDL_EFATAL, "Out of Memory\n");
113 255           broadcastedDimSizes[j] = (PDL_Indx*) malloc(sizeof(PDL_Indx) * pdls[j]->ndims);
114 255 50         if (!broadcastedDimSizes[j]) return pdl_make_error_simple(PDL_EFATAL, "Out of Memory\n");
115             }
116 422 100         for (j=0; j
117 310 100         if (creating[j]) continue;
118 534 100         for ( k=0, t = realdims[j]; t < pdls[j]->ndims; t++, k++ ){
119 279           broadcastedDimSizes[j][k] = pdls[j]->dims[t];
120 279           broadcastedDims[j][k] = t;
121             }
122 255           nbroadcastedDims[j] = pdls[j]->ndims - realdims[j];
123             }
124             /* Go through each broadcasted dim and find best match */
125 112           *p_maxPthread = 0;
126 247 100         for (j=0; j
127 193 100         if (creating[j]) continue;
128 215 100         for ( k=0; k < nbroadcastedDims[j]; k++){
129 102           PDL_Indx this_dim = broadcastedDimSizes[j][k];
130 102           PDL_Indx this_remainder = this_dim % target_pthread;
131 102 100         if ( this_remainder == 0 ){
132 33           *p_maxPthread = target_pthread;
133 33           *p_maxPthreadPDL = j;
134 33           *p_maxPthreadDim = broadcastedDims[j][k];
135 33           break;
136             }
137 69 100         if ( this_dim > *p_maxPthread && this_remainder > max_remainder ){
    100          
138 57           max_remainder = this_remainder;
139 57           *p_maxPthread = PDLMIN(target_pthread, this_dim);
140 57           *p_maxPthreadPDL = j;
141 57           *p_maxPthreadDim = broadcastedDims[j][k];
142             }
143             }
144             /* Don't go any further if target pthread achieved */
145 146 100         if ( *p_maxPthread == target_pthread ) break;
146             }
147 112 50         PDLDEBUG_f(pdl_dump_broadcasting_info(
148             npdls, creating, target_pthread,
149             nbroadcastedDims, broadcastedDims, broadcastedDimSizes,
150             *p_maxPthreadPDL, *p_maxPthreadDim, *p_maxPthread
151             ));
152             /* Free the stuff we allocated */
153 422 100         for (j=0; j
154 310 100         if (creating[j]) continue;
155 255           free(broadcastedDims[j]);
156 255           free(broadcastedDimSizes[j]);
157             }
158 112           return PDL_err;
159             }
160              
161             /* Function to auto-add pthreading magic (i.e. hints for multiple
162             processor threads) to the pdls, based on the target number of
163             pthreads and the pdl-threaded dimensions.
164             noPthreadFlag is a flag indicating that the pdl thread that
165             called this function is not multiple processor threading safe,
166             so no pthreading magic will be added.
167             */
168 40046           pdl_error pdl_autopthreadmagic( pdl **pdls, int npdls, PDL_Indx* realdims, PDL_Indx* creating, int noPthreadFlag ){
169 40046           pdl_error PDL_err = {0, NULL, 0};
170             PDL_Indx j;
171 40046           int maxPthreadPDL = -1; /* PDL that has the max (or right at the target) num pthreads */
172 40046           int maxPthreadDim = -1; /* Threaded dim number that has the max num pthreads */
173 40046           int maxPthread = 0; /* Maximum achievable pthread */
174 40046           int target_pthread = pdl_autopthread_targ;
175 40046           pdl_autopthread_actual = 0; /* Initialize the global variable indicating actual number of pthreads */
176 40046           pdl_autopthread_dim = -1; /* Initialize the global variable indicating actual dim pthreaded on */
177              
178             /* Don't do anything if auto_pthreading is turned off (i.e. equal zero) */
179 40046 100         if ( !target_pthread ) return PDL_err;
180              
181 39826           PDL_Indx largest_nvals = 0;
182             /* Remove any existing threading magic */
183 135954 100         for (j=0; j
184 96128 100         if (creating[j]) continue;
185 68615 100         MAX2(largest_nvals, pdls[j]->nvals); /* Find largest size */
186             /* Remove thread magic, if there is some set for this pdl */
187 68637           if (pdls[j]->magic &&
188 22           (pdl_magic_thread_nthreads(pdls[j], NULL)))
189 22 50         PDL_RETERROR(PDL_err, pdl_add_threading_magic(pdls[j], -1, -1));
190             }
191              
192 39826 100         if ( noPthreadFlag ) return PDL_err; /* Don't go further if the current pdl function isn't thread-safe */
193             /* Don't do anything if we are lower than the threshold */
194 39777 100         if ( (largest_nvals>>20 /* as MBytes */) < pdl_autopthread_size )
195 39665           return PDL_err;
196              
197 112 50         PDL_RETERROR(PDL_err, pdl_find_max_pthread(
198             pdls, npdls, realdims, creating, target_pthread,
199             &maxPthread, &maxPthreadDim, &maxPthreadPDL
200             ));
201              
202             /* Add threading magic */
203 112 100         if ( maxPthread > 1 ) {
204 64 50         PDL_RETERROR(PDL_err, pdl_add_threading_magic(pdls[maxPthreadPDL], maxPthreadDim, maxPthread));
205 64           pdl_autopthread_actual = maxPthread; /* Set the global variable indicating actual number of pthreads */
206 64           pdl_autopthread_dim = maxPthreadDim;
207             }
208 112           return PDL_err;
209             }
210              
211 94257           pdl_error pdl_dim_checks(
212             pdl_transvtable *vtable, pdl **pdls,
213             pdl_broadcast *broadcast, PDL_Indx nimpl, PDL_Indx *creating,
214             PDL_Indx *ind_sizes, char load_only
215             ) {
216 94257           pdl_error PDL_err = {0, NULL, 0};
217             PDL_Indx i, j;
218 94257 50         PDLDEBUG_f(printf("pdl_dim_checks(load_only=%d) %p:\n", load_only, ind_sizes);
219             printf(" ind_sizes: "); pdl_print_iarr(ind_sizes, vtable->ninds);printf("\n"));
220 294137 100         for (i=0; inpdls; i++) {
221 199897           pdl *pdl = pdls[i];
222 199897           PDL_Indx ninds = vtable->par_realdims[i], ndims = pdl->ndims;
223 199897 50         PDLDEBUG_f(printf("pdl_dim_checks pdl %"IND_FLAG" (creating=%"IND_FLAG" ninds=%"IND_FLAG"): ", i, creating[i], ninds));
224 199897 50         PDLDEBUG_f(pdl_dump(pdl));
225 199897           short flags = vtable->par_flags[i];
226 199897 100         if (!load_only && creating[i]) continue;
    100          
227 172276           PDL_Indx *dims = pdl->dims;
228 172276           char isoutput = (i >= vtable->nparents);
229 207756 100         for (j=0; j
230 35497           PDL_Indx ind_id = PDL_IND_ID(vtable, i, j), ind_sz = ind_sizes[ind_id];
231 35497 100         if (j >= ndims && ind_sz == -1)
    100          
232             /* Dimensional promotion when number of dims is less than required: */
233 5929           ind_sz = ind_sizes[ind_id] = 1;
234 35497 100         if (load_only && creating[i]) continue;
    100          
235 34642 100         if (ind_sz == -1 || (!(flags & PDL_PARAM_ISPHYS) && j < ndims && ind_sz == 1)) {
    100          
    100          
    100          
236 14451           ind_sizes[ind_id] = dims[j];
237 14451           continue;
238             }
239 20191 100         if (j >= ndims && isoutput && ind_sz != 1)
    100          
    100          
240 5           return pdl_param_error(vtable,i,
241             pdls, nimpl, creating,
242             "index '%s' size %"IND_FLAG", can't broadcast over output ndarray with size > 1",
243 5           vtable->ind_names[ind_id], ind_sz
244             );
245 20186 100         if (isoutput && ind_sz != 1 && pdl->vafftrans && pdl->vafftrans->incs[j] == 0)
    100          
    100          
    100          
246 2           return pdl_param_error(vtable,i,
247             pdls, nimpl, creating,
248             "index '%s' size %"IND_FLAG", can't broadcast over dummy dim with size > 1",
249 2           vtable->ind_names[ind_id], ind_sz
250             );
251 20184 100         if (j < ndims && ind_sz != dims[j] && (isoutput || dims[j] != 1))
    100          
    100          
    50          
252 8           return pdl_param_error(vtable,i,
253             pdls, nimpl, creating,
254             "index '%s' size %"IND_FLAG", but ndarray dim has size %"IND_FLAG,
255 8           vtable->ind_names[ind_id], ind_sz, dims[j]
256             );
257 20176 100         if (j < ndims && ind_sz != dims[j] &&
    100          
    100          
258 2 50         !load_only && !creating[i] &&
    50          
259 2           ind_sz > 1 &&
260 2 50         (flags & PDL_PARAM_ISPHYS)
261             )
262 2           return pdl_param_error(vtable,i,
263             pdls, nimpl, creating,
264             "index '%s' size %"IND_FLAG", but ndarray dim has size %"IND_FLAG,
265 2           vtable->ind_names[ind_id], ind_sz, dims[j]
266             );
267             }
268             }
269 94240 50         PDLDEBUG_f(printf("pdl_dim_checks after:\n");
270             printf(" ind_sizes: "); pdl_print_iarr(ind_sizes, vtable->ninds);
271             printf("\n"));
272 94240           return PDL_err;
273             }
274              
275 40046           static pdl_error pdl_broadcast_dim_checks(
276             pdl_transvtable *vtable, pdl **pdls,
277             pdl_broadcast *broadcast, PDL_Indx *creating,
278             PDL_Indx nimpl, PDL_Indx *realdims, PDL_Indx npdls, PDL_Indx *nthp
279             ) {
280 40046           pdl_error PDL_err = {0, NULL, 0};
281             PDL_Indx j, nth /* Index to dimensions */;
282 63492 100         for (nth=0; nth
283 73769 100         for (j=0; j
284 50323 100         if (creating[j]) continue; // If jth PDL is null, don't bother trying to match
285 40475 100         char isoutput = (vtable && j >= vtable->nparents);
    100          
286 40475 100         if (nth >= pdls[j]->broadcastids[0]-realdims[j]) { /* off end of current PDL's dimlist */
287 8961 100         if (isoutput && broadcast->dims[nth] != 1)
    100          
288 5           return pdl_param_error(vtable,nth,
289             pdls, nimpl, creating,
290             "implicit dim %"IND_FLAG" size %"IND_FLAG", can't broadcast over output ndarray with size > 1",
291 5           nth, broadcast->dims[nth]
292             );
293 8956           continue;
294             }
295 31514           PDL_Indx cur_pdl_dim = pdls[j]->dims[nth+realdims[j]];
296 31514 100         if (isoutput && cur_pdl_dim == 1 && cur_pdl_dim != broadcast->dims[nth])
    100          
    100          
297 5           return pdl_param_error(vtable,nth,
298             pdls, nimpl, creating,
299             "implicit dim %"IND_FLAG" size %"IND_FLAG", but dim has size %"IND_FLAG,
300 5           nth, broadcast->dims[nth], cur_pdl_dim
301             );
302 31509 100         if (isoutput && cur_pdl_dim != 1 && pdls[j]->vafftrans && pdls[j]->vafftrans->incs[nth+realdims[j]] == 0)
    100          
    100          
    100          
303 2           return pdl_param_error(vtable,nth,
304             pdls, nimpl, creating,
305             "implicit dim %"IND_FLAG" size %"IND_FLAG", but dim is dummy",
306 2           nth, broadcast->dims[nth]
307             );
308 31507 100         if (cur_pdl_dim != 1) { // If the current dim in the current PDL is not 1,
309 25382 100         if (broadcast->dims[nth] != 1) { // ... and the current planned size isn't 1,
310 6968 100         if (broadcast->dims[nth] != cur_pdl_dim) // ... then check to make sure they're the same.
311 1           return pdl_param_error(vtable,j,
312             pdls, nimpl, creating,
313             "Mismatched implicit broadcast dimension %"IND_FLAG": size %"IND_FLAG" vs. %"IND_FLAG,
314 1           nth,broadcast->dims[nth],pdls[j]->dims[nth+realdims[j]]
315             );
316             /* If we're still here, they're the same -- OK! */
317             } else { // current planned size is 1 -- mod it to match this PDL
318 18414           broadcast->dims[nth] = cur_pdl_dim;
319             }
320 25381           PDL_BRC_INC(broadcast->incs, npdls, j, nth) = // Update the corresponding data stride
321 25381 100         PDL_REPRINC(pdls[j],nth+realdims[j]);// from the PDL or from its vafftrans if relevant.
322             }
323             }
324             }
325 40033           *nthp = nth;
326 40033           return PDL_err;
327             }
328              
329             /* The assumptions this function makes:
330             * pdls is dynamic and may go away -> copied
331             * realdims is static and is NOT copied and NOT freed!!!
332             * creating is only used inside this routine.
333             * vtable is assumed static.
334             *
335             * Only the first thread-magicked pdl is taken into account.
336             *
337             * noPthreadFlag is a flag to indicate the trans is not pthreading safe
338             * (i.e. don't attempt to create multiple posix threads to execute)
339             */
340 40046           pdl_error pdl_initbroadcaststruct(int nobl,
341             pdl **pdls,PDL_Indx *realdims,PDL_Indx *creating,PDL_Indx npdls,
342             pdl_transvtable *vtable,pdl_broadcast *broadcast,
343             PDL_Indx *ind_sizes_UNUSED /*CORE21*/, PDL_Indx *inc_sizes_UNUSED,
344             char *flags_UNUSED /*CORE21*/, int noPthreadFlag
345 40046           ) {
346 40046           pdl_error PDL_err = {0, NULL, 0};
347 40046 50         PDLDEBUG_f(printf("initbroadcaststruct(%p)\n", broadcast));
348 46384 100         char already_alloced = (broadcast->magicno == PDL_BRC_MAGICNO &&
349 6338 50         broadcast->gflags & PDL_BROADCAST_INITIALIZED);
350 40046 100         PDL_Indx already_nthr = already_alloced ? broadcast->mag_nthr : -1;
351 40046 100         PDL_Indx already_ndims = already_alloced ? broadcast->ndims : -1;
352 40046           PDL_BRC_SETMAGIC(broadcast);
353 40046           broadcast->gflags = 0;
354 40046           broadcast->npdls = npdls;
355 40046           broadcast->realdims = realdims;
356 40046           broadcast->transvtable = vtable;
357 40046           broadcast->mag_nth = -1;
358 40046           broadcast->mag_nthpdl = -1;
359 40046           broadcast->mag_nthr = -1;
360 40046           broadcast->mag_skip = 0;
361 40046           broadcast->mag_stride = 0;
362              
363             /* Accumulate the maximum number of broadcast dims across the collection of PDLs */
364 40046           PDL_Indx nids = 0, nimpl=0, i, j;
365 136712 100         for (j=0; j
366 96666 100         if (creating[j]) continue;
367 69033 100         MAX2(nids,pdls[j]->nbroadcastids);
368 69033 100         MAX2(nimpl,pdls[j]->broadcastids[0] - realdims[j]);
369             }
370 40046           PDL_Indx ndims = broadcast->nimpl = nimpl;
371              
372 40046 50         PDL_RETERROR(PDL_err, pdl_autopthreadmagic(pdls, npdls, realdims, creating, noPthreadFlag));
373              
374 40046           PDL_Indx nbroadcastids[nids], nthr = 0, nthrd;
375 136712 100         for (j=0; j
376 96666 100         if (creating[j]) continue;
377             /* Check for magical ndarrays (parallelized) */
378 69033 100         if (!nthr &&
379 68829 100         pdls[j]->magic &&
380 111 50         (nthr = pdl_magic_thread_nthreads(pdls[j],&nthrd))
381             ) {
382 111 50         if ((broadcast->mag_nth = nthrd - realdims[j]) < 0)
383 0           return pdl_param_error(vtable,j,
384             pdls, nimpl, creating,
385             "Cannot magick non-broadcasted dims");
386 111           broadcast->mag_nthpdl = j;
387 111           broadcast->mag_nthr = nthr;
388             }
389 138072 100         for (i=0; i
390 69039           ndims += nbroadcastids[i] =
391 69039 50         PDLMAX(0, pdls[j]->nbroadcastids > nids ? 0 :
    100          
    50          
392             pdls[j]->broadcastids[i+1] - pdls[j]->broadcastids[i]);
393             }
394 40046 100         if (nthr)
395 111           broadcast->gflags |= PDL_BROADCAST_MAGICKED;
396 40046           ndims += broadcast->nextra = PDLMAX(0, nobl - ndims); /* If too few, add enough implicit dims */
397              
398 40046           broadcast->ndims = ndims;
399              
400 40046           PDL_Indx nthr1 = PDLMAX(nthr, 1);
401 40046 100         if (!already_alloced || already_nthr != nthr1 || ndims != already_ndims) {
    50          
    0          
402 40046 100         if (already_alloced) {
403 6338           Safefree(broadcast->inds);
404 6338           Safefree(broadcast->dims);
405 6338           Safefree(broadcast->offs);
406             }
407 40046 50         Newxz(broadcast->inds, ndims * nthr1, PDL_Indx); /* Create space for pthread-specific inds (i.e. copy for each pthread)*/
408 40046 50         if (broadcast->inds == NULL) return pdl_make_error_simple(PDL_EFATAL, "Failed to allocate memory for broadcast->inds in pdlbroadcast.c");
409 40046 50         Newxz(broadcast->dims, ndims * nthr1, PDL_Indx);
410 40046 50         if (broadcast->dims == NULL) return pdl_make_error_simple(PDL_EFATAL, "Failed to allocate memory for broadcast->dims in pdlbroadcast.c");
411 40046 50         Newxz(broadcast->offs, npdls * nthr1, PDL_Indx); /* Create space for pthread-specific offs */
412 40046 50         if (broadcast->offs == NULL) return pdl_make_error_simple(PDL_EFATAL, "Failed to allocate memory for broadcast->offs in pdlbroadcast.c");
413             }
414             PDL_Indx nth;
415 120972 100         for (nth=0; nthdims[nth]=1; // all start size 1
416              
417 40046 100         if (!already_alloced) {
418 33708           broadcast->pdls = copy_pdl_array(pdls,npdls);
419 33708 50         Newxz(broadcast->incs, ndims * npdls, PDL_Indx);
420 33708 50         if (broadcast->incs == NULL) return pdl_make_error_simple(PDL_EFATAL, "Failed to allocate memory for broadcast->incs in pdlbroadcast.c");
421 33708           Newxz(broadcast->flags, npdls, char);
422 33708 50         if (broadcast->flags == NULL) return pdl_make_error_simple(PDL_EFATAL, "Failed to allocate memory for broadcast->flags in pdlbroadcast.c");
423             }
424              
425 40046           char *flags = broadcast->flags; /* shortcut for the remainder */
426 136712 100         for (i=0;i
427 96666 100         if (vtable && vtable->par_flags[i] & PDL_PARAM_ISTEMP)
    100          
428 181           flags[i] |= PDL_BROADCAST_TEMP;
429              
430             /* check dims, make implicit inds */
431 40046 100         PDL_RETERROR(PDL_err, pdl_broadcast_dim_checks(
432             vtable, pdls, broadcast, creating, nimpl, realdims, npdls, &nth
433             ));
434              
435             /* Go through everything again and make the real things */
436              
437             PDL_Indx nthid;
438 80059 100         for (nthid=0; nthid
439 40030 100         for (i=0; i
440 12 100         for (j=0; j
441 8           pdl *pdl = pdls[j];
442 8 50         if (PDL_BISTEMP(flags[j]))
443 0           PDL_BRC_INC(broadcast->incs, npdls, j, nth) =
444 0           pdl->dimincs[pdl->ndims-1];
445 8 50         if (creating[j]) continue;
446 8 50         if (pdl->nbroadcastids < nthid) continue;
447 8 50         if (pdl->broadcastids[nthid+1]-pdl->broadcastids[nthid] <= i) continue;
448 8           PDL_Indx mywhichdim = i+pdl->broadcastids[nthid], mydim = pdl->dims[mywhichdim];
449 8 50         if (mydim == 1) continue;
450 8 100         if (broadcast->dims[nth] == 1) {
451 4           broadcast->dims[nth] = mydim;
452             } else {
453 4 50         if (broadcast->dims[nth] != mydim)
454 0           return pdl_param_error(vtable,j,
455             pdls, nimpl, creating,
456             "Mismatched implicit broadcast dimension %"IND_FLAG": should be %"IND_FLAG", is %"IND_FLAG"",
457             i,
458 0           broadcast->dims[nth],
459 0           pdl->dims[i+realdims[j]]);
460             }
461 8           PDL_BRC_INC(broadcast->incs, npdls, j, nth) =
462 8 100         PDL_REPRINC(pdl,mywhichdim);
463             }
464 4           nth++;
465             }
466             }
467              
468             /* If threading, make the true offsets and dims.. */
469 40033 100         if (nthr > 0) {
470 111           PDL_Indx mag_dim = broadcast->dims[broadcast->mag_nth],
471 111           n1 = mag_dim / nthr, n2 = mag_dim % nthr;
472 111           broadcast->mag_stride = n1;
473 111 100         if (n2) {
474 46           n1++;
475 46           broadcast->mag_skip = n2;
476             }
477 111           broadcast->dims[broadcast->mag_nth] = n1;
478 564 100         for (i=1; i
479 1454 100         for (j=0; j
480 1001           broadcast->dims[j + i*ndims] = broadcast->dims[j];
481 111 100         if (n2)
482 354 100         for (i=n2; i
483 308           broadcast->dims[broadcast->mag_nth + i*ndims]--;
484             }
485 40033           broadcast->gflags |= PDL_BROADCAST_INITIALIZED;
486 40033 50         PDLDEBUG_f(pdl_dump_broadcast(broadcast));
487 40033           return PDL_err;
488             }
489              
490 27629           pdl_error pdl_broadcast_create_parameter(pdl_broadcast *broadcast, PDL_Indx j,PDL_Indx *dims,
491             int temp)
492             {
493 27629           pdl_error PDL_err = {0, NULL, 0};
494             PDL_Indx i;
495 27629 100         PDL_Indx td = temp ? 0 : broadcast->nimpl;
496 27629 100         if (!temp && broadcast->nimpl != broadcast->ndims - broadcast->nextra) {
    50          
497 0           return pdl_make_error(PDL_EUSERERROR,
498             "%s: trying to create parameter '%s' while explicitly broadcasting.\n"
499             "See the manual for why this is impossible",
500 0           broadcast->transvtable->name,
501 0           broadcast->transvtable->par_names[j]
502             );
503             }
504 27629 50         if (!broadcast->pdls[j] && !(broadcast->pdls[j] = pdl_pdlnew()))
    0          
505 0           return pdl_make_error_simple(PDL_EFATAL, "Error in pdlnew");
506 27629 100         PDL_RETERROR(PDL_err, pdl_reallocdims(broadcast->pdls[j], broadcast->realdims[j] + td + (temp ? 1 : 0)));
    50          
507 41498 100         for (i=0; irealdims[j] + (temp ? 1 : 0); i++)
    100          
508 13869           broadcast->pdls[j]->dims[i] = dims[i];
509 27629 100         if (!temp)
510 37279 100         for (i=0; inimpl; i++)
511 9831           broadcast->pdls[j]->dims[i+broadcast->realdims[j]] =
512 14 50         (i == broadcast->mag_nth && broadcast->mag_nthr > 0)
513 14           ? PDL_BRC_OFFSET(broadcast->mag_nthr, broadcast)
514 9845 100         : broadcast->dims[i];
515 27629           broadcast->pdls[j]->broadcastids[0] = td + broadcast->realdims[j];
516 27629           pdl_resize_defaultincs(broadcast->pdls[j]);
517 37475 100         for (i=0; inimpl; i++) {
518 9846           PDL_BRC_INC(broadcast->incs, broadcast->npdls, j, i) =
519 9846 100         temp ? 0 :
520 9831 50         PDL_REPRINC(broadcast->pdls[j],i+broadcast->realdims[j]);
521             }
522 27629           return PDL_err;
523             }
524              
525 39132           int pdl_startbroadcastloop(pdl_broadcast *broadcast,pdl_error (*func)(pdl_trans *),
526             pdl_trans *t, pdl_error *error_ret) {
527 39132           PDL_Indx j, npdls = broadcast->npdls;
528 39132 100         if ((broadcast->gflags & (PDL_BROADCAST_MAGICKED | PDL_BROADCAST_MAGICK_BUSY))
529             == PDL_BROADCAST_MAGICKED ) {
530             /* If no function supplied (i.e. being called from PDL::broadcast_over), don't run in parallel */
531 115 50         if (!func)
532 0           broadcast->gflags &= ~PDL_BROADCAST_MAGICKED; /* Cancel thread_magicked */
533             else {
534 115           broadcast->gflags |= PDL_BROADCAST_MAGICK_BUSY;
535             /* Do the broadcastloop magically (i.e. in parallel) */
536 459 100         for (j=0; j
537 344 50         if (!(t->vtable->par_flags[j] & PDL_PARAM_ISTEMP)) continue;
538 0           pdl *it = broadcast->pdls[j];
539 0           it->dims[it->ndims-1] = broadcast->mag_nthr;
540 0           pdl_resize_defaultincs(it);
541 0           pdl_error PDL_err = pdl_make_physical(it);
542 0 0         if (PDL_err.error) {
543 0           *error_ret = PDL_err;
544 0           return 1;
545             }
546             }
547 115           pdl_error PDL_err = pdl_magic_thread_cast(broadcast->pdls[broadcast->mag_nthpdl],
548             func,t, broadcast);
549 115 100         if (PDL_err.error) {
550 1           *error_ret = PDL_err;
551 1           return 1;
552             }
553 114           broadcast->gflags &= ~PDL_BROADCAST_MAGICK_BUSY;
554 114           return 1; /* DON'T DO BROADCASTLOOP AGAIN */
555             }
556             }
557             PDL_Indx *inds, *dims; int thr;
558 39017           PDL_Indx *offsp = pdl_get_threadoffsp_int(broadcast,&thr, &inds, &dims);
559 39017 50         if (!offsp) return -1;
560 117319 100         for (j=0; jndims; j++)
561 78630 100         if (!dims[j]) return 1; /* do nothing if empty */
562 131346 100         for (j=0; j
563 92657 100         offsp[j] = PDL_BRC_THR_OFFSET(broadcast, thr, j);
    100          
    50          
564 38689           return 0;
565             }
566              
567             /* nth is how many dims are done inside the broadcastloop itself */
568             /* inds is how far along each non-broadcastloop dim we are */
569 47247           int pdl_iterbroadcastloop(pdl_broadcast *broadcast,PDL_Indx nth) {
570             int thr;
571 47247           PDL_Indx *inds, *dims, npdls = broadcast->npdls;
572 47247           PDL_Indx *offsp = pdl_get_threadoffsp_int(broadcast, &thr, &inds, &dims);
573 47247 50         if (!offsp) return -1;
574 47247           return pdl_broadcast_nd_step(npdls, offsp, nth, broadcast->ndims, broadcast->incs, dims, inds);
575             }