| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
/* XXX NOTE THAT IT IS NOT SAFE TO USE ->pdls MEMBER OUTSIDE |
|
2
|
|
|
|
|
|
|
INITBROADCASTSTRUCT! */ |
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
#include "pdl.h" /* Data structure declarations */ |
|
5
|
|
|
|
|
|
|
#include "pdlcore.h" /* Core declarations */ |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
#define MAX2(a,b) if ((b)>(a)) a=b; |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
/**** Convenience routines for moving around collections |
|
10
|
|
|
|
|
|
|
**** of indices and PDL pointers. |
|
11
|
|
|
|
|
|
|
****/ |
|
12
|
33708
|
|
|
|
|
|
static pdl **copy_pdl_array (pdl **from, int size) { |
|
13
|
|
|
|
|
|
|
pdl **to; |
|
14
|
33708
|
|
|
|
|
|
Newx (to, size, pdl*); |
|
15
|
33708
|
|
|
|
|
|
return (pdl **) CopyD (from, to, size, pdl*); |
|
16
|
|
|
|
|
|
|
} |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
/******* |
|
19
|
|
|
|
|
|
|
* pdl_get_broadcastdims - get the pthread-specific broadcasting dims from a PDL |
|
20
|
|
|
|
|
|
|
* Input: broadcast structure |
|
21
|
|
|
|
|
|
|
* Outputs: see above (returned by function) |
|
22
|
|
|
|
|
|
|
*/ |
|
23
|
47189
|
|
|
|
|
|
PDL_Indx *pdl_get_broadcastdims(pdl_broadcast *broadcast) |
|
24
|
|
|
|
|
|
|
{ |
|
25
|
|
|
|
|
|
|
/* The non-multithreaded case: return just the usual value */ |
|
26
|
47189
|
100
|
|
|
|
|
if (!(broadcast->gflags & PDL_BROADCAST_MAGICKED)) return broadcast->dims; |
|
27
|
834
|
|
|
|
|
|
int thr = pdl_magic_get_thread(broadcast->pdls[broadcast->mag_nthpdl]); |
|
28
|
834
|
50
|
|
|
|
|
if (thr < 0) return NULL; |
|
29
|
834
|
|
|
|
|
|
return broadcast->dims + thr * broadcast->ndims; |
|
30
|
|
|
|
|
|
|
} |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
/******* |
|
34
|
|
|
|
|
|
|
* pdl_get_threadoffsp - get the pthread-specific offset arrays from a PDL |
|
35
|
|
|
|
|
|
|
* Input: broadcast structure |
|
36
|
|
|
|
|
|
|
* Outputs: Pointer to pthread-specific offset array (returned by function) |
|
37
|
|
|
|
|
|
|
*/ |
|
38
|
47189
|
|
|
|
|
|
PDL_Indx *pdl_get_threadoffsp(pdl_broadcast *broadcast) |
|
39
|
|
|
|
|
|
|
{ |
|
40
|
|
|
|
|
|
|
/* The non-multithreaded case: return just the usual offsets */ |
|
41
|
47189
|
100
|
|
|
|
|
if (!(broadcast->gflags & PDL_BROADCAST_MAGICKED)) return broadcast->offs; |
|
42
|
834
|
|
|
|
|
|
int thr = pdl_magic_get_thread(broadcast->pdls[broadcast->mag_nthpdl]); |
|
43
|
834
|
50
|
|
|
|
|
if (thr < 0) return NULL; |
|
44
|
834
|
|
|
|
|
|
return broadcast->offs + thr * broadcast->npdls; |
|
45
|
|
|
|
|
|
|
} |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
/* Function to get the pthread-specific offset, indexes and pthread number for the supplied broadcast structure |
|
49
|
|
|
|
|
|
|
Input: broadcast structure |
|
50
|
|
|
|
|
|
|
Outputs: Pointer to pthread-specific offset array (returned by function) |
|
51
|
|
|
|
|
|
|
Pointer to pthread-specific index array (ind Pointer supplied and modified by function) |
|
52
|
|
|
|
|
|
|
Pointer to pthread-specific dims array (dims Pointer supplied and modified by function) |
|
53
|
|
|
|
|
|
|
Pthread index for the current pthread ( thr supplied and modified by function) |
|
54
|
|
|
|
|
|
|
*/ |
|
55
|
86264
|
|
|
|
|
|
static inline PDL_Indx* pdl_get_threadoffsp_int(pdl_broadcast *broadcast, int *pthr, PDL_Indx **inds, PDL_Indx **dims) |
|
56
|
|
|
|
|
|
|
{ |
|
57
|
86264
|
100
|
|
|
|
|
if (broadcast->gflags & PDL_BROADCAST_MAGICKED) { |
|
58
|
1405
|
50
|
|
|
|
|
if (broadcast->mag_nthpdl < 0 || broadcast->mag_nthpdl >= broadcast->npdls) |
|
|
|
50
|
|
|
|
|
|
|
59
|
0
|
|
|
|
|
|
return NULL; |
|
60
|
1405
|
|
|
|
|
|
int thr = pdl_magic_get_thread(broadcast->pdls[broadcast->mag_nthpdl]); |
|
61
|
1405
|
50
|
|
|
|
|
if (thr < 0) return NULL; |
|
62
|
1405
|
|
|
|
|
|
*pthr = thr; |
|
63
|
1405
|
|
|
|
|
|
*inds = broadcast->inds + thr * broadcast->ndims; |
|
64
|
1405
|
|
|
|
|
|
*dims = broadcast->dims + thr * broadcast->ndims; |
|
65
|
1405
|
|
|
|
|
|
return broadcast->offs + thr * broadcast->npdls; |
|
66
|
|
|
|
|
|
|
} |
|
67
|
84859
|
|
|
|
|
|
*pthr = 0; |
|
68
|
|
|
|
|
|
|
/* The non-multithreaded case: return just the usual offsets */ |
|
69
|
84859
|
|
|
|
|
|
*dims = broadcast->dims; |
|
70
|
84859
|
|
|
|
|
|
*inds = broadcast->inds; |
|
71
|
84859
|
|
|
|
|
|
return broadcast->offs; |
|
72
|
|
|
|
|
|
|
} |
|
73
|
|
|
|
|
|
|
|
|
74
|
33719
|
|
|
|
|
|
void pdl_freebroadcaststruct(pdl_broadcast *broadcast) { |
|
75
|
33719
|
50
|
|
|
|
|
PDLDEBUG_f(printf("freebroadcaststruct(%p)\n", broadcast)); |
|
76
|
33719
|
100
|
|
|
|
|
if (!broadcast->inds) {return;} |
|
77
|
33708
|
|
|
|
|
|
Safefree(broadcast->inds); |
|
78
|
33708
|
|
|
|
|
|
Safefree(broadcast->dims); |
|
79
|
33708
|
|
|
|
|
|
Safefree(broadcast->offs); |
|
80
|
33708
|
|
|
|
|
|
Safefree(broadcast->incs); |
|
81
|
33708
|
|
|
|
|
|
Safefree(broadcast->flags); |
|
82
|
33708
|
|
|
|
|
|
Safefree(broadcast->pdls); |
|
83
|
33708
|
|
|
|
|
|
pdl_clearbroadcaststruct(broadcast); |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
|
|
86
|
33708
|
|
|
|
|
|
void pdl_clearbroadcaststruct(pdl_broadcast *it) { |
|
87
|
33708
|
50
|
|
|
|
|
PDLDEBUG_f(printf("clearbroadcaststruct(%p)\n", it)); |
|
88
|
33708
|
|
|
|
|
|
it->transvtable=0; it->pdls=0; it->flags = 0; |
|
89
|
33708
|
|
|
|
|
|
it->ndims = it->nimpl = it->npdls = 0; |
|
90
|
33708
|
|
|
|
|
|
it->offs = it->incs = it->realdims = it->inds = it->dims = 0; |
|
91
|
33708
|
|
|
|
|
|
it->gflags=0; /* unsets PDL_BROADCAST_INITIALIZED among others */ |
|
92
|
33708
|
|
|
|
|
|
PDL_CLRMAGIC(it); |
|
93
|
33708
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
|
|
95
|
112
|
|
|
|
|
|
pdl_error pdl_find_max_pthread( |
|
96
|
|
|
|
|
|
|
pdl **pdls, int npdls, PDL_Indx* realdims, PDL_Indx* creating, |
|
97
|
|
|
|
|
|
|
int target_pthread, |
|
98
|
|
|
|
|
|
|
int *p_maxPthread, /* Maximum achievable pthread */ |
|
99
|
|
|
|
|
|
|
int *p_maxPthreadDim, /* Threaded dim number that has the max num pthreads */ |
|
100
|
|
|
|
|
|
|
int *p_maxPthreadPDL /* PDL that has the max (or right at the target) num pthreads */ |
|
101
|
112
|
|
|
|
|
|
) { |
|
102
|
112
|
|
|
|
|
|
pdl_error PDL_err = {0, NULL, 0}; |
|
103
|
|
|
|
|
|
|
PDL_Indx j, k, t; |
|
104
|
|
|
|
|
|
|
/* Build int arrays of broadcasted dim numbers and sizes for each pdl */ |
|
105
|
112
|
|
|
|
|
|
PDL_Indx max_remainder = 0; |
|
106
|
112
|
|
|
|
|
|
PDL_Indx nbroadcastedDims[npdls]; |
|
107
|
112
|
|
|
|
|
|
PDL_Indx *broadcastedDims[npdls]; |
|
108
|
112
|
|
|
|
|
|
PDL_Indx *broadcastedDimSizes[npdls]; |
|
109
|
422
|
100
|
|
|
|
|
for (j=0; j
|
|
110
|
310
|
100
|
|
|
|
|
if (creating[j]) continue; |
|
111
|
255
|
|
|
|
|
|
broadcastedDims[j] = (PDL_Indx*) malloc(sizeof(PDL_Indx) * pdls[j]->ndims); |
|
112
|
255
|
50
|
|
|
|
|
if (!broadcastedDims[j]) return pdl_make_error_simple(PDL_EFATAL, "Out of Memory\n"); |
|
113
|
255
|
|
|
|
|
|
broadcastedDimSizes[j] = (PDL_Indx*) malloc(sizeof(PDL_Indx) * pdls[j]->ndims); |
|
114
|
255
|
50
|
|
|
|
|
if (!broadcastedDimSizes[j]) return pdl_make_error_simple(PDL_EFATAL, "Out of Memory\n"); |
|
115
|
|
|
|
|
|
|
} |
|
116
|
422
|
100
|
|
|
|
|
for (j=0; j
|
|
117
|
310
|
100
|
|
|
|
|
if (creating[j]) continue; |
|
118
|
534
|
100
|
|
|
|
|
for ( k=0, t = realdims[j]; t < pdls[j]->ndims; t++, k++ ){ |
|
119
|
279
|
|
|
|
|
|
broadcastedDimSizes[j][k] = pdls[j]->dims[t]; |
|
120
|
279
|
|
|
|
|
|
broadcastedDims[j][k] = t; |
|
121
|
|
|
|
|
|
|
} |
|
122
|
255
|
|
|
|
|
|
nbroadcastedDims[j] = pdls[j]->ndims - realdims[j]; |
|
123
|
|
|
|
|
|
|
} |
|
124
|
|
|
|
|
|
|
/* Go through each broadcasted dim and find best match */ |
|
125
|
112
|
|
|
|
|
|
*p_maxPthread = 0; |
|
126
|
247
|
100
|
|
|
|
|
for (j=0; j
|
|
127
|
193
|
100
|
|
|
|
|
if (creating[j]) continue; |
|
128
|
215
|
100
|
|
|
|
|
for ( k=0; k < nbroadcastedDims[j]; k++){ |
|
129
|
102
|
|
|
|
|
|
PDL_Indx this_dim = broadcastedDimSizes[j][k]; |
|
130
|
102
|
|
|
|
|
|
PDL_Indx this_remainder = this_dim % target_pthread; |
|
131
|
102
|
100
|
|
|
|
|
if ( this_remainder == 0 ){ |
|
132
|
33
|
|
|
|
|
|
*p_maxPthread = target_pthread; |
|
133
|
33
|
|
|
|
|
|
*p_maxPthreadPDL = j; |
|
134
|
33
|
|
|
|
|
|
*p_maxPthreadDim = broadcastedDims[j][k]; |
|
135
|
33
|
|
|
|
|
|
break; |
|
136
|
|
|
|
|
|
|
} |
|
137
|
69
|
100
|
|
|
|
|
if ( this_dim > *p_maxPthread && this_remainder > max_remainder ){ |
|
|
|
100
|
|
|
|
|
|
|
138
|
57
|
|
|
|
|
|
max_remainder = this_remainder; |
|
139
|
57
|
|
|
|
|
|
*p_maxPthread = PDLMIN(target_pthread, this_dim); |
|
140
|
57
|
|
|
|
|
|
*p_maxPthreadPDL = j; |
|
141
|
57
|
|
|
|
|
|
*p_maxPthreadDim = broadcastedDims[j][k]; |
|
142
|
|
|
|
|
|
|
} |
|
143
|
|
|
|
|
|
|
} |
|
144
|
|
|
|
|
|
|
/* Don't go any further if target pthread achieved */ |
|
145
|
146
|
100
|
|
|
|
|
if ( *p_maxPthread == target_pthread ) break; |
|
146
|
|
|
|
|
|
|
} |
|
147
|
112
|
50
|
|
|
|
|
PDLDEBUG_f(pdl_dump_broadcasting_info( |
|
148
|
|
|
|
|
|
|
npdls, creating, target_pthread, |
|
149
|
|
|
|
|
|
|
nbroadcastedDims, broadcastedDims, broadcastedDimSizes, |
|
150
|
|
|
|
|
|
|
*p_maxPthreadPDL, *p_maxPthreadDim, *p_maxPthread |
|
151
|
|
|
|
|
|
|
)); |
|
152
|
|
|
|
|
|
|
/* Free the stuff we allocated */ |
|
153
|
422
|
100
|
|
|
|
|
for (j=0; j
|
|
154
|
310
|
100
|
|
|
|
|
if (creating[j]) continue; |
|
155
|
255
|
|
|
|
|
|
free(broadcastedDims[j]); |
|
156
|
255
|
|
|
|
|
|
free(broadcastedDimSizes[j]); |
|
157
|
|
|
|
|
|
|
} |
|
158
|
112
|
|
|
|
|
|
return PDL_err; |
|
159
|
|
|
|
|
|
|
} |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
/* Function to auto-add pthreading magic (i.e. hints for multiple |
|
162
|
|
|
|
|
|
|
processor threads) to the pdls, based on the target number of |
|
163
|
|
|
|
|
|
|
pthreads and the pdl-threaded dimensions. |
|
164
|
|
|
|
|
|
|
noPthreadFlag is a flag indicating that the pdl thread that |
|
165
|
|
|
|
|
|
|
called this function is not multiple processor threading safe, |
|
166
|
|
|
|
|
|
|
so no pthreading magic will be added. |
|
167
|
|
|
|
|
|
|
*/ |
|
168
|
40046
|
|
|
|
|
|
pdl_error pdl_autopthreadmagic( pdl **pdls, int npdls, PDL_Indx* realdims, PDL_Indx* creating, int noPthreadFlag ){ |
|
169
|
40046
|
|
|
|
|
|
pdl_error PDL_err = {0, NULL, 0}; |
|
170
|
|
|
|
|
|
|
PDL_Indx j; |
|
171
|
40046
|
|
|
|
|
|
int maxPthreadPDL = -1; /* PDL that has the max (or right at the target) num pthreads */ |
|
172
|
40046
|
|
|
|
|
|
int maxPthreadDim = -1; /* Threaded dim number that has the max num pthreads */ |
|
173
|
40046
|
|
|
|
|
|
int maxPthread = 0; /* Maximum achievable pthread */ |
|
174
|
40046
|
|
|
|
|
|
int target_pthread = pdl_autopthread_targ; |
|
175
|
40046
|
|
|
|
|
|
pdl_autopthread_actual = 0; /* Initialize the global variable indicating actual number of pthreads */ |
|
176
|
40046
|
|
|
|
|
|
pdl_autopthread_dim = -1; /* Initialize the global variable indicating actual dim pthreaded on */ |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
/* Don't do anything if auto_pthreading is turned off (i.e. equal zero) */ |
|
179
|
40046
|
100
|
|
|
|
|
if ( !target_pthread ) return PDL_err; |
|
180
|
|
|
|
|
|
|
|
|
181
|
39826
|
|
|
|
|
|
PDL_Indx largest_nvals = 0; |
|
182
|
|
|
|
|
|
|
/* Remove any existing threading magic */ |
|
183
|
135954
|
100
|
|
|
|
|
for (j=0; j
|
|
184
|
96128
|
100
|
|
|
|
|
if (creating[j]) continue; |
|
185
|
68615
|
100
|
|
|
|
|
MAX2(largest_nvals, pdls[j]->nvals); /* Find largest size */ |
|
186
|
|
|
|
|
|
|
/* Remove thread magic, if there is some set for this pdl */ |
|
187
|
68637
|
|
|
|
|
|
if (pdls[j]->magic && |
|
188
|
22
|
|
|
|
|
|
(pdl_magic_thread_nthreads(pdls[j], NULL))) |
|
189
|
22
|
50
|
|
|
|
|
PDL_RETERROR(PDL_err, pdl_add_threading_magic(pdls[j], -1, -1)); |
|
190
|
|
|
|
|
|
|
} |
|
191
|
|
|
|
|
|
|
|
|
192
|
39826
|
100
|
|
|
|
|
if ( noPthreadFlag ) return PDL_err; /* Don't go further if the current pdl function isn't thread-safe */ |
|
193
|
|
|
|
|
|
|
/* Don't do anything if we are lower than the threshold */ |
|
194
|
39777
|
100
|
|
|
|
|
if ( (largest_nvals>>20 /* as MBytes */) < pdl_autopthread_size ) |
|
195
|
39665
|
|
|
|
|
|
return PDL_err; |
|
196
|
|
|
|
|
|
|
|
|
197
|
112
|
50
|
|
|
|
|
PDL_RETERROR(PDL_err, pdl_find_max_pthread( |
|
198
|
|
|
|
|
|
|
pdls, npdls, realdims, creating, target_pthread, |
|
199
|
|
|
|
|
|
|
&maxPthread, &maxPthreadDim, &maxPthreadPDL |
|
200
|
|
|
|
|
|
|
)); |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
/* Add threading magic */ |
|
203
|
112
|
100
|
|
|
|
|
if ( maxPthread > 1 ) { |
|
204
|
64
|
50
|
|
|
|
|
PDL_RETERROR(PDL_err, pdl_add_threading_magic(pdls[maxPthreadPDL], maxPthreadDim, maxPthread)); |
|
205
|
64
|
|
|
|
|
|
pdl_autopthread_actual = maxPthread; /* Set the global variable indicating actual number of pthreads */ |
|
206
|
64
|
|
|
|
|
|
pdl_autopthread_dim = maxPthreadDim; |
|
207
|
|
|
|
|
|
|
} |
|
208
|
112
|
|
|
|
|
|
return PDL_err; |
|
209
|
|
|
|
|
|
|
} |
|
210
|
|
|
|
|
|
|
|
|
211
|
94257
|
|
|
|
|
|
pdl_error pdl_dim_checks( |
|
212
|
|
|
|
|
|
|
pdl_transvtable *vtable, pdl **pdls, |
|
213
|
|
|
|
|
|
|
pdl_broadcast *broadcast, PDL_Indx nimpl, PDL_Indx *creating, |
|
214
|
|
|
|
|
|
|
PDL_Indx *ind_sizes, char load_only |
|
215
|
|
|
|
|
|
|
) { |
|
216
|
94257
|
|
|
|
|
|
pdl_error PDL_err = {0, NULL, 0}; |
|
217
|
|
|
|
|
|
|
PDL_Indx i, j; |
|
218
|
94257
|
50
|
|
|
|
|
PDLDEBUG_f(printf("pdl_dim_checks(load_only=%d) %p:\n", load_only, ind_sizes); |
|
219
|
|
|
|
|
|
|
printf(" ind_sizes: "); pdl_print_iarr(ind_sizes, vtable->ninds);printf("\n")); |
|
220
|
294137
|
100
|
|
|
|
|
for (i=0; inpdls; i++) { |
|
221
|
199897
|
|
|
|
|
|
pdl *pdl = pdls[i]; |
|
222
|
199897
|
|
|
|
|
|
PDL_Indx ninds = vtable->par_realdims[i], ndims = pdl->ndims; |
|
223
|
199897
|
50
|
|
|
|
|
PDLDEBUG_f(printf("pdl_dim_checks pdl %"IND_FLAG" (creating=%"IND_FLAG" ninds=%"IND_FLAG"): ", i, creating[i], ninds)); |
|
224
|
199897
|
50
|
|
|
|
|
PDLDEBUG_f(pdl_dump(pdl)); |
|
225
|
199897
|
|
|
|
|
|
short flags = vtable->par_flags[i]; |
|
226
|
199897
|
100
|
|
|
|
|
if (!load_only && creating[i]) continue; |
|
|
|
100
|
|
|
|
|
|
|
227
|
172276
|
|
|
|
|
|
PDL_Indx *dims = pdl->dims; |
|
228
|
172276
|
|
|
|
|
|
char isoutput = (i >= vtable->nparents); |
|
229
|
207756
|
100
|
|
|
|
|
for (j=0; j
|
|
230
|
35497
|
|
|
|
|
|
PDL_Indx ind_id = PDL_IND_ID(vtable, i, j), ind_sz = ind_sizes[ind_id]; |
|
231
|
35497
|
100
|
|
|
|
|
if (j >= ndims && ind_sz == -1) |
|
|
|
100
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
/* Dimensional promotion when number of dims is less than required: */ |
|
233
|
5929
|
|
|
|
|
|
ind_sz = ind_sizes[ind_id] = 1; |
|
234
|
35497
|
100
|
|
|
|
|
if (load_only && creating[i]) continue; |
|
|
|
100
|
|
|
|
|
|
|
235
|
34642
|
100
|
|
|
|
|
if (ind_sz == -1 || (!(flags & PDL_PARAM_ISPHYS) && j < ndims && ind_sz == 1)) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
236
|
14451
|
|
|
|
|
|
ind_sizes[ind_id] = dims[j]; |
|
237
|
14451
|
|
|
|
|
|
continue; |
|
238
|
|
|
|
|
|
|
} |
|
239
|
20191
|
100
|
|
|
|
|
if (j >= ndims && isoutput && ind_sz != 1) |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
240
|
5
|
|
|
|
|
|
return pdl_param_error(vtable,i, |
|
241
|
|
|
|
|
|
|
pdls, nimpl, creating, |
|
242
|
|
|
|
|
|
|
"index '%s' size %"IND_FLAG", can't broadcast over output ndarray with size > 1", |
|
243
|
5
|
|
|
|
|
|
vtable->ind_names[ind_id], ind_sz |
|
244
|
|
|
|
|
|
|
); |
|
245
|
20186
|
100
|
|
|
|
|
if (isoutput && ind_sz != 1 && pdl->vafftrans && pdl->vafftrans->incs[j] == 0) |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
246
|
2
|
|
|
|
|
|
return pdl_param_error(vtable,i, |
|
247
|
|
|
|
|
|
|
pdls, nimpl, creating, |
|
248
|
|
|
|
|
|
|
"index '%s' size %"IND_FLAG", can't broadcast over dummy dim with size > 1", |
|
249
|
2
|
|
|
|
|
|
vtable->ind_names[ind_id], ind_sz |
|
250
|
|
|
|
|
|
|
); |
|
251
|
20184
|
100
|
|
|
|
|
if (j < ndims && ind_sz != dims[j] && (isoutput || dims[j] != 1)) |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
252
|
8
|
|
|
|
|
|
return pdl_param_error(vtable,i, |
|
253
|
|
|
|
|
|
|
pdls, nimpl, creating, |
|
254
|
|
|
|
|
|
|
"index '%s' size %"IND_FLAG", but ndarray dim has size %"IND_FLAG, |
|
255
|
8
|
|
|
|
|
|
vtable->ind_names[ind_id], ind_sz, dims[j] |
|
256
|
|
|
|
|
|
|
); |
|
257
|
20176
|
100
|
|
|
|
|
if (j < ndims && ind_sz != dims[j] && |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
258
|
2
|
50
|
|
|
|
|
!load_only && !creating[i] && |
|
|
|
50
|
|
|
|
|
|
|
259
|
2
|
|
|
|
|
|
ind_sz > 1 && |
|
260
|
2
|
50
|
|
|
|
|
(flags & PDL_PARAM_ISPHYS) |
|
261
|
|
|
|
|
|
|
) |
|
262
|
2
|
|
|
|
|
|
return pdl_param_error(vtable,i, |
|
263
|
|
|
|
|
|
|
pdls, nimpl, creating, |
|
264
|
|
|
|
|
|
|
"index '%s' size %"IND_FLAG", but ndarray dim has size %"IND_FLAG, |
|
265
|
2
|
|
|
|
|
|
vtable->ind_names[ind_id], ind_sz, dims[j] |
|
266
|
|
|
|
|
|
|
); |
|
267
|
|
|
|
|
|
|
} |
|
268
|
|
|
|
|
|
|
} |
|
269
|
94240
|
50
|
|
|
|
|
PDLDEBUG_f(printf("pdl_dim_checks after:\n"); |
|
270
|
|
|
|
|
|
|
printf(" ind_sizes: "); pdl_print_iarr(ind_sizes, vtable->ninds); |
|
271
|
|
|
|
|
|
|
printf("\n")); |
|
272
|
94240
|
|
|
|
|
|
return PDL_err; |
|
273
|
|
|
|
|
|
|
} |
|
274
|
|
|
|
|
|
|
|
|
275
|
40046
|
|
|
|
|
|
static pdl_error pdl_broadcast_dim_checks( |
|
276
|
|
|
|
|
|
|
pdl_transvtable *vtable, pdl **pdls, |
|
277
|
|
|
|
|
|
|
pdl_broadcast *broadcast, PDL_Indx *creating, |
|
278
|
|
|
|
|
|
|
PDL_Indx nimpl, PDL_Indx *realdims, PDL_Indx npdls, PDL_Indx *nthp |
|
279
|
|
|
|
|
|
|
) { |
|
280
|
40046
|
|
|
|
|
|
pdl_error PDL_err = {0, NULL, 0}; |
|
281
|
|
|
|
|
|
|
PDL_Indx j, nth /* Index to dimensions */; |
|
282
|
63492
|
100
|
|
|
|
|
for (nth=0; nth
|
|
283
|
73769
|
100
|
|
|
|
|
for (j=0; j
|
|
284
|
50323
|
100
|
|
|
|
|
if (creating[j]) continue; // If jth PDL is null, don't bother trying to match |
|
285
|
40475
|
100
|
|
|
|
|
char isoutput = (vtable && j >= vtable->nparents); |
|
|
|
100
|
|
|
|
|
|
|
286
|
40475
|
100
|
|
|
|
|
if (nth >= pdls[j]->broadcastids[0]-realdims[j]) { /* off end of current PDL's dimlist */ |
|
287
|
8961
|
100
|
|
|
|
|
if (isoutput && broadcast->dims[nth] != 1) |
|
|
|
100
|
|
|
|
|
|
|
288
|
5
|
|
|
|
|
|
return pdl_param_error(vtable,nth, |
|
289
|
|
|
|
|
|
|
pdls, nimpl, creating, |
|
290
|
|
|
|
|
|
|
"implicit dim %"IND_FLAG" size %"IND_FLAG", can't broadcast over output ndarray with size > 1", |
|
291
|
5
|
|
|
|
|
|
nth, broadcast->dims[nth] |
|
292
|
|
|
|
|
|
|
); |
|
293
|
8956
|
|
|
|
|
|
continue; |
|
294
|
|
|
|
|
|
|
} |
|
295
|
31514
|
|
|
|
|
|
PDL_Indx cur_pdl_dim = pdls[j]->dims[nth+realdims[j]]; |
|
296
|
31514
|
100
|
|
|
|
|
if (isoutput && cur_pdl_dim == 1 && cur_pdl_dim != broadcast->dims[nth]) |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
297
|
5
|
|
|
|
|
|
return pdl_param_error(vtable,nth, |
|
298
|
|
|
|
|
|
|
pdls, nimpl, creating, |
|
299
|
|
|
|
|
|
|
"implicit dim %"IND_FLAG" size %"IND_FLAG", but dim has size %"IND_FLAG, |
|
300
|
5
|
|
|
|
|
|
nth, broadcast->dims[nth], cur_pdl_dim |
|
301
|
|
|
|
|
|
|
); |
|
302
|
31509
|
100
|
|
|
|
|
if (isoutput && cur_pdl_dim != 1 && pdls[j]->vafftrans && pdls[j]->vafftrans->incs[nth+realdims[j]] == 0) |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
303
|
2
|
|
|
|
|
|
return pdl_param_error(vtable,nth, |
|
304
|
|
|
|
|
|
|
pdls, nimpl, creating, |
|
305
|
|
|
|
|
|
|
"implicit dim %"IND_FLAG" size %"IND_FLAG", but dim is dummy", |
|
306
|
2
|
|
|
|
|
|
nth, broadcast->dims[nth] |
|
307
|
|
|
|
|
|
|
); |
|
308
|
31507
|
100
|
|
|
|
|
if (cur_pdl_dim != 1) { // If the current dim in the current PDL is not 1, |
|
309
|
25382
|
100
|
|
|
|
|
if (broadcast->dims[nth] != 1) { // ... and the current planned size isn't 1, |
|
310
|
6968
|
100
|
|
|
|
|
if (broadcast->dims[nth] != cur_pdl_dim) // ... then check to make sure they're the same. |
|
311
|
1
|
|
|
|
|
|
return pdl_param_error(vtable,j, |
|
312
|
|
|
|
|
|
|
pdls, nimpl, creating, |
|
313
|
|
|
|
|
|
|
"Mismatched implicit broadcast dimension %"IND_FLAG": size %"IND_FLAG" vs. %"IND_FLAG, |
|
314
|
1
|
|
|
|
|
|
nth,broadcast->dims[nth],pdls[j]->dims[nth+realdims[j]] |
|
315
|
|
|
|
|
|
|
); |
|
316
|
|
|
|
|
|
|
/* If we're still here, they're the same -- OK! */ |
|
317
|
|
|
|
|
|
|
} else { // current planned size is 1 -- mod it to match this PDL |
|
318
|
18414
|
|
|
|
|
|
broadcast->dims[nth] = cur_pdl_dim; |
|
319
|
|
|
|
|
|
|
} |
|
320
|
25381
|
|
|
|
|
|
PDL_BRC_INC(broadcast->incs, npdls, j, nth) = // Update the corresponding data stride |
|
321
|
25381
|
100
|
|
|
|
|
PDL_REPRINC(pdls[j],nth+realdims[j]);// from the PDL or from its vafftrans if relevant. |
|
322
|
|
|
|
|
|
|
} |
|
323
|
|
|
|
|
|
|
} |
|
324
|
|
|
|
|
|
|
} |
|
325
|
40033
|
|
|
|
|
|
*nthp = nth; |
|
326
|
40033
|
|
|
|
|
|
return PDL_err; |
|
327
|
|
|
|
|
|
|
} |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
/* The assumptions this function makes: |
|
330
|
|
|
|
|
|
|
* pdls is dynamic and may go away -> copied |
|
331
|
|
|
|
|
|
|
* realdims is static and is NOT copied and NOT freed!!! |
|
332
|
|
|
|
|
|
|
* creating is only used inside this routine. |
|
333
|
|
|
|
|
|
|
* vtable is assumed static. |
|
334
|
|
|
|
|
|
|
* |
|
335
|
|
|
|
|
|
|
* Only the first thread-magicked pdl is taken into account. |
|
336
|
|
|
|
|
|
|
* |
|
337
|
|
|
|
|
|
|
* noPthreadFlag is a flag to indicate the trans is not pthreading safe |
|
338
|
|
|
|
|
|
|
* (i.e. don't attempt to create multiple posix threads to execute) |
|
339
|
|
|
|
|
|
|
*/ |
|
340
|
40046
|
|
|
|
|
|
pdl_error pdl_initbroadcaststruct(int nobl, |
|
341
|
|
|
|
|
|
|
pdl **pdls,PDL_Indx *realdims,PDL_Indx *creating,PDL_Indx npdls, |
|
342
|
|
|
|
|
|
|
pdl_transvtable *vtable,pdl_broadcast *broadcast, |
|
343
|
|
|
|
|
|
|
PDL_Indx *ind_sizes_UNUSED /*CORE21*/, PDL_Indx *inc_sizes_UNUSED, |
|
344
|
|
|
|
|
|
|
char *flags_UNUSED /*CORE21*/, int noPthreadFlag |
|
345
|
40046
|
|
|
|
|
|
) { |
|
346
|
40046
|
|
|
|
|
|
pdl_error PDL_err = {0, NULL, 0}; |
|
347
|
40046
|
50
|
|
|
|
|
PDLDEBUG_f(printf("initbroadcaststruct(%p)\n", broadcast)); |
|
348
|
46384
|
100
|
|
|
|
|
char already_alloced = (broadcast->magicno == PDL_BRC_MAGICNO && |
|
349
|
6338
|
50
|
|
|
|
|
broadcast->gflags & PDL_BROADCAST_INITIALIZED); |
|
350
|
40046
|
100
|
|
|
|
|
PDL_Indx already_nthr = already_alloced ? broadcast->mag_nthr : -1; |
|
351
|
40046
|
100
|
|
|
|
|
PDL_Indx already_ndims = already_alloced ? broadcast->ndims : -1; |
|
352
|
40046
|
|
|
|
|
|
PDL_BRC_SETMAGIC(broadcast); |
|
353
|
40046
|
|
|
|
|
|
broadcast->gflags = 0; |
|
354
|
40046
|
|
|
|
|
|
broadcast->npdls = npdls; |
|
355
|
40046
|
|
|
|
|
|
broadcast->realdims = realdims; |
|
356
|
40046
|
|
|
|
|
|
broadcast->transvtable = vtable; |
|
357
|
40046
|
|
|
|
|
|
broadcast->mag_nth = -1; |
|
358
|
40046
|
|
|
|
|
|
broadcast->mag_nthpdl = -1; |
|
359
|
40046
|
|
|
|
|
|
broadcast->mag_nthr = -1; |
|
360
|
40046
|
|
|
|
|
|
broadcast->mag_skip = 0; |
|
361
|
40046
|
|
|
|
|
|
broadcast->mag_stride = 0; |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
/* Accumulate the maximum number of broadcast dims across the collection of PDLs */ |
|
364
|
40046
|
|
|
|
|
|
PDL_Indx nids = 0, nimpl=0, i, j; |
|
365
|
136712
|
100
|
|
|
|
|
for (j=0; j
|
|
366
|
96666
|
100
|
|
|
|
|
if (creating[j]) continue; |
|
367
|
69033
|
100
|
|
|
|
|
MAX2(nids,pdls[j]->nbroadcastids); |
|
368
|
69033
|
100
|
|
|
|
|
MAX2(nimpl,pdls[j]->broadcastids[0] - realdims[j]); |
|
369
|
|
|
|
|
|
|
} |
|
370
|
40046
|
|
|
|
|
|
PDL_Indx ndims = broadcast->nimpl = nimpl; |
|
371
|
|
|
|
|
|
|
|
|
372
|
40046
|
50
|
|
|
|
|
PDL_RETERROR(PDL_err, pdl_autopthreadmagic(pdls, npdls, realdims, creating, noPthreadFlag)); |
|
373
|
|
|
|
|
|
|
|
|
374
|
40046
|
|
|
|
|
|
PDL_Indx nbroadcastids[nids], nthr = 0, nthrd; |
|
375
|
136712
|
100
|
|
|
|
|
for (j=0; j
|
|
376
|
96666
|
100
|
|
|
|
|
if (creating[j]) continue; |
|
377
|
|
|
|
|
|
|
/* Check for magical ndarrays (parallelized) */ |
|
378
|
69033
|
100
|
|
|
|
|
if (!nthr && |
|
379
|
68829
|
100
|
|
|
|
|
pdls[j]->magic && |
|
380
|
111
|
50
|
|
|
|
|
(nthr = pdl_magic_thread_nthreads(pdls[j],&nthrd)) |
|
381
|
|
|
|
|
|
|
) { |
|
382
|
111
|
50
|
|
|
|
|
if ((broadcast->mag_nth = nthrd - realdims[j]) < 0) |
|
383
|
0
|
|
|
|
|
|
return pdl_param_error(vtable,j, |
|
384
|
|
|
|
|
|
|
pdls, nimpl, creating, |
|
385
|
|
|
|
|
|
|
"Cannot magick non-broadcasted dims"); |
|
386
|
111
|
|
|
|
|
|
broadcast->mag_nthpdl = j; |
|
387
|
111
|
|
|
|
|
|
broadcast->mag_nthr = nthr; |
|
388
|
|
|
|
|
|
|
} |
|
389
|
138072
|
100
|
|
|
|
|
for (i=0; i
|
|
390
|
69039
|
|
|
|
|
|
ndims += nbroadcastids[i] = |
|
391
|
69039
|
50
|
|
|
|
|
PDLMAX(0, pdls[j]->nbroadcastids > nids ? 0 : |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
pdls[j]->broadcastids[i+1] - pdls[j]->broadcastids[i]); |
|
393
|
|
|
|
|
|
|
} |
|
394
|
40046
|
100
|
|
|
|
|
if (nthr) |
|
395
|
111
|
|
|
|
|
|
broadcast->gflags |= PDL_BROADCAST_MAGICKED; |
|
396
|
40046
|
|
|
|
|
|
ndims += broadcast->nextra = PDLMAX(0, nobl - ndims); /* If too few, add enough implicit dims */ |
|
397
|
|
|
|
|
|
|
|
|
398
|
40046
|
|
|
|
|
|
broadcast->ndims = ndims; |
|
399
|
|
|
|
|
|
|
|
|
400
|
40046
|
|
|
|
|
|
PDL_Indx nthr1 = PDLMAX(nthr, 1); |
|
401
|
40046
|
100
|
|
|
|
|
if (!already_alloced || already_nthr != nthr1 || ndims != already_ndims) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
402
|
40046
|
100
|
|
|
|
|
if (already_alloced) { |
|
403
|
6338
|
|
|
|
|
|
Safefree(broadcast->inds); |
|
404
|
6338
|
|
|
|
|
|
Safefree(broadcast->dims); |
|
405
|
6338
|
|
|
|
|
|
Safefree(broadcast->offs); |
|
406
|
|
|
|
|
|
|
} |
|
407
|
40046
|
50
|
|
|
|
|
Newxz(broadcast->inds, ndims * nthr1, PDL_Indx); /* Create space for pthread-specific inds (i.e. copy for each pthread)*/ |
|
408
|
40046
|
50
|
|
|
|
|
if (broadcast->inds == NULL) return pdl_make_error_simple(PDL_EFATAL, "Failed to allocate memory for broadcast->inds in pdlbroadcast.c"); |
|
409
|
40046
|
50
|
|
|
|
|
Newxz(broadcast->dims, ndims * nthr1, PDL_Indx); |
|
410
|
40046
|
50
|
|
|
|
|
if (broadcast->dims == NULL) return pdl_make_error_simple(PDL_EFATAL, "Failed to allocate memory for broadcast->dims in pdlbroadcast.c"); |
|
411
|
40046
|
50
|
|
|
|
|
Newxz(broadcast->offs, npdls * nthr1, PDL_Indx); /* Create space for pthread-specific offs */ |
|
412
|
40046
|
50
|
|
|
|
|
if (broadcast->offs == NULL) return pdl_make_error_simple(PDL_EFATAL, "Failed to allocate memory for broadcast->offs in pdlbroadcast.c"); |
|
413
|
|
|
|
|
|
|
} |
|
414
|
|
|
|
|
|
|
PDL_Indx nth; |
|
415
|
120972
|
100
|
|
|
|
|
for (nth=0; nthdims[nth]=1; // all start size 1 |
|
416
|
|
|
|
|
|
|
|
|
417
|
40046
|
100
|
|
|
|
|
if (!already_alloced) { |
|
418
|
33708
|
|
|
|
|
|
broadcast->pdls = copy_pdl_array(pdls,npdls); |
|
419
|
33708
|
50
|
|
|
|
|
Newxz(broadcast->incs, ndims * npdls, PDL_Indx); |
|
420
|
33708
|
50
|
|
|
|
|
if (broadcast->incs == NULL) return pdl_make_error_simple(PDL_EFATAL, "Failed to allocate memory for broadcast->incs in pdlbroadcast.c"); |
|
421
|
33708
|
|
|
|
|
|
Newxz(broadcast->flags, npdls, char); |
|
422
|
33708
|
50
|
|
|
|
|
if (broadcast->flags == NULL) return pdl_make_error_simple(PDL_EFATAL, "Failed to allocate memory for broadcast->flags in pdlbroadcast.c"); |
|
423
|
|
|
|
|
|
|
} |
|
424
|
|
|
|
|
|
|
|
|
425
|
40046
|
|
|
|
|
|
char *flags = broadcast->flags; /* shortcut for the remainder */ |
|
426
|
136712
|
100
|
|
|
|
|
for (i=0;i
|
|
427
|
96666
|
100
|
|
|
|
|
if (vtable && vtable->par_flags[i] & PDL_PARAM_ISTEMP) |
|
|
|
100
|
|
|
|
|
|
|
428
|
181
|
|
|
|
|
|
flags[i] |= PDL_BROADCAST_TEMP; |
|
429
|
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
/* check dims, make implicit inds */ |
|
431
|
40046
|
100
|
|
|
|
|
PDL_RETERROR(PDL_err, pdl_broadcast_dim_checks( |
|
432
|
|
|
|
|
|
|
vtable, pdls, broadcast, creating, nimpl, realdims, npdls, &nth |
|
433
|
|
|
|
|
|
|
)); |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
/* Go through everything again and make the real things */ |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
PDL_Indx nthid; |
|
438
|
80059
|
100
|
|
|
|
|
for (nthid=0; nthid
|
|
439
|
40030
|
100
|
|
|
|
|
for (i=0; i
|
|
440
|
12
|
100
|
|
|
|
|
for (j=0; j
|
|
441
|
8
|
|
|
|
|
|
pdl *pdl = pdls[j]; |
|
442
|
8
|
50
|
|
|
|
|
if (PDL_BISTEMP(flags[j])) |
|
443
|
0
|
|
|
|
|
|
PDL_BRC_INC(broadcast->incs, npdls, j, nth) = |
|
444
|
0
|
|
|
|
|
|
pdl->dimincs[pdl->ndims-1]; |
|
445
|
8
|
50
|
|
|
|
|
if (creating[j]) continue; |
|
446
|
8
|
50
|
|
|
|
|
if (pdl->nbroadcastids < nthid) continue; |
|
447
|
8
|
50
|
|
|
|
|
if (pdl->broadcastids[nthid+1]-pdl->broadcastids[nthid] <= i) continue; |
|
448
|
8
|
|
|
|
|
|
PDL_Indx mywhichdim = i+pdl->broadcastids[nthid], mydim = pdl->dims[mywhichdim]; |
|
449
|
8
|
50
|
|
|
|
|
if (mydim == 1) continue; |
|
450
|
8
|
100
|
|
|
|
|
if (broadcast->dims[nth] == 1) { |
|
451
|
4
|
|
|
|
|
|
broadcast->dims[nth] = mydim; |
|
452
|
|
|
|
|
|
|
} else { |
|
453
|
4
|
50
|
|
|
|
|
if (broadcast->dims[nth] != mydim) |
|
454
|
0
|
|
|
|
|
|
return pdl_param_error(vtable,j, |
|
455
|
|
|
|
|
|
|
pdls, nimpl, creating, |
|
456
|
|
|
|
|
|
|
"Mismatched implicit broadcast dimension %"IND_FLAG": should be %"IND_FLAG", is %"IND_FLAG"", |
|
457
|
|
|
|
|
|
|
i, |
|
458
|
0
|
|
|
|
|
|
broadcast->dims[nth], |
|
459
|
0
|
|
|
|
|
|
pdl->dims[i+realdims[j]]); |
|
460
|
|
|
|
|
|
|
} |
|
461
|
8
|
|
|
|
|
|
PDL_BRC_INC(broadcast->incs, npdls, j, nth) = |
|
462
|
8
|
100
|
|
|
|
|
PDL_REPRINC(pdl,mywhichdim); |
|
463
|
|
|
|
|
|
|
} |
|
464
|
4
|
|
|
|
|
|
nth++; |
|
465
|
|
|
|
|
|
|
} |
|
466
|
|
|
|
|
|
|
} |
|
467
|
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
/* If threading, make the true offsets and dims.. */ |
|
469
|
40033
|
100
|
|
|
|
|
if (nthr > 0) { |
|
470
|
111
|
|
|
|
|
|
PDL_Indx mag_dim = broadcast->dims[broadcast->mag_nth], |
|
471
|
111
|
|
|
|
|
|
n1 = mag_dim / nthr, n2 = mag_dim % nthr; |
|
472
|
111
|
|
|
|
|
|
broadcast->mag_stride = n1; |
|
473
|
111
|
100
|
|
|
|
|
if (n2) { |
|
474
|
46
|
|
|
|
|
|
n1++; |
|
475
|
46
|
|
|
|
|
|
broadcast->mag_skip = n2; |
|
476
|
|
|
|
|
|
|
} |
|
477
|
111
|
|
|
|
|
|
broadcast->dims[broadcast->mag_nth] = n1; |
|
478
|
564
|
100
|
|
|
|
|
for (i=1; i
|
|
479
|
1454
|
100
|
|
|
|
|
for (j=0; j
|
|
480
|
1001
|
|
|
|
|
|
broadcast->dims[j + i*ndims] = broadcast->dims[j]; |
|
481
|
111
|
100
|
|
|
|
|
if (n2) |
|
482
|
354
|
100
|
|
|
|
|
for (i=n2; i
|
|
483
|
308
|
|
|
|
|
|
broadcast->dims[broadcast->mag_nth + i*ndims]--; |
|
484
|
|
|
|
|
|
|
} |
|
485
|
40033
|
|
|
|
|
|
broadcast->gflags |= PDL_BROADCAST_INITIALIZED; |
|
486
|
40033
|
50
|
|
|
|
|
PDLDEBUG_f(pdl_dump_broadcast(broadcast)); |
|
487
|
40033
|
|
|
|
|
|
return PDL_err; |
|
488
|
|
|
|
|
|
|
} |
|
489
|
|
|
|
|
|
|
|
|
490
|
27629
|
|
|
|
|
|
pdl_error pdl_broadcast_create_parameter(pdl_broadcast *broadcast, PDL_Indx j,PDL_Indx *dims, |
|
491
|
|
|
|
|
|
|
int temp) |
|
492
|
|
|
|
|
|
|
{ |
|
493
|
27629
|
|
|
|
|
|
pdl_error PDL_err = {0, NULL, 0}; |
|
494
|
|
|
|
|
|
|
PDL_Indx i; |
|
495
|
27629
|
100
|
|
|
|
|
PDL_Indx td = temp ? 0 : broadcast->nimpl; |
|
496
|
27629
|
100
|
|
|
|
|
if (!temp && broadcast->nimpl != broadcast->ndims - broadcast->nextra) { |
|
|
|
50
|
|
|
|
|
|
|
497
|
0
|
|
|
|
|
|
return pdl_make_error(PDL_EUSERERROR, |
|
498
|
|
|
|
|
|
|
"%s: trying to create parameter '%s' while explicitly broadcasting.\n" |
|
499
|
|
|
|
|
|
|
"See the manual for why this is impossible", |
|
500
|
0
|
|
|
|
|
|
broadcast->transvtable->name, |
|
501
|
0
|
|
|
|
|
|
broadcast->transvtable->par_names[j] |
|
502
|
|
|
|
|
|
|
); |
|
503
|
|
|
|
|
|
|
} |
|
504
|
27629
|
50
|
|
|
|
|
if (!broadcast->pdls[j] && !(broadcast->pdls[j] = pdl_pdlnew())) |
|
|
|
0
|
|
|
|
|
|
|
505
|
0
|
|
|
|
|
|
return pdl_make_error_simple(PDL_EFATAL, "Error in pdlnew"); |
|
506
|
27629
|
100
|
|
|
|
|
PDL_RETERROR(PDL_err, pdl_reallocdims(broadcast->pdls[j], broadcast->realdims[j] + td + (temp ? 1 : 0))); |
|
|
|
50
|
|
|
|
|
|
|
507
|
41498
|
100
|
|
|
|
|
for (i=0; irealdims[j] + (temp ? 1 : 0); i++) |
|
|
|
100
|
|
|
|
|
|
|
508
|
13869
|
|
|
|
|
|
broadcast->pdls[j]->dims[i] = dims[i]; |
|
509
|
27629
|
100
|
|
|
|
|
if (!temp) |
|
510
|
37279
|
100
|
|
|
|
|
for (i=0; inimpl; i++) |
|
511
|
9831
|
|
|
|
|
|
broadcast->pdls[j]->dims[i+broadcast->realdims[j]] = |
|
512
|
14
|
50
|
|
|
|
|
(i == broadcast->mag_nth && broadcast->mag_nthr > 0) |
|
513
|
14
|
|
|
|
|
|
? PDL_BRC_OFFSET(broadcast->mag_nthr, broadcast) |
|
514
|
9845
|
100
|
|
|
|
|
: broadcast->dims[i]; |
|
515
|
27629
|
|
|
|
|
|
broadcast->pdls[j]->broadcastids[0] = td + broadcast->realdims[j]; |
|
516
|
27629
|
|
|
|
|
|
pdl_resize_defaultincs(broadcast->pdls[j]); |
|
517
|
37475
|
100
|
|
|
|
|
for (i=0; inimpl; i++) { |
|
518
|
9846
|
|
|
|
|
|
PDL_BRC_INC(broadcast->incs, broadcast->npdls, j, i) = |
|
519
|
9846
|
100
|
|
|
|
|
temp ? 0 : |
|
520
|
9831
|
50
|
|
|
|
|
PDL_REPRINC(broadcast->pdls[j],i+broadcast->realdims[j]); |
|
521
|
|
|
|
|
|
|
} |
|
522
|
27629
|
|
|
|
|
|
return PDL_err; |
|
523
|
|
|
|
|
|
|
} |
|
524
|
|
|
|
|
|
|
|
|
525
|
39132
|
|
|
|
|
|
int pdl_startbroadcastloop(pdl_broadcast *broadcast,pdl_error (*func)(pdl_trans *), |
|
526
|
|
|
|
|
|
|
pdl_trans *t, pdl_error *error_ret) { |
|
527
|
39132
|
|
|
|
|
|
PDL_Indx j, npdls = broadcast->npdls; |
|
528
|
39132
|
100
|
|
|
|
|
if ((broadcast->gflags & (PDL_BROADCAST_MAGICKED | PDL_BROADCAST_MAGICK_BUSY)) |
|
529
|
|
|
|
|
|
|
== PDL_BROADCAST_MAGICKED ) { |
|
530
|
|
|
|
|
|
|
/* If no function supplied (i.e. being called from PDL::broadcast_over), don't run in parallel */ |
|
531
|
115
|
50
|
|
|
|
|
if (!func) |
|
532
|
0
|
|
|
|
|
|
broadcast->gflags &= ~PDL_BROADCAST_MAGICKED; /* Cancel thread_magicked */ |
|
533
|
|
|
|
|
|
|
else { |
|
534
|
115
|
|
|
|
|
|
broadcast->gflags |= PDL_BROADCAST_MAGICK_BUSY; |
|
535
|
|
|
|
|
|
|
/* Do the broadcastloop magically (i.e. in parallel) */ |
|
536
|
459
|
100
|
|
|
|
|
for (j=0; j
|
|
537
|
344
|
50
|
|
|
|
|
if (!(t->vtable->par_flags[j] & PDL_PARAM_ISTEMP)) continue; |
|
538
|
0
|
|
|
|
|
|
pdl *it = broadcast->pdls[j]; |
|
539
|
0
|
|
|
|
|
|
it->dims[it->ndims-1] = broadcast->mag_nthr; |
|
540
|
0
|
|
|
|
|
|
pdl_resize_defaultincs(it); |
|
541
|
0
|
|
|
|
|
|
pdl_error PDL_err = pdl_make_physical(it); |
|
542
|
0
|
0
|
|
|
|
|
if (PDL_err.error) { |
|
543
|
0
|
|
|
|
|
|
*error_ret = PDL_err; |
|
544
|
0
|
|
|
|
|
|
return 1; |
|
545
|
|
|
|
|
|
|
} |
|
546
|
|
|
|
|
|
|
} |
|
547
|
115
|
|
|
|
|
|
pdl_error PDL_err = pdl_magic_thread_cast(broadcast->pdls[broadcast->mag_nthpdl], |
|
548
|
|
|
|
|
|
|
func,t, broadcast); |
|
549
|
115
|
100
|
|
|
|
|
if (PDL_err.error) { |
|
550
|
1
|
|
|
|
|
|
*error_ret = PDL_err; |
|
551
|
1
|
|
|
|
|
|
return 1; |
|
552
|
|
|
|
|
|
|
} |
|
553
|
114
|
|
|
|
|
|
broadcast->gflags &= ~PDL_BROADCAST_MAGICK_BUSY; |
|
554
|
114
|
|
|
|
|
|
return 1; /* DON'T DO BROADCASTLOOP AGAIN */ |
|
555
|
|
|
|
|
|
|
} |
|
556
|
|
|
|
|
|
|
} |
|
557
|
|
|
|
|
|
|
PDL_Indx *inds, *dims; int thr; |
|
558
|
39017
|
|
|
|
|
|
PDL_Indx *offsp = pdl_get_threadoffsp_int(broadcast,&thr, &inds, &dims); |
|
559
|
39017
|
50
|
|
|
|
|
if (!offsp) return -1; |
|
560
|
117319
|
100
|
|
|
|
|
for (j=0; jndims; j++) |
|
561
|
78630
|
100
|
|
|
|
|
if (!dims[j]) return 1; /* do nothing if empty */ |
|
562
|
131346
|
100
|
|
|
|
|
for (j=0; j
|
|
563
|
92657
|
100
|
|
|
|
|
offsp[j] = PDL_BRC_THR_OFFSET(broadcast, thr, j); |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
564
|
38689
|
|
|
|
|
|
return 0; |
|
565
|
|
|
|
|
|
|
} |
|
566
|
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
/* nth is how many dims are done inside the broadcastloop itself */ |
|
568
|
|
|
|
|
|
|
/* inds is how far along each non-broadcastloop dim we are */ |
|
569
|
47247
|
|
|
|
|
|
int pdl_iterbroadcastloop(pdl_broadcast *broadcast,PDL_Indx nth) { |
|
570
|
|
|
|
|
|
|
int thr; |
|
571
|
47247
|
|
|
|
|
|
PDL_Indx *inds, *dims, npdls = broadcast->npdls; |
|
572
|
47247
|
|
|
|
|
|
PDL_Indx *offsp = pdl_get_threadoffsp_int(broadcast, &thr, &inds, &dims); |
|
573
|
47247
|
50
|
|
|
|
|
if (!offsp) return -1; |
|
574
|
47247
|
|
|
|
|
|
return pdl_broadcast_nd_step(npdls, offsp, nth, broadcast->ndims, broadcast->incs, dims, inds); |
|
575
|
|
|
|
|
|
|
} |