File Coverage

AM.xs
Criterion Covered Total %
statement 369 394 93.6
branch 129 140 92.1
condition n/a
subroutine n/a
pod n/a
total 498 534 93.2


line stmt bran cond sub pod time code
1             #define PERL_NO_GET_CONTEXT
2             #define NO_XSLOCKS
3             #include "EXTERN.h"
4             #include "perl.h"
5             #include "XSUB.h"
6              
7             #include "ppport.h"
8              
9             #include
10              
11             #define NUM_LATTICES 4
12              
13             /*
14             * This program must deal with integers that are too big to be
15             * represented by 32 bits.
16             *
17             * They are represented by AM_BIG_INT, which is typedef'd to
18             *
19             * unsigned long a[8]
20             *
21             * where each a[i] < 2*16. Such an array represents the integer
22             *
23             * a[0] + a[1] * 2^16 + ... + a[7] * 2^(7*16).
24             *
25             * We only use 16 bits of the unsigned long instead of 32, so that
26             * when we add or multiply two large integers, we have room for overflow.
27             * After any addition or multiplication, the result is carried so that
28             * each element of the array is again < 2*16.
29             *
30             * Someday I may rewrite this in assembler.
31             *
32             */
33             typedef unsigned short AM_SHORT;
34             typedef unsigned long AM_LONG;
35             typedef AM_LONG AM_BIG_INT[8];
36              
37             #define high_bits(x) x >> 16
38             #define low_bits(x) x & 0xffff
39              
40             #define carry(var, ind) \
41             var[ind + 1] += high_bits(var[ind]); \
42             var[ind] = low_bits(var[ind])
43              
44             /* carry macros for math using AM_BIG_INT */
45             #define carry_pointer(p) \
46             *(p + 1) += high_bits(*(p)); \
47             *(p) = low_bits(*(p))
48              
49             #define carry_replace(var, ind) \
50             var[ind + 1] = high_bits(var[ind]); \
51             var[ind] = low_bits(var[ind])
52              
53             #define hash_pointer_from_stack(ind) \
54             (HV *) SvRV(ST(ind))
55              
56             #define array_pointer_from_stack(ind) \
57             AvARRAY((AV *)SvRV(ST(ind)))
58              
59             #define unsigned_int_from_stack(ind) \
60             SvUVX(ST(ind))
61              
62             /* AM_SUPRAs form a linked list; using for(iter_supra(x, supra)) loops over the list members using the temp variable x */
63             #define iter_supras(loop_var, supra_ptr) \
64             loop_var = supra_ptr + supra_ptr->next; loop_var != supra_ptr; loop_var = supra_ptr + loop_var->next
65              
66             #define sublist_top(supra) \
67             supra->data + supra->data[0] + 1
68              
69             /*
70             * structure for the supracontexts
71             */
72              
73             typedef struct AM_supra {
74             /* list of subcontexts in this supracontext
75             *
76             * data[0] is the number of subcontexts in
77             * the array;
78             *
79             * data[1] is always 0 (useful for finding
80             * intersections; see below)
81             *
82             * data[i] is not an actually subcontext
83             * label; instead, all the subcontext labels
84             * are kept in an array called subcontext
85             * (bad choice of name?) created in
86             * function _fillandcount(). Thus, the
87             * actual subcontexts in the supracontext
88             * are subcontext[data[2]], ...
89             *
90             * data[i] < data[i+1] if i > 1 and
91             * i < data[0].
92             *
93             * Using an array of increasing positive
94             * integers makes it easy to take
95             * intersections (see lattice.pod).
96             */
97             AM_SHORT *data;
98              
99             /* number of supracontexts that contain
100             * precisely these subcontexts;
101             *
102             * According to the AM algorithm, we're
103             * supposed to look at all the homogeneous
104             * supracontexts to compute the analogical
105             * set. Instead of traversing the
106             * supracontextual lattice to find them, we
107             * can instead traverse the list of AM_SUPRA
108             * with count > 0 and use the value of count
109             * to do our computing.
110             *
111             * Since we're actually traversing four
112             * small lattices and taking intersections,
113             * we'll be multiplying the four values of
114             * count to get what we want.
115             *
116             */
117             AM_SHORT count;
118              
119             /*
120             * used to implement two linked lists
121             *
122             * One linked list contains all the nonempty
123             * supracontexts (i.e., data[0] is not 0).
124             * This linked list is in fact circular.
125             *
126             * One linked list contains all the unused
127             * memory that can be used for new
128             * supracontexts.
129             */
130             AM_SHORT next;
131              
132             /*
133             * used during the filling of the
134             * supracontextual lattice (see below)
135             */
136             unsigned char touched;
137             } AM_SUPRA;
138              
139             /*
140             * There is quite a bit of data that must pass between AM.pm and
141             * AM.xs. Instead of repeatedly passing it back and forth on
142             * the argument stack, AM.pm sends references to the variables
143             * holding this shared data, by calling _xs_initialize() (defined later
144             * on). These pointers are then stored in the following structure,
145             * which is put into the magic part of $self (since $self is an HV,
146             * it is perforce an SvPVMG as well).
147             *
148             * Note that for arrays, we store a pointer to the array data itself,
149             * not the AV*. That means that in AM.pm, we have to be careful
150             * how we make assignments to array variables; a reassignment such as
151             *
152             * @sum = (pack "L!8", 0, 0, 0, 0, 0, 0, 0, 0) x @sum;
153             *
154             * breaks everything because the pointer stored here then won't point
155             * to the actual data anymore. That's why the appropriate line in
156             * AM.pm is
157             *
158             * foreach (@sum) {
159             * $_ = pack "L!8", 0, 0, 0, 0, 0, 0, 0, 0;
160             * }
161             *
162             * Most of the identifiers in the struct have the same names as the
163             * variables created in AM.pm and are documented there. Those
164             * that don't are documented below.
165             *
166             * This trick of storing pointers like this is borrowed from the
167             * source code of Perl/Tk. Thanks, Nick!
168             *
169             */
170              
171             typedef struct AM_guts {
172              
173             /*
174             * Let i be an integer from 0 to 3; this represents which of the
175             * four sublattices we are considering.
176             *
177             * Let lattice = lattice_list[i] and supralist = supra_list[i]; then lattice and
178             * supralist taken together tell us which subcontexts are in a
179             * particular supracontext. If s is the label of a supracontext,
180             * then it contains the subcontexts listed in
181             * supralist[lattice[s]].data[].
182             *
183             */
184              
185             AM_SHORT *lattice_list[NUM_LATTICES];
186             AM_SUPRA *supra_list[NUM_LATTICES];
187              
188             /* array ref containing number of active features in
189             * each lattice (currently we us four lattices)
190             */
191             SV **lattice_sizes;
192             /* array ref containing class labels for whole data set;
193             * array index is data item index in data set.
194             */
195             SV **classes;
196             /* TODO: ??? */
197             SV **itemcontextchain;
198             /* TODO: ??? */
199             HV *itemcontextchainhead;
200             /* Maps subcontext binary labels to class indices (or 0 if subcontext is heterogeneous) */
201             HV *context_to_class;
202             /* Maps subcontext binary labels to the number of training items
203             * contained in that subcontext
204             */
205             HV *context_size;
206             /* Maps binary context labels to the number of pointers to each,
207             * or to the number of pointers to each class label if heterogenous.
208             * The key "grand_total" maps to the total number of pointers.
209             */
210             HV *pointers;
211             /* Maps binary context labels to the size of the gang effect of
212             * that subcontext. A gang effect is the number of pointers in
213             * the given subcontext multiplied by the number of training items
214             * contained in the context.
215             */
216             HV *raw_gang;
217             /* number of pointers to each class label;
218             * keys are class indices and values are numbers
219             * of pointers (AM_BIG_INT).
220             */
221             SV **sum;
222             /*
223             * contains the total number of possible class labels;
224             * used for computing gang effects.
225             */
226             IV num_classes;
227             } AM_GUTS;
228              
229             /*
230             * A function and a vtable necessary for the use of Perl magic
231             * TODO: explain the necessity
232             */
233              
234 192           static int AMguts_mgFree(pTHX_ SV *sv, MAGIC *magic) {
235             int i;
236 192           AM_GUTS *guts = (AM_GUTS *) SvPVX(magic->mg_obj);
237 960 100         for (i = 0; i < NUM_LATTICES; ++i) {
238 768           Safefree(guts->lattice_list[i]);
239 768           Safefree(guts->supra_list[i][0].data);
240 768           Safefree(guts->supra_list[i]);
241             }
242 192           return 0;
243             }
244              
245             MGVTBL AMguts_vtab = {
246             NULL,
247             NULL,
248             NULL,
249             NULL,
250             AMguts_mgFree
251             };
252              
253             /*
254             * arrays used in the change-of-base portion of normalize(SV *s)
255             * they are initialized in BOOT
256             */
257              
258             AM_LONG tens[16]; /* 10, 10*2, 10*4, ... */
259             AM_LONG ones[16]; /* 1, 1*2, 1*4, ... */
260              
261             /*
262             * function: normalize(SV *s)
263             *
264             * s is an SvPV whose PV* is an unsigned long array representing a very
265             * large integer
266             *
267             * this function modifies s so that its NV is the floating point
268             * representation of the very large integer value, while its PV* is
269             * the decimal representation of the very large integer value in ASCII
270             * (cool, a double-valued scalar)
271             *
272             * computing the NV is straightforward
273             *
274             * computing the PV is done using the old change-of-base algorithm:
275             * repeatedly divide by 10, and use the remainders to construct the
276             * ASCII digits from least to most significant
277             *
278             */
279             const unsigned int ASCII_0 = 0x30;
280             const unsigned int DIVIDE_SPACE = 10;
281             const int OUTSPACE_SIZE = 55;
282 6243           void normalize(pTHX_ SV *s) {
283 6243           AM_LONG *p = (AM_LONG *)SvPVX(s);
284              
285 6243           AM_LONG dspace[DIVIDE_SPACE];
286 6243           AM_LONG qspace[DIVIDE_SPACE];
287             AM_LONG *dividend, *quotient, *dptr, *qptr;
288              
289 6243           STRLEN length = SvCUR(s) / sizeof(AM_LONG);
290             /* length indexes into dspace and qspace */
291             assert(length <= DIVIDE_SPACE);
292              
293             /*
294             * outptr iterates outspace from end to beginning, and an ASCII digit is inserted at each location.
295             * No need to 0-terminate, since we track the final string length in outlength and pass it to sv_setpvn.
296             */
297 6243           char outspace[OUTSPACE_SIZE];
298             char *outptr;
299 6243           outptr = outspace + (OUTSPACE_SIZE - 1);
300 6243           unsigned int outlength = 0;
301              
302             /* TODO: is this required to be a certain number of bits? */
303 6243           long double nn = 0;
304              
305             /* nn will be assigned to the NV */
306 56187 100         for (int j = 8; j; --j) {
307             /* 2^16 * nn + p[j-1] */
308 49944           nn = 65536.0 * nn + (double) *(p + j - 1);
309             }
310              
311 6243           dividend = &dspace[0];
312 6243           quotient = &qspace[0];
313 6243 50         Copy(p, dividend, length, AM_LONG);
314              
315 14404           while (1) {
316 70591 100         while (length && (*(dividend + length - 1) == 0)) {
    100          
317 49944           --length;
318             }
319 20647 100         if (length == 0) {
320 6243           sv_setpvn(s, outptr, outlength);
321 6243           break;
322             }
323 14404           dptr = dividend + length - 1;
324 14404           qptr = quotient + length - 1;
325 14404           AM_LONG carry = 0;
326 28812 100         while (dptr >= dividend) {
327             unsigned int i;
328 14408           *dptr += carry << 16;
329 14408           *qptr = 0;
330 244936 100         for (i = 16; i; ) {
331 230528           --i;
332 230528 100         if (tens[i] <= *dptr) {
333 17580           *dptr -= tens[i];
334 17580           *qptr += ones[i];
335             }
336             }
337 14408           carry = *dptr;
338 14408           --dptr;
339 14408           --qptr;
340             }
341 14404           --outptr;
342 14404           *outptr = (char)(ASCII_0 + *dividend) & 0x00ff;
343 14404           ++outlength;
344 14404           AM_LONG *temp = dividend;
345 14404           dividend = quotient;
346 14404           quotient = temp;
347             }
348              
349 6243           SvNVX(s) = nn;
350 6243           SvNOK_on(s);
351 6243           }
352              
353             /* Given 2 lists of training item indices sorted in descending order,
354             * fill a third list with the intersection of items in these lists.
355             * This is a simple intersection, and no check for heterogeneity is
356             * performed.
357             * Return the next empty (available) index address in the third list.
358             * If the two lists have no intersection, then the return value is
359             * just the same as the third input.
360             */
361 9227           unsigned short *intersect_supras(
362             AM_SHORT *intersection_list_top, AM_SHORT *subcontext_list_top, AM_SHORT *k){
363             while (1) {
364 342985 100         while (*intersection_list_top > *subcontext_list_top) {
365 220213           --intersection_list_top;
366             }
367 122772 100         if (*intersection_list_top == 0) {
368 9227           break;
369             }
370 113545 100         if (*intersection_list_top < *subcontext_list_top) {
371 33962           AM_SHORT *temp = intersection_list_top;
372 33962           intersection_list_top = subcontext_list_top;
373 33962           subcontext_list_top = temp;
374 33962           continue;
375             }
376 79583           *k = *intersection_list_top;
377 79583           --intersection_list_top;
378 79583           --subcontext_list_top;
379 79583           --k;
380             }
381 9227           return k;
382             }
383             /* The first three inputs are the same as for intersect_supra above,
384             * and the fourth paramater should be a list containing the class
385             * index for all of the training items. In addition to combining
386             * the first two lists into the third via intersection, the final
387             * list is checked for heterogeneity and the non-deterministic
388             * heterogeneous supracontexts are removed.
389             * The return value is the number of items contained in the resulting
390             * list.
391             */
392 33728           AM_SHORT intersect_supras_final(
393             AM_SHORT *intersection_list_top, AM_SHORT *subcontext_list_top,
394             AM_SHORT *intersect, AM_SHORT *subcontext_class){
395 33728           AM_SHORT class = 0;
396 33728           AM_SHORT length = 0;
397             while (1) {
398 691082 100         while (*intersection_list_top > *subcontext_list_top) {
399 506781           --intersection_list_top;
400             }
401 184301 100         if (*intersection_list_top == 0) {
402 21261           break;
403             }
404 163040 100         if (*intersection_list_top < *subcontext_list_top) {
405 92778           AM_SHORT *temp = intersection_list_top;
406 92778           intersection_list_top = subcontext_list_top;
407 92778           subcontext_list_top = temp;
408 92778           continue;
409             }
410 70262           *intersect = *intersection_list_top;
411 70262           ++intersect;
412 70262           ++length;
413              
414             /* is it heterogeneous? */
415 70262 100         if (class == 0) {
416             /* is it not deterministic? */
417 29519 100         if (length > 1) {
418 1431           length = 0;
419 1431           break;
420             } else {
421 28088           class = subcontext_class[*intersection_list_top];
422             }
423             } else {
424             /* Do the classes not match? */
425 40743 100         if (class != subcontext_class[*intersection_list_top]) {
426 11036           length = 0;
427 11036           break;
428             }
429             }
430 57795           --intersection_list_top;
431 57795           --subcontext_list_top;
432             }
433 33728           return length;
434             }
435              
436             /* clear out the supracontexts */
437 194           void clear_supras(AM_SUPRA **supra_list, int supras_length)
438             {
439             AM_SUPRA *p;
440 970 100         for (int i = 0; i < supras_length; i++)
441             {
442 3687 100         for (iter_supras(p, supra_list[i]))
443             {
444 2911           Safefree(p->data);
445             }
446             }
447 194           }
448              
449             MODULE = Algorithm::AM PACKAGE = Algorithm::AM
450              
451             PROTOTYPES: DISABLE
452              
453             BOOT:
454             {
455 10           AM_LONG ten = 10;
456 10           AM_LONG one = 1;
457 10           AM_LONG *tensptr = &tens[0];
458 10           AM_LONG *onesptr = &ones[0];
459             unsigned int i;
460 170 100         for (i = 16; i; i--) {
461 160           *tensptr = ten;
462 160           *onesptr = one;
463 160           ++tensptr;
464 160           ++onesptr;
465 160           ten <<= 1;
466 160           one <<= 1;
467             }
468             }
469              
470             /*
471             * This function is called by from AM.pm right after creating
472             * a blessed reference to Algorithm::AM. It stores the necessary
473             * pointers in the AM_GUTS structure and attaches it to the magic
474             * part of the reference.
475             *
476             */
477              
478             void
479             _xs_initialize(...)
480             PPCODE:
481             /* NOT A POINTER THIS TIME! (let memory allocate automatically) */
482             AM_GUTS guts;
483             /* 9 arguments are passed to the _xs_initialize method: */
484             /* $self, the AM object */
485 192           HV *self = hash_pointer_from_stack(0);
486             /* For explanations on these, see the comments on AM_guts */
487 192           SV **lattice_sizes = array_pointer_from_stack(1);
488 192           guts.classes = array_pointer_from_stack(2);
489 192           guts.itemcontextchain = array_pointer_from_stack(3);
490 192           guts.itemcontextchainhead = hash_pointer_from_stack(4);
491 192           guts.context_to_class = hash_pointer_from_stack(5);
492 192           guts.context_size = hash_pointer_from_stack(6);
493 192           guts.pointers = hash_pointer_from_stack(7);
494 192           guts.raw_gang = hash_pointer_from_stack(8);
495 192           guts.sum = array_pointer_from_stack(9);
496             /* Length of guts.sum */
497 192           guts.num_classes = av_len((AV *) SvRV(ST(9)));
498              
499             /*
500             * Since the sublattices are small, we just take a chunk of memory
501             * here that will be large enough for our purposes and do the actual
502             * memory allocation within the code; this reduces the overhead of
503             * repeated system calls.
504             *
505             */
506              
507 960 100         for (int i = 0; i < NUM_LATTICES; ++i) {
508 768           UV v = SvUVX(lattice_sizes[i]);
509 768           Newxz(guts.lattice_list[i], 1 << v, AM_SHORT);
510 768           Newxz(guts.supra_list[i], 1 << (v + 1), AM_SUPRA); /* CHANGED */ /* TODO: what changed? */
511 768           Newxz(guts.supra_list[i][0].data, 2, AM_SHORT);
512             }
513              
514             /* Perl magic invoked here */
515              
516 192           SV *svguts = newSVpv((char *)&guts, sizeof(AM_GUTS));
517 192           sv_magic((SV *) self, svguts, PERL_MAGIC_ext, NULL, 0);
518 192           SvRMAGICAL_off((SV *) self);
519 192           MAGIC *magic = mg_find((SV *)self, PERL_MAGIC_ext);
520 192           magic->mg_virtual = &AMguts_vtab;
521 192           mg_magical((SV *) self);
522              
523             void
524             _fillandcount(...)
525             PPCODE:
526             /* Input args are the AM object ($self), number of features in each
527             * lattice, and a flag to indicate whether to count occurrences
528             * (true) or pointers (false), also known as linear/quadratic.
529             */
530 194           HV *self = hash_pointer_from_stack(0);
531 194           SV **lattice_sizes_input = array_pointer_from_stack(1);
532 194           UV linear_flag = unsigned_int_from_stack(2);
533 194           MAGIC *magic = mg_find((SV *)self, PERL_MAGIC_ext);
534 194           AM_GUTS *guts = (AM_GUTS *)SvPVX(magic->mg_obj);
535              
536             /*
537             * We initialize the memory for the sublattices, including setting up the
538             * linked lists.
539             */
540              
541 194           AM_SHORT **lattice_list = guts->lattice_list;
542 194           AM_SUPRA **supra_list = guts->supra_list;
543             /* this helps us manage the free list in supra_list[i] */
544             AM_SHORT nptr[NUM_LATTICES];
545             AM_SHORT lattice_sizes[NUM_LATTICES];
546 970 100         for (int sublattice_index = 0; sublattice_index < NUM_LATTICES; ++sublattice_index) {
547             /* Extract numeric values for the specified lattice_sizes */
548 776           lattice_sizes[sublattice_index] = (AM_SHORT) SvUVX(lattice_sizes_input[sublattice_index]);
549             /* TODO: explain the lines below */
550 776           Zero(lattice_list[sublattice_index], 1 << lattice_sizes[sublattice_index], AM_SHORT);
551 776           supra_list[sublattice_index][0].next = 0;
552 776           nptr[sublattice_index] = 1;
553 7048 100         for (int i = 1; i < 1 << (lattice_sizes[sublattice_index] + 1); ++i) {/* CHANGED (TODO: changed what?) */
554 6272           supra_list[sublattice_index][i].next = (AM_SHORT) i + 1;
555             }
556             }
557              
558             /*
559             * Instead of adding subcontext labels directly to the supracontexts,
560             * we store all of these labels in an array called subcontext. We
561             * then store the array indices of the subcontext labels in the
562             * supracontexts. That means the list of subcontexts in the
563             * supracontexts is an increasing sequence of positive integers, handy
564             * for taking intersections (see lattice.pod).
565             *
566             * The index into the array is called subcontextnumber.
567             *
568             * The array of matching classes is called subcontext_class.
569             *
570             */
571              
572 194           HV *context_to_class = guts->context_to_class;
573 194 50         AM_SHORT subcontextnumber = (AM_SHORT)HvUSEDKEYS(context_to_class);
574             AM_SHORT *subcontext;
575 194           Newxz(subcontext, NUM_LATTICES *(subcontextnumber + 1), AM_SHORT);
576 194           subcontext += NUM_LATTICES * subcontextnumber;
577             AM_SHORT *subcontext_class;
578 194           Newxz(subcontext_class, subcontextnumber + 1, AM_SHORT);
579 194           subcontext_class += subcontextnumber;
580              
581             AM_SHORT *intersectlist, *intersectlist2, *intersectlist3;
582             AM_SHORT *ilist2top, *ilist3top;
583 194           Newxz(intersectlist, subcontextnumber + 1, AM_SHORT);
584 194           Newxz(intersectlist2, subcontextnumber + 1, AM_SHORT);
585 194           ilist2top = intersectlist2 + subcontextnumber;
586 194           Newxz(intersectlist3, subcontextnumber + 1, AM_SHORT);
587 194           ilist3top = intersectlist3 + subcontextnumber;
588              
589 194           hv_iterinit(context_to_class);
590             HE *context_to_class_entry;
591 8382 100         while ((context_to_class_entry = hv_iternext(context_to_class))) {
592 8188           AM_SHORT *contextptr = (AM_SHORT *) HeKEY(context_to_class_entry);
593 8188           AM_SHORT class = (AM_SHORT) SvUVX(HeVAL(context_to_class_entry));
594 40940 100         for (int sublattice_index = 0; sublattice_index < NUM_LATTICES; ++sublattice_index, ++contextptr) {
595 32752           AM_SHORT active = lattice_sizes[sublattice_index];
596 32752           AM_SHORT *lattice = lattice_list[sublattice_index];
597 32752           AM_SUPRA *supralist = supra_list[sublattice_index];
598 32752           AM_SHORT nextsupra = nptr[sublattice_index];
599 32752           AM_SHORT context = *contextptr;
600              
601             /* We want to add subcontextnumber to the appropriate
602             * supracontexts in the four smaller lattices.
603             *
604             * Suppose we want to add subcontextnumber to the supracontext
605             * labeled by d. supralist[lattice[d]] is an AM_SUPRA which
606             * reflects the current state of the supracontext. Suppose this
607             * state is
608             *
609             * data: 2 0 x y (i.e., currently contains two subcontexts)
610             * count: 5
611             * next: 7
612             * touched: 0
613             *
614             * Then we pluck an unused AM_SUPRA off of the free list;
615             * suppose that it's located at supralist[9] (the variable
616             * nextsupra tells us where). Then supralist[lattice[d]] will
617             * change to
618             *
619             * data: 2 0 x y
620             * count: 4 (decrease by 1)
621             * next: 9
622             * touched: 1
623             *
624             * and supralist[9] will become
625             *
626             * data: 3 0 subcontextnumber x y (now has three subcontexts)
627             * count: 1
628             * next: 7
629             * touched: 0
630             *
631             * (note: the entries in data[] are added in decreasing order)
632             *
633             *
634             * If, on the other hand, if supralist[lattice[d]] looks like
635             *
636             * data: 2 0 x y
637             * count: 8
638             * next: 11
639             * touched: 1
640             *
641             * that means that supralist[11] must look something like
642             *
643             * data: 3 0 subcontextnumber x y
644             * count: 4
645             * next: 2
646             * touched: 0
647             *
648             * There already exists a supracontext with subcontextnumber
649             * added in! So we change supralist[lattice[d]] to
650             *
651             * data: 2 0 x y
652             * count: 7 (decrease by 1)
653             * next: 11
654             * touched: 1
655             *
656             * change supralist[11] to
657             *
658             * data: 3 0 subcontextnumber x y
659             * count: 5 (increase by 1)
660             * next: 2
661             * touched: 0
662             *
663             * and set lattice[d] = 11.
664             */
665              
666 32752           subcontext[sublattice_index] = context;
667             AM_SHORT gaps[16];
668 32752 100         if (context == 0) {
669             AM_SUPRA *p;
670 28805 100         for (iter_supras(p, supralist)) {
671             AM_SHORT *data;
672 22441           Newxz(data, p->data[0] + 3, AM_SHORT);
673 22441           Copy(p->data + 2, data + 3, p->data[0], AM_SHORT);
674 22441           data[2] = subcontextnumber;
675 22441           data[0] = p->data[0] + 1;
676 22441           Safefree(p->data);
677 22441           p->data = data;
678             }
679 6364 100         if (lattice[context] == 0) {
680              
681             /* in this case, the subcontext will be
682             * added to all supracontexts, so there's
683             * no need to hassle with a Gray code and
684             * move pointers
685             */
686              
687 732           AM_SHORT count = 0;
688 732           AM_SHORT ci = nptr[sublattice_index];
689 732           nptr[sublattice_index] = supralist[ci].next;
690 732           AM_SUPRA *c = supralist + ci;
691 732           c->next = supralist->next;
692 732           supralist->next = ci;
693 732           Newxz(c->data, 3, AM_SHORT);
694 732           c->data[2] = subcontextnumber;
695 732           c->data[0] = 1;
696 3723 100         for (int i = 0; i < (1 << active); ++i) {
697 2991 100         if (lattice[i] == 0) {
698 1682           lattice[i] = ci;
699 1682           ++count;
700             }
701             }
702 732           c->count = count;
703             }
704 6364           continue;
705             }
706              
707             /* set up traversal using Gray code */
708 26388           AM_SHORT d = context;
709 26388           AM_SHORT numgaps = 0;
710 81785 100         for (int i = 1 << (active - 1); i; i >>= 1) {
711 55397 100         if (!(i & context)) {
712 15490           gaps[numgaps++] = i;
713             }
714             }
715 26388           AM_SHORT t = 1 << numgaps;
716              
717 26388           AM_SHORT pi = lattice[context];
718 26388           AM_SUPRA *p = supralist + pi;
719 26388 100         if (pi) {
720 25373           --(p->count);
721             }
722 26388           AM_SHORT ci = nextsupra;
723 26388           nextsupra = supralist[ci].next;
724 26388           p->touched = 1;
725 26388           AM_SUPRA *c = supralist + ci;
726 26388           c->touched = 0;
727 26388           c->next = p->next;
728 26388           p->next = ci;
729 26388           c->count = 1;
730 26388           Newxz(c->data, p->data[0] + 3, AM_SHORT);
731 26388           Copy(p->data + 2, c->data + 3, p->data[0], AM_SHORT);
732 26388           c->data[2] = subcontextnumber;
733 26388           c->data[0] = p->data[0] + 1;
734 26388           lattice[context] = ci;
735              
736             /* traverse */
737 43317 100         while (--t) {
738             AM_SHORT tt;
739             int i;
740             /* find the rightmost 1 in t; from HAKMEM, I believe */
741 18368 100         for (i = 0, tt = ~t & (t - 1); tt; tt >>= 1, ++i) {
742             ;
743             }
744 16929           d ^= gaps[i];
745              
746 16929           p = supralist + (pi = lattice[d]);
747 16929 100         if (pi) {
748 16439           --(p->count);
749             }
750 16929           switch (p->touched) {
751 1141           case 1:
752 1141           ++supralist[lattice[d] = p->next].count;
753 1141           break;
754 15788           case 0:
755 15788           ci = nextsupra;
756 15788           nextsupra = supralist[ci].next;
757 15788           p->touched = 1;
758 15788           c = supralist + ci;
759 15788           c->touched = 0;
760 15788           c->next = p->next;
761 15788           p->next = ci;
762 15788           c->count = 1;
763 15788           Newxz(c->data, p->data[0] + 3, AM_SHORT);
764 15788           Copy(p->data + 2, c->data + 3, p->data[0], AM_SHORT);
765 15788           c->data[2] = subcontextnumber;
766 15788           c->data[0] = p->data[0] + 1;
767 15788           lattice[d] = ci;
768             }
769             }
770              
771             /* Here we return all AM_SUPRA with count 0 back to the free
772             * list and set touched = 0 for all remaining.
773             */
774              
775 26388           p = supralist;
776 26388           p->touched = 0;
777             int i;
778             do {
779 136980 100         if (supralist[i = p->next].count == 0) {
780 39997           Safefree(supralist[i].data);
781 39997           p->next = supralist[i].next;
782 39997           supralist[i].next = nextsupra;
783 39997           nextsupra = (AM_SHORT) i;
784             } else {
785 96983           p = supralist + p->next;
786 96983           p->touched = 0;
787             }
788 136980 100         } while (p->next);
789 26388           nptr[sublattice_index] = nextsupra;
790             } /*end for(sublattice_index = 0...*/
791 8188           subcontext -= NUM_LATTICES;
792 8188           *subcontext_class = class;
793 8188           --subcontext_class;
794 8188           --subcontextnumber;
795             } /*end while (context_to_class_entry = hv_iternext(...*/
796              
797 194           HV *context_size = guts->context_size;
798 194           HV *pointers = guts->pointers;
799              
800             /*
801             * The code is in three parts:
802             *
803             * 1. We successively take one nonempty supracontext from each of the
804             * four small lattices and take their intersection to find a
805             * supracontext of the big lattice. If at any point we get the
806             * empty set, we move on.
807             *
808             * 2. We determine if the supracontext so found is heterogeneous; if
809             * so, we skip it.
810             *
811             * 3. Otherwise, we count the pointers or occurrences.
812             *
813             */
814             {
815             /* find intersections */
816             AM_SUPRA * p0;
817 824 100         for (iter_supras(p0, supra_list[0])) {
818             AM_SUPRA *p1;
819 2751 100         for (iter_supras(p1, supra_list[1])) {
820             /* Find intersection between p0 and p1 */
821 2121           AM_SHORT *k = intersect_supras(
822 2121           sublist_top(p0),
823 2121           sublist_top(p1),
824             ilist2top
825             );
826             /* If k has not been increased then intersection was empty */
827 2121 100         if (k == ilist2top) {
828 154           continue;
829             }
830 1967           *k = 0;
831              
832             AM_SUPRA *p2;
833 9073 100         for (iter_supras(p2, supra_list[2])) {
834              
835             /*Find intersection between previous intersection and p2*/
836 7106           k = intersect_supras(
837             ilist2top,
838 7106           sublist_top(p2),
839             ilist3top
840             );
841             /* If k has not been increased then intersection was empty */
842 7106 100         if (k == ilist3top) {
843 694           continue;
844             }
845 6412           *k = 0;
846              
847             AM_SUPRA *p3;
848 40140 100         for (iter_supras(p3, supra_list[3])) {
849              
850             /* Find intersection between previous intersection and p3;
851             * check for disqualified supras this time.
852             */
853 33728           AM_SHORT length = intersect_supras_final(
854             ilist3top,
855 33728           sublist_top(p3),
856             intersectlist,
857             subcontext_class
858             );
859              
860             /* count occurrences */
861 33728 100         if (length) {
862 15621           AM_BIG_INT count = {0, 0, 0, 0, 0, 0, 0, 0};
863              
864 15621           count[0] = p0->count;
865              
866 15621           count[0] *= p1->count;
867 15621           carry(count, 0);
868              
869 15621           count[0] *= p2->count;
870 15621           count[1] *= p2->count;
871 15621           carry(count, 0);
872 15621           carry(count, 1);
873              
874 15621           count[0] *= p3->count;
875 15621           count[1] *= p3->count;
876 15621           count[2] *= p3->count;
877 15621           carry(count, 0);
878 15621           carry(count, 1);
879 15621           carry(count, 2);
880 15621 100         if(!linear_flag){
881             /* If scoring is pointers (quadratic) instead of linear*/
882 15601           AM_LONG pointercount = 0;
883 50558 100         for (int i = 0; i < length; ++i) {
884 34957           pointercount += (AM_LONG) SvUV(*hv_fetch(context_size,
885             (char *) (subcontext + (NUM_LATTICES * intersectlist[i])), 8, 0));
886             }
887 15601 50         if (pointercount & 0xffff0000) {
888 0           AM_SHORT pchi = (AM_SHORT) (high_bits(pointercount));
889 0           AM_SHORT pclo = (AM_SHORT) (low_bits(pointercount));
890             AM_LONG hiprod[6];
891 0           hiprod[1] = pchi * count[0];
892 0           hiprod[2] = pchi * count[1];
893 0           hiprod[3] = pchi * count[2];
894 0           hiprod[4] = pchi * count[3];
895 0           count[0] *= pclo;
896 0           count[1] *= pclo;
897 0           count[2] *= pclo;
898 0           count[3] *= pclo;
899 0           carry(count, 0);
900 0           carry(count, 1);
901 0           carry(count, 2);
902 0           carry(count, 3);
903              
904 0           count[1] += hiprod[1];
905 0           count[2] += hiprod[2];
906 0           count[3] += hiprod[3];
907 0           count[4] += hiprod[4];
908 0           carry(count, 1);
909 0           carry(count, 2);
910 0           carry(count, 3);
911 0           carry(count, 4);
912             } else {
913 15601           count[0] *= pointercount;
914 15601           count[1] *= pointercount;
915 15601           count[2] *= pointercount;
916 15601           count[3] *= pointercount;
917 15601           carry(count, 0);
918 15601           carry(count, 1);
919 15601           carry(count, 2);
920 15601           carry(count, 3);
921             }
922             }
923 50611 100         for (int i = 0; i < length; ++i) {
924 34990           SV *final_pointers_sv = *hv_fetch(pointers,
925             (char *) (subcontext + (NUM_LATTICES * intersectlist[i])), 8, 1);
926 34990 100         if (!SvPOK(final_pointers_sv)) {
927 2745 50         SvUPGRADE(final_pointers_sv, SVt_PVNV);
928 2745 50         SvGROW(final_pointers_sv, 8 * sizeof(AM_LONG) + 1);
    50          
929 2745           Zero(SvPVX(final_pointers_sv), 8, AM_LONG);
930 2745           SvCUR_set(final_pointers_sv, 8 * sizeof(AM_LONG));
931 2745           SvPOK_on(final_pointers_sv);
932             }
933 34990           AM_LONG *final_pointers = (AM_LONG *) SvPVX(final_pointers_sv);
934 279920 100         for (int j = 0; j < 7; ++j) {
935 244930           *(final_pointers + j) += count[j];
936 244930           carry_pointer(final_pointers + j);
937             }
938             } /* end for (i = 0;... */
939             } /* end if (length) */
940             } /* end for (iter_supras(p3... */
941             } /* end for (iter_supras(p2... */
942             } /* end for (iter_supras(p1... */
943             } /* end for (iter_supras(p0... */
944              
945 194           clear_supras(supra_list, 4);
946              
947             /*
948             * compute analogical set and raw gang effects
949             *
950             * Technically, we don't compute the analogical set; instead, we
951             * compute how many pointers/occurrences there are for each of the
952             * data items in a particular subcontext, and associate that number
953             * with the subcontext label, not directly with the data item. We can
954             * do this because if two data items are in the same subcontext, they
955             * will have the same number of pointers/occurrences.
956             *
957             * If the user wants the detailed analogical set, it will be created
958             * in Result.pm.
959             *
960             */
961              
962 194           HV *raw_gang = guts->raw_gang;
963 194           SV **classes = guts->classes;
964 194           SV **itemcontextchain = guts->itemcontextchain;
965 194           HV *itemcontextchainhead = guts->itemcontextchainhead;
966 194           SV **sum = guts->sum;
967 194           IV num_classes = guts->num_classes;
968 194           AM_BIG_INT grand_total = {0, 0, 0, 0, 0, 0, 0, 0};
969 194           hv_iterinit(pointers);
970             HE * pointers_entry;
971 2939 100         while ((pointers_entry = hv_iternext(pointers))) {
972             AM_BIG_INT p;
973 2745           Copy(SvPVX(HeVAL(pointers_entry)), p, 8, AM_LONG);
974              
975 2745           SV *num_examplars = *hv_fetch(context_size, HeKEY(pointers_entry), NUM_LATTICES * sizeof(AM_SHORT), 0);
976 2745           AM_LONG count = (AM_LONG)SvUVX(num_examplars);
977 2745           AM_SHORT counthi = (AM_SHORT)(high_bits(count));
978 2745           AM_SHORT countlo = (AM_SHORT)(low_bits(count));
979              
980             /* initialize 0 because it won't be overwritten */
981             /*
982             * TODO: multiply through p[7] into gangcount[7]
983             * and warn if there's potential overflow
984             */
985             AM_BIG_INT gangcount;
986 2745           gangcount[0] = 0;
987 21960 100         for (int i = 0; i < 7; ++i) {
988 19215           gangcount[i] += countlo * p[i];
989 19215           carry_replace(gangcount, i);
990             }
991 2745           gangcount[7] += countlo * p[7];
992              
993             /* TODO: why is element 0 not considered here? */
994 2745 50         if (counthi) {
995 0 0         for (int i = 0; i < 6; ++i) {
996 0           gangcount[i + 1] += counthi * p[i];
997 0           carry(gangcount, i + 1);
998             }
999             }
1000 21960 100         for (int i = 0; i < 7; ++i) {
1001 19215           grand_total[i] += gangcount[i];
1002 19215           carry(grand_total, i);
1003             }
1004 2745           grand_total[7] += gangcount[7];
1005              
1006 2745           normalize(aTHX_ HeVAL(pointers_entry));
1007              
1008 2745           SV* gang_pointers = *hv_fetch(raw_gang, HeKEY(pointers_entry), NUM_LATTICES * sizeof(AM_SHORT), 1);
1009 2745 50         SvUPGRADE(gang_pointers, SVt_PVNV);
1010 2745           sv_setpvn(gang_pointers, (char *) gangcount, 8 * sizeof(AM_LONG));
1011 2745           normalize(aTHX_ gang_pointers);
1012              
1013 2745           SV* this_class_sv = *hv_fetch(context_to_class, HeKEY(pointers_entry), NUM_LATTICES * sizeof(AM_SHORT), 0);
1014 2745           AM_SHORT this_class = (AM_SHORT) SvUVX(this_class_sv);
1015 2745 100         if (this_class) {
1016 2702 100         SV_CHECK_THINKFIRST(sum[this_class]);
1017 2702           AM_LONG *s = (AM_LONG *) SvPVX(sum[this_class]);
1018 21616 100         for (int i = 0; i < 7; ++i) {
1019 18914           *(s + i) += gangcount[i];
1020 18914           carry_pointer(s + i);
1021             }
1022             } else {
1023 43           SV *exemplar = *hv_fetch(itemcontextchainhead, HeKEY(pointers_entry), NUM_LATTICES * sizeof(AM_SHORT), 0);
1024 155 100         while (SvIOK(exemplar)) {
1025 112           IV datanum = SvIVX(exemplar);
1026 112           IV ocnum = SvIVX(classes[datanum]);
1027 112 100         SV_CHECK_THINKFIRST(sum[ocnum]);
1028 112           AM_LONG *s = (AM_LONG *) SvPVX(sum[ocnum]);
1029 896 100         for (int i = 0; i < 7; ++i) {
1030 784           *(s + i) += p[i];
1031 784           carry_pointer(s + i);
1032 784           exemplar = itemcontextchain[datanum];
1033             }
1034             }
1035             }
1036             }
1037 753 100         for (int i = 1; i <= num_classes; ++i) {
1038 559           normalize(aTHX_ sum[i]);
1039             }
1040              
1041 194           SV *grand_total_entry = *hv_fetch(pointers, "grand_total", 11, 1);
1042 194 50         SvUPGRADE(grand_total_entry, SVt_PVNV);
1043 194           sv_setpvn(grand_total_entry, (char *) grand_total, 8 * sizeof(AM_LONG));
1044 194           normalize(aTHX_ grand_total_entry);
1045              
1046 194           Safefree(subcontext);
1047 194           Safefree(subcontext_class);
1048 194           Safefree(intersectlist);
1049 194           Safefree(intersectlist2);
1050 194           Safefree(intersectlist3);
1051             }