File Coverage

AM.xs
Criterion Covered Total %
statement 369 394 93.6
branch 141 164 85.9
condition n/a
subroutine n/a
pod n/a
total 510 558 91.4


line stmt bran cond sub pod time code
1             #define PERL_NO_GET_CONTEXT
2             #define NO_XSLOCKS
3             #include "EXTERN.h"
4             #include "perl.h"
5             #include "XSUB.h"
6              
7             #include "ppport.h"
8              
9             #include
10              
11             #define NUM_LATTICES 4
12              
13             /*
14             * This program must deal with integers that are too big to be
15             * represented by 32 bits.
16             *
17             * They are represented by AM_BIG_INT, which is typedef'd to
18             *
19             * unsigned long a[8]
20             *
21             * where each a[i] < 2*16. Such an array represents the integer
22             *
23             * a[0] + a[1] * 2^16 + ... + a[7] * 2^(7*16).
24             *
25             * We only use 16 bits of the unsigned long instead of 32, so that
26             * when we add or multiply two large integers, we have room for overflow.
27             * After any addition or multiplication, the result is carried so that
28             * each element of the array is again < 2*16.
29             *
30             * Someday I may rewrite this in assembler.
31             *
32             */
33             typedef unsigned short AM_SHORT;
34             typedef unsigned long AM_LONG;
35             typedef AM_LONG AM_BIG_INT[8];
36              
37             #define high_bits(x) x >> 16
38             #define low_bits(x) x & 0xffff
39              
40             #define carry(var, ind) \
41             var[ind + 1] += high_bits(var[ind]); \
42             var[ind] = low_bits(var[ind])
43              
44             /* carry macros for math using AM_BIG_INT */
45             #define carry_pointer(p) \
46             *(p + 1) += high_bits(*(p)); \
47             *(p) = low_bits(*(p))
48              
49             #define carry_replace(var, ind) \
50             var[ind + 1] = high_bits(var[ind]); \
51             var[ind] = low_bits(var[ind])
52              
53             #define hash_pointer_from_stack(ind) \
54             (HV *) SvRV(ST(ind))
55              
56             #define array_pointer_from_stack(ind) \
57             AvARRAY((AV *)SvRV(ST(ind)))
58              
59             #define unsigned_int_from_stack(ind) \
60             SvUVX(ST(ind))
61              
62             /* AM_SUPRAs form a linked list; using for(iter_supra(x, supra)) loops over the list members using the temp variable x */
63             #define iter_supras(loop_var, supra_ptr) \
64             loop_var = supra_ptr + supra_ptr->next; loop_var != supra_ptr; loop_var = supra_ptr + loop_var->next
65              
66             #define sublist_top(supra) \
67             supra->data + supra->data[0] + 1
68              
69             /*
70             * structure for the supracontexts
71             */
72              
73             typedef struct AM_supra {
74             /* list of subcontexts in this supracontext
75             *
76             * data[0] is the number of subcontexts in
77             * the array;
78             *
79             * data[1] is always 0 (useful for finding
80             * intersections; see below)
81             *
82             * data[i] is not an actually subcontext
83             * label; instead, all the subcontext labels
84             * are kept in an array called subcontext
85             * (bad choice of name?) created in
86             * function _fillandcount(). Thus, the
87             * actual subcontexts in the supracontext
88             * are subcontext[data[2]], ...
89             *
90             * data[i] < data[i+1] if i > 1 and
91             * i < data[0].
92             *
93             * Using an array of increasing positive
94             * integers makes it easy to take
95             * intersections (see lattice.pod).
96             */
97             AM_SHORT *data;
98              
99             /* number of supracontexts that contain
100             * precisely these subcontexts;
101             *
102             * According to the AM algorithm, we're
103             * supposed to look at all the homogeneous
104             * supracontexts to compute the analogical
105             * set. Instead of traversing the
106             * supracontextual lattice to find them, we
107             * can instead traverse the list of AM_SUPRA
108             * with count > 0 and use the value of count
109             * to do our computing.
110             *
111             * Since we're actually traversing four
112             * small lattices and taking intersections,
113             * we'll be multiplying the four values of
114             * count to get what we want.
115             *
116             */
117             AM_SHORT count;
118              
119             /*
120             * used to implement two linked lists
121             *
122             * One linked list contains all the nonempty
123             * supracontexts (i.e., data[0] is not 0).
124             * This linked list is in fact circular.
125             *
126             * One linked list contains all the unused
127             * memory that can be used for new
128             * supracontexts.
129             */
130             AM_SHORT next;
131              
132             /*
133             * used during the filling of the
134             * supracontextual lattice (see below)
135             */
136             unsigned char touched;
137             } AM_SUPRA;
138              
139             /*
140             * There is quite a bit of data that must pass between AM.pm and
141             * AM.xs. Instead of repeatedly passing it back and forth on
142             * the argument stack, AM.pm sends references to the variables
143             * holding this shared data, by calling _xs_initialize() (defined later
144             * on). These pointers are then stored in the following structure,
145             * which is put into the magic part of $self (since $self is an HV,
146             * it is perforce an SvPVMG as well).
147             *
148             * Note that for arrays, we store a pointer to the array data itself,
149             * not the AV*. That means that in AM.pm, we have to be careful
150             * how we make assignments to array variables; a reassignment such as
151             *
152             * @sum = (pack "L!8", 0, 0, 0, 0, 0, 0, 0, 0) x @sum;
153             *
154             * breaks everything because the pointer stored here then won't point
155             * to the actual data anymore. That's why the appropriate line in
156             * AM.pm is
157             *
158             * foreach (@sum) {
159             * $_ = pack "L!8", 0, 0, 0, 0, 0, 0, 0, 0;
160             * }
161             *
162             * Most of the identifiers in the struct have the same names as the
163             * variables created in AM.pm and are documented there. Those
164             * that don't are documented below.
165             *
166             * This trick of storing pointers like this is borrowed from the
167             * source code of Perl/Tk. Thanks, Nick!
168             *
169             */
170              
171             typedef struct AM_guts {
172              
173             /*
174             * Let i be an integer from 0 to 3; this represents which of the
175             * four sublattices we are considering.
176             *
177             * Let lattice = lptr[i] and supralist = sptr[i]; then lattice and
178             * supralist taken together tell us which subcontexts are in a
179             * particular supracontext. If s is the label of a supracontext,
180             * then it contains the subcontexts listed in
181             * supralist[lattice[s]].data[].
182             *
183             */
184              
185             AM_SHORT *lptr[NUM_LATTICES];
186             AM_SUPRA *sptr[NUM_LATTICES];
187              
188             /* array ref containing number of active features in
189             * each lattice (currently we us four lattices)
190             */
191             SV **lattice_sizes;
192             /* array ref containing class labels for whole data set;
193             * array index is data item index in data set.
194             */
195             SV **classes;
196             /* TODO: ??? */
197             SV **itemcontextchain;
198             /* TODO: ??? */
199             HV *itemcontextchainhead;
200             /* Maps subcontext binary labels to class indices */
201             HV *context_to_class;
202             /* Maps binary context labels to the number of training items
203             * contained in that subcontext
204             */
205             HV *contextsize;
206             /* Maps binary context labels to the number of pointers to each,
207             * or to the number of pointers to class label if heterogenous.
208             * The key 'grandtotal' maps to the total number of pointers.
209             */
210             HV *pointers;
211             /* Maps binary context labels to the size of the gang effect of
212             * that context. A gang effect is the number of pointers in
213             * the given context multiplied by the number of training items
214             * contained in the context.
215             */
216             HV *gang;
217             /* number of pointers to each class label;
218             * keys are class indices and values are numbers
219             * of pointers (AM_BIG_INT).
220             */
221             SV **sum;
222             /*
223             * contains the total number of possible class labels;
224             * used for computing gang effects.
225             */
226             IV num_classes;
227             } AM_GUTS;
228              
229             /*
230             * A function and a vtable necessary for the use of Perl magic
231             * TODO: explain the necessity
232             */
233              
234 192           static int AMguts_mgFree(pTHX_ SV *sv, MAGIC *mg) {
235             int i;
236 192           AM_GUTS *guts = (AM_GUTS *) SvPVX(mg->mg_obj);
237 960 100         for (i = 0; i < NUM_LATTICES; ++i) {
238 768           Safefree(guts->lptr[i]);
239 768           Safefree(guts->sptr[i][0].data);
240 768           Safefree(guts->sptr[i]);
241             }
242 192           return 0;
243             }
244              
245             MGVTBL AMguts_vtab = {
246             NULL,
247             NULL,
248             NULL,
249             NULL,
250             AMguts_mgFree
251             };
252              
253             /*
254             * arrays used in the change-of-base portion of normalize(SV *s)
255             * they are initialized in BOOT
256             */
257              
258             AM_LONG tens[16]; /* 10, 10*2, 10*4, ... */
259             AM_LONG ones[16]; /* 1, 1*2, 1*4, ... */
260              
261             /*
262             * function: normalize(SV *s)
263             *
264             * s is an SvPV whose PV* is an unsigned long array representing a very
265             * large integer
266             *
267             * this function modifies s so that its NV is the floating point
268             * representation of the very large integer value, while its PV* is
269             * the decimal representation of the very large integer value in ASCII
270             * (cool, a double-valued scalar)
271             *
272             * computing the NV is straightforward
273             *
274             * computing the PV is done using the old change-of-base algorithm:
275             * repeatedly divide by 10, and use the remainders to construct the
276             * ASCII digits from least to most significant
277             *
278             */
279             const unsigned int ASCII_0 = 0x30;
280             const unsigned int DIVIDE_SPACE = 10;
281             const int OUTSPACE_SIZE = 55;
282 6243           void normalize(pTHX_ SV *s) {
283 6243           AM_LONG *p = (AM_LONG *)SvPVX(s);
284              
285 6243           AM_LONG dspace[DIVIDE_SPACE];
286 6243           AM_LONG qspace[DIVIDE_SPACE];
287             AM_LONG *dividend, *quotient, *dptr, *qptr;
288              
289 6243           STRLEN length = SvCUR(s) / sizeof(AM_LONG);
290             /* length indexes into dspace and qspace */
291             assert(length <= DIVIDE_SPACE);
292              
293             /*
294             * outptr iterates outspace from end to beginning, and an ASCII digit is inserted at each location.
295             * No need to 0-terminate since we track the final string length in outlength and pass it to sv_setpvn.
296             */
297 6243           char outspace[OUTSPACE_SIZE];
298             char *outptr;
299 6243           outptr = outspace + (OUTSPACE_SIZE - 1);
300 6243           unsigned int outlength = 0;
301              
302             /* TODO: is this required to be a certain number of bits? */
303 6243           long double nn = 0;
304             int j;
305              
306             /* nn will be assigned to the NV */
307 56187 100         for (j = 8; j; --j) {
308             /* 2^16 * nn + p[j-1] */
309 49944           nn = 65536.0 * nn + (double) *(p + j - 1);
310             }
311              
312 6243           dividend = &dspace[0];
313 6243           quotient = &qspace[0];
314 6243 50         Copy(p, dividend, length, AM_LONG);
315              
316             while (1) {
317 20647           AM_LONG *temp, carry = 0;
318 70591 100         while (length && (*(dividend + length - 1) == 0)) {
    100          
319 49944           --length;
320             }
321 20647 100         if (length == 0) {
322 6243           sv_setpvn(s, outptr, outlength);
323 6243           break;
324             }
325 14404           dptr = dividend + length - 1;
326 14404           qptr = quotient + length - 1;
327 28812 100         while (dptr >= dividend) {
328             unsigned int i;
329 14408           *dptr += carry << 16;
330 14408           *qptr = 0;
331 244936 100         for (i = 16; i; ) {
332 230528           --i;
333 230528 100         if (tens[i] <= *dptr) {
334 17580           *dptr -= tens[i];
335 17580           *qptr += ones[i];
336             }
337             }
338 14408           carry = *dptr;
339 14408           --dptr;
340 14408           --qptr;
341             }
342 14404           --outptr;
343 14404           *outptr = (char)(ASCII_0 + *dividend) & 0x00ff;
344 14404           ++outlength;
345 14404           temp = dividend;
346 14404           dividend = quotient;
347 14404           quotient = temp;
348 14404           }
349              
350 6243           SvNVX(s) = nn;
351 6243           SvNOK_on(s);
352 6243           }
353              
354             /* Given 2 lists of training item indices sorted in descending order,
355             * fill a third list with the intersection of items in these lists.
356             * This is a simple intersection, and no check for heterogeneity is
357             * performed.
358             * Return the next empty (available) index address in the third list.
359             * If the two lists have no intersection, then the return value is
360             * just the same as the third input.
361             */
362 9227           unsigned short *intersect_supras(
363             AM_SHORT *intersection_list_top, AM_SHORT *subcontext_list_top, AM_SHORT *k){
364             AM_SHORT *temp;
365             while (1) {
366 343909 100         while (*intersection_list_top > *subcontext_list_top) {
367 222091           --intersection_list_top;
368             }
369 121818 100         if (*intersection_list_top == 0) {
370 9227           break;
371             }
372 112591 100         if (*intersection_list_top < *subcontext_list_top) {
373 33008           temp = intersection_list_top;
374 33008           intersection_list_top = subcontext_list_top;
375 33008           subcontext_list_top = temp;
376 33008           continue;
377             }
378 79583           *k = *intersection_list_top;
379 79583           --intersection_list_top;
380 79583           --subcontext_list_top;
381 79583           --k;
382 112591           }
383 9227           return k;
384             }
385             /* The first three inputs are the same as for intersect_supra above,
386             * and the fourth paramater should be a list containing the class
387             * index for all of the training items. In addition to combining
388             * the first two lists into the third via intersection, the final
389             * list is checked for heterogeneity and the non-deterministic
390             * heterogeneous supracontexts are removed.
391             * The return value is the number of items contained in the resulting
392             * list.
393             */
394 33728           AM_SHORT intersect_supras_final(
395             AM_SHORT *intersection_list_top, AM_SHORT *subcontext_list_top,
396             AM_SHORT *intersect, AM_SHORT *subcontext_class){
397 33728           AM_SHORT class = 0;
398 33728           AM_SHORT length = 0;
399             AM_SHORT *temp;
400             while (1) {
401 698351 100         while (*intersection_list_top > *subcontext_list_top) {
402 513960           --intersection_list_top;
403             }
404 184391 100         if (*intersection_list_top == 0) {
405 21261           break;
406             }
407 163130 100         if (*intersection_list_top < *subcontext_list_top) {
408 94156           temp = intersection_list_top;
409 94156           intersection_list_top = subcontext_list_top;
410 94156           subcontext_list_top = temp;
411 94156           continue;
412             }
413 68974           *intersect = *intersection_list_top;
414 68974           ++intersect;
415 68974           ++length;
416              
417             /* is it heterogeneous? */
418 68974 100         if (class == 0) {
419             /* is it not deterministic? */
420 29835 100         if (length > 1) {
421 1747           length = 0;
422 1747           break;
423             } else {
424 28088           class = subcontext_class[*intersection_list_top];
425             }
426             } else {
427             /* Do the classes not match? */
428 39139 100         if (class != subcontext_class[*intersection_list_top]) {
429 10720           length = 0;
430 10720           break;
431             }
432             }
433 56507           --intersection_list_top;
434 56507           --subcontext_list_top;
435 150663           }
436 33728           return length;
437             }
438              
439             MODULE = Algorithm::AM PACKAGE = Algorithm::AM
440              
441             PROTOTYPES: DISABLE
442              
443             BOOT:
444             {
445 10           AM_LONG ten = 10;
446 10           AM_LONG one = 1;
447 10           AM_LONG *tensptr = &tens[0];
448 10           AM_LONG *onesptr = &ones[0];
449             unsigned int i;
450 170 100         for (i = 16; i; i--) {
451 160           *tensptr = ten;
452 160           *onesptr = one;
453 160           ++tensptr;
454 160           ++onesptr;
455 160           ten <<= 1;
456 160           one <<= 1;
457             }
458             }
459              
460             /*
461             * This function is called by from AM.pm right after creating
462             * a blessed reference to Algorithm::AM. It stores the necessary
463             * pointers in the AM_GUTS structure and attaches it to the magic
464             * part of the reference.
465             *
466             */
467              
468             void
469             _xs_initialize(...)
470             PREINIT:
471             HV *project;
472             AM_GUTS guts; /* NOT A POINTER THIS TIME! (let memory allocate automatically) */
473             SV **lattice_sizes;
474             SV *svguts;
475             MAGIC *mg;
476             int i;
477             PPCODE:
478             /* 9 arguments are passed to the _xs_initialize method: */
479             /* $self, the AM object */
480 192           project = hash_pointer_from_stack(0);
481             /* For explanations on these, see the comments on AM_guts */
482 192           lattice_sizes = array_pointer_from_stack(1);
483 192           guts.classes = array_pointer_from_stack(2);
484 192           guts.itemcontextchain = array_pointer_from_stack(3);
485 192           guts.itemcontextchainhead = hash_pointer_from_stack(4);
486 192           guts.context_to_class = hash_pointer_from_stack(5);
487 192           guts.contextsize = hash_pointer_from_stack(6);
488 192           guts.pointers = hash_pointer_from_stack(7);
489 192           guts.gang = hash_pointer_from_stack(8);
490 192           guts.sum = array_pointer_from_stack(9);
491             /* Length of guts.sum */
492 192           guts.num_classes = av_len((AV *) SvRV(ST(9)));
493              
494             /*
495             * Since the sublattices are small, we just take a chunk of memory
496             * here that will be large enough for our purposes and do the actual
497             * memory allocation within the code; this reduces the overhead of
498             * repeated system calls.
499             *
500             */
501              
502 960 100         for (i = 0; i < NUM_LATTICES; ++i) {
503 768           UV v = SvUVX(lattice_sizes[i]);
504 768 50         Newxz(guts.lptr[i], 1 << v, AM_SHORT);
505 768 50         Newxz(guts.sptr[i], 1 << (v + 1), AM_SUPRA); /* CHANGED */ /* TODO: what changed? */
506 768           Newxz(guts.sptr[i][0].data, 2, AM_SHORT);
507             }
508              
509             /* Perl magic invoked here */
510              
511 192           svguts = newSVpv((char *) &guts, sizeof(AM_GUTS));
512 192           sv_magic((SV *) project, svguts, PERL_MAGIC_ext, NULL, 0);
513 192           SvRMAGICAL_off((SV *) project);
514 192           mg = mg_find((SV *) project, PERL_MAGIC_ext);
515 192           mg->mg_virtual = &AMguts_vtab;
516 192           mg_magical((SV *) project);
517              
518             void
519             _fillandcount(...)
520             PREINIT:
521             HV *project;
522             UV linear_flag;
523             AM_GUTS *guts;
524             MAGIC *mg;
525             SV **lattice_sizes_input;
526             AM_SHORT lattice_sizes[NUM_LATTICES];
527             AM_SHORT **lptr;
528             AM_SUPRA **sptr;
529             AM_SHORT nptr[NUM_LATTICES];/* this helps us manage the free list in sptr[i] */
530             AM_SHORT subcontextnumber;
531             AM_SHORT *subcontext;
532             AM_SHORT *subcontext_class;
533             SV **classes, **itemcontextchain, **sum;
534             HV *itemcontextchainhead, *context_to_class, *contextsize, *pointers, *gang;
535             IV num_classes;
536             HE *he;
537 194           AM_BIG_INT grandtotal = {0, 0, 0, 0, 0, 0, 0, 0};
538             SV *tempsv;
539             int chunk, i;
540             AM_SHORT gaps[16];
541             AM_SHORT *intersect, *intersectlist;
542             AM_SHORT *intersectlist2, *intersectlist3, *ilist2top, *ilist3top;
543             PPCODE:
544             /* Input args are the AM object ($self), number of features
545             * perl lattice, and a flag to indicate whether to count occurrences
546             * (true) or pointers (false), also known as linear/quadratic.
547             */
548 194           project = hash_pointer_from_stack(0);
549 194           lattice_sizes_input = array_pointer_from_stack(1);
550 194           linear_flag = unsigned_int_from_stack(2);
551 194           mg = mg_find((SV *) project, PERL_MAGIC_ext);
552 194           guts = (AM_GUTS *) SvPVX(mg->mg_obj);
553              
554             /*
555             * We initialize the memory for the sublattices, including setting up the
556             * linked lists.
557             *
558             */
559              
560 194           lptr = guts->lptr;
561 194           sptr = guts->sptr;
562 970 100         for (chunk = 0; chunk < NUM_LATTICES; ++chunk) {
563             /* Extract numeric values for the specified lattice_sizes */
564 776           lattice_sizes[chunk] = (AM_SHORT) SvUVX(lattice_sizes_input[chunk]);
565             /* TODO: explain the lines below */
566 776 50         Zero(lptr[chunk], 1 << lattice_sizes[chunk], AM_SHORT);
567 776           sptr[chunk][0].next = 0;
568 776           nptr[chunk] = 1;
569 7048 100         for (i = 1; i < 1 << (lattice_sizes[chunk] + 1); ++i) {/* CHANGED (TODO: changed what?) */
570 6272           sptr[chunk][i].next = (AM_SHORT) i + 1;
571             }
572             }
573              
574             /*
575             * Instead of adding subcontext labels directly to the supracontexts,
576             * we store all of these labels in an array called subcontext. We
577             * then store the array indices of the subcontext labels in the
578             * supracontexts. That means the list of subcontexts in the
579             * supracontexts is an increasing sequence of positive integers, handy
580             * for taking intersections (see lattice.pod).
581             *
582             * The index into the array is called subcontextnumber.
583             *
584             * The array of matching classes is called subcontext_class.
585             *
586             */
587              
588 194           context_to_class = guts->context_to_class;
589 194 50         subcontextnumber = (AM_SHORT) HvUSEDKEYS(context_to_class);
590 194 50         Newxz(subcontext, NUM_LATTICES * (subcontextnumber + 1), AM_SHORT);
591 194           subcontext += NUM_LATTICES * subcontextnumber;
592 194 50         Newxz(subcontext_class, subcontextnumber + 1, AM_SHORT);
593 194           subcontext_class += subcontextnumber;
594 194 50         Newxz(intersectlist, subcontextnumber + 1, AM_SHORT);
595 194 50         Newxz(intersectlist2, subcontextnumber + 1, AM_SHORT);
596 194           ilist2top = intersectlist2 + subcontextnumber;
597 194 50         Newxz(intersectlist3, subcontextnumber + 1, AM_SHORT);
598 194           ilist3top = intersectlist3 + subcontextnumber;
599              
600 194           hv_iterinit(context_to_class);
601 8382 100         while ((he = hv_iternext(context_to_class))) {
602 8188           AM_SHORT *contextptr = (AM_SHORT *) HeKEY(he);
603 8188           AM_SHORT class = (AM_SHORT) SvUVX(HeVAL(he));
604 40940 100         for (chunk = 0; chunk < NUM_LATTICES; ++chunk, ++contextptr) {
605 32752           AM_SHORT active = lattice_sizes[chunk];
606 32752           AM_SHORT *lattice = lptr[chunk];
607 32752           AM_SUPRA *supralist = sptr[chunk];
608 32752           AM_SHORT nextsupra = nptr[chunk];
609 32752           AM_SHORT context = *contextptr;
610             AM_SUPRA *p, *c;
611             AM_SHORT pi, ci;
612 32752           AM_SHORT d, t, tt, numgaps = 0;
613              
614             /* We want to add subcontextnumber to the appropriate
615             * supracontexts in the four smaller lattices.
616             *
617             * Suppose we want to add subcontextnumber to the supracontext
618             * labeled by d. supralist[lattice[d]] is an AM_SUPRA which
619             * reflects the current state of the supracontext. Suppose this
620             * state is
621             *
622             * data: 2 0 x y (i.e., currently contains two subcontexts)
623             * count: 5
624             * next: 7
625             * touched: 0
626             *
627             * Then we pluck an unused AM_SUPRA off of the free list;
628             * suppose that it's located at supralist[9] (the variable
629             * nextsupra tells us where). Then supralist[lattice[d]] will
630             * change to
631             *
632             * data: 2 0 x y
633             * count: 4 (decrease by 1)
634             * next: 9
635             * touched: 1
636             *
637             * and supralist[9] will become
638             *
639             * data: 3 0 subcontextnumber x y (now has three subcontexts)
640             * count: 1
641             * next: 7
642             * touched: 0
643             *
644             * (note: the entries in data[] are added in decreasing order)
645             *
646             *
647             * If, on the other hand, if supralist[lattice[d]] looks like
648             *
649             * data: 2 0 x y
650             * count: 8
651             * next: 11
652             * touched: 1
653             *
654             * that means that supralist[11] must look something like
655             *
656             * data: 3 0 subcontextnumber x y
657             * count: 4
658             * next: 2
659             * touched: 0
660             *
661             * There already exists a supracontext with subcontextnumber
662             * added in! So we change supralist[lattice[d]] to
663             *
664             * data: 2 0 x y
665             * count: 7 (decrease by 1)
666             * next: 11
667             * touched: 1
668             *
669             * change supralist[11] to
670             *
671             * data: 3 0 subcontextnumber x y
672             * count: 5 (increase by 1)
673             * next: 2
674             * touched: 0
675             *
676             * and set lattice[d] = 11.
677             */
678              
679 32752           subcontext[chunk] = context;
680              
681 32752 100         if (context == 0) {
682 28576 100         for (iter_supras(p, supralist)) {
683             AM_SHORT *data;
684 22212 50         Newxz(data, p->data[0] + 3, AM_SHORT);
685 22212           Copy(p->data + 2, data + 3, p->data[0], AM_SHORT);
686 22212           data[2] = subcontextnumber;
687 22212           data[0] = p->data[0] + 1;
688 22212           Safefree(p->data);
689 22212           p->data = data;
690             }
691 6364 100         if (lattice[context] == 0) {
692              
693             /* in this case, the subcontext will be
694             * added to all supracontexts, so there's
695             * no need to hassle with a Gray code and
696             * move pointers
697             */
698              
699 732           AM_SHORT count = 0;
700 732           ci = nptr[chunk];
701 732           nptr[chunk] = supralist[ci].next;
702 732           c = supralist + ci;
703 732           c->next = supralist->next;
704 732           supralist->next = ci;
705 732           Newxz(c->data, 3, AM_SHORT);
706 732           c->data[2] = subcontextnumber;
707 732           c->data[0] = 1;
708 3723 100         for (i = 0; i < (1 << active); ++i) {
709 2991 100         if (lattice[i] == 0) {
710 1641           lattice[i] = ci;
711 1641           ++count;
712             }
713             }
714 732           c->count = count;
715             }
716 6364           continue;
717             }
718              
719             /* set up traversal using Gray code */
720 26388           d = context;
721 81785 100         for (i = 1 << (active - 1); i; i >>= 1) {
722 55397 100         if (!(i & context)) {
723 15490           gaps[numgaps++] = i;
724             }
725             }
726 26388           t = 1 << numgaps;
727              
728 26388           p = supralist + (pi = lattice[context]);
729 26388 100         if (pi) {
730 25331           --(p->count);
731             }
732 26388           ci = nextsupra;
733 26388           nextsupra = supralist[ci].next;
734 26388           p->touched = 1;
735 26388           c = supralist + ci;
736 26388           c->touched = 0;
737 26388           c->next = p->next;
738 26388           p->next = ci;
739 26388           c->count = 1;
740 26388 50         Newxz(c->data, p->data[0] + 3, AM_SHORT);
741 26388           Copy(p->data + 2, c->data + 3, p->data[0], AM_SHORT);
742 26388           c->data[2] = subcontextnumber;
743 26388           c->data[0] = p->data[0] + 1;
744 26388           lattice[context] = ci;
745              
746             /* traverse */
747 43317 100         while (--t) {
748             /* find the rightmost 1 in t; from HAKMEM, I believe */
749 18368 100         for (i = 0, tt = ~t & (t - 1); tt; tt >>= 1, ++i) {
750             ;
751             }
752 16929           d ^= gaps[i];
753              
754 16929           p = supralist + (pi = lattice[d]);
755 16929 100         if (pi) {
756 16440           --(p->count);
757             }
758 16929           switch (p->touched) {
759             case 1:
760 1144           ++supralist[lattice[d] = p->next].count;
761 1144           break;
762             case 0:
763 15785           ci = nextsupra;
764 15785           nextsupra = supralist[ci].next;
765 15785           p->touched = 1;
766 15785           c = supralist + ci;
767 15785           c->touched = 0;
768 15785           c->next = p->next;
769 15785           p->next = ci;
770 15785           c->count = 1;
771 15785 50         Newxz(c->data, p->data[0] + 3, AM_SHORT);
772 15785           Copy(p->data + 2, c->data + 3, p->data[0], AM_SHORT);
773 15785           c->data[2] = subcontextnumber;
774 15785           c->data[0] = p->data[0] + 1;
775 15785           lattice[d] = ci;
776             }
777             }
778              
779             /* Here we return all AM_SUPRA with count 0 back to the free
780             * list and set touched = 0 for all remaining.
781             */
782              
783 26388           p = supralist;
784 26388           p->touched = 0;
785             do {
786 135952 100         if (supralist[i = p->next].count == 0) {
787 39994           Safefree(supralist[i].data);
788 39994           p->next = supralist[i].next;
789 39994           supralist[i].next = nextsupra;
790 39994           nextsupra = (AM_SHORT) i;
791             } else {
792 95958           p = supralist + p->next;
793 95958           p->touched = 0;
794             }
795 135952 100         } while (p->next);
796 26388           nptr[chunk] = nextsupra;
797             } /*end for(chunk = 0...*/
798 8188           subcontext -= NUM_LATTICES;
799 8188           *subcontext_class = class;
800 8188           --subcontext_class;
801 8188           --subcontextnumber;
802             } /*end while (he = hv_iternext(...*/
803              
804 194           contextsize = guts->contextsize;
805 194           pointers = guts->pointers;
806              
807             /*
808             * The code is in three parts:
809             *
810             * 1. We successively take one nonempty supracontext from each of the
811             * four small lattices and take their intersection to find a
812             * supracontext of the big lattice. If at any point we get the
813             * empty set, we move on.
814             *
815             * 2. We determine if the supracontext so found is heterogeneous; if
816             * so, we skip it.
817             *
818             * 3. Otherwise, we count the pointers or occurrences.
819             *
820             */
821             {
822             AM_SUPRA *p0, *p1, *p2, *p3;
823             AM_SHORT length;
824             AM_SHORT *k;
825              
826             /* find intersections */
827 824 100         for (iter_supras(p0, sptr[0])) {
828 2751 100         for (iter_supras(p1, sptr[1])) {
829             /*Find intersection between p0 and p2*/
830 2121           k = intersect_supras(
831 2121           sublist_top(p0),
832 2121           sublist_top(p1),
833             ilist2top
834             );
835             /* If k has not been increased then intersection was empty */
836 2121 100         if (k == ilist2top) {
837 154           continue;
838             }
839 1967           *k = 0;
840              
841 9073 100         for (iter_supras(p2, sptr[2])) {
842              
843             /*Find intersection between previous intersection and p2*/
844 7106           k = intersect_supras(
845             ilist2top,
846 7106           sublist_top(p2),
847             ilist3top
848             );
849             /* If k has not been increased then intersection was empty */
850 7106 100         if (k == ilist3top) {
851 694           continue;
852             }
853 6412           *k = 0;
854              
855 40140 100         for (iter_supras(p3, sptr[3])) {
856              
857             /* Find intersection between previous intersection and p3;
858             * check for disqualified supras this time.
859             */
860 33728           length = intersect_supras_final(
861             ilist3top,
862 33728           sublist_top(p3),
863             intersectlist,
864             subcontext_class
865             );
866              
867             /* count occurrences */
868 33728 100         if (length) {
869             AM_SHORT i;
870 15621           AM_BIG_INT count = {0, 0, 0, 0, 0, 0, 0, 0};
871 15621           AM_LONG mask = 0xffff;
872              
873 15621           count[0] = p0->count;
874              
875 15621           count[0] *= p1->count;
876 15621           carry(count, 0);
877              
878 15621           count[0] *= p2->count;
879 15621           count[1] *= p2->count;
880 15621           carry(count, 0);
881 15621           carry(count, 1);
882              
883 15621           count[0] *= p3->count;
884 15621           count[1] *= p3->count;
885 15621           count[2] *= p3->count;
886 15621           carry(count, 0);
887 15621           carry(count, 1);
888 15621           carry(count, 2);
889 15621 100         if(!linear_flag){
890             /* If scoring is pointers (quadratic) instead of linear*/
891 15601           AM_LONG pointercount = 0;
892 50558 100         for (i = 0; i < length; ++i) {
893 34957 50         pointercount += (AM_LONG) SvUV(*hv_fetch(contextsize,
894             (char *) (subcontext + (NUM_LATTICES * intersectlist[i])), 8, 0));
895             }
896 15601 50         if (pointercount & 0xffff0000) {
897 0           AM_SHORT pchi = (AM_SHORT) (high_bits(pointercount));
898 0           AM_SHORT pclo = (AM_SHORT) (low_bits(pointercount));
899             AM_LONG hiprod[6];
900 0           hiprod[1] = pchi * count[0];
901 0           hiprod[2] = pchi * count[1];
902 0           hiprod[3] = pchi * count[2];
903 0           hiprod[4] = pchi * count[3];
904 0           count[0] *= pclo;
905 0           count[1] *= pclo;
906 0           count[2] *= pclo;
907 0           count[3] *= pclo;
908 0           carry(count, 0);
909 0           carry(count, 1);
910 0           carry(count, 2);
911 0           carry(count, 3);
912              
913 0           count[1] += hiprod[1];
914 0           count[2] += hiprod[2];
915 0           count[3] += hiprod[3];
916 0           count[4] += hiprod[4];
917 0           carry(count, 1);
918 0           carry(count, 2);
919 0           carry(count, 3);
920 0           carry(count, 4);
921             } else {
922 15601           count[0] *= pointercount;
923 15601           count[1] *= pointercount;
924 15601           count[2] *= pointercount;
925 15601           count[3] *= pointercount;
926 15601           carry(count, 0);
927 15601           carry(count, 1);
928 15601           carry(count, 2);
929 15601           carry(count, 3);
930             }
931             }
932 50611 100         for (i = 0; i < length; ++i) {
933             int j;
934             SV *tempsv;
935             AM_LONG *p;
936 34990           tempsv = *hv_fetch(pointers,
937             (char *) (subcontext + (NUM_LATTICES * intersectlist[i])), 8, 1);
938 34990 100         if (!SvPOK(tempsv)) {
939 2745 50         SvUPGRADE(tempsv, SVt_PVNV);
940 2745 50         SvGROW(tempsv, 8 * sizeof(AM_LONG) + 1);
    50          
941 2745           Zero(SvPVX(tempsv), 8, AM_LONG);
942 2745           SvCUR_set(tempsv, 8 * sizeof(AM_LONG));
943 2745           SvPOK_on(tempsv);
944             }
945 34990           p = (AM_LONG *) SvPVX(tempsv);
946 279920 100         for (j = 0; j < 7; ++j) {
947 244930           *(p + j) += count[j];
948 244930           carry_pointer(p + j);
949             }
950             } /* end for (i = 0;... */
951             } /* end if (length) */
952             } /* end for (iter_supras(p3... */
953             } /* end for (iter_supras(p2... */
954             } /* end for (iter_supras(p1... */
955             } /* end for (iter_supras(p0... */
956              
957             /* clear out the supracontexts */
958 824 100         for (iter_supras(p0, sptr[0])) {
959 630           Safefree(p0->data);
960             }
961 833 100         for (iter_supras(p1, sptr[1])) {
962 639           Safefree(p1->data);
963             }
964 876 100         for (iter_supras(p2, sptr[2])) {
965 682           Safefree(p2->data);
966             }
967 1154 100         for (iter_supras(p3, sptr[3])) {
968 960           Safefree(p3->data);
969             }
970              
971             /*
972             * compute analogical set and gang effects
973             *
974             * Technically, we don't compute the analogical set; instead, we
975             * compute how many pointers/occurrences there are for each of the
976             * data items in a particular subcontext, and associate that number
977             * with the subcontext label, not directly with the data item. We can
978             * do this because if two data items are in the same subcontext, they
979             * will have the same number of pointers/occurrences.
980             *
981             * If the user wants the detailed analogical set, it will be created
982             * in Result.pm.
983             *
984             */
985              
986 194           gang = guts->gang;
987 194           classes = guts->classes;
988 194           itemcontextchain = guts->itemcontextchain;
989 194           itemcontextchainhead = guts->itemcontextchainhead;
990 194           sum = guts->sum;
991 194           num_classes = guts->num_classes;
992 194           hv_iterinit(pointers);
993 2939 100         while ((he = hv_iternext(pointers))) {
994             AM_LONG count;
995             AM_SHORT counthi, countlo;
996             AM_BIG_INT p;
997             AM_BIG_INT gangcount;
998             AM_SHORT this_class;
999             SV *dataitem;
1000 2745           Copy(SvPVX(HeVAL(he)), p, 8, AM_LONG);
1001              
1002 2745           tempsv = *hv_fetch(contextsize, HeKEY(he), NUM_LATTICES * sizeof(AM_SHORT), 0);
1003 2745           count = (AM_LONG) SvUVX(tempsv);
1004 2745           counthi = (AM_SHORT) (high_bits(count));
1005 2745           countlo = (AM_SHORT) (low_bits(count));
1006              
1007             /* initialize 0 because it won't be overwritten */
1008             /*
1009             * TODO: multiply through p[7] into gangcount[7]
1010             * and warn if there's potential overflow
1011             */
1012 2745           gangcount[0] = 0;
1013 21960 100         for (i = 0; i < 7; ++i) {
1014 19215           gangcount[i] += countlo * p[i];
1015 19215           carry_replace(gangcount, i);
1016             }
1017 2745           gangcount[7] += countlo * p[7];
1018              
1019             /* TODO: why is element 0 not considered here? */
1020 2745 50         if (counthi) {
1021 0 0         for (i = 0; i < 6; ++i) {
1022 0           gangcount[i + 1] += counthi * p[i];
1023 0           carry(gangcount, i + 1);
1024             }
1025             }
1026 21960 100         for (i = 0; i < 7; ++i) {
1027 19215           grandtotal[i] += gangcount[i];
1028 19215           carry(grandtotal, i);
1029             }
1030 2745           grandtotal[7] += gangcount[7];
1031              
1032 2745           tempsv = *hv_fetch(gang, HeKEY(he), NUM_LATTICES * sizeof(AM_SHORT), 1);
1033 2745 50         SvUPGRADE(tempsv, SVt_PVNV);
1034 2745           sv_setpvn(tempsv, (char *) gangcount, 8 * sizeof(AM_LONG));
1035 2745           normalize(aTHX_ tempsv);
1036 2745           normalize(aTHX_ HeVAL(he));
1037              
1038 2745           tempsv = *hv_fetch(context_to_class, HeKEY(he), NUM_LATTICES * sizeof(AM_SHORT), 0);
1039 2745           this_class = (AM_SHORT) SvUVX(tempsv);
1040 2745 100         if (this_class) {
1041 2702           AM_LONG *s = (AM_LONG *) SvPVX(sum[this_class]);
1042 21616 100         for (i = 0; i < 7; ++i) {
1043 18914           *(s + i) += gangcount[i];
1044 18914           carry_pointer(s + i);
1045             }
1046             } else {
1047 43           dataitem = *hv_fetch(itemcontextchainhead, HeKEY(he), NUM_LATTICES * sizeof(AM_SHORT), 0);
1048 2857 100         while (SvIOK(dataitem)) {
1049 112           IV datanum = SvIVX(dataitem);
1050 112           IV ocnum = SvIVX(classes[datanum]);
1051 112           AM_LONG *s = (AM_LONG *) SvPVX(sum[ocnum]);
1052 896 100         for (i = 0; i < 7; ++i) {
1053 784           *(s + i) += p[i];
1054 784           carry_pointer(s + i);
1055 784           dataitem = itemcontextchain[datanum];
1056             }
1057             }
1058             }
1059             }
1060 753 100         for (i = 1; i <= num_classes; ++i) {
1061 559           normalize(aTHX_ sum[i]);
1062             }
1063 194           tempsv = *hv_fetch(pointers, "grandtotal", 10, 1);
1064 194 50         SvUPGRADE(tempsv, SVt_PVNV);
1065 194           sv_setpvn(tempsv, (char *) grandtotal, 8 * sizeof(AM_LONG));
1066 194           normalize(aTHX_ tempsv);
1067              
1068 194           Safefree(subcontext);
1069 194           Safefree(subcontext_class);
1070 194           Safefree(intersectlist);
1071 194           Safefree(intersectlist2);
1072 194           Safefree(intersectlist3);
1073             }