File Coverage

AM.xs
Criterion Covered Total %
statement 364 389 93.5
branch 141 164 85.9
condition n/a
subroutine n/a
pod n/a
total 505 553 91.3


line stmt bran cond sub pod time code
1             #define PERL_NO_GET_CONTEXT
2             #define NO_XSLOCKS
3             #include "EXTERN.h"
4             #include "perl.h"
5             #include "XSUB.h"
6              
7             #include "ppport.h"
8              
9             #define NUM_LATTICES 4
10              
11             /*
12             * This program must deal with integers that are too big to be
13             * represented by 32 bits.
14             *
15             * They are represented by AM_BIG_INT, which is typedef'd to
16             *
17             * unsigned long a[8]
18             *
19             * where each a[i] < 2*16. Such an array represents the integer
20             *
21             * a[0] + a[1] * 2^16 + ... + a[7] * 2^(7*16).
22             *
23             * We only use 16 bits of the unsigned long instead of 32, so that
24             * when we add or multiply two large integers, we have room for overflow.
25             * After any addition or multiplication, the result is carried so that
26             * each element of the array is again < 2*16.
27             *
28             * Someday I may rewrite this in assembler.
29             *
30             */
31             typedef unsigned short AM_SHORT;
32             typedef unsigned long AM_LONG;
33             typedef AM_LONG AM_BIG_INT[8];
34              
35             #define high_bits(x) x >> 16
36             #define low_bits(x) x & 0xffff
37              
38             #define carry(var, ind) \
39             var[ind + 1] += high_bits(var[ind]); \
40             var[ind] = low_bits(var[ind])
41              
42             /* carry macro for AM_BIG_INT pointers */
43             #define carry_pointer(p) \
44             *(p + 1) += high_bits(*(p)); \
45             *(p) = low_bits(*(p))
46              
47             #define carry_replace(var, ind) \
48             var[ind + 1] = high_bits(var[ind]); \
49             var[ind] = low_bits(var[ind])
50              
51             /*
52             * structure for the supracontexts
53             *
54             */
55              
56             typedef struct AM_supra {
57             /* list of subcontexts in this supracontext
58             *
59             * data[0] is the number of subcontexts in
60             * the array;
61             *
62             * data[1] is always 0 (useful for finding
63             * intersections; see below)
64             *
65             * data[i] is not an actually subcontext
66             * label; instead, all the subcontext labels
67             * are kept in an array called subcontext
68             * (bad choice of name?) created in
69             * function _fillandcount(). Thus, the
70             * actual subcontexts in the supracontext
71             * are subcontext[data[2]], ...
72             *
73             * data[i] < data[i+1] if i > 1 and
74             * i < data[0].
75             *
76             * Using an array of increasing positive
77             * integers makes it easy to take
78             * intersections (see lattice.pod).
79             */
80             AM_SHORT *data;
81              
82             /* number of supracontexts that contain
83             * precisely these subcontexts;
84             *
85             * According to the AM algorithm, we're
86             * supposed to look at all the homogeneous
87             * supracontexts to compute the analogical
88             * set. Instead of traversing the
89             * supracontextual lattice to find them, we
90             * can instead traverse the list of AM_SUPRA
91             * with count > 0 and use the value of count
92             * to do our computing.
93             *
94             * Since we're actually traversing four
95             * small lattices and taking intersections,
96             * we'll be multiplying the four values of
97             * count to get what we want.
98             *
99             */
100             AM_SHORT count;
101              
102             /*
103             * used to implement two linked lists
104             *
105             * One linked list contains all the nonempty
106             * supracontexts (i.e., data[0] is not 0).
107             * This linked list is in fact circular.
108             *
109             * One linked list contains all the unused
110             * memory that can be used for new
111             * supracontexts.
112             */
113             AM_SHORT next;
114              
115             /*
116             * used during the filling of the
117             * supracontextual lattice (see below)
118             */
119             unsigned char touched;
120             } AM_SUPRA;
121              
122             /*
123             * There is quite a bit of data that must pass between AM.pm and
124             * AM.xs. Instead of repeatedly passing it back and forth on
125             * the argument stack, AM.pm sends references to the variables
126             * holding this shared data, by calling _xs_initialize() (defined later
127             * on). These pointers are then stored in the following structure,
128             * which is put into the magic part of $self (since $self is an HV,
129             * it is perforce an SvPVMG as well).
130             *
131             * Note that for arrays, we store a pointer to the array data itself,
132             * not the AV*. That means that in AM.pm, we have to be careful
133             * how we make assignments to array variables; a reassignment such as
134             *
135             * @sum = (pack "L!8", 0, 0, 0, 0, 0, 0, 0, 0) x @sum;
136             *
137             * breaks everything because the pointer stored here then won't point
138             * to the actual data anymore. That's why the appropriate line in
139             * AM.pm is
140             *
141             * foreach (@sum) {
142             * $_ = pack "L!8", 0, 0, 0, 0, 0, 0, 0, 0;
143             * }
144             *
145             * Most of the identifiers in the struct have the same names as the
146             * variables created in AM.pm and are documented there. Those
147             * that don't are documented below.
148             *
149             * This trick of storing pointers like this is borrowed from the
150             * source code of Perl/Tk. Thanks, Nick!
151             *
152             */
153              
154             typedef struct AM_guts {
155              
156             /*
157             * Let i be an integer from 0 to 3; this represents which of the
158             * four sublattices we are considering.
159             *
160             * Let lattice = lptr[i] and supralist = sptr[i]; then lattice and
161             * supralist taken together tell us which subcontexts are in a
162             * particular supracontext. If s is the label of a supracontext,
163             * then it contains the subcontexts listed in
164             * supralist[lattice[s]].data[].
165             *
166             */
167              
168             AM_SHORT *lptr[NUM_LATTICES];
169             AM_SUPRA *sptr[NUM_LATTICES];
170              
171             /* array ref containing number of active features in
172             * each lattice (currently we us four lattices)
173             */
174             SV **lattice_sizes;
175             /* array ref containing class labels for whole data set;
176             * array index is data item index in data set.
177             */
178             SV **classes;
179             /* ??? */
180             SV **itemcontextchain;
181             /* ??? */
182             HV *itemcontextchainhead;
183             /* Maps subcontext binary labels to class indices */
184             HV *context_to_class;
185             /* Maps binary context labels to the number of training items
186             * contained in that subcontext
187             */
188             HV *contextsize;
189             /* Maps binary context labels to the number of pointers to each,
190             * or to the number of pointers to class label if heterogenous.
191             * The key 'grandtotal' maps to the total number of pointers.
192             */
193             HV *pointers;
194             /* Maps binary context labels to the size of the gang effect of
195             * that context. A gang effect is the number of pointers in
196             * the given context multiplied by the number training items
197             * contained in the context.
198             */
199             HV *gang;
200             /* number of pointers to each class label;
201             * keys are class indices and values are numbers
202             * of pointers (AM_BIG_INT).
203             */
204             SV **sum;
205             /*
206             * contains the total number of possible class labels;
207             * used for computing gang effects.
208             */
209             IV num_classes;
210             } AM_GUTS;
211              
212             /*
213             * A function and a vtable necessary for the use of Perl magic
214             */
215              
216 192           static int AMguts_mgFree(pTHX_ SV *sv, MAGIC *mg) {
217             int i;
218 192           AM_GUTS *guts = (AM_GUTS *) SvPVX(mg->mg_obj);
219 960 100         for (i = 0; i < NUM_LATTICES; ++i) {
220 768           Safefree(guts->lptr[i]);
221 768           Safefree(guts->sptr[i][0].data);
222 768           Safefree(guts->sptr[i]);
223             }
224 192           return 0;
225             }
226              
227             MGVTBL AMguts_vtab = {
228             NULL,
229             NULL,
230             NULL,
231             NULL,
232             AMguts_mgFree
233             };
234              
235             /*
236             * arrays used in the change-of-base portion of normalize(SV *s)
237             * they are initialized in BOOT
238             *
239             */
240              
241             AM_LONG tens[16]; /* 10, 10*2, 10*4, ... */
242             AM_LONG ones[16]; /* 1, 1*2, 1*4, ... */
243              
244             /*
245             * function: normalize(SV *s)
246             *
247             * s is an SvPV whose PV* is a unsigned long array representing a very
248             * large integer
249             *
250             * this function modifies s so that its NV is the floating point
251             * representation of the very large integer value, while its PV* is
252             * the decimal representation of the very large integer value in ASCII
253             * (cool, a double-valued scalar)
254             *
255             * computing the NV is straightforward
256             *
257             * computing the PV is done using the old change-of-base algorithm:
258             * repeatedly divide by 10, and use the remainders to construct the
259             * ASCII digits from least to most significant
260             *
261             */
262              
263 6243           normalize(pTHX_ SV *s) {
264             AM_LONG dspace[10];
265             AM_LONG qspace[10];
266             char outspace[55];
267             AM_LONG *dividend, *quotient, *dptr, *qptr;
268             char *outptr;
269 6243           unsigned int outlength = 0;
270 6243           AM_LONG *p = (AM_LONG *) SvPVX(s);
271 6243           STRLEN length = SvCUR(s) / sizeof(AM_LONG);
272             /* TODO: is this required to be a certain number of bits?*/
273 6243           long double nn = 0;
274             int j;
275              
276             /* you can't put the for block in {}, or it doesn't work
277             * ask me for details some time
278             * TODO: is this still necessary? (Nate)
279             */
280 56187 100         for (j = 8; j; --j){
281             /* 2^16 * nn + p[j-1] */
282 49944           nn = 65536.0 * nn + (double) *(p + j - 1);
283             }
284              
285 6243           dividend = &dspace[0];
286 6243           quotient = &qspace[0];
287 6243 50         Copy(p, dividend, length, sizeof(AM_LONG));
288             /* Magic number here... */
289 6243           outptr = outspace + 54;
290              
291             while (1) {
292 20647           AM_LONG *temp, carry = 0;
293 70591 100         while (length && (*(dividend + length - 1) == 0)) --length;
    100          
294 20647 100         if (length == 0) {
295 6243           sv_setpvn(s, outptr, outlength);
296 6243           break;
297             }
298 14404           dptr = dividend + length - 1;
299 14404           qptr = quotient + length - 1;
300 28812 100         while (dptr >= dividend) {
301             unsigned int i;
302 14408           *dptr += carry << 16;
303 14408           *qptr = 0;
304 244936 100         for (i = 16; i; ) {
305 230528           --i;
306 230528 100         if (tens[i] <= *dptr) {
307 17580           *dptr -= tens[i];
308 17580           *qptr += ones[i];
309             }
310             }
311 14408           carry = *dptr;
312 14408           --dptr;
313 14408           --qptr;
314             }
315 14404           --outptr;
316 14404           *outptr = (char) (0x30 + *dividend) & 0x00ff;
317 14404           ++outlength;
318 14404           temp = dividend;
319 14404           dividend = quotient;
320 14404           quotient = temp;
321 14404           }
322              
323 6243           SvNVX(s) = nn;
324 6243           SvNOK_on(s);
325 6243           }
326              
327             /* Given 2 lists of training item indices sorted in descending order,
328             * fill a third list with the intersection of items in these lists.
329             * This is a simple intersection, and no check for heterogeneity is
330             * performed.
331             * Return the next empty (available) index address in the third list.
332             * If the two lists have no intersection, then the return value is
333             * just the same as the third input.
334             */
335 9227           unsigned short *intersect_supras(
336             AM_SHORT *i, AM_SHORT *j, AM_SHORT *k){
337             AM_SHORT *temp;
338             while (1) {
339 343093 100         while (*i > *j)
340 221153           --i;
341 121940 100         if (*i == 0) break;
342 112713 100         if (*i < *j) {
343 33130           temp = i;
344 33130           i = j;
345 33130           j = temp;
346 33130           continue;
347             }
348 79583           *k = *i;
349 79583           --i;
350 79583           --j;
351 79583           --k;
352 112713           }
353 9227           return k;
354             }
355             /* The first three inputs are the same as for intersect_supra above,
356             * and the fourth paramater should be a list containing the class
357             * index for all of the training items. In addition to combining
358             * the first two lists into the third via intersection, the final
359             * list is checked for heterogeneity and the non-deterministic
360             * heterogeneous supracontexts are removed.
361             * The return value is the number of items contained in the resulting
362             * list.
363             */
364 33728           AM_SHORT intersect_supras_final(
365             AM_SHORT *i, AM_SHORT *j,
366             AM_SHORT *intersect, AM_SHORT *subcontext_class){
367 33728           AM_SHORT class = 0;
368 33728           AM_SHORT length = 0;
369             AM_SHORT *temp;
370             while (1) {
371 699168 100         while (*i > *j)
372 516500           --i;
373 182668 100         if (*i == 0)
374 21261           break;
375 161407 100         if (*i < *j) {
376 91372           temp = i;
377 91372           i = j;
378 91372           j = temp;
379 91372           continue;
380             }
381 70035           *intersect = *i;
382 70035           ++intersect;
383 70035           ++length;
384              
385             /* is it heterogeneous? */
386 70035 100         if (class == 0) {
387             /* is it not deterministic? */
388 29851 100         if (length > 1) {
389 1763           length = 0;
390 1763           break;
391             } else {
392 28088           class = subcontext_class[*i];
393             }
394             } else {
395             /* Do the classes not match? */
396 40184 100         if (class != subcontext_class[*i]) {
397 10704           length = 0;
398 10704           break;
399             }
400             }
401 57568           --i;
402 57568           --j;
403 148940           }
404 33728           return length;
405             }
406              
407             MODULE = Algorithm::AM PACKAGE = Algorithm::AM
408              
409             BOOT:
410             {
411 10           AM_LONG ten = 10;
412 10           AM_LONG one = 1;
413 10           AM_LONG *tensptr = &tens[0];
414 10           AM_LONG *onesptr = &ones[0];
415             unsigned int i;
416 170 100         for (i = 16; i; i--) {
417 160           *tensptr = ten;
418 160           *onesptr = one;
419 160           ++tensptr;
420 160           ++onesptr;
421 160           ten <<= 1;
422 160           one <<= 1;
423             }
424             }
425              
426             /*
427             * This function is called by from AM.pm right after creating
428             * a blessed reference to Algorithm::AM. It stores the necessary
429             * pointers in the AM_GUTS structure and attaches it to the magic
430             * part of thre reference.
431             *
432             */
433              
434             void
435             _xs_initialize(...)
436             PREINIT:
437             HV *project;
438             AM_GUTS guts; /* NOT A POINTER THIS TIME! (let memory allocate automatically) */
439             SV **lattice_sizes;
440             SV *svguts;
441             MAGIC *mg;
442             int i;
443             PPCODE:
444             /* 9 arguments are passed to the _xs_initialize method: */
445             /* $self, the AM object */
446 192           project = (HV *) SvRV(ST(0));
447             /* For explanations on these, see the comments on AM_guts */
448 192           lattice_sizes = AvARRAY((AV *) SvRV(ST(1)));
449 192           guts.classes = AvARRAY((AV *) SvRV(ST(2)));
450 192           guts.itemcontextchain = AvARRAY((AV *) SvRV(ST(3)));
451 192           guts.itemcontextchainhead = (HV *) SvRV(ST(4));
452 192           guts.context_to_class = (HV *) SvRV(ST(5));
453 192           guts.contextsize = (HV *) SvRV(ST(6));
454 192           guts.pointers = (HV *) SvRV(ST(7));
455 192           guts.gang = (HV *) SvRV(ST(8));
456 192           guts.sum = AvARRAY((AV *) SvRV(ST(9)));
457 192           guts.num_classes = av_len((AV *) SvRV(ST(9)));
458              
459             /*
460             * Since the sublattices are small, we just take a chunk of memory
461             * here that will be large enough for our purposes and do the actual
462             * memory allocation within the code; this reduces the overhead of
463             * repeated system calls.
464             *
465             */
466              
467 960 100         for (i = 0; i < NUM_LATTICES; ++i) {
468 768           UV v = SvUVX(lattice_sizes[i]);
469 768 50         Newz(0, guts.lptr[i], 1 << v, AM_SHORT);
470 768 50         Newz(0, guts.sptr[i], 1 << (v + 1), AM_SUPRA); /* CHANGED */
471 768           Newz(0, guts.sptr[i][0].data, 2, AM_SHORT);
472             }
473              
474             /* Perl magic invoked here */
475              
476 192           svguts = newSVpv((char *) &guts, sizeof(AM_GUTS));
477 192           sv_magic((SV *) project, svguts, PERL_MAGIC_ext, NULL, 0);
478 192           SvRMAGICAL_off((SV *) project);
479 192           mg = mg_find((SV *) project, PERL_MAGIC_ext);
480 192           mg->mg_virtual = &AMguts_vtab;
481 192           mg_magical((SV *) project);
482              
483             void
484             _fillandcount(...)
485             PREINIT:
486             HV *project;
487             UV linear_flag;
488             AM_GUTS *guts;
489             MAGIC *mg;
490             SV **lattice_sizes_input;
491             AM_SHORT lattice_sizes[NUM_LATTICES];
492             AM_SHORT **lptr;
493             AM_SUPRA **sptr;
494             AM_SHORT nptr[NUM_LATTICES];/* this helps us manage the free list in sptr[i] */
495             AM_SHORT subcontextnumber;
496             AM_SHORT *subcontext;
497             AM_SHORT *subcontext_class;
498             SV **classes, **itemcontextchain, **sum;
499             HV *itemcontextchainhead, *context_to_class, *contextsize, *pointers, *gang;
500             IV num_classes;
501             HE *he;
502 194           AM_BIG_INT grandtotal = {0, 0, 0, 0, 0, 0, 0, 0};
503             SV *tempsv;
504             int chunk, i;
505             AM_SHORT gaps[16];
506             AM_SHORT *intersect, *intersectlist;
507             AM_SHORT *intersectlist2, *intersectlist3, *ilist2top, *ilist3top;
508             PPCODE:
509             /* Input args are the AM object ($self), number of features
510             * perl lattice, and a flag to indicate whether to count occurrences
511             * (true) or pointers (false), also known as linear/quadratic.
512             */
513 194           project = (HV *) SvRV(ST(0));
514 194           lattice_sizes_input = AvARRAY((AV *) SvRV(ST(1)));
515 194           linear_flag = SvUVX(ST(2));
516 194           mg = mg_find((SV *) project, PERL_MAGIC_ext);
517 194           guts = (AM_GUTS *) SvPVX(mg->mg_obj);
518              
519             /*
520             * We initialize the memory for the sublattices, including setting up the
521             * linked lists.
522             *
523             */
524              
525 194           lptr = guts->lptr;
526 194           sptr = guts->sptr;
527 970 100         for (chunk = 0; chunk < NUM_LATTICES; ++chunk) {
528             /* Extract numeric values for the specified lattice_sizes */
529 776           lattice_sizes[chunk] = (AM_SHORT) SvUVX(lattice_sizes_input[chunk]);
530             /* TODO: explain the lines below */
531 776 50         Zero(lptr[chunk], 1 << lattice_sizes[chunk], AM_SHORT);
532 776           sptr[chunk][0].next = 0;
533 776           nptr[chunk] = 1;
534 7048 100         for (i = 1; i < 1 << (lattice_sizes[chunk] + 1); ++i) /* CHANGED */
535 6272           sptr[chunk][i].next = (AM_SHORT) i + 1;
536             }
537              
538             /*
539             * Instead of adding subcontext labels directly to the supracontexts,
540             * we store all of these labels in an array called subcontext. We
541             * then store the array indices of the subcontext labels in the
542             * supracontexts. That means the list of subcontexts in the
543             * supracontexts is an increasing sequence of positive integers, handy
544             * for taking intersections (see lattice.pod).
545             *
546             * The index into the array is called subcontextnumber.
547             *
548             * The array of matching classes is called subcontext_class.
549             *
550             */
551              
552 194           context_to_class = guts->context_to_class;
553 194 50         subcontextnumber = (AM_SHORT) HvUSEDKEYS(context_to_class);
554 194 50         Newz(0, subcontext, NUM_LATTICES * (subcontextnumber + 1), AM_SHORT);
555 194           subcontext += NUM_LATTICES * subcontextnumber;
556 194 50         Newz(0, subcontext_class, subcontextnumber + 1, AM_SHORT);
557 194           subcontext_class += subcontextnumber;
558 194 50         Newz(0, intersectlist, subcontextnumber + 1, AM_SHORT);
559 194 50         Newz(0, intersectlist2, subcontextnumber + 1, AM_SHORT);
560 194           ilist2top = intersectlist2 + subcontextnumber;
561 194 50         Newz(0, intersectlist3, subcontextnumber + 1, AM_SHORT);
562 194           ilist3top = intersectlist3 + subcontextnumber;
563              
564 194           hv_iterinit(context_to_class);
565 8382 100         while (he = hv_iternext(context_to_class)) {
566 8188           AM_SHORT *contextptr = (AM_SHORT *) HeKEY(he);
567 8188           AM_SHORT class = (AM_SHORT) SvUVX(HeVAL(he));
568 40940 100         for (chunk = 0; chunk < NUM_LATTICES; ++chunk, ++contextptr) {
569 32752           AM_SHORT active = lattice_sizes[chunk];
570 32752           AM_SHORT *lattice = lptr[chunk];
571 32752           AM_SUPRA *supralist = sptr[chunk];
572 32752           AM_SHORT nextsupra = nptr[chunk];
573 32752           AM_SHORT context = *contextptr;
574             AM_SUPRA *p, *c;
575             AM_SHORT pi, ci;
576 32752           AM_SHORT d, t, tt, numgaps = 0;
577              
578             /* We want to add subcontextnumber to the appropriate
579             * supracontexts in the four smaller lattices.
580             *
581             * Suppose we want to add subcontextnumber to the supracontext
582             * labeled by d. supralist[lattice[d]] is an AM_SUPRA which
583             * reflects the current state of the supracontext. Suppose this
584             * state is
585             *
586             * data: 2 0 x y (i.e., currently contains two subcontexts)
587             * count: 5
588             * next: 7
589             * touched: 0
590             *
591             * Then we pluck an unused AM_SUPRA off of the free list;
592             * suppose that it's located at supralist[9] (the variable
593             * nextsupra tells us where). Then supralist[lattice[d]] will
594             * change to
595             *
596             * data: 2 0 x y
597             * count: 4 (decrease by 1)
598             * next: 9
599             * touched: 1
600             *
601             * and supralist[9] will become
602             *
603             * data: 3 0 subcontextnumber x y (now has three subcontexts)
604             * count: 1
605             * next: 7
606             * touched: 0
607             *
608             * (note: the entries in data[] are added in decreasing order)
609             *
610             *
611             * If, on the other hand, if supralist[lattice[d]] looks like
612             *
613             * data: 2 0 x y
614             * count: 8
615             * next: 11
616             * touched: 1
617             *
618             * that means that supralist[11] must look something like
619             *
620             * data: 3 0 subcontextnumber x y
621             * count: 4
622             * next: 2
623             * touched: 0
624             *
625             * There already exists a supracontext with subcontextnumber
626             * added in! So we change supralist[lattice[d]] to
627             *
628             * data: 2 0 x y
629             * count: 7 (decrease by 1)
630             * next: 11
631             * touched: 1
632             *
633             * change supralist[11] to
634             *
635             * data: 3 0 subcontextnumber x y
636             * count: 5 (increase by 1)
637             * next: 2
638             * touched: 0
639             *
640             * and set lattice[d] = 11.
641             */
642              
643 32752           subcontext[chunk] = context;
644              
645 32752 100         if (context == 0) {
646 28745 100         for (p = supralist + supralist->next;
647 22381           p != supralist; p = supralist + p->next) {
648             AM_SHORT *data;
649 22381 50         Newz(0, data, p->data[0] + 3, AM_SHORT);
650 22381           Copy(p->data + 2, data + 3, p->data[0], AM_SHORT);
651 22381           data[2] = subcontextnumber;
652 22381           data[0] = p->data[0] + 1;
653 22381           Safefree(p->data);
654 22381           p->data = data;
655             }
656 6364 100         if (lattice[context] == 0) {
657              
658             /* in this case, the subcontext will be
659             * added to all supracontexts, so there's
660             * no need to hassle with a Gray code and
661             * move pointers
662             */
663              
664 732           AM_SHORT count = 0;
665 732           ci = nptr[chunk];
666 732           nptr[chunk] = supralist[ci].next;
667 732           c = supralist + ci;
668 732           c->next = supralist->next;
669 732           supralist->next = ci;
670 732           Newz(0, c->data, 3, AM_SHORT);
671 732           c->data[2] = subcontextnumber;
672 732           c->data[0] = 1;
673 3723 100         for (i = 0; i < (1 << active); ++i) {
674 2991 100         if (lattice[i] == 0) {
675 1616           lattice[i] = ci;
676 1616           ++count;
677             }
678             }
679 732           c->count = count;
680             }
681 6364           continue;
682             }
683              
684             /* set up traversal using Gray code */
685 26388           d = context;
686 81785 100         for (i = 1 << (active - 1); i; i >>= 1)
687 55397 100         if (!(i & context))
688 15490           gaps[numgaps++] = i;
689 26388           t = 1 << numgaps;
690              
691 26388           p = supralist + (pi = lattice[context]);
692 26388 100         if (pi)
693 25342           --(p->count);
694 26388           ci = nextsupra;
695 26388           nextsupra = supralist[ci].next;
696 26388           p->touched = 1;
697 26388           c = supralist + ci;
698 26388           c->touched = 0;
699 26388           c->next = p->next;
700 26388           p->next = ci;
701 26388           c->count = 1;
702 26388 50         Newz(0, c->data, p->data[0] + 3, AM_SHORT);
703 26388           Copy(p->data + 2, c->data + 3, p->data[0], AM_SHORT);
704 26388           c->data[2] = subcontextnumber;
705 26388           c->data[0] = p->data[0] + 1;
706 26388           lattice[context] = ci;
707              
708             /* traverse */
709 43317 100         while (--t) {
710             /* find the rightmost 1 in t; from HAKMEM, I believe */
711 18368 100         for (i = 0, tt = ~t & (t - 1); tt; tt >>= 1, ++i)
712             ;
713 16929           d ^= gaps[i];
714              
715 16929           p = supralist + (pi = lattice[d]);
716 16929 100         if (pi)
717 16404           --(p->count);
718 16929           switch (p->touched) {
719             case 1:
720 1158           ++supralist[lattice[d] = p->next].count;
721 1158           break;
722             case 0:
723 15771           ci = nextsupra;
724 15771           nextsupra = supralist[ci].next;
725 15771           p->touched = 1;
726 15771           c = supralist + ci;
727 15771           c->touched = 0;
728 15771           c->next = p->next;
729 15771           p->next = ci;
730 15771           c->count = 1;
731 15771 50         Newz(0, c->data, p->data[0] + 3, AM_SHORT);
732 15771           Copy(p->data + 2, c->data + 3, p->data[0], AM_SHORT);
733 15771           c->data[2] = subcontextnumber;
734 15771           c->data[0] = p->data[0] + 1;
735 15771           lattice[d] = ci;
736             }
737             }
738              
739             /* Here we return all AM_SUPRA with count 0 back to the free
740             * list and set touched = 0 for all remaining.
741             */
742              
743 26388           p = supralist;
744 26388           p->touched = 0;
745             do {
746 136061 100         if (supralist[i = p->next].count == 0) {
747 39980           Safefree(supralist[i].data);
748 39980           p->next = supralist[i].next;
749 39980           supralist[i].next = nextsupra;
750 39980           nextsupra = (AM_SHORT) i;
751             } else {
752 96081           p = supralist + p->next;
753 96081           p->touched = 0;
754             }
755 136061 100         } while (p->next);
756 26388           nptr[chunk] = nextsupra;
757             }/*end for(chunk = 0...*/
758 8188           subcontext -= NUM_LATTICES;
759 8188           *subcontext_class = class;
760 8188           --subcontext_class;
761 8188           --subcontextnumber;
762             }/*end while (he = hv_iternext(...*/
763              
764 194           contextsize = guts->contextsize;
765 194           pointers = guts->pointers;
766              
767             /*
768             * The code is in three parts:
769             *
770             * 1. We successively take one nonempty supracontext from each of the
771             * four small lattices and take their intersection to find a
772             * supracontext of the big lattice. If at any point we get the
773             * empty set, we move on.
774             *
775             * 2. We determine if the supracontext so found is heterogeneous; if
776             * so, we skip it.
777             *
778             * 3. Otherwise, we count the pointers or occurrences.
779             *
780             */
781             {
782             AM_SUPRA *p0, *p1, *p2, *p3;
783             AM_SHORT length;
784             AM_SHORT *k;
785              
786             /* find intersections */
787 824 100         for (p0 = sptr[0] + sptr[0]->next; p0 != sptr[0]; p0 = sptr[0] + p0->next) {
788 2751 100         for (p1 = sptr[1] + sptr[1]->next; p1 != sptr[1]; p1 = sptr[1] + p1->next) {
789             /*Find intersection between p0 and p2*/
790 2121           k = intersect_supras(
791 2121           p0->data + p0->data[0] + 1,
792 2121           p1->data + p1->data[0] + 1,
793             ilist2top
794             );
795             /* If k has not been increased then intersection was empty */
796 2121 100         if (k == ilist2top)
797 154           continue;
798 1967           *k = 0;
799              
800 9073 100         for (p2 = sptr[2] + sptr[2]->next; p2 != sptr[2]; p2 = sptr[2] + p2->next) {
801              
802             /*Find intersection between previous intersection and p2*/
803 7106           k = intersect_supras(
804             ilist2top,
805 7106           p2->data + p2->data[0] + 1,
806             ilist3top
807             );
808             /* If k has not been increased then intersection was empty */
809 7106 100         if (k == ilist3top)
810 694           continue;
811 6412           *k = 0;
812              
813 40140 100         for (p3 = sptr[3] + sptr[3]->next; p3 != sptr[3]; p3 = sptr[3] + p3->next) {
814              
815             /* Find intersection between previous intersection and p3;
816             * check for disqualified supras this time.
817             */
818 33728           length = intersect_supras_final(
819             ilist3top,
820 33728           p3->data + p3->data[0] + 1,
821             intersectlist,
822             subcontext_class
823             );
824              
825             /* count occurrences */
826 33728 100         if (length) {
827             AM_SHORT i;
828 15621           AM_BIG_INT count = {0, 0, 0, 0, 0, 0, 0, 0};
829 15621           AM_LONG mask = 0xffff;
830              
831 15621           count[0] = p0->count;
832              
833 15621           count[0] *= p1->count;
834 15621           carry(count, 0);
835              
836 15621           count[0] *= p2->count;
837 15621           count[1] *= p2->count;
838 15621           carry(count, 0);
839 15621           carry(count, 1);
840              
841 15621           count[0] *= p3->count;
842 15621           count[1] *= p3->count;
843 15621           count[2] *= p3->count;
844 15621           carry(count, 0);
845 15621           carry(count, 1);
846 15621           carry(count, 2);
847 15621 100         if(!linear_flag){
848             /* If scoring is pointers (quadratic) instead of linear*/
849 15601           AM_LONG pointercount = 0;
850 50558 100         for (i = 0; i < length; ++i)
851 34957 50         pointercount += (AM_LONG) SvUV(*hv_fetch(contextsize,
852             (char *) (subcontext + (NUM_LATTICES * intersectlist[i])), 8, 0));
853 15601 50         if (pointercount & 0xffff0000) {
854 0           AM_SHORT pchi = (AM_SHORT) (high_bits(pointercount));
855 0           AM_SHORT pclo = (AM_SHORT) (low_bits(pointercount));
856             AM_LONG hiprod[6];
857 0           hiprod[1] = pchi * count[0];
858 0           hiprod[2] = pchi * count[1];
859 0           hiprod[3] = pchi * count[2];
860 0           hiprod[4] = pchi * count[3];
861 0           count[0] *= pclo;
862 0           count[1] *= pclo;
863 0           count[2] *= pclo;
864 0           count[3] *= pclo;
865 0           carry(count, 0);
866 0           carry(count, 1);
867 0           carry(count, 2);
868 0           carry(count, 3);
869              
870 0           count[1] += hiprod[1];
871 0           count[2] += hiprod[2];
872 0           count[3] += hiprod[3];
873 0           count[4] += hiprod[4];
874 0           carry(count, 1);
875 0           carry(count, 2);
876 0           carry(count, 3);
877 0           carry(count, 4);
878             } else {
879 15601           count[0] *= pointercount;
880 15601           count[1] *= pointercount;
881 15601           count[2] *= pointercount;
882 15601           count[3] *= pointercount;
883 15601           carry(count, 0);
884 15601           carry(count, 1);
885 15601           carry(count, 2);
886 15601           carry(count, 3);
887             }
888             }
889 50611 100         for (i = 0; i < length; ++i) {
890             int j;
891             SV *tempsv;
892             AM_LONG *p;
893 34990           tempsv = *hv_fetch(pointers,
894             (char *) (subcontext + (NUM_LATTICES * intersectlist[i])), 8, 1);
895 34990 100         if (!SvPOK(tempsv)) {
896 2745 50         SvUPGRADE(tempsv, SVt_PVNV);
897 2745 50         SvGROW(tempsv, 8 * sizeof(AM_LONG) + 1);
    50          
898 2745           Zero(SvPVX(tempsv), 8, AM_LONG);
899 2745           SvCUR_set(tempsv, 8 * sizeof(AM_LONG));
900 2745           SvPOK_on(tempsv);
901             }
902 34990           p = (AM_LONG *) SvPVX(tempsv);
903 279920 100         for (j = 0; j < 7; ++j) {
904 244930           *(p + j) += count[j];
905 244930           carry_pointer(p + j);
906             }
907             }/* end for (i = 0;... */
908             }/* end if (length) */
909             }/* end for (p3 = sptr[3]... */
910             }/* end for (p2 = sptr[2]... */
911             }/* end for (p1 = sptr[1]... */
912             }/* end for (p0 = sptr[0]... */
913             /* clear out the supracontexts */
914 824 100         for (p0 = sptr[0] + sptr[0]->next; p0 != sptr[0]; p0 = sptr[0] + p0->next)
915 630           Safefree(p0->data);
916 833 100         for (p1 = sptr[1] + sptr[1]->next; p1 != sptr[1]; p1 = sptr[1] + p1->next)
917 639           Safefree(p1->data);
918 876 100         for (p2 = sptr[2] + sptr[2]->next; p2 != sptr[2]; p2 = sptr[2] + p2->next)
919 682           Safefree(p2->data);
920 1154 100         for (p3 = sptr[3] + sptr[3]->next; p3 != sptr[3]; p3 = sptr[3] + p3->next)
921 960           Safefree(p3->data);
922              
923             /*
924             * compute analogical set and gang effects
925             *
926             * Technically, we don't compute the analogical set; instead, we
927             * compute how many pointers/occurrences there are for each of the
928             * data items in a particular subcontext, and associate that number
929             * with the subcontext label, not directly with the data item. We can
930             * do this because if two data items are in the same subcontext, they
931             * will have the same number of pointers/occurrences.
932             *
933             * If the user wants the detailed analogical set, it will be created
934             * in Result.pm.
935             *
936             */
937              
938 194           gang = guts->gang;
939 194           classes = guts->classes;
940 194           itemcontextchain = guts->itemcontextchain;
941 194           itemcontextchainhead = guts->itemcontextchainhead;
942 194           sum = guts->sum;
943 194           num_classes = guts->num_classes;
944 194           hv_iterinit(pointers);
945 2939 100         while (he = hv_iternext(pointers)) {
946             AM_LONG count;
947             AM_SHORT counthi, countlo;
948             AM_BIG_INT p;
949             AM_BIG_INT gangcount;
950             AM_SHORT this_class;
951             SV *dataitem;
952 2745           Copy(SvPVX(HeVAL(he)), p, 8, AM_LONG);
953              
954 2745           tempsv = *hv_fetch(contextsize, HeKEY(he), NUM_LATTICES * sizeof(AM_SHORT), 0);
955 2745           count = (AM_LONG) SvUVX(tempsv);
956 2745           counthi = (AM_SHORT) (high_bits(count));
957 2745           countlo = (AM_SHORT) (low_bits(count));
958              
959             /* initialize 0 because it won't be overwritten */
960             /*
961             * TODO: multiply through p[7] into gangcount[7]
962             * and warn if there's potential overflow
963             */
964 2745           gangcount[0] = 0;
965 21960 100         for (i = 0; i < 7; ++i) {
966 19215           gangcount[i] += countlo * p[i];
967 19215           carry_replace(gangcount, i);
968             }
969 2745           gangcount[7] += countlo * p[7];
970              
971             /* TODO: why is element 0 not considered here? */
972 2745 50         if (counthi) {
973 0 0         for (i = 0; i < 6; ++i) {
974 0           gangcount[i + 1] += counthi * p[i];
975 0           carry(gangcount, i + 1);
976             }
977             }
978 21960 100         for (i = 0; i < 7; ++i) {
979 19215           grandtotal[i] += gangcount[i];
980 19215           carry(grandtotal, i);
981             }
982 2745           grandtotal[7] += gangcount[7];
983              
984 2745           tempsv = *hv_fetch(gang, HeKEY(he), NUM_LATTICES * sizeof(AM_SHORT), 1);
985 2745 50         SvUPGRADE(tempsv, SVt_PVNV);
986 2745           sv_setpvn(tempsv, (char *) gangcount, 8 * sizeof(AM_LONG));
987 2745           normalize(aTHX_ tempsv);
988 2745           normalize(aTHX_ HeVAL(he));
989              
990 2745           tempsv = *hv_fetch(context_to_class, HeKEY(he), NUM_LATTICES * sizeof(AM_SHORT), 0);
991 2745           this_class = (AM_SHORT) SvUVX(tempsv);
992 2745 100         if (this_class) {
993 2702           AM_LONG *s = (AM_LONG *) SvPVX(sum[this_class]);
994 21616 100         for (i = 0; i < 7; ++i) {
995 18914           *(s + i) += gangcount[i];
996 18914           carry_pointer(s + i);
997             }
998             } else {
999 43           dataitem = *hv_fetch(itemcontextchainhead, HeKEY(he), NUM_LATTICES * sizeof(AM_SHORT), 0);
1000 2857 100         while (SvIOK(dataitem)) {
1001 112           IV datanum = SvIVX(dataitem);
1002 112           IV ocnum = SvIVX(classes[datanum]);
1003 112           AM_LONG *s = (AM_LONG *) SvPVX(sum[ocnum]);
1004 896 100         for (i = 0; i < 7; ++i) {
1005 784           *(s + i) += p[i];
1006 784           carry_pointer(s + i);
1007 784           dataitem = itemcontextchain[datanum];
1008             }
1009             }
1010             }
1011             }
1012 753 100         for (i = 1; i <= num_classes; ++i) normalize(aTHX_ sum[i])
1013             ;
1014 194           tempsv = *hv_fetch(pointers, "grandtotal", 10, 1);
1015 194 50         SvUPGRADE(tempsv, SVt_PVNV);
1016 194           sv_setpvn(tempsv, (char *) grandtotal, 8 * sizeof(AM_LONG));
1017 194           normalize(aTHX_ tempsv);
1018              
1019 194           Safefree(subcontext);
1020 194           Safefree(subcontext_class);
1021 194           Safefree(intersectlist);
1022 194           Safefree(intersectlist2);
1023 194           Safefree(intersectlist3);
1024             }