line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# /=====================================================================\ # |
2
|
|
|
|
|
|
|
# | NNexus Autolinker | # |
3
|
|
|
|
|
|
|
# | Named Entity Classification Module | # |
4
|
|
|
|
|
|
|
# |=====================================================================| # |
5
|
|
|
|
|
|
|
# | Part of the Planetary project: http://trac.mathweb.org/planetary | # |
6
|
|
|
|
|
|
|
# | Research software, produced as part of work done by: | # |
7
|
|
|
|
|
|
|
# | the KWARC group at Jacobs University | # |
8
|
|
|
|
|
|
|
# | Copyright (c) 2012 | # |
9
|
|
|
|
|
|
|
# | Released under the MIT License (MIT) | # |
10
|
|
|
|
|
|
|
# |---------------------------------------------------------------------| # |
11
|
|
|
|
|
|
|
# | Adapted from the original NNexus code by | # |
12
|
|
|
|
|
|
|
# | James Gardner and Aaron Krowne | # |
13
|
|
|
|
|
|
|
# |---------------------------------------------------------------------| # |
14
|
|
|
|
|
|
|
# | Deyan Ginev #_# | # |
15
|
|
|
|
|
|
|
# | http://kwarc.info/people/dginev (o o) | # |
16
|
|
|
|
|
|
|
# \=========================================================ooo==U==ooo=/ # |
17
|
|
|
|
|
|
|
package NNexus::Classification; |
18
|
6
|
|
|
6
|
|
21346
|
use strict; |
|
6
|
|
|
|
|
8
|
|
|
6
|
|
|
|
|
206
|
|
19
|
6
|
|
|
6
|
|
24
|
use warnings; |
|
6
|
|
|
|
|
6
|
|
|
6
|
|
|
|
|
147
|
|
20
|
6
|
|
|
6
|
|
21
|
use Exporter; |
|
6
|
|
|
|
|
37
|
|
|
6
|
|
|
|
|
333
|
|
21
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
22
|
|
|
|
|
|
|
our @EXPORT_OK = qw(msc_similarity disambiguate); |
23
|
6
|
|
|
6
|
|
34
|
use List::Util qw(max min); |
|
6
|
|
|
|
|
8
|
|
|
6
|
|
|
|
|
19610
|
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# Let's do things differently here. |
26
|
|
|
|
|
|
|
# We will use Jan Wilken Doerrie's MSC similarity metric: |
27
|
|
|
|
|
|
|
# "we defined the similarity between two categories as the size of the |
28
|
|
|
|
|
|
|
# intersections divided by the size of the union over the ZBL dataset" |
29
|
|
|
|
|
|
|
our $msc_similarities = [ # 63x63 matrix, top-level MSC 2000 categories |
30
|
|
|
|
|
|
|
[1.00000000,0.08157364,0.05611695,0.01857567,0.00526125,0.00493453,0.02162828,0.01738086,0.01488942,0.01995192,0.01850128,0.01315144,0.01112859,0.00875912,0.00258325,0.01958233,0.01059040,0.02694878,0.00814224,0.01508530,0.00391065,0.01222424,0.00659643,0.01744963,0.02033044,0.01715241,0.00549930,0.00368565,0.00844784,0.01205280,0.00590242,0.00640019,0.00470555,0.01450557,0.01117073,0.01519435,0.01915299,0.00949692,0.02112821,0.01075672,0.00831446,0.01284788,0.02376349,0.01846751,0.02192419,0.02827359,0.06235569,0.01871924,0.02326626,0.02549734,0.01149905,0.00904571,0.02978857,0.01858157,0.01913664,0.00986617,0.00980736,0.02622097,0.02015345,0.02370473,0.02309777,0.02525110,0.06049483], |
31
|
|
|
|
|
|
|
[0.08157364,1.00000000,0.04843149,0.00532875,0.00205666,0.00135896,0.01867335,0.01162555,0.00454580,0.00914432,0.00561143,0.00339066,0.00233557,0.00318598,0.00112062,0.00596528,0.00474509,0.01721411,0.00592865,0.00682061,0.00180499,0.00344266,0.00441784,0.00357584,0.00276735,0.00367641,0.00180428,0.00557237,0.00315267,0.00397678,0.00230947,0.00158525,0.00194050,0.00436769,0.00245270,0.00323992,0.03266539,0.00462011,0.00786873,0.00592230,0.00625668,0.00554406,0.00367927,0.00894575,0.00961623,0.00339433,0.00442661,0.02332687,0.00304362,0.00367554,0.00783214,0.00359950,0.01357759,0.00583133,0.01961088,0.03410275,0.00370477,0.00360240,0.00517674,0.00367842,0.00215416,0.00470674,0.02079996], |
32
|
|
|
|
|
|
|
[0.05611695,0.04843149,1.00000000,0.01860533,0.09942421,0.05170597,0.01116272,0.02366804,0.01088601,0.00678883,0.00619870,0.01188854,0.00194541,0.03826328,0.00037826,0.03378654,0.00411333,0.01952391,0.02813212,0.00144777,0.00048211,0.00200346,0.00019596,0.00120727,0.00040013,0.00283891,0.00242860,0.00174449,0.00086392,0.00117239,0.00139450,0.00026806,0.00036410,0.01135132,0.00259196,0.00098411,0.00869706,0.00278418,0.00065182,0.04870472,0.00162055,0.00190805,0.00122017,0.01033098,0.00408488,0.00158937,0.11329349,0.00131901,0.00031955,0.00016804,0.00014428,0.00025776,0.01361661,0.00071465,0.00278636,0.00058838,0.00033706,0.01095038,0.02561634,0.00333059,0.00669844,0.02532072,0.00174317], |
33
|
|
|
|
|
|
|
[0.01857567,0.00532875,0.01860533,1.00000000,0.03854465,0.00695286,0.06147985,0.00381552,0.01111890,0.01036229,0.04460750,0.01316603,0.01265964,0.00515676,0.00079911,0.07192152,0.00451860,0.00357714,0.00334221,0.00286208,0.00143900,0.00147393,0.02179999,0.00186239,0.00088413,0.00484911,0.00313808,0.00180966,0.00263198,0.00221683,0.00284620,0.00101324,0.00022575,0.00341334,0.00269952,0.00114372,0.05704468,0.04862097,0.00177969,0.00785647,0.00544478,0.01902742,0.00199649,0.02516203,0.01832947,0.00795814,0.07579728,0.00124556,0.00085965,0.00014810,0.00034212,0.00039567,0.00564823,0.01156505,0.00036484,0.00003067,0.00026446,0.04216157,0.01448784,0.02353333,0.00356946,0.04458390,0.00179613], |
34
|
|
|
|
|
|
|
[0.00526125,0.00205666,0.09942421,0.03854465,1.00000000,0.13498574,0.00433243,0.01252955,0.02119390,0.00273427,0.01431677,0.03376084,0.00576282,0.05441052,0.00177224,0.05178723,0.00922929,0.00392307,0.01936738,0.00051527,0.00099050,0.00025145,0.00058066,0.00029417,0.00013702,0.00106777,0.00244524,0.00110532,0.00055186,0.00040533,0.00124399,0.00035144,0.00010310,0.01638482,0.00445366,0.00120617,0.01113947,0.01531835,0.00039070,0.06202833,0.00706150,0.00336538,0.00045634,0.00300478,0.00139167,0.00075193,0.01245292,0.00033423,0.00002379,0.00001954,0.00003722,0.00006753,0.00970181,0.00050743,0.00086061,0.00000000,0.00009151,0.00290705,0.00745701,0.00204881,0.00119390,0.02242817,0.00025653], |
35
|
|
|
|
|
|
|
[0.00493453,0.00135896,0.05170597,0.00695286,0.13498574,1.00000000,0.00108207,0.00591517,0.00605827,0.00141397,0.00741604,0.02646703,0.01336303,0.06039531,0.00035010,0.06902687,0.00574293,0.00052350,0.00053657,0.00002714,0.00000000,0.00000000,0.00005061,0.00000000,0.00003237,0.00004214,0.00101535,0.00008925,0.00014061,0.00003739,0.00007630,0.00016497,0.00004694,0.00062654,0.00021500,0.00000000,0.00378012,0.00184760,0.00017622,0.00733195,0.00199509,0.00039905,0.00009479,0.00026701,0.00007847,0.00065581,0.00679404,0.00022507,0.00002622,0.00001058,0.00000000,0.00009166,0.00075771,0.00011908,0.00010628,0.00000000,0.00000000,0.00028208,0.00056563,0.00035722,0.00077455,0.00193900,0.00064115], |
36
|
|
|
|
|
|
|
[0.02162828,0.01867335,0.01116272,0.06147985,0.00433243,0.00108207,1.00000000,0.07510125,0.02964458,0.12328037,0.02564406,0.01861066,0.01072190,0.00610098,0.01736422,0.04876552,0.03999814,0.01013660,0.01648549,0.02365574,0.00114353,0.01283800,0.04351610,0.00310510,0.00250883,0.01927488,0.00578289,0.01215249,0.00733156,0.01013116,0.00939150,0.00237590,0.00047886,0.00438082,0.00267569,0.00027081,0.01065916,0.02057247,0.00373513,0.00293046,0.00387222,0.00807042,0.00871024,0.00918703,0.00147008,0.01090241,0.01622746,0.00087207,0.00010859,0.00011596,0.00047994,0.00016857,0.00977301,0.00545278,0.00160478,0.00010979,0.00000000,0.00213201,0.00103991,0.00105356,0.00090291,0.04688816,0.00337673], |
37
|
|
|
|
|
|
|
[0.01738086,0.01162555,0.02366804,0.00381552,0.01252955,0.00591517,0.07510125,1.00000000,0.10327601,0.05552831,0.02090534,0.04187867,0.00738352,0.01105315,0.01170594,0.02435063,0.00429508,0.03557033,0.00178563,0.02655667,0.00033904,0.00973898,0.00672954,0.00693442,0.00078453,0.00241796,0.00796306,0.00207254,0.00332360,0.00160225,0.00290728,0.00200535,0.00017666,0.00755212,0.00168273,0.00028101,0.01917842,0.00421842,0.00106842,0.00377618,0.00492809,0.00231786,0.00275841,0.00050158,0.00015461,0.00975019,0.00727802,0.00089392,0.00012882,0.00002086,0.00009797,0.00021588,0.00095943,0.00042942,0.00054114,0.00007965,0.00000000,0.00062483,0.00020720,0.00027694,0.00276635,0.00486968,0.00339206], |
38
|
|
|
|
|
|
|
[0.01488942,0.00454580,0.01088601,0.01111890,0.02119390,0.00605827,0.02964458,0.10327601,1.00000000,0.14900523,0.02532176,0.09917895,0.01360819,0.05210757,0.01503290,0.03515368,0.00379712,0.00287966,0.00067947,0.00302452,0.00014375,0.02226528,0.00235095,0.00174760,0.00080893,0.00196637,0.00355291,0.00048213,0.00217216,0.00078930,0.00063203,0.00112325,0.00006953,0.00426375,0.00128080,0.00022091,0.00456440,0.00935890,0.00140085,0.00611161,0.01957090,0.00565262,0.00378400,0.00031160,0.00065168,0.00249695,0.00858768,0.00057049,0.00008359,0.00002941,0.00011305,0.00010245,0.00105292,0.00026795,0.00014010,0.00000000,0.00027875,0.00106974,0.00004685,0.00060664,0.00289897,0.00612092,0.00117948], |
39
|
|
|
|
|
|
|
[0.01995192,0.00914432,0.00678883,0.01036229,0.00273427,0.00141397,0.12328037,0.05552831,0.14900523,1.00000000,0.01372342,0.04089925,0.03216888,0.03963411,0.03271138,0.04915837,0.02431038,0.00392283,0.00104910,0.02161542,0.00081625,0.17399932,0.01041594,0.00381929,0.00396809,0.01526674,0.00204542,0.00011577,0.00177630,0.00079854,0.00312736,0.00160699,0.00015678,0.00310573,0.00173009,0.00057235,0.02585302,0.01645332,0.03430529,0.00272510,0.03361024,0.04910434,0.03047494,0.00050122,0.00031984,0.00272203,0.00552471,0.00299371,0.00008030,0.00011027,0.00051862,0.00008856,0.02816400,0.00223054,0.00878367,0.00005787,0.00002673,0.00080236,0.00028778,0.00047446,0.00134026,0.01790618,0.00051373], |
40
|
|
|
|
|
|
|
[0.01850128,0.00561143,0.00619870,0.04460750,0.01431677,0.00741604,0.02564406,0.02090534,0.02532176,0.01372342,1.00000000,0.04495687,0.01809097,0.00825840,0.00188975,0.03007862,0.00764406,0.01730886,0.00282589,0.01172287,0.00164933,0.00389351,0.01197920,0.01455147,0.00347447,0.00669667,0.01056136,0.00352644,0.00909528,0.01061630,0.00248232,0.00446782,0.00385592,0.01283188,0.03121173,0.00698622,0.02200911,0.01214765,0.01022627,0.00180644,0.00246790,0.00323702,0.00523446,0.01917991,0.01575161,0.06158560,0.00760292,0.00694049,0.00478471,0.00082096,0.00367486,0.00057709,0.01518055,0.01160495,0.00572667,0.00021805,0.00098801,0.01278296,0.00415772,0.00773684,0.04709159,0.01208866,0.00336093], |
41
|
|
|
|
|
|
|
[0.01315144,0.00339066,0.01188854,0.01316603,0.03376084,0.02646703,0.01861066,0.04187867,0.09917895,0.04089925,0.04495687,1.00000000,0.13736403,0.11612482,0.02659091,0.12470886,0.01323026,0.00047464,0.00044968,0.00112657,0.00009768,0.00573489,0.00318360,0.00051594,0.00070273,0.00211532,0.00196784,0.00059630,0.00037158,0.00027242,0.00282797,0.00033981,0.00015541,0.01651029,0.00546494,0.00018146,0.00558436,0.00267307,0.00489924,0.00526033,0.01933054,0.01241364,0.00974384,0.00055166,0.00006572,0.00038290,0.00337312,0.00055779,0.00002137,0.00000000,0.00000000,0.00000000,0.02064372,0.00222026,0.00081263,0.00003506,0.00000000,0.00023440,0.00009475,0.00013492,0.00134141,0.00240426,0.00027885], |
42
|
|
|
|
|
|
|
[0.01112859,0.00233557,0.00194541,0.01265964,0.00576282,0.01336303,0.01072190,0.00738352,0.01360819,0.03216888,0.01809097,0.13736403,1.00000000,0.03104971,0.00777443,0.05533815,0.10059134,0.00042804,0.00058611,0.00250784,0.00021171,0.01568708,0.02072003,0.00317848,0.00615162,0.02370322,0.00575064,0.00032687,0.00036216,0.00062237,0.00832029,0.00094027,0.00022213,0.02323206,0.00493813,0.00064563,0.01001453,0.00277862,0.03036682,0.00078355,0.01349284,0.02104511,0.03578595,0.00150099,0.00051297,0.00038859,0.00126309,0.00544643,0.00013874,0.00025775,0.00085371,0.00031220,0.10424722,0.01947444,0.00894971,0.00018670,0.00000000,0.00040926,0.00020963,0.00277772,0.00225085,0.00098062,0.00005553], |
43
|
|
|
|
|
|
|
[0.00875912,0.00318598,0.03826328,0.00515676,0.05441052,0.06039531,0.00610098,0.01105315,0.05210757,0.03963411,0.00825840,0.11612482,0.03104971,1.00000000,0.05955507,0.03995468,0.01494897,0.00099281,0.00313536,0.00031539,0.00028115,0.00767646,0.00029001,0.00020468,0.00041422,0.00140823,0.00028409,0.00044925,0.00042652,0.00006426,0.00205914,0.00012121,0.00003894,0.01414366,0.00140343,0.00026078,0.00422643,0.00332764,0.00447246,0.04956246,0.12995965,0.03171323,0.01188014,0.00057622,0.00013891,0.00014967,0.01075779,0.00056373,0.00007438,0.00001011,0.00004264,0.00003817,0.00792679,0.00036874,0.00104283,0.00000000,0.00000000,0.00023474,0.00106642,0.00086267,0.00166201,0.00145186,0.00008213], |
44
|
|
|
|
|
|
|
[0.00258325,0.00112062,0.00037826,0.00079911,0.00177224,0.00035010,0.01736422,0.01170594,0.01503290,0.03271138,0.00188975,0.02659091,0.00777443,0.05955507,1.00000000,0.01562602,0.00983284,0.00014027,0.00041045,0.00035932,0.00000000,0.00281889,0.00183886,0.00012794,0.00019978,0.00122685,0.00022561,0.00000000,0.00000000,0.00012876,0.00093168,0.00000000,0.00011200,0.01759005,0.00173629,0.00005821,0.00064098,0.00152619,0.00245656,0.00099108,0.05874367,0.01961069,0.01879199,0.00010133,0.00001166,0.00000000,0.00002908,0.00028928,0.00000000,0.00000000,0.00000000,0.00000000,0.00347836,0.00033174,0.00080709,0.00000000,0.00000000,0.00001225,0.00001882,0.00002594,0.00015389,0.00002476,0.00022975], |
45
|
|
|
|
|
|
|
[0.01958233,0.00596528,0.03378654,0.07192152,0.05178723,0.06902687,0.04876552,0.02435063,0.03515368,0.04915837,0.03007862,0.12470886,0.05533815,0.03995468,0.01562602,1.00000000,0.06595953,0.00136487,0.00350335,0.01802493,0.00096360,0.00909453,0.00515694,0.00092596,0.00073283,0.00797362,0.00525956,0.00057556,0.00034906,0.00149558,0.01390702,0.00054471,0.00018725,0.00700920,0.00276790,0.00021263,0.05220301,0.01430250,0.01125201,0.01183416,0.03213102,0.06774395,0.00526822,0.00410551,0.00065532,0.00117392,0.01695153,0.00125465,0.00081774,0.00016668,0.00056620,0.00011811,0.01195643,0.00888301,0.00146184,0.00014386,0.00001942,0.00067740,0.00087217,0.00157362,0.00068008,0.01158942,0.00059678], |
46
|
|
|
|
|
|
|
[0.01059040,0.00474509,0.00411333,0.00451860,0.00922929,0.00574293,0.03999814,0.00429508,0.00379712,0.02431038,0.00764406,0.01323026,0.10059134,0.01494897,0.00983284,0.06595953,1.00000000,0.00328817,0.02482425,0.00616447,0.00591990,0.03196992,0.02214633,0.00303539,0.00808711,0.02152074,0.00508249,0.00264278,0.00161911,0.01106370,0.21195908,0.00576058,0.00091710,0.03571950,0.01124942,0.00191355,0.01234284,0.00343306,0.04837319,0.04712816,0.01933463,0.04609447,0.04692010,0.00572997,0.00019301,0.00129479,0.00076547,0.00962075,0.00104078,0.00109856,0.00264999,0.00041869,0.05004606,0.00436722,0.00915766,0.00048918,0.00030055,0.00011663,0.00039512,0.00076144,0.00225356,0.00133797,0.00017619], |
47
|
|
|
|
|
|
|
[0.02694878,0.01721411,0.01952391,0.00357714,0.00392307,0.00052350,0.01013660,0.03557033,0.00287966,0.00392283,0.01730886,0.00047464,0.00042804,0.00099281,0.00014027,0.00136487,0.00328817,1.00000000,0.08067590,0.04178413,0.01288847,0.00814343,0.05918013,0.02287265,0.01192139,0.01255827,0.05727140,0.02851493,0.04112494,0.05247327,0.00817775,0.03127318,0.03346962,0.05308678,0.02299337,0.04035755,0.00753177,0.02359920,0.00502414,0.04179025,0.00149856,0.00215435,0.01544949,0.01176310,0.00279332,0.01809550,0.00277590,0.00414399,0.00252518,0.00136921,0.00191274,0.00121252,0.00204549,0.00226252,0.00068219,0.00029128,0.00048363,0.01441252,0.00342977,0.00180629,0.00600656,0.00517165,0.00897340], |
48
|
|
|
|
|
|
|
[0.00814224,0.00592865,0.02813212,0.00334221,0.01936738,0.00053657,0.01648549,0.00178563,0.00067947,0.00104910,0.00282589,0.00044968,0.00058611,0.00313536,0.00041045,0.00350335,0.02482425,0.08067590,1.00000000,0.01136176,0.02128583,0.00313779,0.00177016,0.00395005,0.00439948,0.06575925,0.00852211,0.00751651,0.00731073,0.02724912,0.03242017,0.00630792,0.00369276,0.07529430,0.02643964,0.01881616,0.00519386,0.01723189,0.00673282,0.06325264,0.00181273,0.00496519,0.01507412,0.04863530,0.00300932,0.00262918,0.00308379,0.00202089,0.00218238,0.00155296,0.00134257,0.00131987,0.01042747,0.00960167,0.00138919,0.00065992,0.00177403,0.00228709,0.00542576,0.00281322,0.00247036,0.00861962,0.00110538], |
49
|
|
|
|
|
|
|
[0.01508530,0.00682061,0.00144777,0.00286208,0.00051527,0.00002714,0.02365574,0.02655667,0.00302452,0.02161542,0.01172287,0.00112657,0.00250784,0.00031539,0.00035932,0.01802493,0.00616447,0.04178413,0.01136176,1.00000000,0.06483758,0.08307318,0.03566022,0.01725834,0.01750644,0.02539841,0.01688718,0.02514467,0.05486780,0.03688048,0.00521973,0.01688746,0.02445290,0.04768928,0.03471644,0.00333698,0.00610840,0.00442747,0.01372850,0.00426017,0.00223644,0.02063272,0.01163316,0.00536271,0.00062260,0.01746793,0.00129649,0.00160921,0.00404504,0.00413838,0.00337126,0.00066456,0.00454784,0.00171182,0.00098014,0.00013873,0.00077143,0.00063688,0.00013513,0.00050885,0.00794608,0.00238844,0.00064367], |
50
|
|
|
|
|
|
|
[0.00391065,0.00180499,0.00048211,0.00143900,0.00099050,0.00000000,0.00114353,0.00033904,0.00014375,0.00081625,0.00164933,0.00009768,0.00021171,0.00028115,0.00000000,0.00096360,0.00591990,0.01288847,0.02128583,0.06483758,1.00000000,0.04072639,0.00850963,0.00148968,0.03424890,0.00161710,0.00330852,0.00214345,0.01283577,0.02128869,0.01600221,0.01043247,0.02031329,0.01823453,0.01338593,0.00553621,0.00141886,0.00394744,0.00880579,0.00196628,0.00030094,0.00124320,0.01546540,0.01752062,0.00012141,0.00815143,0.00007725,0.00166190,0.00659144,0.00314795,0.01158046,0.00120625,0.00136223,0.00162939,0.00054020,0.00071395,0.00815323,0.00026597,0.00018976,0.00073762,0.00035807,0.00090650,0.00010881], |
51
|
|
|
|
|
|
|
[0.01222424,0.00344266,0.00200346,0.00147393,0.00025145,0.00000000,0.01283800,0.00973898,0.02226528,0.17399932,0.00389351,0.00573489,0.01568708,0.00767646,0.00281889,0.00909453,0.03196992,0.00814343,0.00313779,0.08307318,0.04072639,1.00000000,0.01126956,0.00503505,0.01361538,0.01960735,0.00316527,0.00331739,0.00744073,0.01068346,0.01134893,0.00646498,0.00267195,0.03957653,0.01929061,0.00228865,0.00421853,0.00776764,0.06510780,0.00242980,0.02196256,0.05102092,0.06929174,0.00146184,0.00017783,0.00105556,0.00039866,0.00099751,0.00020387,0.00014078,0.00093308,0.00008869,0.01896512,0.00131588,0.00768026,0.00008613,0.00007672,0.00024719,0.00005833,0.00012964,0.00082056,0.00037605,0.00015148], |
52
|
|
|
|
|
|
|
[0.00659643,0.00441784,0.00019596,0.02179999,0.00058066,0.00005061,0.04351610,0.00672954,0.00235095,0.01041594,0.01197920,0.00318360,0.02072003,0.00029001,0.00183886,0.00515694,0.02214633,0.05918013,0.00177016,0.03566022,0.00850963,0.01126956,1.00000000,0.02045685,0.00826220,0.00633983,0.03213753,0.02469657,0.04730327,0.08274742,0.02489030,0.08616438,0.01621454,0.00693314,0.00677325,0.00158068,0.00223943,0.00308515,0.00227818,0.00029899,0.00053250,0.00147263,0.00375682,0.00824269,0.00760135,0.01816660,0.00190250,0.00329502,0.00193978,0.00143710,0.00700996,0.00206318,0.02359644,0.00999129,0.00189036,0.00279543,0.00183423,0.00047399,0.00054415,0.00170902,0.00370215,0.00409053,0.00128667], |
53
|
|
|
|
|
|
|
[0.01744963,0.00357584,0.00120727,0.00186239,0.00029417,0.00000000,0.00310510,0.00693442,0.00174760,0.00381929,0.01455147,0.00051594,0.00317848,0.00020468,0.00012794,0.00092596,0.00303539,0.02287265,0.00395005,0.01725834,0.00148968,0.00503505,0.02045685,1.00000000,0.05145871,0.12779436,0.03636016,0.00283634,0.01066241,0.00982807,0.00257735,0.01096068,0.02880374,0.00972467,0.10046547,0.03624167,0.00056914,0.00067807,0.00583958,0.00657362,0.00220362,0.00263033,0.02207083,0.02274367,0.00194055,0.09990713,0.00528151,0.05241284,0.01205260,0.00876405,0.00796532,0.00761985,0.01903268,0.00936378,0.00189993,0.00146593,0.00266354,0.00441917,0.00655674,0.10888766,0.08290100,0.00726229,0.00045993], |
54
|
|
|
|
|
|
|
[0.02033044,0.00276735,0.00040013,0.00088413,0.00013702,0.00003237,0.00250883,0.00078453,0.00080893,0.00396809,0.00347447,0.00070273,0.00615162,0.00041422,0.00019978,0.00073283,0.00808711,0.01192139,0.00439948,0.01750644,0.03424890,0.01361538,0.00826220,0.05145871,1.00000000,0.08066205,0.00847989,0.00086344,0.00613541,0.01587066,0.00450803,0.00917661,0.03289855,0.03249862,0.09045805,0.06328122,0.00048236,0.00143716,0.02809991,0.00126278,0.00192920,0.00161724,0.07735493,0.02717634,0.00155401,0.19320418,0.00435925,0.00996765,0.06535708,0.13841556,0.06412573,0.04164011,0.05750162,0.04546989,0.01269074,0.00447161,0.01606400,0.00400217,0.00554911,0.05259490,0.02729086,0.00299233,0.00016756], |
55
|
|
|
|
|
|
|
[0.01715241,0.00367641,0.00283891,0.00484911,0.00106777,0.00004214,0.01927488,0.00241796,0.00196637,0.01526674,0.00669667,0.00211532,0.02370322,0.00140823,0.00122685,0.00797362,0.02152074,0.01255827,0.06575925,0.02539841,0.00161710,0.01960735,0.00633983,0.12779436,0.08066205,1.00000000,0.03859051,0.00113611,0.00203467,0.00471129,0.00409870,0.00081985,0.00343621,0.00824681,0.01813158,0.01059378,0.00205531,0.00545895,0.05316721,0.04592582,0.00994601,0.03556494,0.07198694,0.02189391,0.00529453,0.02358360,0.01043343,0.11826228,0.00809985,0.02347113,0.01625107,0.00605939,0.04742529,0.04541076,0.01082100,0.00415774,0.00848348,0.00436409,0.01243201,0.07245937,0.04159908,0.01913399,0.00060123], |
56
|
|
|
|
|
|
|
[0.00549930,0.00180428,0.00242860,0.00313808,0.00244524,0.00101535,0.00578289,0.00796306,0.00355291,0.00204542,0.01056136,0.00196784,0.00575064,0.00028409,0.00022561,0.00525956,0.00508249,0.05727140,0.00852211,0.01688718,0.00330852,0.00316527,0.03213753,0.03636016,0.00847989,0.03859051,1.00000000,0.00972854,0.00657131,0.00892829,0.00628014,0.00991010,0.01018037,0.01401221,0.02144435,0.00420855,0.00420610,0.00367435,0.00175623,0.00667293,0.00038671,0.00074318,0.00382104,0.00550945,0.00170323,0.00992406,0.00108417,0.00348879,0.00098750,0.00099727,0.00218474,0.00048967,0.00714997,0.00495844,0.00084966,0.00043909,0.00015966,0.00121366,0.00499164,0.02679881,0.01159190,0.00893648,0.00031932], |
57
|
|
|
|
|
|
|
[0.00368565,0.00557237,0.00174449,0.00180966,0.00110532,0.00008925,0.01215249,0.00207254,0.00048213,0.00011577,0.00352644,0.00059630,0.00032687,0.00044925,0.00000000,0.00057556,0.00264278,0.02851493,0.00751651,0.02514467,0.00214345,0.00331739,0.02469657,0.00283634,0.00086344,0.00113611,0.00972854,1.00000000,0.02446085,0.06612813,0.00528649,0.01944665,0.00185438,0.02321463,0.00769444,0.00070282,0.00087736,0.00123164,0.00031833,0.00472461,0.00011277,0.00018688,0.00042154,0.00321139,0.00040724,0.00479677,0.00035664,0.00062726,0.00030474,0.00013869,0.00043804,0.00009519,0.00096055,0.00042709,0.00007295,0.00000000,0.00022639,0.00023733,0.00023313,0.00033997,0.00068031,0.00083669,0.00214654], |
58
|
|
|
|
|
|
|
[0.00844784,0.00315267,0.00086392,0.00263198,0.00055186,0.00014061,0.00733156,0.00332360,0.00217216,0.00177630,0.00909528,0.00037158,0.00036216,0.00042652,0.00000000,0.00034906,0.00161911,0.04112494,0.00731073,0.05486780,0.01283577,0.00744073,0.04730327,0.01066241,0.00613541,0.00203467,0.00657131,0.02446085,1.00000000,0.11138003,0.00710961,0.01516372,0.00903694,0.04806795,0.02464383,0.01008555,0.00222670,0.00667785,0.00197022,0.00722700,0.00074335,0.00100007,0.00245419,0.00719443,0.00560381,0.08068350,0.00451608,0.00176721,0.00455153,0.00254111,0.00270384,0.00094113,0.00199997,0.00147935,0.00026871,0.00047024,0.00173808,0.00682661,0.00119168,0.00224231,0.00897971,0.00852850,0.00045398], |
59
|
|
|
|
|
|
|
[0.01205280,0.00397678,0.00117239,0.00221683,0.00040533,0.00003739,0.01013116,0.00160225,0.00078930,0.00079854,0.01061630,0.00027242,0.00062237,0.00006426,0.00012876,0.00149558,0.01106370,0.05247327,0.02724912,0.03688048,0.02128869,0.01068346,0.08274742,0.00982807,0.01587066,0.00471129,0.00892829,0.06612813,0.11138003,1.00000000,0.07763328,0.07034465,0.01753972,0.06965379,0.04399287,0.00363571,0.00118424,0.00530821,0.00161086,0.00142676,0.00021036,0.00037829,0.00491597,0.01279794,0.00798530,0.02766847,0.00439205,0.00178918,0.00290064,0.00250747,0.00538406,0.00131325,0.00517861,0.00293871,0.00032591,0.00150324,0.00420256,0.00080150,0.00089756,0.00500701,0.00921155,0.03720525,0.00026633], |
60
|
|
|
|
|
|
|
[0.00590242,0.00230947,0.00139450,0.00284620,0.00124399,0.00007630,0.00939150,0.00290728,0.00063203,0.00312736,0.00248232,0.00282797,0.00832029,0.00205914,0.00093168,0.01390702,0.21195908,0.00817775,0.03242017,0.00521973,0.01600221,0.01134893,0.02489030,0.00257735,0.00450803,0.00409870,0.00628014,0.00528649,0.00710961,0.07763328,1.00000000,0.03420485,0.00317056,0.04405387,0.01900739,0.00074559,0.00213527,0.00279654,0.00937419,0.00591989,0.00142592,0.00335003,0.01447748,0.01179839,0.00050940,0.00142761,0.00031669,0.00068340,0.00005170,0.00005228,0.00133899,0.00013094,0.00446244,0.00121102,0.00054575,0.00032507,0.00085792,0.00003481,0.00005202,0.00053387,0.00066170,0.00349448,0.00011249], |
61
|
|
|
|
|
|
|
[0.00640019,0.00158525,0.00026806,0.00101324,0.00035144,0.00016497,0.00237590,0.00200535,0.00112325,0.00160699,0.00446782,0.00033981,0.00094027,0.00012121,0.00000000,0.00054471,0.00576058,0.03127318,0.00630792,0.01688746,0.01043247,0.00646498,0.08616438,0.01096068,0.00917661,0.00081985,0.00991010,0.01944665,0.01516372,0.07034465,0.03420485,1.00000000,0.04656485,0.03144514,0.01743365,0.00247936,0.00125290,0.00289379,0.00578328,0.00050220,0.00026804,0.00014454,0.00440834,0.00535735,0.00195926,0.01604359,0.00073699,0.00124962,0.00579896,0.00314838,0.00823689,0.00647544,0.00259548,0.00237396,0.00028234,0.00317951,0.00642179,0.00061039,0.00109380,0.01472719,0.00625958,0.00628051,0.00050588], |
62
|
|
|
|
|
|
|
[0.00470555,0.00194050,0.00036410,0.00022575,0.00010310,0.00004694,0.00047886,0.00017666,0.00006953,0.00015678,0.00385592,0.00015541,0.00022213,0.00003894,0.00011200,0.00018725,0.00091710,0.03346962,0.00369276,0.02445290,0.02031329,0.00267195,0.01621454,0.02880374,0.03289855,0.00343621,0.01018037,0.00185438,0.00903694,0.01753972,0.00317056,0.04656485,1.00000000,0.01346135,0.07012036,0.01053257,0.00003529,0.00033151,0.00139163,0.00164366,0.00086229,0.00016281,0.00327332,0.00782047,0.00218277,0.07406401,0.00063875,0.00304542,0.02846726,0.01254924,0.03420190,0.01311560,0.00436480,0.03379355,0.00047982,0.02170573,0.00668109,0.00078414,0.00283927,0.02753618,0.00716731,0.00222120,0.00000000], |
63
|
|
|
|
|
|
|
[0.01450557,0.00436769,0.01135132,0.00341334,0.01638482,0.00062654,0.00438082,0.00755212,0.00426375,0.00310573,0.01283188,0.01651029,0.02323206,0.01414366,0.01759005,0.00700920,0.03571950,0.05308678,0.07529430,0.04768928,0.01823453,0.03957653,0.00693314,0.00972467,0.03249862,0.00824681,0.01401221,0.02321463,0.04806795,0.06965379,0.04405387,0.03144514,0.01346135,1.00000000,0.22332256,0.03047097,0.00362515,0.02587995,0.00860720,0.04333949,0.00895941,0.00767373,0.04035949,0.02896817,0.00429136,0.00798823,0.00091424,0.00197470,0.00328806,0.00244236,0.00150984,0.00056344,0.05075724,0.01291364,0.00323281,0.00021794,0.00041444,0.00805088,0.00339569,0.00165429,0.00734813,0.00457324,0.00019363], |
64
|
|
|
|
|
|
|
[0.01117073,0.00245270,0.00259196,0.00269952,0.00445366,0.00021500,0.00267569,0.00168273,0.00128080,0.00173009,0.03121173,0.00546494,0.00493813,0.00140343,0.00173629,0.00276790,0.01124942,0.02299337,0.02643964,0.03471644,0.01338593,0.01929061,0.00677325,0.10046547,0.09045805,0.01813158,0.02144435,0.00769444,0.02464383,0.04399287,0.01900739,0.01743365,0.07012036,0.22332256,1.00000000,0.06098303,0.00077042,0.00716332,0.00413977,0.06062982,0.01079193,0.00174967,0.04156133,0.02700926,0.00180154,0.04599695,0.00104510,0.00314268,0.00849010,0.00488137,0.00490207,0.00185002,0.04563527,0.01657738,0.00096511,0.00110621,0.00110381,0.01077644,0.00514171,0.00978555,0.01815607,0.00403095,0.00012445], |
65
|
|
|
|
|
|
|
[0.01519435,0.00323992,0.00098411,0.00114372,0.00120617,0.00000000,0.00027081,0.00028101,0.00022091,0.00057235,0.00698622,0.00018146,0.00064563,0.00026078,0.00005821,0.00021263,0.00191355,0.04035755,0.01881616,0.00333698,0.00553621,0.00228865,0.00158068,0.03624167,0.06328122,0.01059378,0.00420855,0.00070282,0.01008555,0.00363571,0.00074559,0.00247936,0.01053257,0.03047097,0.06098303,1.00000000,0.00165044,0.01853358,0.02860934,0.01705040,0.00252622,0.00299878,0.04582252,0.01576011,0.00392172,0.06257874,0.00413081,0.02740352,0.04932696,0.01498342,0.00862626,0.00987188,0.00586699,0.00936342,0.00236967,0.00108098,0.00381416,0.12164423,0.04318634,0.02021245,0.13947737,0.00631943,0.00011909], |
66
|
|
|
|
|
|
|
[0.01915299,0.03266539,0.00869706,0.05704468,0.01113947,0.00378012,0.01065916,0.01917842,0.00456440,0.02585302,0.02200911,0.00558436,0.01001453,0.00422643,0.00064098,0.05220301,0.01234284,0.00753177,0.00519386,0.00610840,0.00141886,0.00421853,0.00223943,0.00056914,0.00048236,0.00205531,0.00420610,0.00087736,0.00222670,0.00118424,0.00213527,0.00125290,0.00003529,0.00362515,0.00077042,0.00165044,1.00000000,0.09649299,0.02691656,0.00795024,0.00473072,0.01771804,0.00439487,0.00116296,0.00113255,0.00426294,0.00557289,0.00516613,0.00072015,0.00011814,0.00168547,0.00017330,0.00208905,0.00386485,0.00598437,0.00169900,0.00109036,0.00130272,0.00055026,0.00199908,0.00033131,0.01332835,0.01469944], |
67
|
|
|
|
|
|
|
[0.00949692,0.00462011,0.00278418,0.04862097,0.01531835,0.00184760,0.02057247,0.00421842,0.00935890,0.01645332,0.01214765,0.00267307,0.00277862,0.00332764,0.00152619,0.01430250,0.00343306,0.02359920,0.01723189,0.00442747,0.00394744,0.00776764,0.00308515,0.00067807,0.00143716,0.00545895,0.00367435,0.00123164,0.00667785,0.00530821,0.00279654,0.00289379,0.00033151,0.02587995,0.00716332,0.01853358,0.09649299,1.00000000,0.02542187,0.01524772,0.01455425,0.02331533,0.00530730,0.01223921,0.00147280,0.00553666,0.01314248,0.00205700,0.00079142,0.00013946,0.00072233,0.00010843,0.00125165,0.01166432,0.00085113,0.00035170,0.00055254,0.02302896,0.00358856,0.00321930,0.00172338,0.00418654,0.00271172], |
68
|
|
|
|
|
|
|
[0.02112821,0.00786873,0.00065182,0.00177969,0.00039070,0.00017622,0.00373513,0.00106842,0.00140085,0.03430529,0.01022627,0.00489924,0.03036682,0.00447246,0.00245656,0.01125201,0.04837319,0.00502414,0.00673282,0.01372850,0.00880579,0.06510780,0.00227818,0.00583958,0.02809991,0.05316721,0.00175623,0.00031833,0.00197022,0.00161086,0.00937419,0.00578328,0.00139163,0.00860720,0.00413977,0.02860934,0.02691656,0.02542187,1.00000000,0.00624394,0.02793209,0.09534102,0.18153533,0.00475902,0.00232323,0.00477584,0.00256019,0.02937592,0.00495045,0.00336073,0.00602078,0.00156047,0.06711762,0.00411568,0.14722449,0.00328211,0.00092243,0.00065248,0.00072151,0.00266505,0.00213053,0.00120634,0.00078964], |
69
|
|
|
|
|
|
|
[0.01075672,0.00592230,0.04870472,0.00785647,0.06202833,0.00733195,0.00293046,0.00377618,0.00611161,0.00272510,0.00180644,0.00526033,0.00078355,0.04956246,0.00099108,0.01183416,0.04712816,0.04179025,0.06325264,0.00426017,0.00196628,0.00242980,0.00029899,0.00657362,0.00126278,0.04592582,0.00667293,0.00472461,0.00722700,0.00142676,0.00591989,0.00050220,0.00164366,0.04333949,0.06062982,0.01705040,0.00795024,0.01524772,0.00624394,1.00000000,0.07036638,0.04675877,0.01179371,0.00527734,0.00064642,0.00217038,0.00347852,0.00118570,0.00030999,0.00014375,0.00023369,0.00013175,0.00105149,0.00066307,0.00179034,0.00008588,0.00005294,0.00526645,0.00864329,0.00143054,0.00235497,0.00149506,0.00034901], |
70
|
|
|
|
|
|
|
[0.00831446,0.00625668,0.00162055,0.00544478,0.00706150,0.00199509,0.00387222,0.00492809,0.01957090,0.03361024,0.00246790,0.01933054,0.01349284,0.12995965,0.05874367,0.03213102,0.01933463,0.00149856,0.00181273,0.00223644,0.00030094,0.02196256,0.00053250,0.00220362,0.00192920,0.00994601,0.00038671,0.00011277,0.00074335,0.00021036,0.00142592,0.00026804,0.00086229,0.00895941,0.01079193,0.00252622,0.00473072,0.01455425,0.02793209,0.07036638,1.00000000,0.23565814,0.05010477,0.00050604,0.00016712,0.00125798,0.00180972,0.00289651,0.00037433,0.00011872,0.00050773,0.00007054,0.00756966,0.00102191,0.00258941,0.00000000,0.00000000,0.00062218,0.00101418,0.00051524,0.00104060,0.00035737,0.00013958], |
71
|
|
|
|
|
|
|
[0.01284788,0.00554406,0.00190805,0.01902742,0.00336538,0.00039905,0.00807042,0.00231786,0.00565262,0.04910434,0.00323702,0.01241364,0.02104511,0.03171323,0.01961069,0.06774395,0.04609447,0.00215435,0.00496519,0.02063272,0.00124320,0.05102092,0.00147263,0.00263033,0.00161724,0.03556494,0.00074318,0.00018688,0.00100007,0.00037829,0.00335003,0.00014454,0.00016281,0.00767373,0.00174967,0.00299878,0.01771804,0.02331533,0.09534102,0.04675877,0.23565814,1.00000000,0.10544492,0.00120475,0.00021955,0.00065430,0.00211425,0.00428711,0.00067512,0.00070849,0.00112639,0.00037462,0.01987232,0.00510775,0.00778948,0.00067253,0.00016885,0.00047698,0.00042981,0.00396239,0.00254832,0.00057251,0.00059960], |
72
|
|
|
|
|
|
|
[0.02376349,0.00367927,0.00122017,0.00199649,0.00045634,0.00009479,0.00871024,0.00275841,0.00378400,0.03047494,0.00523446,0.00974384,0.03578595,0.01188014,0.01879199,0.00526822,0.04692010,0.01544949,0.01507412,0.01163316,0.01546540,0.06929174,0.00375682,0.02207083,0.07735493,0.07198694,0.00382104,0.00042154,0.00245419,0.00491597,0.01447748,0.00440834,0.00327332,0.04035949,0.04156133,0.04582252,0.00439487,0.00530730,0.18153533,0.01179371,0.05010477,0.10544492,1.00000000,0.01800460,0.00088198,0.00487004,0.00120767,0.02345498,0.00500422,0.00548267,0.00906789,0.00289471,0.08020179,0.01362142,0.03209294,0.00152357,0.00142448,0.00434502,0.00288680,0.00374848,0.00593906,0.00111861,0.00021871], |
73
|
|
|
|
|
|
|
[0.01846751,0.00894575,0.01033098,0.02516203,0.00300478,0.00026701,0.00918703,0.00050158,0.00031160,0.00050122,0.01917991,0.00055166,0.00150099,0.00057622,0.00010133,0.00410551,0.00572997,0.01176310,0.04863530,0.00536271,0.01752062,0.00146184,0.00824269,0.02274367,0.02717634,0.02189391,0.00550945,0.00321139,0.00719443,0.01279794,0.01179839,0.00535735,0.00782047,0.02896817,0.02700926,0.01576011,0.00116296,0.01223921,0.00475902,0.00527734,0.00050604,0.00120475,0.01800460,1.00000000,0.14325369,0.03572809,0.01769275,0.00618835,0.00404324,0.00713058,0.00249292,0.00271148,0.02514607,0.07519441,0.00092286,0.00068632,0.00376590,0.08263703,0.05462587,0.04486943,0.06067439,0.02265862,0.00113842], |
74
|
|
|
|
|
|
|
[0.02192419,0.00961623,0.00408488,0.01832947,0.00139167,0.00007847,0.00147008,0.00015461,0.00065168,0.00031984,0.01575161,0.00006572,0.00051297,0.00013891,0.00001166,0.00065532,0.00019301,0.00279332,0.00300932,0.00062260,0.00012141,0.00017783,0.00760135,0.00194055,0.00155401,0.00529453,0.00170323,0.00040724,0.00560381,0.00798530,0.00050940,0.00195926,0.00218277,0.00429136,0.00180154,0.00392172,0.00113255,0.00147280,0.00232323,0.00064642,0.00016712,0.00021955,0.00088198,0.14325369,1.00000000,0.10514595,0.02354699,0.00141278,0.00299712,0.00090253,0.00119456,0.00085844,0.00269840,0.00333976,0.00134696,0.00347246,0.01236056,0.03587032,0.07669852,0.05432662,0.03723326,0.02739749,0.00155304], |
75
|
|
|
|
|
|
|
[0.02827359,0.00339433,0.00158937,0.00795814,0.00075193,0.00065581,0.01090241,0.00975019,0.00249695,0.00272203,0.06158560,0.00038290,0.00038859,0.00014967,0.00000000,0.00117392,0.00129479,0.01809550,0.00262918,0.01746793,0.00815143,0.00105556,0.01816660,0.09990713,0.19320418,0.02358360,0.00992406,0.00479677,0.08068350,0.02766847,0.00142761,0.01604359,0.07406401,0.00798823,0.04599695,0.06257874,0.00426294,0.00553666,0.00477584,0.00217038,0.00125798,0.00065430,0.00487004,0.03572809,0.10514595,1.00000000,0.04705589,0.01459540,0.05663378,0.08734657,0.04325459,0.02502914,0.01335659,0.02266056,0.00137775,0.00532641,0.01872218,0.10620462,0.01714580,0.04982741,0.03309934,0.02584118,0.00083280], |
76
|
|
|
|
|
|
|
[0.06235569,0.00442661,0.11329349,0.07579728,0.01245292,0.00679404,0.01622746,0.00727802,0.00858768,0.00552471,0.00760292,0.00337312,0.00126309,0.01075779,0.00002908,0.01695153,0.00076547,0.00277590,0.00308379,0.00129649,0.00007725,0.00039866,0.00190250,0.00528151,0.00435925,0.01043343,0.00108417,0.00035664,0.00451608,0.00439205,0.00031669,0.00073699,0.00063875,0.00091424,0.00104510,0.00413081,0.00557289,0.01314248,0.00256019,0.00347852,0.00180972,0.00211425,0.00120767,0.01769275,0.02354699,0.04705589,1.00000000,0.00499088,0.00324442,0.00276915,0.00230305,0.00104103,0.00991123,0.00605850,0.00047069,0.00057773,0.00218800,0.07395106,0.02744583,0.04985920,0.03208429,0.09567894,0.00264640], |
77
|
|
|
|
|
|
|
[0.01871924,0.02332687,0.00131901,0.00124556,0.00033423,0.00022507,0.00087207,0.00089392,0.00057049,0.00299371,0.00694049,0.00055779,0.00544643,0.00056373,0.00028928,0.00125465,0.00962075,0.00414399,0.00202089,0.00160921,0.00166190,0.00099751,0.00329502,0.05241284,0.00996765,0.11826228,0.00348879,0.00062726,0.00176721,0.00178918,0.00068340,0.00124962,0.00304542,0.00197470,0.00314268,0.02740352,0.00516613,0.00205700,0.02937592,0.00118570,0.00289651,0.00428711,0.02345498,0.00618835,0.00141278,0.01459540,0.00499088,1.00000000,0.04780304,0.01753946,0.02230113,0.00498698,0.03513683,0.01497822,0.02916777,0.03924092,0.00497768,0.00279354,0.00132169,0.00759703,0.05759177,0.00232030,0.00078112], |
78
|
|
|
|
|
|
|
[0.02326626,0.00304362,0.00031955,0.00085965,0.00002379,0.00002622,0.00010859,0.00012882,0.00008359,0.00008030,0.00478471,0.00002137,0.00013874,0.00007438,0.00000000,0.00081774,0.00104078,0.00252518,0.00218238,0.00404504,0.00659144,0.00020387,0.00193978,0.01205260,0.06535708,0.00809985,0.00098750,0.00030474,0.00455153,0.00290064,0.00005170,0.00579896,0.02846726,0.00328806,0.00849010,0.04932696,0.00072015,0.00079142,0.00495045,0.00030999,0.00037433,0.00067512,0.00500422,0.00404324,0.00299712,0.05663378,0.00324442,0.04780304,1.00000000,0.06433982,0.01949774,0.04861192,0.00223968,0.02276722,0.00192441,0.00080812,0.02324735,0.00682948,0.00084406,0.01766422,0.01920178,0.00094351,0.00012488], |
79
|
|
|
|
|
|
|
[0.02549734,0.00367554,0.00016804,0.00014810,0.00001954,0.00001058,0.00011596,0.00002086,0.00002941,0.00011027,0.00082096,0.00000000,0.00025775,0.00001011,0.00000000,0.00016668,0.00109856,0.00136921,0.00155296,0.00413838,0.00314795,0.00014078,0.00143710,0.00876405,0.13841556,0.02347113,0.00099727,0.00013869,0.00254111,0.00250747,0.00005228,0.00314838,0.01254924,0.00244236,0.00488137,0.01498342,0.00011814,0.00013946,0.00336073,0.00014375,0.00011872,0.00070849,0.00548267,0.00713058,0.00090253,0.08734657,0.00276915,0.01753946,0.06433982,1.00000000,0.02163343,0.16374857,0.00520633,0.06743083,0.01195431,0.02952770,0.08726531,0.00222538,0.00088424,0.02395618,0.00662449,0.00103668,0.00018821], |
80
|
|
|
|
|
|
|
[0.01149905,0.00783214,0.00014428,0.00034212,0.00003722,0.00000000,0.00047994,0.00009797,0.00011305,0.00051862,0.00367486,0.00000000,0.00085371,0.00004264,0.00000000,0.00056620,0.00264999,0.00191274,0.00134257,0.00337126,0.01158046,0.00093308,0.00700996,0.00796532,0.06412573,0.01625107,0.00218474,0.00043804,0.00270384,0.00538406,0.00133899,0.00823689,0.03420190,0.00150984,0.00490207,0.00862626,0.00168547,0.00072233,0.00602078,0.00023369,0.00050773,0.00112639,0.00906789,0.00249292,0.00119456,0.04325459,0.00230305,0.02230113,0.01949774,0.02163343,1.00000000,0.03909415,0.02983356,0.04637260,0.01669265,0.01548775,0.01718709,0.00285431,0.00095862,0.01814512,0.00698152,0.01188710,0.00161540], |
81
|
|
|
|
|
|
|
[0.00904571,0.00359950,0.00025776,0.00039567,0.00006753,0.00009166,0.00016857,0.00021588,0.00010245,0.00008856,0.00057709,0.00000000,0.00031220,0.00003817,0.00000000,0.00011811,0.00041869,0.00121252,0.00131987,0.00066456,0.00120625,0.00008869,0.00206318,0.00761985,0.04164011,0.00605939,0.00048967,0.00009519,0.00094113,0.00131325,0.00013094,0.00647544,0.01311560,0.00056344,0.00185002,0.00987188,0.00017330,0.00010843,0.00156047,0.00013175,0.00007054,0.00037462,0.00289471,0.00271148,0.00085844,0.02502914,0.00104103,0.00498698,0.04861192,0.16374857,0.03909415,1.00000000,0.00615838,0.03567149,0.02437421,0.01141797,0.01910548,0.00159134,0.00073817,0.02505985,0.00600777,0.00288950,0.00068166], |
82
|
|
|
|
|
|
|
[0.02978857,0.01357759,0.01361661,0.00564823,0.00970181,0.00075771,0.00977301,0.00095943,0.00105292,0.02816400,0.01518055,0.02064372,0.10424722,0.00792679,0.00347836,0.01195643,0.05004606,0.00204549,0.01042747,0.00454784,0.00136223,0.01896512,0.02359644,0.01903268,0.05750162,0.04742529,0.00714997,0.00096055,0.00199997,0.00517861,0.00446244,0.00259548,0.00436480,0.05075724,0.04563527,0.00586699,0.00208905,0.00125165,0.06711762,0.00105149,0.00756966,0.01987232,0.08020179,0.02514607,0.00269840,0.01335659,0.00991123,0.03513683,0.00223968,0.00520633,0.02983356,0.00615838,1.00000000,0.11796095,0.15722729,0.00953404,0.00063211,0.00130397,0.00314726,0.00615594,0.00281269,0.02691022,0.00068706], |
83
|
|
|
|
|
|
|
[0.01858157,0.00583133,0.00071465,0.01156505,0.00050743,0.00011908,0.00545278,0.00042942,0.00026795,0.00223054,0.01160495,0.00222026,0.01947444,0.00036874,0.00033174,0.00888301,0.00436722,0.00226252,0.00960167,0.00171182,0.00162939,0.00131588,0.00999129,0.00936378,0.04546989,0.04541076,0.00495844,0.00042709,0.00147935,0.00293871,0.00121102,0.00237396,0.03379355,0.01291364,0.01657738,0.00936342,0.00386485,0.01166432,0.00411568,0.00066307,0.00102191,0.00510775,0.01362142,0.07519441,0.00333976,0.02266056,0.00605850,0.01497822,0.02276722,0.06743083,0.04637260,0.03567149,0.11796095,1.00000000,0.01279065,0.01390689,0.00468608,0.00411594,0.00532310,0.02350313,0.00355548,0.01051787,0.00054470], |
84
|
|
|
|
|
|
|
[0.01913664,0.01961088,0.00278636,0.00036484,0.00086061,0.00010628,0.00160478,0.00054114,0.00014010,0.00878367,0.00572667,0.00081263,0.00894971,0.00104283,0.00080709,0.00146184,0.00915766,0.00068219,0.00138919,0.00098014,0.00054020,0.00768026,0.00189036,0.00189993,0.01269074,0.01082100,0.00084966,0.00007295,0.00026871,0.00032591,0.00054575,0.00028234,0.00047982,0.00323281,0.00096511,0.00236967,0.00598437,0.00085113,0.14722449,0.00179034,0.00258941,0.00778948,0.03209294,0.00092286,0.00134696,0.00137775,0.00047069,0.02916777,0.00192441,0.01195431,0.01669265,0.02437421,0.15722729,0.01279065,1.00000000,0.12141985,0.00128826,0.00002962,0.00019234,0.00044672,0.00046079,0.00436314,0.00308205], |
85
|
|
|
|
|
|
|
[0.00986617,0.03410275,0.00058838,0.00003067,0.00000000,0.00000000,0.00010979,0.00007965,0.00000000,0.00005787,0.00021805,0.00003506,0.00018670,0.00000000,0.00000000,0.00014386,0.00048918,0.00029128,0.00065992,0.00013873,0.00071395,0.00008613,0.00279543,0.00146593,0.00447161,0.00415774,0.00043909,0.00000000,0.00047024,0.00150324,0.00032507,0.00317951,0.02170573,0.00021794,0.00110621,0.00108098,0.00169900,0.00035170,0.00328211,0.00008588,0.00000000,0.00067253,0.00152357,0.00068632,0.00347246,0.00532641,0.00057773,0.03924092,0.00080812,0.02952770,0.01548775,0.01141797,0.00953404,0.01390689,0.12141985,1.00000000,0.01628836,0.00016611,0.00007172,0.00084973,0.00050277,0.00202156,0.00600214], |
86
|
|
|
|
|
|
|
[0.00980736,0.00370477,0.00033706,0.00026446,0.00009151,0.00000000,0.00000000,0.00000000,0.00027875,0.00002673,0.00098801,0.00000000,0.00000000,0.00000000,0.00000000,0.00001942,0.00030055,0.00048363,0.00177403,0.00077143,0.00815323,0.00007672,0.00183423,0.00266354,0.01606400,0.00848348,0.00015966,0.00022639,0.00173808,0.00420256,0.00085792,0.00642179,0.00668109,0.00041444,0.00110381,0.00381416,0.00109036,0.00055254,0.00092243,0.00005294,0.00000000,0.00016885,0.00142448,0.00376590,0.01236056,0.01872218,0.00218800,0.00497768,0.02324735,0.08726531,0.01718709,0.01910548,0.00063211,0.00468608,0.00128826,0.01628836,1.00000000,0.00189372,0.00133057,0.00722112,0.00391637,0.00331270,0.00091412], |
87
|
|
|
|
|
|
|
[0.02622097,0.00360240,0.01095038,0.04216157,0.00290705,0.00028208,0.00213201,0.00062483,0.00106974,0.00080236,0.01278296,0.00023440,0.00040926,0.00023474,0.00001225,0.00067740,0.00011663,0.01441252,0.00228709,0.00063688,0.00026597,0.00024719,0.00047399,0.00441917,0.00400217,0.00436409,0.00121366,0.00023733,0.00682661,0.00080150,0.00003481,0.00061039,0.00078414,0.00805088,0.01077644,0.12164423,0.00130272,0.02302896,0.00065248,0.00526645,0.00062218,0.00047698,0.00434502,0.08263703,0.03587032,0.10620462,0.07395106,0.00279354,0.00682948,0.00222538,0.00285431,0.00159134,0.00130397,0.00411594,0.00002962,0.00016611,0.00189372,1.00000000,0.11510114,0.01939883,0.05791038,0.02146187,0.00081616], |
88
|
|
|
|
|
|
|
[0.02015345,0.00517674,0.02561634,0.01448784,0.00745701,0.00056563,0.00103991,0.00020720,0.00004685,0.00028778,0.00415772,0.00009475,0.00020963,0.00106642,0.00001882,0.00087217,0.00039512,0.00342977,0.00542576,0.00013513,0.00018976,0.00005833,0.00054415,0.00655674,0.00554911,0.01243201,0.00499164,0.00023313,0.00119168,0.00089756,0.00005202,0.00109380,0.00283927,0.00339569,0.00514171,0.04318634,0.00055026,0.00358856,0.00072151,0.00864329,0.00101418,0.00042981,0.00288680,0.05462587,0.07669852,0.01714580,0.02744583,0.00132169,0.00084406,0.00088424,0.00095862,0.00073817,0.00314726,0.00532310,0.00019234,0.00007172,0.00133057,0.11510114,1.00000000,0.03310225,0.04122635,0.01095066,0.00273120], |
89
|
|
|
|
|
|
|
[0.02370473,0.00367842,0.00333059,0.02353333,0.00204881,0.00035722,0.00105356,0.00027694,0.00060664,0.00047446,0.00773684,0.00013492,0.00277772,0.00086267,0.00002594,0.00157362,0.00076144,0.00180629,0.00281322,0.00050885,0.00073762,0.00012964,0.00170902,0.10888766,0.05259490,0.07245937,0.02679881,0.00033997,0.00224231,0.00500701,0.00053387,0.01472719,0.02753618,0.00165429,0.00978555,0.02021245,0.00199908,0.00321930,0.00266505,0.00143054,0.00051524,0.00396239,0.00374848,0.04486943,0.05432662,0.04982741,0.04985920,0.00759703,0.01766422,0.02395618,0.01814512,0.02505985,0.00615594,0.02350313,0.00044672,0.00084973,0.00722112,0.01939883,0.03310225,1.00000000,0.05860653,0.02368163,0.00127072], |
90
|
|
|
|
|
|
|
[0.02309777,0.00215416,0.00669844,0.00356946,0.00119390,0.00077455,0.00090291,0.00276635,0.00289897,0.00134026,0.04709159,0.00134141,0.00225085,0.00166201,0.00015389,0.00068008,0.00225356,0.00600656,0.00247036,0.00794608,0.00035807,0.00082056,0.00370215,0.08290100,0.02729086,0.04159908,0.01159190,0.00068031,0.00897971,0.00921155,0.00066170,0.00625958,0.00716731,0.00734813,0.01815607,0.13947737,0.00033131,0.00172338,0.00213053,0.00235497,0.00104060,0.00254832,0.00593906,0.06067439,0.03723326,0.03309934,0.03208429,0.05759177,0.01920178,0.00662449,0.00698152,0.00600777,0.00281269,0.00355548,0.00046079,0.00050277,0.00391637,0.05791038,0.04122635,0.05860653,1.00000000,0.05373646,0.00045164], |
91
|
|
|
|
|
|
|
[0.02525110,0.00470674,0.02532072,0.04458390,0.02242817,0.00193900,0.04688816,0.00486968,0.00612092,0.01790618,0.01208866,0.00240426,0.00098062,0.00145186,0.00002476,0.01158942,0.00133797,0.00517165,0.00861962,0.00238844,0.00090650,0.00037605,0.00409053,0.00726229,0.00299233,0.01913399,0.00893648,0.00083669,0.00852850,0.03720525,0.00349448,0.00628051,0.00222120,0.00457324,0.00403095,0.00631943,0.01332835,0.00418654,0.00120634,0.00149506,0.00035737,0.00057251,0.00111861,0.02265862,0.02739749,0.02584118,0.09567894,0.00232030,0.00094351,0.00103668,0.01188710,0.00288950,0.02691022,0.01051787,0.00436314,0.00202156,0.00331270,0.02146187,0.01095066,0.02368163,0.05373646,1.00000000,0.00093386], |
92
|
|
|
|
|
|
|
[0.06049483,0.02079996,0.00174317,0.00179613,0.00025653,0.00064115,0.00337673,0.00339206,0.00117948,0.00051373,0.00336093,0.00027885,0.00005553,0.00008213,0.00022975,0.00059678,0.00017619,0.00897340,0.00110538,0.00064367,0.00010881,0.00015148,0.00128667,0.00045993,0.00016756,0.00060123,0.00031932,0.00214654,0.00045398,0.00026633,0.00011249,0.00050588,0.00000000,0.00019363,0.00012445,0.00011909,0.01469944,0.00271172,0.00078964,0.00034901,0.00013958,0.00059960,0.00021871,0.00113842,0.00155304,0.00083280,0.00264640,0.00078112,0.00012488,0.00018821,0.00161540,0.00068166,0.00068706,0.00054470,0.00308205,0.00600214,0.00091412,0.00081616,0.00273120,0.00127072,0.00045164,0.00093386,1.00000000] |
93
|
|
|
|
|
|
|
]; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# Precompute Logs, make a finite penalty for 0 entries, where logs would be undefined |
96
|
0
|
|
|
0
|
0
|
0
|
sub log10 { log($_[0])/log(10); } |
97
|
|
|
|
|
|
|
our $msc_log_similarities = [map {[map {$_ ? log($_) : undef} @$_]} @$msc_similarities]; |
98
|
|
|
|
|
|
|
our $underflow_penalty = min(grep {defined} map {@$_} @$msc_log_similarities) - 1; |
99
|
|
|
|
|
|
|
# We only do this once, so no need to overoptimize |
100
|
|
|
|
|
|
|
$msc_log_similarities = [map {[map {$_ ? log($_) : $underflow_penalty} @$_]} @$msc_similarities]; |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
our $msc_to_array_index = { |
103
|
|
|
|
|
|
|
'00'=>0, '01'=>1, '03'=>2, '05'=>3, '06'=>4, '08'=>5, 11=>6, 12=>7, 13=>8, 14=>9, 15=>10, |
104
|
|
|
|
|
|
|
16=>11, 17=>12, 18=>13, 19=>14, 20=>15, 22=>16, 26=>17, 28=>18, 30=>19, 31=>20, |
105
|
|
|
|
|
|
|
32=>21, 33=>22, 34=>23, 35=>24, 37=>25, 39=>26, 40=>27, 41=>28, 42=>29, 43=>30, |
106
|
|
|
|
|
|
|
44=>31, 45=>32, 46=>33, 47=>34, 49=>35, 51=>36, 52=>37, 53=>38, 54=>39, 55=>40, |
107
|
|
|
|
|
|
|
57=>41, 58=>42, 60=>43, 62=>44, 65=>45, 68=>46, 70=>47, 74=>48, 76=>49, 78=>50, |
108
|
|
|
|
|
|
|
80=>51, 81=>52, 82=>53, 83=>54, 85=>55, 86=>56, 90=>57, 91=>58, 92=>59, 93=>60, |
109
|
|
|
|
|
|
|
94=>61, 97=>62 }; |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# Until we have some metric that determines the term-likelihood of a given word, |
112
|
|
|
|
|
|
|
# we will use a simple threshold on the number of characters in a concept, |
113
|
|
|
|
|
|
|
# bailing out on words that are not long enough, as they are most likely to |
114
|
|
|
|
|
|
|
# have informal uses. For the moment, 7 seems a good value. |
115
|
|
|
|
|
|
|
our $word_length_threshold = 7; |
116
|
19
|
100
|
|
19
|
0
|
76
|
sub msc_to_array_index { defined $_[0] ? $msc_to_array_index->{"".substr($_[0],0,2)} : undef; } |
117
|
|
|
|
|
|
|
sub msc_similarity { |
118
|
5
|
|
|
5
|
1
|
387
|
my ($category1, $category2) = @_; |
119
|
|
|
|
|
|
|
# Top-level MSC categories only at the moment: |
120
|
5
|
|
|
|
|
12
|
my $index1 = msc_to_array_index($category1); |
121
|
5
|
|
|
|
|
7
|
my $index2 = msc_to_array_index($category2); |
122
|
5
|
100
|
100
|
|
|
36
|
((defined $index1) && (defined $index2)) ? |
123
|
|
|
|
|
|
|
# Well-defined, lookup in matrix |
124
|
|
|
|
|
|
|
return $msc_similarities->[$index1] |
125
|
|
|
|
|
|
|
->[$index2] : |
126
|
|
|
|
|
|
|
# Ill-defined means no similarity |
127
|
|
|
|
|
|
|
return 0; } |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# Discover the most similar cluster of concepts |
130
|
|
|
|
|
|
|
sub disambiguate { |
131
|
9
|
|
|
9
|
1
|
47
|
my ($candidates,%options) = @_; |
132
|
9
|
|
|
|
|
14
|
my %category_view = (); |
133
|
|
|
|
|
|
|
#print STDERR "\n Text length: ",($options{text_length}||0),"\n"; |
134
|
|
|
|
|
|
|
# Algorithm: |
135
|
9
|
50
|
|
|
|
26
|
if ($options{include_all}) { |
136
|
0
|
|
|
|
|
0
|
return $candidates; } |
137
|
|
|
|
|
|
|
# 0. Dropping anything uncategorized: |
138
|
9
|
|
|
|
|
21
|
@$candidates = grep {$_->{scheme} eq 'msc'} @$candidates; # TODO: Map everything into MSC! |
|
27
|
|
|
|
|
150
|
|
139
|
9
|
|
|
|
|
21
|
@$candidates = grep {$_->{category} !~ /^XX/} @$candidates; # TODO: Can we do something with uncategorized concepts? |
|
17
|
|
|
|
|
58
|
|
140
|
9
|
|
|
|
|
16
|
@$candidates = grep {length($_->{concept}) >= $word_length_threshold} @$candidates; # TEMPORARY: We really need term-likelihood here |
|
12
|
|
|
|
|
34
|
|
141
|
|
|
|
|
|
|
# 1. group by top-level MSC category and point into the original candidates array |
142
|
9
|
50
|
|
|
|
27
|
print STDERR "[NNexus::Classification] Eligible concepts: ",scalar(@$candidates),"\n" if $options{verbosity}; |
143
|
9
|
|
|
|
|
36
|
foreach my $index(0..$#$candidates) { |
144
|
9
|
|
|
|
|
19
|
my $candidate = $candidates->[$index]; |
145
|
9
|
|
|
|
|
16
|
my $link = $candidate->{link}; |
146
|
9
|
|
|
|
|
24
|
my $msc_index = msc_to_array_index($candidate->{category}); |
147
|
|
|
|
|
|
|
# 1.0. Skip fine-grained distinctions on the MSC class from the same URL (for now at least) |
148
|
|
|
|
|
|
|
# 45H07 and 45H05 are just "45" with the current metric, so we only need one of them |
149
|
9
|
100
|
50
|
|
|
31
|
next if ($link && (grep {($candidates->[$_]->{link}||'') eq $link} @{$category_view{$msc_index}})); |
|
3
|
|
66
|
|
|
26
|
|
|
9
|
|
|
|
|
39
|
|
150
|
|
|
|
|
|
|
# 1.1. also, use the similarity indeces, for faster lookups |
151
|
8
|
|
|
|
|
12
|
push @{$category_view{$msc_index}}, $index; |
|
8
|
|
|
|
|
25
|
|
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
# 2. Greedy search through the ordered %category_view: |
154
|
|
|
|
|
|
|
# 2.1. Precompute category weights (sum of length of concepts) |
155
|
9
|
|
|
|
|
31
|
my @category_keys = keys %category_view; |
156
|
9
|
|
|
|
|
33
|
my %category_weights = map {$_ => (weigh_category($category_view{$_},$candidates))} @category_keys; |
|
6
|
|
|
|
|
22
|
|
157
|
|
|
|
|
|
|
# 2.2. Order by weights |
158
|
9
|
|
|
|
|
92
|
my @ordered_categories = sort {$category_weights{$b} cmp $category_weights{$a}} @category_keys; |
|
0
|
|
|
|
|
0
|
|
159
|
|
|
|
|
|
|
# 2.3. Precompute concept sizes in each category (for greedy cutoff) |
160
|
9
|
|
|
|
|
15
|
my %category_sizes = map {$_ => scalar(@{$category_view{$_}})} @category_keys; |
|
6
|
|
|
|
|
7
|
|
|
6
|
|
|
|
|
18
|
|
161
|
|
|
|
|
|
|
# print STDERR Dumper(\%category_view); |
162
|
|
|
|
|
|
|
# print STDERR Dumper(\%category_weights); |
163
|
|
|
|
|
|
|
# print STDERR Dumper(\%category_sizes); |
164
|
|
|
|
|
|
|
# So: maximize the sum of lengths of all concepts currently grouped and all log_similarities! |
165
|
9
|
|
|
|
|
34
|
my $max_clique = maximize_clique(weights=>\%category_weights,sizes=>\%category_sizes,queue=>\@ordered_categories, ); |
166
|
|
|
|
|
|
|
# Grab the corresponding candidates from %category_view, and then splice the $candidates array: |
167
|
9
|
|
|
|
|
13
|
my @final_candidates_indexes = map { @{$category_view{$_}} } @{$max_clique->{clique}}; |
|
6
|
|
|
|
|
6
|
|
|
6
|
|
|
|
|
32
|
|
|
9
|
|
|
|
|
24
|
|
168
|
9
|
|
|
|
|
27
|
my @final_candidates = map {$candidates->[$_]} sort {$a<=>$b} @final_candidates_indexes; |
|
8
|
|
|
|
|
14
|
|
|
2
|
|
|
|
|
7
|
|
169
|
9
|
50
|
|
|
|
22
|
print STDERR "[NNexus::Classification] Disambiguated concepts: ",scalar(@final_candidates),"\n" if $options{verbosity}; |
170
|
9
|
|
|
|
|
46
|
return \@final_candidates; # mockup |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
sub weigh_category { |
174
|
6
|
|
|
6
|
0
|
10
|
my ($concept_indexes,$candidates) = @_; |
175
|
6
|
|
|
|
|
10
|
my $weight = 0; |
176
|
|
|
|
|
|
|
# - Weigh by: the (sum of lengths)/4 of all concepts in the category |
177
|
6
|
|
|
|
|
18
|
foreach my $index(@$concept_indexes) { |
178
|
8
|
|
|
|
|
19
|
my $current_concept = $candidates->[$index]->{concept}; |
179
|
8
|
|
|
|
|
14
|
my $current_weight = length($current_concept) - $word_length_threshold; |
180
|
|
|
|
|
|
|
# Possible Alternatives: geometric via division: |
181
|
|
|
|
|
|
|
# my $current_weight = length($current_concept) / $word_length_threshold; |
182
|
8
|
|
|
|
|
15
|
$weight += $current_weight; |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
# Concepts of length 4 or less are less "termy" than longer concepts. |
185
|
|
|
|
|
|
|
# TODO: How certain are we? If we're really certain long phrases are termy, we can subtract 4 rather than divide. |
186
|
|
|
|
|
|
|
# Though subtracting 4 is unintelligent... maybe using the "number of words" in a concept? |
187
|
|
|
|
|
|
|
#$weight = $weight / 4; |
188
|
6
|
|
|
|
|
30
|
return $weight; } |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
sub maximize_clique { |
191
|
15
|
|
|
15
|
0
|
63
|
my (%options) = @_; |
192
|
15
|
|
|
|
|
30
|
my ($weights, $sizes, $queue, $score, $size, $clique) = map {$options{$_}} qw(weights sizes queue score size clique); |
|
90
|
|
|
|
|
107
|
|
193
|
15
|
|
|
|
|
28
|
my @traversal_queue = @$queue; |
194
|
15
|
100
|
|
|
|
73
|
return {score=>$score,size=>$size,clique=>$clique} unless @traversal_queue; # Base case |
195
|
6
|
|
|
|
|
7
|
my $greedy_bound = 0; |
196
|
6
|
|
50
|
|
|
34
|
$score //= 0; |
197
|
6
|
|
50
|
|
|
20
|
$size //= 0; |
198
|
6
|
|
50
|
|
|
26
|
$clique //= []; |
199
|
6
|
50
|
|
|
|
17
|
my @candidate_cliques= $score ? ({score=>$score,size=>$size,clique=>$clique}) : (); |
200
|
|
|
|
|
|
|
# 2 entries in category 10 , and 4 entries in category 80 = 2^((length(a1)+length(a2)+...)/4)*sim(10,80) |
201
|
|
|
|
|
|
|
# |
202
|
|
|
|
|
|
|
# 2 in 10, 4 in 80, 3 in 53 = 2^(sum of lengths / 4)*sim(10,80) *sim(10,53) * sim(53,80) |
203
|
|
|
|
|
|
|
# Take logs to simplify: |
204
|
|
|
|
|
|
|
# (sum of lengths) / 4 -log(sim(10,80)) -log(sim(10,53)) -log(sim(53,80)) |
205
|
6
|
|
|
|
|
16
|
while(@traversal_queue) { |
206
|
|
|
|
|
|
|
# Next extension index: |
207
|
6
|
|
|
|
|
9
|
my $next_index = shift @traversal_queue; |
208
|
6
|
50
|
|
|
|
20
|
last if $sizes->{$next_index} < $greedy_bound; # Greedy, don't go beyond the bound |
209
|
6
|
|
|
|
|
10
|
my $next_weight = $weights->{$next_index}; |
210
|
6
|
|
|
|
|
14
|
my $next_size = $sizes->{$next_index}; |
211
|
6
|
|
|
|
|
7
|
my $similarity_score=0; |
212
|
6
|
|
|
|
|
9
|
my $well_defined = 1; |
213
|
6
|
|
|
|
|
17
|
foreach my $category_index(@$clique) { |
214
|
0
|
|
|
|
|
0
|
my $similarity = $msc_log_similarities->[$next_index]->[$category_index]; |
215
|
0
|
0
|
|
|
|
0
|
if (! $similarity) { |
216
|
|
|
|
|
|
|
# Ill-defined, skip the $next_index |
217
|
0
|
|
|
|
|
0
|
$well_defined = 0; |
218
|
0
|
|
|
|
|
0
|
last; |
219
|
|
|
|
|
|
|
} |
220
|
0
|
|
|
|
|
0
|
$similarity_score += $similarity; |
221
|
|
|
|
|
|
|
} |
222
|
6
|
50
|
|
|
|
18
|
next if (! $well_defined); |
223
|
6
|
|
|
|
|
16
|
my $extended_score = $score + $next_weight + $similarity_score; # ?! |
224
|
6
|
|
|
|
|
8
|
my $extended_size = $size + $next_size; |
225
|
|
|
|
|
|
|
# DEPRECATED: Maximizing score |
226
|
6
|
50
|
|
|
|
17
|
next if $extended_score < $score; # No improvement, next |
227
|
|
|
|
|
|
|
# NEW: Maximizing clique, while keeping a positive score |
228
|
|
|
|
|
|
|
#next if $extended_score <= 0; |
229
|
|
|
|
|
|
|
# Improvement! Update the score and clique |
230
|
6
|
|
|
|
|
14
|
my $extended_clique = [@$clique,$next_index]; |
231
|
6
|
|
|
|
|
49
|
push @candidate_cliques, maximize_clique(weights=>$weights, sizes=>$sizes, queue=>\@traversal_queue, |
232
|
|
|
|
|
|
|
score=>$extended_score, size=>$extended_size, clique=>$extended_clique); |
233
|
|
|
|
|
|
|
# Heuristic: Let's be greedy here to save time. The moment a category with size N can be added |
234
|
|
|
|
|
|
|
# to the current cluster, don't look in categories of size N-1 or smaller in the current merge pass |
235
|
|
|
|
|
|
|
# (of course we look at them as further additions to the now extended cluster) |
236
|
|
|
|
|
|
|
# Update the greedy bound: |
237
|
6
|
|
|
|
|
22
|
$greedy_bound = $sizes->{$next_index}; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
# DEPRECATED: Maximizing score |
241
|
|
|
|
|
|
|
# # We've gathered a number of candidate cliques, return the best scoring one: |
242
|
6
|
|
|
|
|
15
|
@candidate_cliques = sort { $b->{score} <=> $a->{score} } @candidate_cliques; |
|
0
|
|
|
|
|
0
|
|
243
|
|
|
|
|
|
|
# We've gathered a number of candidate cliques, return the one with most concepts: |
244
|
|
|
|
|
|
|
#@candidate_cliques = sort { $b->{size} <=> $a->{size} } @candidate_cliques; |
245
|
|
|
|
|
|
|
|
246
|
6
|
|
|
|
|
19
|
return $candidate_cliques[0]; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
1; |
250
|
|
|
|
|
|
|
__END__ |