Heldout perplexity (Table 1) {'total_loss_ema': 1.256824457350846, 'nsp_loss_ema': 0.006769012687692929, 'lm_loss_ema': 1.250055449443854, 'total_loss': 1.2903043007153214, 'nsp_loss': 0.03745921022317875, 'lm_loss': 1.2528450911970248, 'lm_loss_wgt': 1.2559472863953876, 'mrr': 0.0, 'nsp_accuracy': 0.9831589661049044, 'wiki_el_precision': 0.0, 'wiki_el_recall': 0.0, 'wiki_el_f1': 0.0, 'wiki_span_precision': 0.0, 'wiki_span_recall': 0.0, 'wiki_span_f1': 0.0, 'wordnet_el_precision': 0.0, 'wordnet_el_recall': 0.0, 'wordnet_el_f1': 0.0, 'wordnet_span_precision': 0.0, 'wordnet_span_recall': 0.0, 'wordnet_span_f1': 0.0}
Wikidata KG probe (Table 1) {'total_loss_ema': 4.418370670230346, 'nsp_loss_ema': 0.0, 'lm_loss_ema': 4.418370670230346, 'total_loss': 4.193105020479525, 'nsp_loss': 0, 'lm_loss': 4.193105020479525, 'lm_loss_wgt': 4.152787456908796, 'mrr': 0.3108375402762271, 'nsp_accuracy': 0.0, 'wiki_el_precision': 0.0, 'wiki_el_recall': 0.0, 'wiki_el_f1': 0.0, 'wiki_span_precision': 0.0, 'wiki_span_recall': 0.0, 'wiki_span_f1': 0.0, 'wordnet_el_precision': 0.0, 'wordnet_el_recall': 0.0, 'wordnet_el_f1': 0.0, 'wordnet_span_precision': 0.0, 'wordnet_span_recall': 0.0, 'wordnet_span_f1': 0.0, 'loss': 4.193105020479525}
Word-sense disambiguation 75.1
AIDA Entity linking {'total_loss_ema': 0.0, 'nsp_loss_ema': 0.0, 'lm_loss_ema': 0.0, 'total_loss': 0, 'nsp_loss': 0, 'lm_loss': 0, 'lm_loss_wgt': 0, 'mrr': 0.0, 'nsp_accuracy': 0.0, 'wiki_el_precision': 0.9059656218402427, 'wiki_el_recall': 0.7508904253090299, 'wiki_el_f1': 0.8211708099438157, 'wiki_span_precision': 0.942366026289181, 'wiki_span_recall': 0.7810601298973392, 'wiki_span_f1': 0.8541642799862033, 'wordnet_el_precision': 0.0, 'wordnet_el_recall': 0.0, 'wordnet_el_f1': 0.0, 'wordnet_span_precision': 0.0, 'wordnet_span_recall': 0.0, 'wordnet_span_f1': 0.0}
{'tokens': {'tokens': tensor([[ 101, 4511, 2038, 13794, 2112, 103, 102]], device='cuda:0')}, 'segment_ids': tensor([[0, 0, 0, 0, 0, 0, 0]], device='cuda:0'), 'candidates': {'wiki': {'candidate_entity_priors': tensor([[[4.5594e-01, 5.8171e-02, 5.6861e-02, 5.6386e-02, 5.6310e-02, 5.6272e-02, 5.6158e-02, 5.6063e-02, 5.6044e-02, 5.6044e-02, 1.0482e-02, 5.3170e-03, 4.5575e-03, 1.9369e-03, 1.3103e-03, 1.2533e-03, 1.2343e-03, 1.2153e-03, 1.2153e-03, 1.1014e-03, 8.1654e-04, 7.5958e-04, 7.5958e-04, 6.8362e-04, 6.2665e-04, 5.8867e-04, 5.6968e-04, 4.7473e-04, 4.5575e-04, 3.9878e-04], [4.6187e-01, 1.5058e-01, 1.4475e-01, 1.0097e-01, 1.0000e-01, 2.0428e-02, 1.9455e-02, 9.7276e-04, 9.7276e-04, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], [1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]]], device='cuda:0'), 'candidate_entities': {'ids': tensor([[[348718, 358963, 311145, 238554, 87406, 211272, 464923, 350470, 460812, 175601, 131457, 319720, 170829, 157074, 228099, 453367, 29413, 51600, 299674, 119141, 370319, 230055, 411970, 128164, 136743, 54476, 447727, 331522, 103750, 179035], [329630, 460566, 436144, 11831, 177862, 8160, 224746, 12394, 303062, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [470115, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]], device='cuda:0')}, 'candidate_spans': tensor([[[1, 1], [3, 3], [5, 5]]], device='cuda:0'), 'candidate_segment_ids': tensor([[0, 0, 0]], device='cuda:0')}, 'wordnet': {'candidate_entity_priors': tensor([[[2.0000e-01, 6.6667e-01, 6.6667e-02, 6.6667e-02, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], [4.4385e-04, 1.7754e-03, 5.3262e-03, 2.6631e-03, 2.8850e-02, 1.1540e-02, 3.9947e-03, 1.9086e-02, 1.3316e-03, 1.3759e-02, 9.8091e-02, 5.3395e-01, 6.3915e-02, 1.2872e-02, 3.5508e-03, 3.5508e-03, 1.7754e-03, 1.4203e-02, 1.6778e-01, 1.1540e-02], [6.6667e-02, 5.3333e-01, 6.6667e-02, 6.6667e-02, 2.0000e-01, 6.6667e-02, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], [4.4776e-02, 5.2239e-02, 1.1194e-02, 9.3284e-02, 1.4925e-02, 7.4627e-02, 2.9851e-02, 3.7313e-02, 1.1194e-02, 5.9701e-02, 7.4627e-02, 3.3582e-02, 4.2537e-01, 3.7313e-03, 3.7313e-03, 3.7313e-03, 1.8657e-02, 7.4627e-03, 0.0000e+00, 0.0000e+00], [1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]]], device='cuda:0'), 'candidate_entities': {'ids': tensor([[[ 49298, 64949, 109616, 109688, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 78805, 104174, 104219, 104221, 104479, 104480, 107662, 109526, 110980, 112521, 114418, 114886, 114891, 114893, 114915, 115038, 115642, 115765, 116938, 117489], [ 4939, 38462, 53950, 55891, 73413, 95709, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 18196, 25455, 25779, 43251, 50809, 52958, 53945, 54221, 59833, 68572, 72180, 92718, 95709, 111629, 111633, 113925, 114008, 116002, 0, 0], [117661, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]], device='cuda:0')}, 'candidate_spans': tensor([[[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]], device='cuda:0'), 'candidate_segment_ids': tensor([[0, 0, 0, 0, 0]], device='cuda:0')}}, 'lm_label_ids': {'lm_labels': tensor([[ 101, 4511, 2038, 13794, 2112, 14722, 102]], device='cuda:0')}, 'mask_indicator': tensor([[0, 0, 0, 0, 0, 1, 0]], device='cuda:0', dtype=torch.uint8)}