-
Notifications
You must be signed in to change notification settings - Fork 1
/
utils.py
executable file
·152 lines (105 loc) · 4.02 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import os
import glob
import json
import numpy as np
from tqdm import tqdm
from sklearn import metrics
import nltk
from logger import logger
# A function that retures the paths of all the files in the directory
# (dir) witht he extension (extension) excluding the file (exclude).
def get_files(dir, exclude='', extension='csv'):
logger.info("Getting all files from (%s) with the extension (%s) excluding (%s)" % (
dir, extension, exclude))
return glob.glob(os.path.join(dir, '[!_%s]*.%s' % (exclude, extension)))
def compare_lists(l1, l2):
# NOT IMPLIMENTED
return 0
def load_config(config):
with open(config, "r") as config_file:
return json.load(config_file)
def compute_MAP(labels, scores, top_k=-1):
if len(labels) != len(scores):
logger.error(
"Failed computing MAP because leght of labels (%d) and scores (%d) are different"%(len(labels), len(scores)))
return -1
if top_k < 0 or top_k > labels.shape[1]:
top_k = labels.shape[1]
average_precision_scores = []
for i, (label, score) in enumerate(zip(labels, scores)):
sorted_indices = score.argsort()[::-1]
score = score[sorted_indices]
label = label[sorted_indices]
score[top_k:] = 0
if sum(label) == 0:
logger.error("Found something when computing MAP no labels (%d)"%sum(label))
average_precision_score = 0
else:
average_precision_score = metrics.average_precision_score(label, score)
average_precision_scores.append(average_precision_score)
return np.mean(average_precision_scores), average_precision_scores
def compute_recall(labels, scores, top_k=1):
if len(labels) != len(scores):
logger.error(
"Failed computing RECALL because leght of labels and scores are different")
return -1
if top_k < 0 or top_k > labels.shape[1]:
top_k = labels.shape[1]
recalls = []
for i, (label, score) in enumerate(zip(labels, scores)):
sorted_indices = score.argsort()[::-1][:top_k]
if sum(label) == 0:
logger.error("Found something when computing RECALL no labels (%d)"%sum(label))
recall = 0
else:
max_score = np.sum(label)
recall = label[sorted_indices].sum()/max_score
recalls.append(recall)
return np.mean(recalls), recalls
def compute_precision(labels, scores, top_k=1):
if len(labels) != len(scores):
logger.error(
"Failed computing PRECISION because leght of labels and scores are different")
return -1
if top_k < 0 or top_k > labels.shape[1]:
top_k = labels.shape[1]
precisions = []
for i, (label, score) in enumerate(zip(labels, scores)):
sorted_indices = score.argsort()[::-1][:top_k]
precision = label[sorted_indices].sum()/top_k
precisions.append(precision)
return np.mean(precisions), precisions
def compute_MRR(labels, scores, top_k=1):
if len(labels) != len(scores):
logger.error(
"Failed computing MRR because leght of labels and scores are different")
return -1
MRRs = []
for i, (label, score) in enumerate(zip(labels, scores)):
sorted_indices = score.argsort()[::-1]
sorted_label = label[sorted_indices]
true_positions = np.where(sorted_label)[0]
if len(true_positions) == 0:
logger.error("Found something when computing MRR no labels (%d)"%sum(label))
MRR = 0
else:
MRR = 1.0/(true_positions[0] + 1)
MRRs.append(MRR)
return np.mean(MRRs), MRRs
def compute_top_positives(labels, scores, top_k=1):
if len(labels) != len(scores):
logger.error(
"Failed computing top_positive because leght of labels and scores are different")
return -1
if top_k < 0 or top_k > labels.shape[1]:
top_k = labels.shape[1]
top_positives = []
for i, (label, score) in enumerate(zip(labels, scores)):
sorted_indices = score.argsort()[::-1][:top_k]
if sum(label) == 0:
logger.error("Found something when computing top_positive no labels (%d)"%sum(label))
top_positive = 0
else:
top_positive = 1 if label[sorted_indices].sum() else 0
top_positives.append(top_positive)
return np.mean(top_positives), top_positives