-
Notifications
You must be signed in to change notification settings - Fork 1
/
metrics.py
67 lines (55 loc) · 2.09 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
@File : metrics.py
@Time : 2024/01/27 15:49:48
@Author : Weihao Xia
@Version : 1.0
@Desc : modified from [CLIPScore](https://arxiv.org/abs/2104.08718) (EMNLP'21)
'''
from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer
from pycocoevalcap.spice.spice import Spice
from pycocoevalcap.meteor.meteor import Meteor
from pycocoevalcap.bleu.bleu import Bleu
from pycocoevalcap.cider.cider import Cider
from pycocoevalcap.rouge.rouge import Rouge
from pycocoevalcap.spice.spice import Spice
def get_all_metrics(refs, cands, return_per_cap=False):
metrics = []
names = []
pycoco_eval_cap_scorers = [(Bleu(4), 'bleu'),
(Meteor(), 'meteor'),
(Rouge(), 'rouge'),
(Cider(), 'cider'),
(Spice(), 'spice')]
for scorer, name in pycoco_eval_cap_scorers:
overall, per_cap = pycoco_eval(scorer, refs, cands)
if return_per_cap:
metrics.append(per_cap)
else:
metrics.append(overall)
names.append(name)
metrics = dict(zip(names, metrics))
return metrics
def tokenize(refs, cands, no_op=False):
# no_op is a debug option to see how significantly not using the PTB tokenizer
# affects things
tokenizer = PTBTokenizer()
if no_op:
refs = {idx: [r for r in c_refs] for idx, c_refs in enumerate(refs)}
cands = {idx: [c] for idx, c in enumerate(cands)}
else:
refs = {idx: [{'caption':r} for r in c_refs] for idx, c_refs in enumerate(refs)}
cands = {idx: [{'caption':c}] for idx, c in enumerate(cands)}
refs = tokenizer.tokenize(refs)
cands = tokenizer.tokenize(cands)
return refs, cands
def pycoco_eval(scorer, refs, cands):
'''
scorer is assumed to have a compute_score function.
refs is a list of lists of strings
cands is a list of predictions
'''
refs, cands = tokenize(refs, cands)
average_score, scores = scorer.compute_score(refs, cands)
return average_score, scores