Skip to content

Commit

Permalink
Merge pull request #334 from mdekstrand/feature/rbp
Browse files Browse the repository at this point in the history
Add RBP top-N metric
  • Loading branch information
mdekstrand authored Nov 3, 2023
2 parents c6299fa + f2205fc commit 3179988
Show file tree
Hide file tree
Showing 2 changed files with 242 additions and 1 deletion.
101 changes: 100 additions & 1 deletion lenskit/metrics/topn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import logging
import warnings
import numpy as np
import pandas as pd

Expand Down Expand Up @@ -269,7 +270,7 @@ def dcg(recs, truth, discount=np.log2):

def ndcg(recs, truth, discount=np.log2, k=None):
"""
Compute the normalized discounted cumulative gain :cite:p:`Jarvelin2002-xf`.
Compute the normalized discounted cumulative gain :cite:p:`ndcg`.
Discounted cumultative gain is computed as:
Expand Down Expand Up @@ -357,3 +358,101 @@ def _bulk_ndcg(recs, truth, discount=np.log2, k=None):
dcg['ndcg'] = dcg['dcg'].fillna(0) / dcg['ideal']

return dcg['ndcg']


def rbp(recs, truth, k=None, patience=0.5, normalize=False):
"""
Evaluate recommendations with rank-biased precision :cite:p:`rbp` with a
patience parameter :math:`\\gamma`.
If :math:`r_{ui} \\in \\{0, 1\\}` is binary implicit ratings, this is
computed by:
.. math::
\\begin{align*}
\\operatorname{RBP}_\\gamma(L, u) & =(1 - \\gamma) \\sum_i r_{ui} p^i
\\end{align*}
The original RBP metric depends on the idea that the rank-biased sum of
binary relevance scores in an infinitely-long, perfectly-precise list has is
:math:`1/(1 - \\gamma)`. However, in recommender evaluation, we usually have
a small test set, so the maximum achievable RBP is significantly less, and
is a function of the number of test items. With ``normalize=True``, the RBP
metric will be normalized by the maximum achievable with the provided test
data.
Parameters:
recs: the recommendation list.
truth: the user's truth data.
k(int): the maximum recommendation list length.
patience(float): the patience parameter :math:`\\gamma`, the probability
that the user continues browsing at each point.
normalize(bool): whether to normalize the RBP
scores; if ``True``, divides the RBP score by the maximum achievable
with the test data.
"""
if k is not None and k <= len(recs):
recs = recs.iloc[:k]
else:
k = len(recs)

if 'rank' not in recs.columns:
recs = recs.assign(rank=np.arange(1, len(recs)+1))

if np.min(recs['rank']) != 1:
warnings.warn('rank should start with 1')

nrel = len(truth)
if nrel == 0:
return None

good = recs['item'].isin(truth.index)
ranks = recs['rank'][good]
disc = patience ** (ranks - 1)
rbp = np.sum(disc)
if normalize:
# normalize by achievable RBP
max = np.sum(patience ** np.arange(min(nrel, k)))
# _log.info('rbp=%e, nrel=%d, eff=%d, max=%e', rbp, nrel, min(nrel, k), max)
return rbp / max
else:
# normal RBP normalization
return rbp * (1 - patience)


@bulk_impl(rbp)
def _bulk_rbp(recs, truth, k=None, patience=0.5, normalize=False):
if k is not None:
recs = recs[recs['rank'] <= k]

good = recs.join(truth, on=['LKTruthID', 'item'], how='inner')
good['rbp_disc'] = patience ** (good['rank'] - 1)
scores = good.groupby('LKRecID')['rbp_disc'].sum()

if normalize:
tns = truth.reset_index().groupby('LKTruthID')['item'].count()
if k is not None:
tns[tns > k] = k
max_nrel = np.max(tns)
# compute 0...k-1 (the powers of k-1 for 1..k)
kseq = np.arange(max_nrel)
# compute the discounts at each k-1
nd = patience ** kseq
# convert to a series of the sums, up through each k
max_rbps = pd.Series(np.cumsum(nd), index=kseq + 1)

# get a rec/truth mapping
map = recs[['LKRecID', 'LKTruthID']].drop_duplicates()
map.set_index('LKRecID', inplace=True)
map = map.reindex(scores.index)
# map to nrel, and then to the max RBPs
map = map.join(tns.to_frame('nrel'), on='LKTruthID', how='left')
map = map.join(max_rbps.to_frame('rbp_max'), on='nrel', how='left')

# divide each score by max RBP
scores /= map['rbp_max']
else:
scores *= (1 - patience)

scores = scores.reindex(recs['LKRecID'].unique(), fill_value=0)
return scores
142 changes: 142 additions & 0 deletions tests/test_topn_rbp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import logging
import numpy as np
import pandas as pd

from pytest import approx, mark
from hypothesis import given
import hypothesis.strategies as st
import hypothesis.extra.numpy as nph

from lenskit.metrics.topn import rbp, _bulk_rbp
from lenskit.topn import RecListAnalysis
from lenskit.util.test import demo_recs

_log = logging.getLogger(__name__)


def test_rbp_empty():
recs = pd.DataFrame({'item': []})
truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
truth = truth.set_index('item')
assert rbp(recs, truth) == approx(0.0)


def test_rbp_no_match():
recs = pd.DataFrame({'item': [4]})
truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
truth = truth.set_index('item')
assert rbp(recs, truth) == approx(0.0)


def test_rbp_one_match():
recs = pd.DataFrame({'item': [1]})
truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
truth = truth.set_index('item')
assert rbp(recs, truth) == approx(0.5)


@given(st.lists(st.integers(1), min_size=1, max_size=100, unique=True), st.floats(0.05, 0.95))
def test_rbp_perfect(items, p):
n = len(items)
recs = pd.DataFrame({'item': items})
truth = pd.DataFrame({'item': items, 'rating': 1})
truth = truth.set_index('item').sort_index()
assert rbp(recs, truth, patience=p) == approx(np.sum(p ** np.arange(n)) * (1 - p))


@given(st.lists(st.integers(1), min_size=1, max_size=100, unique=True), st.floats(0.05, 0.95))
def test_rbp_perfect_norm(items, p):
recs = pd.DataFrame({'item': items})
truth = pd.DataFrame({'item': items, 'rating': 1})
truth = truth.set_index('item').sort_index()
assert rbp(recs, truth, patience=p, normalize=True) == approx(1.0)


@given(st.lists(st.integers(1), min_size=1, max_size=100, unique=True),
st.integers(1, 100), st.floats(0.05, 0.95))
def test_rbp_perfect_k(items, k, p):
n = len(items)
eff_n = min(n, k)
recs = pd.DataFrame({'item': items})
truth = pd.DataFrame({'item': items, 'rating': 1})
truth = truth.set_index('item').sort_index()
assert rbp(recs, truth, k=k, patience=p) == approx(np.sum(p ** np.arange(eff_n)) * (1 - p))


@given(st.lists(st.integers(1), min_size=1, max_size=100, unique=True),
st.integers(1, 100), st.floats(0.05, 0.95))
def test_rbp_perfect_k_norm(items, k, p):
recs = pd.DataFrame({'item': items})
truth = pd.DataFrame({'item': items, 'rating': 1})
truth = truth.set_index('item').sort_index()
assert rbp(recs, truth, k=k, patience=p, normalize=True) == approx(1.0)


def test_rbp_missing():
recs = pd.DataFrame({'item': [1, 2]})
truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
truth = truth.set_index('item').sort_index()
# (1 + 0.5) * 0.5
assert rbp(recs, truth) == approx(0.75)


def test_rbp_bulk_at_top():
truth = pd.DataFrame.from_records([
(1, 50, 3.5),
(1, 30, 3.5)
], columns=['LKTruthID', 'item', 'rating']).set_index(['LKTruthID', 'item'])

recs = pd.DataFrame.from_records([
(1, 1, 50, 1),
(1, 1, 30, 2),
(1, 1, 72, 3)
], columns=['LKRecID', 'LKTruthID', 'item', 'rank'])

rbp = _bulk_rbp(recs, truth)
assert len(rbp) == 1
assert rbp.index.tolist() == [1]
assert rbp.iloc[0] == approx(0.75)


def test_rbp_bulk_not_at_top():
truth = pd.DataFrame.from_records([
(1, 50, 3.5),
(1, 30, 3.5)
], columns=['LKTruthID', 'item', 'rating']).set_index(['LKTruthID', 'item'])

recs = pd.DataFrame.from_records([
(1, 1, 50, 1),
(1, 1, 72, 2),
(1, 1, 30, 3)
], columns=['LKRecID', 'LKTruthID', 'item', 'rank'])

rbp = _bulk_rbp(recs, truth)
assert len(rbp) == 1
assert rbp.index.tolist() == [1]
assert rbp.iloc[0] == approx((1 + 0.25) * 0.5)


@mark.parametrize('normalize', [False, True])
def test_rbp_bulk_match(demo_recs, normalize):
"bulk and normal match"
train, test, recs = demo_recs

rla = RecListAnalysis()
rla.add_metric(rbp, normalize=normalize)
rla.add_metric(rbp, name='rbp_k', k=5, normalize=normalize)
# metric without the bulk capabilities
rla.add_metric(lambda *a: rbp(*a, normalize=normalize), name='ind_rbp')
rla.add_metric(lambda *a, **k: rbp(*a, normalize=normalize, **k), name='ind_rbp_k', k=5)
res = rla.compute(recs, test)

res['diff'] = np.abs(res.rbp - res.ind_rbp)
rl = res.nlargest(5, 'diff')
_log.info('res:\n%s', rl)
user = rl.index[0]
_log.info('user: %s\n%s', user, rl.iloc[0])
_log.info('test:\n%s', test[test['user'] == user])
urecs = recs[recs['user'] == user].join(test.set_index(['user', 'item'])['rating'], on=['user', 'item'], how='left')
_log.info('recs:\n%s', urecs[urecs['rating'].notnull()])

assert res.rbp.values == approx(res.ind_rbp.values)
assert res.rbp_k.values == approx(res.ind_rbp_k.values)

0 comments on commit 3179988

Please sign in to comment.