lenskit · ZiyaoWei · Oct 17, 2024 · ZiyaoWei · Oct 18, 2024 · ZiyaoWei
diff --git a/lenskit/lenskit/algorithms/basic.py b/lenskit/lenskit/algorithms/basic.py
@@ -1,4 +1,4 @@
 # This file is part of LensKit.
 # Copyright (C) 2018-2023 Boise State University
 # Copyright (C) 2023-2024 Drexel University
 # Licensed under the MIT license, see LICENSE.md for details.
@@ -8,23 +8,24 @@
 Basic utility algorithms and combiners.
 """
 
+from datetime import datetime, timedelta
 import logging
 from collections.abc import Iterable, Sequence
 from typing import overload

 import numpy as np
 import pandas as pd
 from typing_extensions import override

 from lenskit.algorithms import CandidateSelector, Predictor, Recommender
 from lenskit.algorithms.bias import Bias  # noqa: F401
 from lenskit.algorithms.ranking import TopN  # noqa: F401
 from lenskit.data import Dataset
 from lenskit.data.matrix import CSRStructure
 from lenskit.data.vocab import Vocabulary
 from lenskit.util import derivable_rng

 _logger = logging.getLogger(__name__)


 class PopScore(Predictor):
@@ -41,7 +42,7 @@
             - ``'count'``
 
     Attributes:
-        item_pop_(pandas.Series):
+        item_scores_(pandas.Series):
             Item popularity scores.
     """
 
@@ -51,8 +52,13 @@
     @override
     def fit(self, data: Dataset, **kwargs):
         _logger.info("counting item popularity")
-        stats = data.item_stats()
-        scores = stats["count"]
+
+        counts = data.item_stats()["count"]
+        self.item_scores_ = self._fit_internal(counts, **kwargs)
+
+        return self
+
+    def _fit_internal(self, scores: pd.Series, **kwargs):
         if self.score_method == "rank":
             _logger.info("ranking %d items", len(scores))
             scores = scores.rank().sort_index()
@@ -68,9 +74,7 @@
         else:
             raise ValueError("invalid scoring method " + repr(self.score_method))
 
-        self.item_scores_ = scores
-
-        return self
+        return scores
 
     @override
     def predict_for_user(self, user, items, ratings=None):
@@ -80,6 +84,55 @@
         return "PopScore({})".format(self.score_method)
 
 
+class TimeBoundedPopScore(PopScore):
+    """
+    Score items by their time-bounded popularity, i.e., the popularity in the
+    most recent `time_window` period.  Use with :py:class:`TopN` to get a
+    most-popular-recent-items recommender.
+
+    Args:
+        time_window(datetime.timedelta):
+            The time window for computing popularity scores.
+        score_type(str):
+            The method for computing popularity scores.  Can be one of the following:
+
+            - ``'quantile'`` (the default)
+            - ``'rank'``
+            - ``'count'``
+
+    Attributes:
+        item_scores_(pandas.Series):
+            Time-bounded item popularity scores.
+    """
+
+    def __init__(self, time_window: timedelta, score_method="quantile"):
+        super().__init__(score_method)
+
+        self.time_window = time_window
+        self.score_method = score_method
+
+    @override
+    def fit(self, data: Dataset, **kwargs):
+        _logger.info("counting time-bounded item popularity")
+
+        log = data.interaction_log("numpy")
+
+        counts = np.zeros(data.item_count, dtype=np.int32)
+        start_timestamp = (datetime.now() - self.time_window).timestamp()
+        item_nums = log.item_nums[log.timestamps is not None and log.timestamps > start_timestamp]
+        np.add.at(counts, item_nums, 1)
+
+        self.item_scores_ = super()._fit_internal(
+            pd.Series(counts, index=data.items.index), **kwargs
+        )
+
+        return self
+
+    @override
+    def __str__(self):
+        return "TimeBoundedPopScore({}, {})".format(self.time_window, self.score_method)
+
+
 class Memorized(Predictor):
     """
     The memorized algorithm memorizes socres provided at construction time

diff --git a/lenskit/tests/algorithms/test_time_bounded_popular.py b/lenskit/tests/algorithms/test_time_bounded_popular.py
@@ -0,0 +1,61 @@
+# This file is part of LensKit.
+# Copyright (C) 2018-2023 Boise State University
+# Copyright (C) 2023-2024 Drexel University
+# Licensed under the MIT license, see LICENSE.md for details.
+# SPDX-License-Identifier: MIT
+
+from datetime import datetime, timedelta
+import pickle
+
+import numpy as np
+import pandas as pd
+
+from lenskit.lenskit.data.convert import from_interactions_df
+from lenskit.algorithms import basic
+
+day = timedelta(days=1)
+now = int(datetime.now().timestamp())
+one_day_ago = now - day.total_seconds()
+simple_df = pd.DataFrame(
+    {
+        "item": [1, 2, 2, 3],
+        "user": [10, 12, 10, 13],
+        "rating": [4.0, 3.0, 5.0, 2.0],
+        "timestamp": [now, one_day_ago, one_day_ago, one_day_ago],
+    }
+)
+simple_ds = from_interactions_df(simple_df)
+
+
+def test_time_bounded_pop_score_quantile_one_day_window():
+    algo = basic.TimeBoundedPopScore(day)
+    algo.fit(simple_ds)
+    assert algo.item_scores_.equals(pd.Series([1.0, 0.0, 0.0], index=[1, 2, 3]))
+
+
+def test_time_bounded_pop_score_quantile_two_day_window():
+    algo = basic.TimeBoundedPopScore(2 * day)
+    algo.fit(simple_ds)
+    assert algo.item_scores_.equals(pd.Series([0.25, 1.0, 0.5], index=[1, 2, 3]))
+
+
+def test_time_bounded_pop_score_rank():
+    algo = basic.TimeBoundedPopScore(2 * day, "rank")
+    algo.fit(simple_ds)
+    assert algo.item_scores_.equals(pd.Series([1.5, 3.0, 1.5], index=[1, 2, 3]))
+
+
+def test_time_bounded_pop_score_counts(rng):
+    algo = basic.TimeBoundedPopScore(2 * day, "count")
+    algo.fit(simple_ds)
+    assert algo.item_scores_.equals(pd.Series([1, 2, 1], index=[1, 2, 3], dtype=np.int32))
+
+
+def test_time_bounded_pop_score_save_load():
+    original = basic.TimeBoundedPopScore(day)
+    original.fit(simple_ds)
+
+    mod = pickle.dumps(original)
+    algo = pickle.loads(mod)
+
+    assert all(algo.item_scores_ == original.item_scores_)