Skip to content

ndcg

logger = logging.getLogger(__name__) module-attribute

NDCGK

Bases: ListwiseMetricK

Computes the normalized sum of gains of all items in a recommendation list.

The normalized Discounted Cumulative Gain (nDCG) is similar to DCG, but normalizes by dividing the resulting sum of cumulative gains by the best possible discounted cumulative gain for a list of recommendations of length K for a user with history length N.

Scores are always in the interval [0, 1]

.. math::

\text{NormalizedDiscountedCumulativeGain}(u) = \frac{\text{DCG}(u)}{\text{IDCG}(u)}

where IDCG stands for Ideal Discounted Cumulative Gain, computed as:

.. math::

\text{IDCG}(u) = \sum\limits_{j=1}^{\text{min}(K, |y^{true}_u|)} \frac{1}{\log_2 (j + 1)}

:param K: Size of the recommendation list consisting of the Top-K item predictions. :type K: int

This code is adapted from RecPack :cite:recpack

Source code in src/streamsight/metrics/ranking/ndcg.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
class NDCGK(ListwiseMetricK):

    """Computes the normalized sum of gains of all items in a recommendation list.

    The normalized Discounted Cumulative Gain (nDCG) is similar to DCG,
    but normalizes by dividing the resulting sum of cumulative gains
    by the best possible discounted cumulative gain for a list of recommendations
    of length K for a user with history length N.

    Scores are always in the interval [0, 1]

    .. math::

        \\text{NormalizedDiscountedCumulativeGain}(u) = \\frac{\\text{DCG}(u)}{\\text{IDCG}(u)}

    where IDCG stands for Ideal Discounted Cumulative Gain, computed as:

    .. math::

        \\text{IDCG}(u) = \\sum\\limits_{j=1}^{\\text{min}(K, |y^{true}_u|)} \\frac{1}{\\log_2 (j + 1)}

    :param K: Size of the recommendation list consisting of the Top-K item predictions.
    :type K: int

    This code is adapted from RecPack :cite:`recpack`
    """
    IS_BASE: bool = False

    def _calculate(self, y_true: csr_matrix, y_pred: csr_matrix) -> None:
        logger.debug(f"NDCGK compute started - {self.name}")
        logger.debug(f"Number of users: {y_true.shape[0]}")
        logger.debug(f"Number of ground truth interactions: {y_true.nnz}")

        self.discount_template = 1.0 / np.log2(np.arange(2, self.K + 2))
        # Calculate IDCG values by creating a list of partial sums
        self.IDCG_cache = np.concatenate([[1], np.cumsum(self.discount_template)])

        # Correct predictions only
        denominator = y_pred.multiply(y_true)
        # Denominator: log2(rank_i + 1)
        denominator.data = np.log2(denominator.data + 1)
        # Binary relevance
        # Numerator: rel_i
        numerator = y_true

        dcg = sparse_divide_nonzero(numerator, denominator)

        per_user_dcg = dcg.sum(axis=1)

        hist_len = y_true.sum(axis=1).astype(np.int32)
        hist_len[hist_len > self.K] = self.K

        self._scores = sparse_divide_nonzero(
            csr_matrix(per_user_dcg),
            csr_matrix(self.IDCG_cache[hist_len]),
        )

        logger.debug(f"NDCGK compute complete - {self.name}")

IS_BASE = False class-attribute instance-attribute

name property

Name of the metric.

params property

Parameters of the metric.

identifier property

Identifier of the object.

Identifier is made by combining the class name with the parameters passed at construction time.

Constructed by recreating the initialisation call. Example: Algorithm(param_1=value)

:return: Identifier of the object

micro_result property

User level results for the metric.

Contains an entry for every user.

:return: The results DataFrame with columns: user_id, score :rtype: pd.DataFrame

macro_result property

Global metric value obtained by taking the average over all users.

:raises ValueError: If the metric has not been calculated yet. :return: The global metric value. :rtype: float, optional

is_time_aware property

Whether the metric is time-aware.

timestamp_limit property

The timestamp limit for the metric.

num_items property

Dimension of the item-space in both y_true and y_pred

num_users property

Dimension of the user-space in both y_true and y_pred after elimination of users without interactions in y_true.

K = K instance-attribute

col_names property

The names of the columns in the results DataFrame.

get_params()

Get the parameters of the metric.

Source code in src/streamsight/metrics/core/base.py
53
54
55
56
57
def get_params(self) -> dict[str, int | None]:
    """Get the parameters of the metric."""
    if not self.is_time_aware:
        return {}
    return {"timestamp_limit": self._timestamp_limit}

calculate(y_true, y_pred)

Calculates this metric for all nonzero users in y_true, given true labels and predicted scores.

Source code in src/streamsight/metrics/core/base.py
116
117
118
119
120
121
def calculate(self, y_true: csr_matrix, y_pred: csr_matrix) -> None:
    """Calculates this metric for all nonzero users in `y_true`,
    given true labels and predicted scores.
    """
    y_true, y_pred = self._prepare_matrix(y_true, y_pred)
    self._calculate(y_true, y_pred)

prepare_matrix(y_true, y_pred)

Source code in src/streamsight/metrics/core/top_k.py
57
58
59
60
def prepare_matrix(self, y_true: csr_matrix, y_pred: csr_matrix) -> tuple[csr_matrix, csr_matrix]:
    y_true, y_pred = super()._prepare_matrix(y_true, y_pred)
    y_pred = get_top_K_ranks(y_pred, self.K)
    return y_true, y_pred