ndcg

`logger = logging.getLogger(name)` `module-attribute` ¶

`NDCGK` ¶

Bases: ListwiseMetricK

Computes the normalized sum of gains of all items in a recommendation list.

The normalized Discounted Cumulative Gain (nDCG) is similar to DCG, but normalizes by dividing the resulting sum of cumulative gains by the best possible discounted cumulative gain for a list of recommendations of length K for a user with history length N.

Scores are always in the interval [0, 1]

.. math::

\text{NormalizedDiscountedCumulativeGain}(u) = \frac{\text{DCG}(u)}{\text{IDCG}(u)}

where IDCG stands for Ideal Discounted Cumulative Gain, computed as:

.. math::

\text{IDCG}(u) = \sum\limits_{j=1}^{\text{min}(K, |y^{true}_u|)} \frac{1}{\log_2 (j + 1)}

:param K: Size of the recommendation list consisting of the Top-K item predictions. :type K: int

This code is adapted from RecPack :cite:recpack

Source code in src/streamsight/metrics/ranking/ndcg.py

class NDCGK(ListwiseMetricK):

    """Computes the normalized sum of gains of all items in a recommendation list.

    The normalized Discounted Cumulative Gain (nDCG) is similar to DCG,
    but normalizes by dividing the resulting sum of cumulative gains
    by the best possible discounted cumulative gain for a list of recommendations
    of length K for a user with history length N.

    Scores are always in the interval [0, 1]

    .. math::

        \\text{NormalizedDiscountedCumulativeGain}(u) = \\frac{\\text{DCG}(u)}{\\text{IDCG}(u)}

    where IDCG stands for Ideal Discounted Cumulative Gain, computed as:

    .. math::

        \\text{IDCG}(u) = \\sum\\limits_{j=1}^{\\text{min}(K, |y^{true}_u|)} \\frac{1}{\\log_2 (j + 1)}

    :param K: Size of the recommendation list consisting of the Top-K item predictions.
    :type K: int

    This code is adapted from RecPack :cite:`recpack`
    """
    IS_BASE: bool = False

    def _calculate(self, y_true: csr_matrix, y_pred: csr_matrix) -> None:
        logger.debug(f"NDCGK compute started - {self.name}")
        logger.debug(f"Number of users: {y_true.shape[0]}")
        logger.debug(f"Number of ground truth interactions: {y_true.nnz}")

        self.discount_template = 1.0 / np.log2(np.arange(2, self.K + 2))
        # Calculate IDCG values by creating a list of partial sums
        self.IDCG_cache = np.concatenate([[1], np.cumsum(self.discount_template)])

        # Correct predictions only
        denominator = y_pred.multiply(y_true)
        # Denominator: log2(rank_i + 1)
        denominator.data = np.log2(denominator.data + 1)
        # Binary relevance
        # Numerator: rel_i
        numerator = y_true

        dcg = sparse_divide_nonzero(numerator, denominator)

        per_user_dcg = dcg.sum(axis=1)

        hist_len = y_true.sum(axis=1).astype(np.int32)
        hist_len[hist_len > self.K] = self.K

        self._scores = sparse_divide_nonzero(
            csr_matrix(per_user_dcg),
            csr_matrix(self.IDCG_cache[hist_len]),
        )

        logger.debug(f"NDCGK compute complete - {self.name}")

`IS_BASE = False` `class-attribute` `instance-attribute` ¶

`name` `property` ¶

Name of the metric.

`params` `property` ¶

Parameters of the metric.

`identifier` `property` ¶

Identifier of the object.

Identifier is made by combining the class name with the parameters passed at construction time.

Constructed by recreating the initialisation call. Example: Algorithm(param_1=value)

:return: Identifier of the object

`micro_result` `property` ¶

User level results for the metric.

Contains an entry for every user.

:return: The results DataFrame with columns: user_id, score :rtype: pd.DataFrame

`macro_result` `property` ¶

Global metric value obtained by taking the average over all users.

:raises ValueError: If the metric has not been calculated yet. :return: The global metric value. :rtype: float, optional

`is_time_aware` `property` ¶

Whether the metric is time-aware.

`timestamp_limit` `property` ¶

The timestamp limit for the metric.

`num_items` `property` ¶

Dimension of the item-space in both y_true and y_pred

`num_users` `property` ¶

Dimension of the user-space in both y_true and y_pred after elimination of users without interactions in y_true.

`K = K` `instance-attribute` ¶

`col_names` `property` ¶

The names of the columns in the results DataFrame.

`get_params()` ¶

Get the parameters of the metric.

Source code in src/streamsight/metrics/core/base.py

def get_params(self) -> dict[str, int | None]:
    """Get the parameters of the metric."""
    if not self.is_time_aware:
        return {}
    return {"timestamp_limit": self._timestamp_limit}

`calculate(y_true, y_pred)` ¶

Calculates this metric for all nonzero users in y_true, given true labels and predicted scores.

Source code in src/streamsight/metrics/core/base.py

def calculate(self, y_true: csr_matrix, y_pred: csr_matrix) -> None:
    """Calculates this metric for all nonzero users in `y_true`,
    given true labels and predicted scores.
    """
    y_true, y_pred = self._prepare_matrix(y_true, y_pred)
    self._calculate(y_true, y_pred)

`prepare_matrix(y_true, y_pred)` ¶

Source code in src/streamsight/metrics/core/top_k.py

def prepare_matrix(self, y_true: csr_matrix, y_pred: csr_matrix) -> tuple[csr_matrix, csr_matrix]:
    y_true, y_pred = super()._prepare_matrix(y_true, y_pred)
    y_pred = get_top_K_ranks(y_pred, self.K)
    return y_true, y_pred

ndcg

logger = logging.getLogger(__name__) module-attribute ¶

NDCGK ¶

IS_BASE = False class-attribute instance-attribute ¶

name property ¶

params property ¶

identifier property ¶

micro_result property ¶

macro_result property ¶

is_time_aware property ¶

timestamp_limit property ¶

num_items property ¶

num_users property ¶

K = K instance-attribute ¶

col_names property ¶

get_params() ¶

calculate(y_true, y_pred) ¶

prepare_matrix(y_true, y_pred) ¶