dcg

`logger = logging.getLogger(name)` `module-attribute` ¶

`DCGK` ¶

Bases: ListwiseMetricK

Computes the sum of gains of all items in a recommendation list.

Relevant items that are ranked higher in the Top-K recommendations have a higher gain.

The Discounted Cumulative Gain (DCG) is computed for every user as

.. math::

\text{DiscountedCumulativeGain}(u) = \sum\limits_{i \in Top-K(u)} \frac{y^{true}_{u,i}}{\log_2 (\text{rank}(u,i) + 1)}

:param K: Size of the recommendation list consisting of the Top-K item predictions. :type K: int

This code is adapted from RecPack :cite:recpack

Source code in src/streamsight/metrics/ranking/dcg.py

class DCGK(ListwiseMetricK):
    """Computes the sum of gains of all items in a recommendation list.

    Relevant items that are ranked higher in the Top-K recommendations have a higher gain.

    The Discounted Cumulative Gain (DCG) is computed for every user as

    .. math::

        \\text{DiscountedCumulativeGain}(u) = \\sum\\limits_{i \\in Top-K(u)} \\frac{y^{true}_{u,i}}{\\log_2 (\\text{rank}(u,i) + 1)}


    :param K: Size of the recommendation list consisting of the Top-K item predictions.
    :type K: int

    This code is adapted from RecPack :cite:`recpack`
    """
    IS_BASE: bool = False

    def _calculate(self, y_true: csr_matrix, y_pred: csr_matrix) -> None:
        logger.debug("Precision compute started - %s", self.name)
        logger.debug("Shape of matrix: (%d, %d)", y_true.shape[0], y_true.shape[1])
        logger.debug("Number of ground truth interactions: %d", y_true.nnz)

        denominator = y_pred.multiply(y_true)
        # Denominator: log2(rank_i + 1)
        denominator.data = np.log2(denominator.data + 1)
        # Binary relevance
        # Numerator: rel_i
        numerator = y_true

        dcg = sparse_divide_nonzero(numerator, denominator)

        self._scores = csr_matrix(dcg.sum(axis=1))

        logger.debug(f"DCGK compute complete - {self.name}")

`IS_BASE = False` `class-attribute` `instance-attribute` ¶

`name` `property` ¶

Name of the metric.

`params` `property` ¶

Parameters of the metric.

`identifier` `property` ¶

Identifier of the object.

Identifier is made by combining the class name with the parameters passed at construction time.

Constructed by recreating the initialisation call. Example: Algorithm(param_1=value)

:return: Identifier of the object

`micro_result` `property` ¶

User level results for the metric.

Contains an entry for every user.

:return: The results DataFrame with columns: user_id, score :rtype: pd.DataFrame

`macro_result` `property` ¶

Global metric value obtained by taking the average over all users.

:raises ValueError: If the metric has not been calculated yet. :return: The global metric value. :rtype: float, optional

`is_time_aware` `property` ¶

Whether the metric is time-aware.

`timestamp_limit` `property` ¶

The timestamp limit for the metric.

`num_items` `property` ¶

Dimension of the item-space in both y_true and y_pred

`num_users` `property` ¶

Dimension of the user-space in both y_true and y_pred after elimination of users without interactions in y_true.

`K = K` `instance-attribute` ¶

`col_names` `property` ¶

The names of the columns in the results DataFrame.

`get_params()` ¶

Get the parameters of the metric.

Source code in src/streamsight/metrics/core/base.py

def get_params(self) -> dict[str, int | None]:
    """Get the parameters of the metric."""
    if not self.is_time_aware:
        return {}
    return {"timestamp_limit": self._timestamp_limit}

`calculate(y_true, y_pred)` ¶

Calculates this metric for all nonzero users in y_true, given true labels and predicted scores.

Source code in src/streamsight/metrics/core/base.py

def calculate(self, y_true: csr_matrix, y_pred: csr_matrix) -> None:
    """Calculates this metric for all nonzero users in `y_true`,
    given true labels and predicted scores.
    """
    y_true, y_pred = self._prepare_matrix(y_true, y_pred)
    self._calculate(y_true, y_pred)

`prepare_matrix(y_true, y_pred)` ¶

Source code in src/streamsight/metrics/core/top_k.py

def prepare_matrix(self, y_true: csr_matrix, y_pred: csr_matrix) -> tuple[csr_matrix, csr_matrix]:
    y_true, y_pred = super()._prepare_matrix(y_true, y_pred)
    y_pred = get_top_K_ranks(y_pred, self.K)
    return y_true, y_pred

dcg

logger = logging.getLogger(__name__) module-attribute ¶

DCGK ¶

IS_BASE = False class-attribute instance-attribute ¶

name property ¶

params property ¶

identifier property ¶

micro_result property ¶

macro_result property ¶

is_time_aware property ¶

timestamp_limit property ¶

num_items property ¶

num_users property ¶

K = K instance-attribute ¶

col_names property ¶

get_params() ¶

calculate(y_true, y_pred) ¶

prepare_matrix(y_true, y_pred) ¶