leave_n_out_setting

`logger = logging.getLogger(name)` `module-attribute` ¶

`LeaveNOutSetting` ¶

Bases: Setting

Leave-N-Out setting for splitting data.

Splits the dataset into training and test sets by leaving out the last N interactions for each user as test data, using the previous n_seq_data interactions as context.

Source code in src/recnexteval/settings/strategy/leave_n_out_setting.py

class LeaveNOutSetting(Setting):
    """Leave-N-Out setting for splitting data.

    Splits the dataset into training and test sets by leaving out the last N interactions
    for each user as test data, using the previous n_seq_data interactions as context.
    """

    IS_BASE: bool = False

    def __init__(
        self,
        n_seq_data: int = 1,
        N: int = 1,
        seed: int = 42,
    ) -> None:
        super().__init__(seed=seed)
        self.n_seq_data = n_seq_data
        # we use top_K to denote the number of items to predict
        self.top_K = N
        logger.info("Splitting data")
        self._splitter = NLastInteractionSplitter(N, n_seq_data)

    def _split(self, data: InteractionMatrix) -> None:
        """Splits the dataset into training and test sets based on interaction timestamps.

        Args:
            data: Interaction matrix to be split. Must contain timestamps.
        """

        self._training_data, future_interaction = self._splitter.split(data)
        # we need to copy the data to avoid modifying the background data
        past_interaction = self._training_data.copy()

        self._unlabeled_data, self._ground_truth_data = self.prediction_data_processor.process(
            past_interaction=past_interaction,
            future_interaction=future_interaction,
            top_K=self.top_K,
        )
        self._t_window = None

`IS_BASE = False` `class-attribute` `instance-attribute` ¶

`n_seq_data = n_seq_data` `instance-attribute` ¶

`top_K = N` `instance-attribute` ¶

`name` `property` ¶

Name of the object's class.

:return: Name of the object's class :rtype: str

`params` `property` ¶

Parameters of the object.

:return: Parameters of the object :rtype: dict

`identifier` `property` ¶

Name of the setting.

`seed = seed` `instance-attribute` ¶

`prediction_data_processor = PredictionDataProcessor()` `instance-attribute` ¶

`num_split` `property` ¶

Get number of splits created from dataset.

This property defaults to 1 (no splits on training set) for typical settings. For SlidingWindowSetting, this is typically greater than 1 if there are multiple splits created from the sliding window.

Returns:

Type	Description
`int`	Number of splits created from dataset.

`is_ready` `property` ¶

Check if setting is ready for evaluation.

Returns:

Type	Description
`bool`	True if the setting has been split and is ready to use.

`is_sliding_window_setting` `property` ¶

Check if setting is SlidingWindowSetting.

Returns:

Type	Description
`bool`	True if this is a SlidingWindowSetting instance.

`training_data` `property` ¶

Get background data for initial model training.

Returns:

Type	Description
`InteractionMatrix`	InteractionMatrix of training interactions.

`t_window` `property` ¶

Get the upper timestamp of the window in split.

In settings that respect the global timeline, returns a timestamp value. In SlidingWindowSetting, returns a list of timestamp values. In settings like LeaveNOutSetting, returns None.

Returns:

Type	Description
`Union[None, int, list[int]]`	Timestamp limit for the data (int, list of ints, or None).

`unlabeled_data` `property` ¶

Get unlabeled data for model predictions.

Contains the user/item ID for prediction along with previous sequential interactions. Used to make predictions on ground truth data.

Returns:

Type	Description
`InteractionMatrix \| list[InteractionMatrix]`	Single InteractionMatrix or list of InteractionMatrix for sliding window setting.

`ground_truth_data` `property` ¶

Get ground truth data for model evaluation.

Contains the actual interactions of user-item that the model should predict.

Returns:

Type	Description
`InteractionMatrix \| list[InteractionMatrix]`	Single InteractionMatrix or list of InteractionMatrix for sliding window.

`incremental_data` `property` ¶

Get data for incrementally updating the model.

Only available for SlidingWindowSetting.

Returns:

Type	Description
`list[InteractionMatrix]`	List of InteractionMatrix objects for incremental updates.

Raises:

Type	Description
`AttributeError`	If setting is not SlidingWindowSetting.

`get_params()` ¶

Get the parameters of the setting.

Source code in src/recnexteval/settings/base.py

def get_params(self) -> dict[str, Any]:
    """Get the parameters of the setting."""
    # Get all instance attributes that don't start with underscore
    # and are not special attributes
    exclude_attrs = {"prediction_data_processor"}

    params = {}
    for attr_name, attr_value in vars(self).items():
        if not attr_name.startswith("_") and attr_name not in exclude_attrs:
            params[attr_name] = attr_value

    return params

`split(data)` ¶

Split data according to the setting.

Calling this method changes the state of the setting object to be ready for evaluation. The method splits data into training_data, ground_truth_data, and unlabeled_data.

Note

SlidingWindowSetting will have an additional attribute incremental_data.

Parameters:

Name	Type	Description	Default
`data`	`InteractionMatrix`	Interaction matrix to be split.	required

Source code in src/recnexteval/settings/base.py

def split(self, data: InteractionMatrix) -> None:
    """Split data according to the setting.

    Calling this method changes the state of the setting object to be ready
    for evaluation. The method splits data into training_data, ground_truth_data,
    and unlabeled_data.

    Note:
        SlidingWindowSetting will have an additional attribute incremental_data.

    Args:
        data: Interaction matrix to be split.
    """
    logger.debug("Splitting data...")
    self._num_full_interactions = data.num_interactions
    start = time.time()
    self._split(data)
    end = time.time()
    logger.info(f"{self.name} data split - Took {end - start:.3}s")

    logger.debug("Checking split attribute and sizes.")
    self._check_split()

    self._split_complete = True
    logger.info(f"{self.name} data split complete.")

`restore(n=0)` ¶

Restore last run.

Parameters:

Name	Type	Description	Default
`n`	`int`	Iteration number to restore to. If None, restores to beginning.	`0`

Source code in src/recnexteval/settings/base.py

def restore(self, n: int = 0) -> None:
    """Restore last run.

    Args:
        n: Iteration number to restore to. If None, restores to beginning.
    """
    logger.debug(f"Restoring setting to iteration {n}")
    self.current_index = n

`get_split_at(index)` ¶

Get the split data at a specific index.

Parameters:

Name	Type	Description	Default
`index`	`int`	The index of the split to retrieve.	required

Returns:

Type	Description
`SplitResult`	SplitResult with keys: 'unlabeled', 'ground_truth', 't_window', 'incremental'.

Raises:

Type	Description
`IndexError`	If index is out of range.

Source code in src/recnexteval/settings/base.py

def get_split_at(self, index: int) -> SplitResult:
    """Get the split data at a specific index.

    Args:
        index: The index of the split to retrieve.

    Returns:
        SplitResult with keys: 'unlabeled', 'ground_truth', 't_window', 'incremental'.

    Raises:
        IndexError: If index is out of range.
    """
    if index < 0 or index > self.num_split:
        raise IndexError(f"Index {index} out of range for {self.num_split} splits")

    if self._sliding_window_setting:
        if not (
            isinstance(self._unlabeled_data, list)
            and isinstance(self._ground_truth_data, list)
            and isinstance(self._t_window, list)
        ):
            raise ValueError("Expected list of InteractionMatrix for sliding window setting.")
        result = SplitResult(
            unlabeled=self._unlabeled_data[index],
            ground_truth=self._ground_truth_data[index],
            # TODO change this variable to training_data when refactoring
            incremental=(
                self._incremental_data[index - 1] if index < len(self._incremental_data) and index > 0 else None
            ),
            t_window=self._t_window[index],
        )
    else:
        if index != 0:
            raise IndexError("Non-sliding setting has only one split at index 0")
        if (
            isinstance(self._unlabeled_data, list)
            or isinstance(self._ground_truth_data, list)
            or isinstance(self._t_window, list)
        ):
            raise ValueError("Expected single data for non-sliding setting.")
        result = SplitResult(
            unlabeled=self._unlabeled_data,
            ground_truth=self._ground_truth_data,
            incremental=None,
            t_window=self._t_window,
        )

    return result

leave_n_out_setting

logger = logging.getLogger(__name__) module-attribute ¶