Skip to content

leave_n_out_setting

logger = logging.getLogger(__name__) module-attribute

LeaveNOutSetting

Bases: Setting

Leave-N-Out setting for splitting data.

Splits the dataset into training and test sets by leaving out the last N interactions for each user as test data, using the previous n_seq_data interactions as context.

Source code in src/recnexteval/settings/strategy/leave_n_out_setting.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
class LeaveNOutSetting(Setting):
    """Leave-N-Out setting for splitting data.

    Splits the dataset into training and test sets by leaving out the last N interactions
    for each user as test data, using the previous n_seq_data interactions as context.
    """

    IS_BASE: bool = False

    def __init__(
        self,
        n_seq_data: int = 1,
        N: int = 1,
        seed: int = 42,
    ) -> None:
        super().__init__(seed=seed)
        self.n_seq_data = n_seq_data
        # we use top_K to denote the number of items to predict
        self.top_K = N
        logger.info("Splitting data")
        self._splitter = NLastInteractionSplitter(N, n_seq_data)

    def _split(self, data: InteractionMatrix) -> None:
        """Splits the dataset into training and test sets based on interaction timestamps.

        Args:
            data: Interaction matrix to be split. Must contain timestamps.
        """

        self._training_data, future_interaction = self._splitter.split(data)
        # we need to copy the data to avoid modifying the background data
        past_interaction = self._training_data.copy()

        self._unlabeled_data, self._ground_truth_data = self.prediction_data_processor.process(
            past_interaction=past_interaction,
            future_interaction=future_interaction,
            top_K=self.top_K,
        )
        self._t_window = None

IS_BASE = False class-attribute instance-attribute

n_seq_data = n_seq_data instance-attribute

top_K = N instance-attribute

name property

Name of the object's class.

:return: Name of the object's class :rtype: str

params property

Parameters of the object.

:return: Parameters of the object :rtype: dict

identifier property

Name of the setting.

seed = seed instance-attribute

prediction_data_processor = PredictionDataProcessor() instance-attribute

num_split property

Get number of splits created from dataset.

This property defaults to 1 (no splits on training set) for typical settings. For SlidingWindowSetting, this is typically greater than 1 if there are multiple splits created from the sliding window.

Returns:

Type Description
int

Number of splits created from dataset.

is_ready property

Check if setting is ready for evaluation.

Returns:

Type Description
bool

True if the setting has been split and is ready to use.

is_sliding_window_setting property

Check if setting is SlidingWindowSetting.

Returns:

Type Description
bool

True if this is a SlidingWindowSetting instance.

training_data property

Get background data for initial model training.

Returns:

Type Description
InteractionMatrix

InteractionMatrix of training interactions.

t_window property

Get the upper timestamp of the window in split.

In settings that respect the global timeline, returns a timestamp value. In SlidingWindowSetting, returns a list of timestamp values. In settings like LeaveNOutSetting, returns None.

Returns:

Type Description
Union[None, int, list[int]]

Timestamp limit for the data (int, list of ints, or None).

unlabeled_data property

Get unlabeled data for model predictions.

Contains the user/item ID for prediction along with previous sequential interactions. Used to make predictions on ground truth data.

Returns:

Type Description
InteractionMatrix | list[InteractionMatrix]

Single InteractionMatrix or list of InteractionMatrix for sliding window setting.

ground_truth_data property

Get ground truth data for model evaluation.

Contains the actual interactions of user-item that the model should predict.

Returns:

Type Description
InteractionMatrix | list[InteractionMatrix]

Single InteractionMatrix or list of InteractionMatrix for sliding window.

incremental_data property

Get data for incrementally updating the model.

Only available for SlidingWindowSetting.

Returns:

Type Description
list[InteractionMatrix]

List of InteractionMatrix objects for incremental updates.

Raises:

Type Description
AttributeError

If setting is not SlidingWindowSetting.

get_params()

Get the parameters of the setting.

Source code in src/recnexteval/settings/base.py
74
75
76
77
78
79
80
81
82
83
84
85
def get_params(self) -> dict[str, Any]:
    """Get the parameters of the setting."""
    # Get all instance attributes that don't start with underscore
    # and are not special attributes
    exclude_attrs = {"prediction_data_processor"}

    params = {}
    for attr_name, attr_value in vars(self).items():
        if not attr_name.startswith("_") and attr_name not in exclude_attrs:
            params[attr_name] = attr_value

    return params

split(data)

Split data according to the setting.

Calling this method changes the state of the setting object to be ready for evaluation. The method splits data into training_data, ground_truth_data, and unlabeled_data.

Note

SlidingWindowSetting will have an additional attribute incremental_data.

Parameters:

Name Type Description Default
data InteractionMatrix

Interaction matrix to be split.

required
Source code in src/recnexteval/settings/base.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def split(self, data: InteractionMatrix) -> None:
    """Split data according to the setting.

    Calling this method changes the state of the setting object to be ready
    for evaluation. The method splits data into training_data, ground_truth_data,
    and unlabeled_data.

    Note:
        SlidingWindowSetting will have an additional attribute incremental_data.

    Args:
        data: Interaction matrix to be split.
    """
    logger.debug("Splitting data...")
    self._num_full_interactions = data.num_interactions
    start = time.time()
    self._split(data)
    end = time.time()
    logger.info(f"{self.name} data split - Took {end - start:.3}s")

    logger.debug("Checking split attribute and sizes.")
    self._check_split()

    self._split_complete = True
    logger.info(f"{self.name} data split complete.")

restore(n=0)

Restore last run.

Parameters:

Name Type Description Default
n int

Iteration number to restore to. If None, restores to beginning.

0
Source code in src/recnexteval/settings/base.py
303
304
305
306
307
308
309
310
def restore(self, n: int = 0) -> None:
    """Restore last run.

    Args:
        n: Iteration number to restore to. If None, restores to beginning.
    """
    logger.debug(f"Restoring setting to iteration {n}")
    self.current_index = n

get_split_at(index)

Get the split data at a specific index.

Parameters:

Name Type Description Default
index int

The index of the split to retrieve.

required

Returns:

Type Description
SplitResult

SplitResult with keys: 'unlabeled', 'ground_truth', 't_window', 'incremental'.

Raises:

Type Description
IndexError

If index is out of range.

Source code in src/recnexteval/settings/base.py
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
def get_split_at(self, index: int) -> SplitResult:
    """Get the split data at a specific index.

    Args:
        index: The index of the split to retrieve.

    Returns:
        SplitResult with keys: 'unlabeled', 'ground_truth', 't_window', 'incremental'.

    Raises:
        IndexError: If index is out of range.
    """
    if index < 0 or index > self.num_split:
        raise IndexError(f"Index {index} out of range for {self.num_split} splits")

    if self._sliding_window_setting:
        if not (
            isinstance(self._unlabeled_data, list)
            and isinstance(self._ground_truth_data, list)
            and isinstance(self._t_window, list)
        ):
            raise ValueError("Expected list of InteractionMatrix for sliding window setting.")
        result = SplitResult(
            unlabeled=self._unlabeled_data[index],
            ground_truth=self._ground_truth_data[index],
            # TODO change this variable to training_data when refactoring
            incremental=(
                self._incremental_data[index - 1] if index < len(self._incremental_data) and index > 0 else None
            ),
            t_window=self._t_window[index],
        )
    else:
        if index != 0:
            raise IndexError("Non-sliding setting has only one split at index 0")
        if (
            isinstance(self._unlabeled_data, list)
            or isinstance(self._ground_truth_data, list)
            or isinstance(self._t_window, list)
        ):
            raise ValueError("Expected single data for non-sliding setting.")
        result = SplitResult(
            unlabeled=self._unlabeled_data,
            ground_truth=self._ground_truth_data,
            incremental=None,
            t_window=self._t_window,
        )

    return result