Skip to content

single_time_point_setting

logger = logging.getLogger(__name__) module-attribute

SingleTimePointSetting

Bases: Setting

Single time point setting for data split.

Splits an interaction dataset at a single timestamp into training data and evaluation data. The evaluation data can be further processed to produce unlabeled inputs and ground-truth targets for model evaluation.

Parameters:

Name Type Description Default
training_t int

Time point to split the data. The training split covers interactions with timestamps in [0, training_t).

required
n_seq_data int

Number of last sequential interactions to provide as input for prediction. Defaults to 1.

1
top_K int

Number of interactions per user to select for evaluation purposes. Defaults to 1.

1
t_upper int

Upper bound on the timestamp of interactions included in evaluation. Defaults to the maximum 32-bit integer value (acts like infinity).

max
include_all_past_data bool

If True, include all past interactions when constructing input sequences. Defaults to False.

False
seed int

Random seed for reproducible behavior. If None, a seed will be generated.

42
Source code in src/recnexteval/settings/strategy/single_time_point_setting.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
class SingleTimePointSetting(Setting):
    """Single time point setting for data split.

    Splits an interaction dataset at a single timestamp into training
    data and evaluation data. The evaluation data can be
    further processed to produce unlabeled inputs and ground-truth
    targets for model evaluation.

    Args:
        training_t: Time point to split the data. The training
            split covers interactions with timestamps in `[0, training_t)`.
        n_seq_data: Number of last sequential interactions
            to provide as input for prediction. Defaults to `1`.
        top_K: Number of interactions per user to select for
            evaluation purposes. Defaults to `1`.
        t_upper: Upper bound on the timestamp of
            interactions included in evaluation. Defaults to the maximum
            32-bit integer value (acts like infinity).
        include_all_past_data: If True, include all past
            interactions when constructing input sequences. Defaults to False.
        seed: Random seed for reproducible behavior.
            If None, a seed will be generated.
    """

    IS_BASE: bool = False

    def __init__(
        self,
        training_t: int,
        n_seq_data: int = 1,
        top_K: int = 1,
        t_upper: int = np.iinfo(np.int32).max,
        include_all_past_data: bool = False,
        seed: int = 42,
    ):
        super().__init__(seed=seed)
        self.t = training_t
        """Epoch timestamp value to be used in for training set."""
        self.t_upper = t_upper
        """Epoch value to be added to `t` as upper bound for evaluation data."""
        self.n_seq_data = n_seq_data
        self.top_K = top_K

        logger.info("Splitting data at time %s with t_upper interval %s", training_t, t_upper)

        self._training_data_splitter = TimestampSplitter(
            t=training_t,
            t_lower=None,
            t_upper=t_upper,
        )
        self._splitter = NLastInteractionTimestampSplitter(
            t=training_t,
            t_upper=t_upper,
            n_seq_data=n_seq_data,
            include_all_past_data=include_all_past_data,
        )
        self._t_window = training_t

    def _split(self, data: InteractionMatrix) -> None:
        """Split the dataset by timestamp into training and evaluation sets.

        The method raises :class:`TimestampAttributeMissingError` when the
        provided :class:`InteractionMatrix` does not contain timestamp
        information. It will warn if the chosen split time is before the
        earliest timestamp in the data.

        Args:
            data: Interaction matrix to split. Must have timestamps.

        Raises:
            TimestampAttributeMissingError: If `data` has no timestamp attribute.
        """
        if not data.has_timestamps:
            raise TimestampAttributeMissingError()
        if data.min_timestamp > self.t:
            warn(
                f"Splitting at time {self.t} is before the first timestamp"
                " in the data. No data will be in the training set."
            )

        self._training_data, _ = self._training_data_splitter.split(data)
        past_interaction, future_interaction = self._splitter.split(data)
        self._unlabeled_data, self._ground_truth_data = self.prediction_data_processor.process(
            past_interaction=past_interaction,
            future_interaction=future_interaction,
            top_K=self.top_K,
        )

        if len(self._training_data) == 0:
            logger.info("Training data is empty after splitting at time %s", self.t)
        if len(self._unlabeled_data) == 0:
            logger.info("Unlabeled data is empty after splitting at time %s", self.t)
        if len(self._ground_truth_data) == 0:
            logger.info("Ground truth data is empty after splitting at time %s", self.t)

        logger.info("Finished splitting data at time %s", self.t)

IS_BASE = False class-attribute instance-attribute

t = training_t instance-attribute

Epoch timestamp value to be used in for training set.

t_upper = t_upper instance-attribute

Epoch value to be added to t as upper bound for evaluation data.

n_seq_data = n_seq_data instance-attribute

top_K = top_K instance-attribute

name property

Name of the object's class.

:return: Name of the object's class :rtype: str

params property

Parameters of the object.

:return: Parameters of the object :rtype: dict

identifier property

Name of the setting.

seed = seed instance-attribute

prediction_data_processor = PredictionDataProcessor() instance-attribute

num_split property

Get number of splits created from dataset.

This property defaults to 1 (no splits on training set) for typical settings. For SlidingWindowSetting, this is typically greater than 1 if there are multiple splits created from the sliding window.

Returns:

Type Description
int

Number of splits created from dataset.

is_ready property

Check if setting is ready for evaluation.

Returns:

Type Description
bool

True if the setting has been split and is ready to use.

is_sliding_window_setting property

Check if setting is SlidingWindowSetting.

Returns:

Type Description
bool

True if this is a SlidingWindowSetting instance.

training_data property

Get background data for initial model training.

Returns:

Type Description
InteractionMatrix

InteractionMatrix of training interactions.

t_window property

Get the upper timestamp of the window in split.

In settings that respect the global timeline, returns a timestamp value. In SlidingWindowSetting, returns a list of timestamp values. In settings like LeaveNOutSetting, returns None.

Returns:

Type Description
Union[None, int, list[int]]

Timestamp limit for the data (int, list of ints, or None).

unlabeled_data property

Get unlabeled data for model predictions.

Contains the user/item ID for prediction along with previous sequential interactions. Used to make predictions on ground truth data.

Returns:

Type Description
InteractionMatrix | list[InteractionMatrix]

Single InteractionMatrix or list of InteractionMatrix for sliding window setting.

ground_truth_data property

Get ground truth data for model evaluation.

Contains the actual interactions of user-item that the model should predict.

Returns:

Type Description
InteractionMatrix | list[InteractionMatrix]

Single InteractionMatrix or list of InteractionMatrix for sliding window.

incremental_data property

Get data for incrementally updating the model.

Only available for SlidingWindowSetting.

Returns:

Type Description
list[InteractionMatrix]

List of InteractionMatrix objects for incremental updates.

Raises:

Type Description
AttributeError

If setting is not SlidingWindowSetting.

get_params()

Get the parameters of the setting.

Source code in src/recnexteval/settings/base.py
74
75
76
77
78
79
80
81
82
83
84
85
def get_params(self) -> dict[str, Any]:
    """Get the parameters of the setting."""
    # Get all instance attributes that don't start with underscore
    # and are not special attributes
    exclude_attrs = {"prediction_data_processor"}

    params = {}
    for attr_name, attr_value in vars(self).items():
        if not attr_name.startswith("_") and attr_name not in exclude_attrs:
            params[attr_name] = attr_value

    return params

split(data)

Split data according to the setting.

Calling this method changes the state of the setting object to be ready for evaluation. The method splits data into training_data, ground_truth_data, and unlabeled_data.

Note

SlidingWindowSetting will have an additional attribute incremental_data.

Parameters:

Name Type Description Default
data InteractionMatrix

Interaction matrix to be split.

required
Source code in src/recnexteval/settings/base.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def split(self, data: InteractionMatrix) -> None:
    """Split data according to the setting.

    Calling this method changes the state of the setting object to be ready
    for evaluation. The method splits data into training_data, ground_truth_data,
    and unlabeled_data.

    Note:
        SlidingWindowSetting will have an additional attribute incremental_data.

    Args:
        data: Interaction matrix to be split.
    """
    logger.debug("Splitting data...")
    self._num_full_interactions = data.num_interactions
    start = time.time()
    self._split(data)
    end = time.time()
    logger.info(f"{self.name} data split - Took {end - start:.3}s")

    logger.debug("Checking split attribute and sizes.")
    self._check_split()

    self._split_complete = True
    logger.info(f"{self.name} data split complete.")

restore(n=0)

Restore last run.

Parameters:

Name Type Description Default
n int

Iteration number to restore to. If None, restores to beginning.

0
Source code in src/recnexteval/settings/base.py
303
304
305
306
307
308
309
310
def restore(self, n: int = 0) -> None:
    """Restore last run.

    Args:
        n: Iteration number to restore to. If None, restores to beginning.
    """
    logger.debug(f"Restoring setting to iteration {n}")
    self.current_index = n

get_split_at(index)

Get the split data at a specific index.

Parameters:

Name Type Description Default
index int

The index of the split to retrieve.

required

Returns:

Type Description
SplitResult

SplitResult with keys: 'unlabeled', 'ground_truth', 't_window', 'incremental'.

Raises:

Type Description
IndexError

If index is out of range.

Source code in src/recnexteval/settings/base.py
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
def get_split_at(self, index: int) -> SplitResult:
    """Get the split data at a specific index.

    Args:
        index: The index of the split to retrieve.

    Returns:
        SplitResult with keys: 'unlabeled', 'ground_truth', 't_window', 'incremental'.

    Raises:
        IndexError: If index is out of range.
    """
    if index < 0 or index > self.num_split:
        raise IndexError(f"Index {index} out of range for {self.num_split} splits")

    if self._sliding_window_setting:
        if not (
            isinstance(self._unlabeled_data, list)
            and isinstance(self._ground_truth_data, list)
            and isinstance(self._t_window, list)
        ):
            raise ValueError("Expected list of InteractionMatrix for sliding window setting.")
        result = SplitResult(
            unlabeled=self._unlabeled_data[index],
            ground_truth=self._ground_truth_data[index],
            # TODO change this variable to training_data when refactoring
            incremental=(
                self._incremental_data[index - 1] if index < len(self._incremental_data) and index > 0 else None
            ),
            t_window=self._t_window[index],
        )
    else:
        if index != 0:
            raise IndexError("Non-sliding setting has only one split at index 0")
        if (
            isinstance(self._unlabeled_data, list)
            or isinstance(self._ground_truth_data, list)
            or isinstance(self._t_window, list)
        ):
            raise ValueError("Expected single data for non-sliding setting.")
        result = SplitResult(
            unlabeled=self._unlabeled_data,
            ground_truth=self._ground_truth_data,
            incremental=None,
            t_window=self._t_window,
        )

    return result