single_time_point_setting

`logger = logging.getLogger(name)` `module-attribute` ¶

`SingleTimePointSetting` ¶

Bases: Setting

Single time point setting for data split.

Splits an interaction dataset at a single timestamp into training data and evaluation data. The evaluation data can be further processed to produce unlabeled inputs and ground-truth targets for model evaluation.

Parameters:

Name	Type	Description	Default
`training_t`	`int`	Time point to split the data. The training split covers interactions with timestamps in `[0, training_t)`.	required
`n_seq_data`	`int`	Number of last sequential interactions to provide as input for prediction. Defaults to `1`.	`1`
`top_K`	`int`	Number of interactions per user to select for evaluation purposes. Defaults to `1`.	`1`
`t_upper`	`int`	Upper bound on the timestamp of interactions included in evaluation. Defaults to the maximum 32-bit integer value (acts like infinity).	`max`
`include_all_past_data`	`bool`	If True, include all past interactions when constructing input sequences. Defaults to False.	`False`
`seed`	`int`	Random seed for reproducible behavior. If None, a seed will be generated.	`42`

Source code in src/recnexteval/settings/strategy/single_time_point_setting.py

class SingleTimePointSetting(Setting):
    """Single time point setting for data split.

    Splits an interaction dataset at a single timestamp into training
    data and evaluation data. The evaluation data can be
    further processed to produce unlabeled inputs and ground-truth
    targets for model evaluation.

    Args:
        training_t: Time point to split the data. The training
            split covers interactions with timestamps in `[0, training_t)`.
        n_seq_data: Number of last sequential interactions
            to provide as input for prediction. Defaults to `1`.
        top_K: Number of interactions per user to select for
            evaluation purposes. Defaults to `1`.
        t_upper: Upper bound on the timestamp of
            interactions included in evaluation. Defaults to the maximum
            32-bit integer value (acts like infinity).
        include_all_past_data: If True, include all past
            interactions when constructing input sequences. Defaults to False.
        seed: Random seed for reproducible behavior.
            If None, a seed will be generated.
    """

    IS_BASE: bool = False

    def __init__(
        self,
        training_t: int,
        n_seq_data: int = 1,
        top_K: int = 1,
        t_upper: int = np.iinfo(np.int32).max,
        include_all_past_data: bool = False,
        seed: int = 42,
    ):
        super().__init__(seed=seed)
        self.t = training_t
        """Epoch timestamp value to be used in for training set."""
        self.t_upper = t_upper
        """Epoch value to be added to `t` as upper bound for evaluation data."""
        self.n_seq_data = n_seq_data
        self.top_K = top_K

        logger.info("Splitting data at time %s with t_upper interval %s", training_t, t_upper)

        self._training_data_splitter = TimestampSplitter(
            t=training_t,
            t_lower=None,
            t_upper=t_upper,
        )
        self._splitter = NLastInteractionTimestampSplitter(
            t=training_t,
            t_upper=t_upper,
            n_seq_data=n_seq_data,
            include_all_past_data=include_all_past_data,
        )
        self._t_window = training_t

    def _split(self, data: InteractionMatrix) -> None:
        """Split the dataset by timestamp into training and evaluation sets.

        The method raises :class:`TimestampAttributeMissingError` when the
        provided :class:`InteractionMatrix` does not contain timestamp
        information. It will warn if the chosen split time is before the
        earliest timestamp in the data.

        Args:
            data: Interaction matrix to split. Must have timestamps.

        Raises:
            TimestampAttributeMissingError: If `data` has no timestamp attribute.
        """
        if not data.has_timestamps:
            raise TimestampAttributeMissingError()
        if data.min_timestamp > self.t:
            warn(
                f"Splitting at time {self.t} is before the first timestamp"
                " in the data. No data will be in the training set."
            )

        self._training_data, _ = self._training_data_splitter.split(data)
        past_interaction, future_interaction = self._splitter.split(data)
        self._unlabeled_data, self._ground_truth_data = self.prediction_data_processor.process(
            past_interaction=past_interaction,
            future_interaction=future_interaction,
            top_K=self.top_K,
        )

        if len(self._training_data) == 0:
            logger.info("Training data is empty after splitting at time %s", self.t)
        if len(self._unlabeled_data) == 0:
            logger.info("Unlabeled data is empty after splitting at time %s", self.t)
        if len(self._ground_truth_data) == 0:
            logger.info("Ground truth data is empty after splitting at time %s", self.t)

        logger.info("Finished splitting data at time %s", self.t)

`IS_BASE = False` `class-attribute` `instance-attribute` ¶

`t = training_t` `instance-attribute` ¶

Epoch timestamp value to be used in for training set.

`t_upper = t_upper` `instance-attribute` ¶

Epoch value to be added to t as upper bound for evaluation data.

`n_seq_data = n_seq_data` `instance-attribute` ¶

`top_K = top_K` `instance-attribute` ¶

`name` `property` ¶

Name of the object's class.

:return: Name of the object's class :rtype: str

`params` `property` ¶

Parameters of the object.

:return: Parameters of the object :rtype: dict

`identifier` `property` ¶

Name of the setting.

`seed = seed` `instance-attribute` ¶

`prediction_data_processor = PredictionDataProcessor()` `instance-attribute` ¶

`num_split` `property` ¶

Get number of splits created from dataset.

This property defaults to 1 (no splits on training set) for typical settings. For SlidingWindowSetting, this is typically greater than 1 if there are multiple splits created from the sliding window.

Returns:

Type	Description
`int`	Number of splits created from dataset.

`is_ready` `property` ¶

Check if setting is ready for evaluation.

Returns:

Type	Description
`bool`	True if the setting has been split and is ready to use.

`is_sliding_window_setting` `property` ¶

Check if setting is SlidingWindowSetting.

Returns:

Type	Description
`bool`	True if this is a SlidingWindowSetting instance.

`training_data` `property` ¶

Get background data for initial model training.

Returns:

Type	Description
`InteractionMatrix`	InteractionMatrix of training interactions.

`t_window` `property` ¶

Get the upper timestamp of the window in split.

In settings that respect the global timeline, returns a timestamp value. In SlidingWindowSetting, returns a list of timestamp values. In settings like LeaveNOutSetting, returns None.

Returns:

Type	Description
`Union[None, int, list[int]]`	Timestamp limit for the data (int, list of ints, or None).

`unlabeled_data` `property` ¶

Get unlabeled data for model predictions.

Contains the user/item ID for prediction along with previous sequential interactions. Used to make predictions on ground truth data.

Returns:

Type	Description
`InteractionMatrix \| list[InteractionMatrix]`	Single InteractionMatrix or list of InteractionMatrix for sliding window setting.

`ground_truth_data` `property` ¶

Get ground truth data for model evaluation.

Contains the actual interactions of user-item that the model should predict.

Returns:

Type	Description
`InteractionMatrix \| list[InteractionMatrix]`	Single InteractionMatrix or list of InteractionMatrix for sliding window.

`incremental_data` `property` ¶

Get data for incrementally updating the model.

Only available for SlidingWindowSetting.

Returns:

Type	Description
`list[InteractionMatrix]`	List of InteractionMatrix objects for incremental updates.

Raises:

Type	Description
`AttributeError`	If setting is not SlidingWindowSetting.

`get_params()` ¶

Get the parameters of the setting.

Source code in src/recnexteval/settings/base.py

def get_params(self) -> dict[str, Any]:
    """Get the parameters of the setting."""
    # Get all instance attributes that don't start with underscore
    # and are not special attributes
    exclude_attrs = {"prediction_data_processor"}

    params = {}
    for attr_name, attr_value in vars(self).items():
        if not attr_name.startswith("_") and attr_name not in exclude_attrs:
            params[attr_name] = attr_value

    return params

`split(data)` ¶

Split data according to the setting.

Calling this method changes the state of the setting object to be ready for evaluation. The method splits data into training_data, ground_truth_data, and unlabeled_data.

Note

SlidingWindowSetting will have an additional attribute incremental_data.

Parameters:

Name	Type	Description	Default
`data`	`InteractionMatrix`	Interaction matrix to be split.	required

Source code in src/recnexteval/settings/base.py

def split(self, data: InteractionMatrix) -> None:
    """Split data according to the setting.

    Calling this method changes the state of the setting object to be ready
    for evaluation. The method splits data into training_data, ground_truth_data,
    and unlabeled_data.

    Note:
        SlidingWindowSetting will have an additional attribute incremental_data.

    Args:
        data: Interaction matrix to be split.
    """
    logger.debug("Splitting data...")
    self._num_full_interactions = data.num_interactions
    start = time.time()
    self._split(data)
    end = time.time()
    logger.info(f"{self.name} data split - Took {end - start:.3}s")

    logger.debug("Checking split attribute and sizes.")
    self._check_split()

    self._split_complete = True
    logger.info(f"{self.name} data split complete.")

`restore(n=0)` ¶

Restore last run.

Parameters:

Name	Type	Description	Default
`n`	`int`	Iteration number to restore to. If None, restores to beginning.	`0`

Source code in src/recnexteval/settings/base.py

def restore(self, n: int = 0) -> None:
    """Restore last run.

    Args:
        n: Iteration number to restore to. If None, restores to beginning.
    """
    logger.debug(f"Restoring setting to iteration {n}")
    self.current_index = n

`get_split_at(index)` ¶

Get the split data at a specific index.

Parameters:

Name	Type	Description	Default
`index`	`int`	The index of the split to retrieve.	required

Returns:

Type	Description
`SplitResult`	SplitResult with keys: 'unlabeled', 'ground_truth', 't_window', 'incremental'.

Raises:

Type	Description
`IndexError`	If index is out of range.

Source code in src/recnexteval/settings/base.py

def get_split_at(self, index: int) -> SplitResult:
    """Get the split data at a specific index.

    Args:
        index: The index of the split to retrieve.

    Returns:
        SplitResult with keys: 'unlabeled', 'ground_truth', 't_window', 'incremental'.

    Raises:
        IndexError: If index is out of range.
    """
    if index < 0 or index > self.num_split:
        raise IndexError(f"Index {index} out of range for {self.num_split} splits")

    if self._sliding_window_setting:
        if not (
            isinstance(self._unlabeled_data, list)
            and isinstance(self._ground_truth_data, list)
            and isinstance(self._t_window, list)
        ):
            raise ValueError("Expected list of InteractionMatrix for sliding window setting.")
        result = SplitResult(
            unlabeled=self._unlabeled_data[index],
            ground_truth=self._ground_truth_data[index],
            # TODO change this variable to training_data when refactoring
            incremental=(
                self._incremental_data[index - 1] if index < len(self._incremental_data) and index > 0 else None
            ),
            t_window=self._t_window[index],
        )
    else:
        if index != 0:
            raise IndexError("Non-sliding setting has only one split at index 0")
        if (
            isinstance(self._unlabeled_data, list)
            or isinstance(self._ground_truth_data, list)
            or isinstance(self._t_window, list)
        ):
            raise ValueError("Expected single data for non-sliding setting.")
        result = SplitResult(
            unlabeled=self._unlabeled_data,
            ground_truth=self._ground_truth_data,
            incremental=None,
            t_window=self._t_window,
        )

    return result

single_time_point_setting

logger = logging.getLogger(__name__) module-attribute ¶