evaluator

`logger = logging.getLogger(name)` `module-attribute` ¶

`EvaluatorPipeline` `dataclass` ¶

Bases: EvaluatorBase

Evaluation via pipeline.

Source code in src/recnexteval/evaluators/pipeline/evaluator.py

@dataclass(kw_only=True)
class EvaluatorPipeline(EvaluatorBase):
    """Evaluation via pipeline."""

    algo_state_mgr: AlgorithmStateManager

    def _ready_evaluator(self) -> None:
        self._data_release_step()
        logger.debug("Algorithms trained with background data...")

        self._acc = MetricAccumulator()
        logger.debug("Metric accumulator instantiated...")

        self.setting.restore()
        logger.debug("Setting data generators ready...")

    def _evaluate_step(self) -> None:
        logger.info("Phase 2: Evaluating the algorithms...")
        try:
            unlabeled_data, ground_truth_data, _ = self._get_evaluation_data()
        except EOWSettingError as e:
            raise e

        # get the top k interaction per user
        # X_true = ground_truth_data.get_users_n_first_interaction(self.metric_k)
        y_true = ground_truth_data.item_interaction_sequence_matrix
        for algo_state in self.algo_state_mgr.values():
            y_pred = algo_state.algorithm_ptr.predict(unlabeled_data)
            logger.debug("Shape of prediction matrix: %s", y_pred.shape)
            logger.debug("Shape of ground truth matrix: %s", y_true.shape)

            if not self.ignore_unknown_item:
                y_pred = self._prediction_unknown_item_handler(y_true=y_true, y_pred=y_pred)

            self._add_metric_results_for_prediction(
                ground_truth_data=ground_truth_data,
                y_pred=y_pred,
                algorithm_name=self.algo_state_mgr.get_algorithm_identifier(algo_id=algo_state.algorithm_uuid),
            )

    def _data_release_step(self) -> None:
        if self._run_step != 0 and not self.setting.is_sliding_window_setting:
            return
        training_data = self._get_training_data()
        for algo_state in self.algo_state_mgr.values():
            algo_state.algorithm_ptr.fit(training_data)

    def reset(self) -> None:
        """Reset the evaluator to initial state."""
        logger.info("Resetting the evaluator for a new run...")
        self._run_step = 0

    def run_step(self) -> None:
        """Run a single step of the evaluator."""
        if self._run_step == 0:
            logger.info(f"There is a total of {self.setting.num_split} steps. Running step {self._run_step}")
            self._ready_evaluator()

        if self._run_step > self.setting.num_split:
            logger.info("Finished running all steps, call `run_step(reset=True)` to run the evaluation again")
            warn("Running this method again will not have any effect.")
            return
        logger.info("Running step %d", self._run_step)
        self._evaluate_step()
        self._data_release_step()

    def run_steps(self, num_steps: int) -> None:
        """Run multiple steps of the evaluator.

        Effectively runs the run_step method num_steps times. Call
        this method to run multiple steps of the evaluator at once.

        Args:
            num_steps: Number of steps to run.

        Raises:
            ValueError: If cannot run the specified number of steps.
        """
        if self._run_step + num_steps > self.setting.num_split:
            raise ValueError(f"Cannot run {num_steps} steps, only {self.setting.num_split - self._run_step} steps left")
        for _ in tqdm(range(num_steps)):
            self.run_step()

    def run(self) -> None:
        """Run the evaluator across all steps and splits.

        This method should be called when the programmer wants to step through
        all phases and splits to arrive to the metrics computed. An alternative
        to running through all splits is to call the run_step method which runs
        only one step at a time.
        """
        self._ready_evaluator()

        with tqdm(total=self.setting.num_split, desc="Evaluating steps") as pbar:
            while self._run_step <= self.setting.num_split:
                logger.info("Running step %d", self._run_step)
                self._evaluate_step()
                pbar.update(1)
                # if is last step, no need to release data anymore
                # since there is no more evaluation that can be done
                # break out of the loop
                if self._run_step == self.setting.num_split:
                    break
                self._data_release_step()

`algo_state_mgr` `instance-attribute` ¶

`metric_entries` `instance-attribute` ¶

`setting` `instance-attribute` ¶

`metric_k` `instance-attribute` ¶

`ignore_unknown_user = False` `class-attribute` `instance-attribute` ¶

`ignore_unknown_item = False` `class-attribute` `instance-attribute` ¶

`seed = 42` `class-attribute` `instance-attribute` ¶

`user_item_base = field(default_factory=UserItemKnowledgeBase)` `class-attribute` `instance-attribute` ¶

`reset()` ¶

Reset the evaluator to initial state.

Source code in src/recnexteval/evaluators/pipeline/evaluator.py

def reset(self) -> None:
    """Reset the evaluator to initial state."""
    logger.info("Resetting the evaluator for a new run...")
    self._run_step = 0

`run_step()` ¶

Run a single step of the evaluator.

Source code in src/recnexteval/evaluators/pipeline/evaluator.py

def run_step(self) -> None:
    """Run a single step of the evaluator."""
    if self._run_step == 0:
        logger.info(f"There is a total of {self.setting.num_split} steps. Running step {self._run_step}")
        self._ready_evaluator()

    if self._run_step > self.setting.num_split:
        logger.info("Finished running all steps, call `run_step(reset=True)` to run the evaluation again")
        warn("Running this method again will not have any effect.")
        return
    logger.info("Running step %d", self._run_step)
    self._evaluate_step()
    self._data_release_step()

`run_steps(num_steps)` ¶

Run multiple steps of the evaluator.

Effectively runs the run_step method num_steps times. Call this method to run multiple steps of the evaluator at once.

Parameters:

Name	Type	Description	Default
`num_steps`	`int`	Number of steps to run.	required

Raises:

Type	Description
`ValueError`	If cannot run the specified number of steps.

Source code in src/recnexteval/evaluators/pipeline/evaluator.py

def run_steps(self, num_steps: int) -> None:
    """Run multiple steps of the evaluator.

    Effectively runs the run_step method num_steps times. Call
    this method to run multiple steps of the evaluator at once.

    Args:
        num_steps: Number of steps to run.

    Raises:
        ValueError: If cannot run the specified number of steps.
    """
    if self._run_step + num_steps > self.setting.num_split:
        raise ValueError(f"Cannot run {num_steps} steps, only {self.setting.num_split - self._run_step} steps left")
    for _ in tqdm(range(num_steps)):
        self.run_step()

`run()` ¶

Run the evaluator across all steps and splits.

This method should be called when the programmer wants to step through all phases and splits to arrive to the metrics computed. An alternative to running through all splits is to call the run_step method which runs only one step at a time.

Source code in src/recnexteval/evaluators/pipeline/evaluator.py

def run(self) -> None:
    """Run the evaluator across all steps and splits.

    This method should be called when the programmer wants to step through
    all phases and splits to arrive to the metrics computed. An alternative
    to running through all splits is to call the run_step method which runs
    only one step at a time.
    """
    self._ready_evaluator()

    with tqdm(total=self.setting.num_split, desc="Evaluating steps") as pbar:
        while self._run_step <= self.setting.num_split:
            logger.info("Running step %d", self._run_step)
            self._evaluate_step()
            pbar.update(1)
            # if is last step, no need to release data anymore
            # since there is no more evaluation that can be done
            # break out of the loop
            if self._run_step == self.setting.num_split:
                break
            self._data_release_step()

`metric_results(level=MetricLevelEnum.MACRO, only_current_timestamp=False, filter_timestamp=None, filter_algo=None)` ¶

Results of the metrics computed.

Computes the metrics of all algorithms based on the level specified and return the results in a pandas DataFrame. The results can be filtered based on the algorithm name and the current timestamp.

Specifics¶

User level: User level metrics computed across all timestamps.
Window level: Window level metrics computed across all timestamps. This can be viewed as a macro level metric in the context of a single window, where the scores of each user is averaged within the window.
Macro level: Macro level metrics computed for entire timeline. This score is computed by averaging the scores of all windows, treating each window equally.
Micro level: Micro level metrics computed for entire timeline. This score is computed by averaging the scores of all users, treating each user and the timestamp the user is in as unique contribution to the overall score.

Parameters:

Name	Type	Description	Default
`level`	`MetricLevelEnum \| Literal['macro', 'micro', 'window', 'user']`	Level of the metric to compute, defaults to "macro".	`MACRO`
`only_current_timestamp`	`None \| bool`	Filter only the current timestamp, defaults to False.	`False`
`filter_timestamp`	`None \| int`	Timestamp value to filter on, defaults to None. If both `only_current_timestamp` and `filter_timestamp` are provided, `filter_timestamp` will be used.	`None`
`filter_algo`	`None \| str`	Algorithm name to filter on, defaults to None.	`None`

Returns:

Type	Description
`DataFrame`	Dataframe representation of the metric.