Skip to content

amazon

logger = logging.getLogger(__name__) module-attribute

AmazonItemMetadata

Bases: Metadata

Source code in src/recnexteval/datasets/metadata/amazon.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
class AmazonItemMetadata(Metadata):
    config: ClassVar[AmazonItemMetadataConfig] = AmazonItemMetadataConfig()

    def __init__(self, item_id_mapping: pd.DataFrame) -> None:
        super().__init__()
        self.item_id_mapping = item_id_mapping

    def _load_dataframe(self) -> pd.DataFrame:
        self.fetch_dataset()
        df = pd.read_json(
            self.file_path,  # Ensure file_path contains the JSONL file path
            dtype=self.config.dtype_dict,
            lines=True,  # Required for JSONL format
        )

        item_id_to_iid = dict(zip(self.item_id_mapping[self.config.item_ix], self.item_id_mapping["iid"]))

        # Map config.item_ix in metadata_df using the optimized function
        df[self.config.item_ix] = df[self.config.item_ix].map(lambda x: item_id_to_iid.get(x, x))

        return df

    def _download_dataset(self) -> None:
        """Downloads the metadata for the dataset.

        Downloads the zipfile, and extracts the ratings file to `self.file_path`
        """
        if not self.config.dataset_url:
            raise ValueError(f"{self.name} does not have URL specified.")

        self._fetch_remote(
            self.config.dataset_url, os.path.join(self.base_path, f"{self.config.remote_filename}")
        )

config = AmazonItemMetadataConfig() class-attribute

item_id_mapping = item_id_mapping instance-attribute

IS_BASE = True class-attribute instance-attribute

name property

Name of the object's class.

:return: Name of the object's class :rtype: str

file_path property

File path of the dataset.

processed_cache_path property

Path for cached processed data.

base_path = base_path if base_path else self.config.default_base_path instance-attribute

filename = filename if filename else self.config.default_filename instance-attribute

fetch_dataset()

Check if dataset is present, if not download

Source code in src/recnexteval/datasets/base.py
39
40
41
42
43
44
45
def fetch_dataset(self) -> None:
    """Check if dataset is present, if not download"""
    if os.path.exists(self.file_path):
        logger.debug("Data file is in memory and in dir specified.")
        return
    logger.debug(f"{self.name} dataset not found in {self.file_path}.")
    self._download_dataset()

fetch_dataset_force()

Force re-download of the dataset.

Source code in src/recnexteval/datasets/base.py
47
48
49
50
def fetch_dataset_force(self) -> None:
    """Force re-download of the dataset."""
    logger.debug(f"{self.name} force re-download of dataset.")
    self._download_dataset()

load()

Load the metadata from file and return it as a DataFrame.

Returns:

Type Description
DataFrame

DataFrame containing the metadata.

Source code in src/recnexteval/datasets/metadata/base.py
32
33
34
35
36
37
38
def load(self) -> pd.DataFrame:
    """Load the metadata from file and return it as a DataFrame.

    Returns:
        DataFrame containing the metadata.
    """
    return self._load_dataframe()

AmazonMusicItemMetadata

Bases: AmazonItemMetadata

Source code in src/recnexteval/datasets/metadata/amazon.py
55
56
class AmazonMusicItemMetadata(AmazonItemMetadata):
    config: ClassVar[AmazonDigitalMusicItemMetadataConfig] = AmazonDigitalMusicItemMetadataConfig()

config = AmazonDigitalMusicItemMetadataConfig() class-attribute

IS_BASE = True class-attribute instance-attribute

name property

Name of the object's class.

:return: Name of the object's class :rtype: str

file_path property

File path of the dataset.

processed_cache_path property

Path for cached processed data.

base_path = base_path if base_path else self.config.default_base_path instance-attribute

filename = filename if filename else self.config.default_filename instance-attribute

item_id_mapping = item_id_mapping instance-attribute

fetch_dataset()

Check if dataset is present, if not download

Source code in src/recnexteval/datasets/base.py
39
40
41
42
43
44
45
def fetch_dataset(self) -> None:
    """Check if dataset is present, if not download"""
    if os.path.exists(self.file_path):
        logger.debug("Data file is in memory and in dir specified.")
        return
    logger.debug(f"{self.name} dataset not found in {self.file_path}.")
    self._download_dataset()

fetch_dataset_force()

Force re-download of the dataset.

Source code in src/recnexteval/datasets/base.py
47
48
49
50
def fetch_dataset_force(self) -> None:
    """Force re-download of the dataset."""
    logger.debug(f"{self.name} force re-download of dataset.")
    self._download_dataset()

load()

Load the metadata from file and return it as a DataFrame.

Returns:

Type Description
DataFrame

DataFrame containing the metadata.

Source code in src/recnexteval/datasets/metadata/base.py
32
33
34
35
36
37
38
def load(self) -> pd.DataFrame:
    """Load the metadata from file and return it as a DataFrame.

    Returns:
        DataFrame containing the metadata.
    """
    return self._load_dataframe()

AmazonMovieItemMetadata

Bases: AmazonItemMetadata

Source code in src/recnexteval/datasets/metadata/amazon.py
59
60
class AmazonMovieItemMetadata(AmazonItemMetadata):
    config: ClassVar[AmazonMoviesAndTVItemMetadataConfig] = AmazonMoviesAndTVItemMetadataConfig()

config = AmazonMoviesAndTVItemMetadataConfig() class-attribute

IS_BASE = True class-attribute instance-attribute

name property

Name of the object's class.

:return: Name of the object's class :rtype: str

file_path property

File path of the dataset.

processed_cache_path property

Path for cached processed data.

base_path = base_path if base_path else self.config.default_base_path instance-attribute

filename = filename if filename else self.config.default_filename instance-attribute

item_id_mapping = item_id_mapping instance-attribute

fetch_dataset()

Check if dataset is present, if not download

Source code in src/recnexteval/datasets/base.py
39
40
41
42
43
44
45
def fetch_dataset(self) -> None:
    """Check if dataset is present, if not download"""
    if os.path.exists(self.file_path):
        logger.debug("Data file is in memory and in dir specified.")
        return
    logger.debug(f"{self.name} dataset not found in {self.file_path}.")
    self._download_dataset()

fetch_dataset_force()

Force re-download of the dataset.

Source code in src/recnexteval/datasets/base.py
47
48
49
50
def fetch_dataset_force(self) -> None:
    """Force re-download of the dataset."""
    logger.debug(f"{self.name} force re-download of dataset.")
    self._download_dataset()

load()

Load the metadata from file and return it as a DataFrame.

Returns:

Type Description
DataFrame

DataFrame containing the metadata.

Source code in src/recnexteval/datasets/metadata/base.py
32
33
34
35
36
37
38
def load(self) -> pd.DataFrame:
    """Load the metadata from file and return it as a DataFrame.

    Returns:
        DataFrame containing the metadata.
    """
    return self._load_dataframe()

AmazonSubscriptionBoxesItemMetadata

Bases: AmazonItemMetadata

Source code in src/recnexteval/datasets/metadata/amazon.py
63
64
class AmazonSubscriptionBoxesItemMetadata(AmazonItemMetadata):
    config: ClassVar[AmazonSubscriptionBoxesItemMetadataConfig] = AmazonSubscriptionBoxesItemMetadataConfig()

config = AmazonSubscriptionBoxesItemMetadataConfig() class-attribute

IS_BASE = True class-attribute instance-attribute

name property

Name of the object's class.

:return: Name of the object's class :rtype: str

file_path property

File path of the dataset.

processed_cache_path property

Path for cached processed data.

base_path = base_path if base_path else self.config.default_base_path instance-attribute

filename = filename if filename else self.config.default_filename instance-attribute

item_id_mapping = item_id_mapping instance-attribute

fetch_dataset()

Check if dataset is present, if not download

Source code in src/recnexteval/datasets/base.py
39
40
41
42
43
44
45
def fetch_dataset(self) -> None:
    """Check if dataset is present, if not download"""
    if os.path.exists(self.file_path):
        logger.debug("Data file is in memory and in dir specified.")
        return
    logger.debug(f"{self.name} dataset not found in {self.file_path}.")
    self._download_dataset()

fetch_dataset_force()

Force re-download of the dataset.

Source code in src/recnexteval/datasets/base.py
47
48
49
50
def fetch_dataset_force(self) -> None:
    """Force re-download of the dataset."""
    logger.debug(f"{self.name} force re-download of dataset.")
    self._download_dataset()

load()

Load the metadata from file and return it as a DataFrame.

Returns:

Type Description
DataFrame

DataFrame containing the metadata.

Source code in src/recnexteval/datasets/metadata/base.py
32
33
34
35
36
37
38
def load(self) -> pd.DataFrame:
    """Load the metadata from file and return it as a DataFrame.

    Returns:
        DataFrame containing the metadata.
    """
    return self._load_dataframe()

AmazonBookItemMetadata

Bases: AmazonItemMetadata

Source code in src/recnexteval/datasets/metadata/amazon.py
67
68
class AmazonBookItemMetadata(AmazonItemMetadata):
    config: ClassVar[AmazonBooksItemMetadataConfig] = AmazonBooksItemMetadataConfig()

config = AmazonBooksItemMetadataConfig() class-attribute

IS_BASE = True class-attribute instance-attribute

name property

Name of the object's class.

:return: Name of the object's class :rtype: str

file_path property

File path of the dataset.

processed_cache_path property

Path for cached processed data.

base_path = base_path if base_path else self.config.default_base_path instance-attribute

filename = filename if filename else self.config.default_filename instance-attribute

item_id_mapping = item_id_mapping instance-attribute

fetch_dataset()

Check if dataset is present, if not download

Source code in src/recnexteval/datasets/base.py
39
40
41
42
43
44
45
def fetch_dataset(self) -> None:
    """Check if dataset is present, if not download"""
    if os.path.exists(self.file_path):
        logger.debug("Data file is in memory and in dir specified.")
        return
    logger.debug(f"{self.name} dataset not found in {self.file_path}.")
    self._download_dataset()

fetch_dataset_force()

Force re-download of the dataset.

Source code in src/recnexteval/datasets/base.py
47
48
49
50
def fetch_dataset_force(self) -> None:
    """Force re-download of the dataset."""
    logger.debug(f"{self.name} force re-download of dataset.")
    self._download_dataset()

load()

Load the metadata from file and return it as a DataFrame.

Returns:

Type Description
DataFrame

DataFrame containing the metadata.

Source code in src/recnexteval/datasets/metadata/base.py
32
33
34
35
36
37
38
def load(self) -> pd.DataFrame:
    """Load the metadata from file and return it as a DataFrame.

    Returns:
        DataFrame containing the metadata.
    """
    return self._load_dataframe()