Skip to content

amazon

AmazonDatasetConfig dataclass

Bases: DatasetConfig

Amazon dataset base configuration.

Source code in src/recnexteval/datasets/config/amazon.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
@dataclass
class AmazonDatasetConfig(DatasetConfig):
    """Amazon dataset base configuration."""

    user_ix: str = "user_id"
    item_ix: str = "parent_asin"
    timestamp_ix: str = "timestamp"
    rating_ix: str = "rating"
    helpful_vote_ix: str = "helpful_vote"
    dataset_url: str = ""  # Set per subclass
    remote_filename: str = ""  # Set per subclass
    default_base_path: str = DatasetConfig.default_base_path + "/amazon"

    @property
    def default_filename(self) -> str:
        """Return just the filename for Amazon datasets (no zipname prefix)."""
        return self.remote_filename

user_ix = 'user_id' class-attribute instance-attribute

item_ix = 'parent_asin' class-attribute instance-attribute

timestamp_ix = 'timestamp' class-attribute instance-attribute

rating_ix = 'rating' class-attribute instance-attribute

helpful_vote_ix = 'helpful_vote' class-attribute instance-attribute

dataset_url = '' class-attribute instance-attribute

remote_filename = '' class-attribute instance-attribute

default_base_path = DatasetConfig.default_base_path + '/amazon' class-attribute instance-attribute

default_filename property

Return just the filename for Amazon datasets (no zipname prefix).

remote_zipname = '' class-attribute instance-attribute

AmazonMusicDatasetConfig dataclass

Bases: AmazonDatasetConfig

Amazon Music dataset configuration.

Source code in src/recnexteval/datasets/config/amazon.py
25
26
27
28
29
30
@dataclass
class AmazonMusicDatasetConfig(AmazonDatasetConfig):
    """Amazon Music dataset configuration."""

    remote_filename: str = "Digital_Music.jsonl.gz"
    dataset_url: str = "https://mcauleylab.ucsd.edu/public_datasets/data/amazon_2023/raw/review_categories/Digital_Music.jsonl.gz"

remote_filename = 'Digital_Music.jsonl.gz' class-attribute instance-attribute

dataset_url = 'https://mcauleylab.ucsd.edu/public_datasets/data/amazon_2023/raw/review_categories/Digital_Music.jsonl.gz' class-attribute instance-attribute

user_ix = 'user_id' class-attribute instance-attribute

item_ix = 'parent_asin' class-attribute instance-attribute

timestamp_ix = 'timestamp' class-attribute instance-attribute

default_base_path = DatasetConfig.default_base_path + '/amazon' class-attribute instance-attribute

remote_zipname = '' class-attribute instance-attribute

default_filename property

Return just the filename for Amazon datasets (no zipname prefix).

rating_ix = 'rating' class-attribute instance-attribute

helpful_vote_ix = 'helpful_vote' class-attribute instance-attribute

AmazonMovieDatasetConfig dataclass

Bases: AmazonDatasetConfig

Amazon Movie dataset configuration.

Source code in src/recnexteval/datasets/config/amazon.py
33
34
35
36
37
38
@dataclass
class AmazonMovieDatasetConfig(AmazonDatasetConfig):
    """Amazon Movie dataset configuration."""

    remote_filename: str = "Movies_and_TV.jsonl.gz"
    dataset_url: str = "https://mcauleylab.ucsd.edu/public_datasets/data/amazon_2023/raw/review_categories/Movies_and_TV.jsonl.gz"

remote_filename = 'Movies_and_TV.jsonl.gz' class-attribute instance-attribute

dataset_url = 'https://mcauleylab.ucsd.edu/public_datasets/data/amazon_2023/raw/review_categories/Movies_and_TV.jsonl.gz' class-attribute instance-attribute

user_ix = 'user_id' class-attribute instance-attribute

item_ix = 'parent_asin' class-attribute instance-attribute

timestamp_ix = 'timestamp' class-attribute instance-attribute

default_base_path = DatasetConfig.default_base_path + '/amazon' class-attribute instance-attribute

remote_zipname = '' class-attribute instance-attribute

default_filename property

Return just the filename for Amazon datasets (no zipname prefix).

rating_ix = 'rating' class-attribute instance-attribute

helpful_vote_ix = 'helpful_vote' class-attribute instance-attribute

AmazonSubscriptionBoxesDatasetConfig dataclass

Bases: AmazonDatasetConfig

Amazon Subscription Boxes dataset configuration.

Source code in src/recnexteval/datasets/config/amazon.py
41
42
43
44
45
46
@dataclass
class AmazonSubscriptionBoxesDatasetConfig(AmazonDatasetConfig):
    """Amazon Subscription Boxes dataset configuration."""

    remote_filename: str = "Subscription_Boxes.jsonl.gz"
    dataset_url: str = "https://mcauleylab.ucsd.edu/public_datasets/data/amazon_2023/raw/review_categories/Subscription_Boxes.jsonl.gz"

remote_filename = 'Subscription_Boxes.jsonl.gz' class-attribute instance-attribute

dataset_url = 'https://mcauleylab.ucsd.edu/public_datasets/data/amazon_2023/raw/review_categories/Subscription_Boxes.jsonl.gz' class-attribute instance-attribute

user_ix = 'user_id' class-attribute instance-attribute

item_ix = 'parent_asin' class-attribute instance-attribute

timestamp_ix = 'timestamp' class-attribute instance-attribute

default_base_path = DatasetConfig.default_base_path + '/amazon' class-attribute instance-attribute

remote_zipname = '' class-attribute instance-attribute

default_filename property

Return just the filename for Amazon datasets (no zipname prefix).

rating_ix = 'rating' class-attribute instance-attribute

helpful_vote_ix = 'helpful_vote' class-attribute instance-attribute

AmazonBookDatasetConfig dataclass

Bases: AmazonDatasetConfig

Amazon Books dataset configuration.

Source code in src/recnexteval/datasets/config/amazon.py
49
50
51
52
53
54
@dataclass
class AmazonBookDatasetConfig(AmazonDatasetConfig):
    """Amazon Books dataset configuration."""

    remote_filename: str = "Books.jsonl.gz"
    dataset_url: str = "https://mcauleylab.ucsd.edu/public_datasets/data/amazon_2023/raw/review_categories/Books.jsonl.gz"

remote_filename = 'Books.jsonl.gz' class-attribute instance-attribute

dataset_url = 'https://mcauleylab.ucsd.edu/public_datasets/data/amazon_2023/raw/review_categories/Books.jsonl.gz' class-attribute instance-attribute

user_ix = 'user_id' class-attribute instance-attribute

item_ix = 'parent_asin' class-attribute instance-attribute

timestamp_ix = 'timestamp' class-attribute instance-attribute

default_base_path = DatasetConfig.default_base_path + '/amazon' class-attribute instance-attribute

remote_zipname = '' class-attribute instance-attribute

default_filename property

Return just the filename for Amazon datasets (no zipname prefix).

rating_ix = 'rating' class-attribute instance-attribute

helpful_vote_ix = 'helpful_vote' class-attribute instance-attribute

AmazonItemMetadataConfig dataclass

Bases: MetadataConfig, AmazonDatasetConfig

Amazon Item Metadata Base Configuration.

Handles configuration for Amazon product metadata including: - Product identifiers (ASIN) - Product information (title, category, price, rating) - Rich content (features, description, images, videos) - Relational data (store, details, bought together)

All properties are computed from base fields to ensure consistency.

Source code in src/recnexteval/datasets/config/amazon.py
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
@dataclass
class AmazonItemMetadataConfig(MetadataConfig, AmazonDatasetConfig):
    """
    Amazon Item Metadata Base Configuration.

    Handles configuration for Amazon product metadata including:
    - Product identifiers (ASIN)
    - Product information (title, category, price, rating)
    - Rich content (features, description, images, videos)
    - Relational data (store, details, bought together)

    All properties are computed from base fields to ensure consistency.
    """

    item_ix: str = "parent_asin"
    """Name of the column containing product identifiers (parent ASIN)."""
    main_category_ix: str = "main_category"
    """Name of the column containing the main product category."""
    title_ix: str = "title"
    """Name of the column containing product title."""
    average_rating_ix: str = "average_rating"
    """Name of the column containing average product rating (0-5)."""
    rating_number_ix: str = "rating_number"
    """Name of the column containing number of ratings received."""
    features_ix: str = "features"
    """Name of the column containing product features (list)."""
    description_ix: str = "description"
    """Name of the column containing product description (list)."""
    price_ix: str = "price"
    """Name of the column containing product price."""
    images_ix: str = "images"
    """Name of the column containing product images URLs (list)."""
    videos_ix: str = "videos"
    """Name of the column containing product videos URLs (list)."""
    store_ix: str = "store"
    """Name of the column containing store/seller information."""
    categories_ix: str = "categories"
    """Name of the column containing category hierarchy (list)."""
    details_ix: str = "details"
    """Name of the column containing product details (dict)."""
    bought_together_ix: str = "bought_together"
    """Name of the column containing products bought together (list)."""

    @property
    def column_names(self) -> list[str]:
        return [
            self.main_category_ix,
            self.title_ix,
            self.average_rating_ix,
            self.rating_number_ix,
            self.features_ix,
            self.description_ix,
            self.price_ix,
            self.images_ix,
            self.videos_ix,
            self.store_ix,
            self.categories_ix,
            self.details_ix,
            self.item_ix,
            self.bought_together_ix,
        ]

    @property
    def dtype_dict(self) -> dict:
        return {
            self.main_category_ix: str,
            self.title_ix: str,
            self.average_rating_ix: "float32",
            self.rating_number_ix: "int64",
            self.features_ix: list,
            self.description_ix: list,
            self.price_ix: "float32",
            self.images_ix: list,
            self.videos_ix: list,
            self.store_ix: str,
            self.categories_ix: list,
            self.details_ix: dict,
            self.item_ix: str,
            self.bought_together_ix: list,
        }

item_ix = 'parent_asin' class-attribute instance-attribute

Name of the column containing product identifiers (parent ASIN).

main_category_ix = 'main_category' class-attribute instance-attribute

Name of the column containing the main product category.

title_ix = 'title' class-attribute instance-attribute

Name of the column containing product title.

average_rating_ix = 'average_rating' class-attribute instance-attribute

Name of the column containing average product rating (0-5).

rating_number_ix = 'rating_number' class-attribute instance-attribute

Name of the column containing number of ratings received.

features_ix = 'features' class-attribute instance-attribute

Name of the column containing product features (list).

description_ix = 'description' class-attribute instance-attribute

Name of the column containing product description (list).

price_ix = 'price' class-attribute instance-attribute

Name of the column containing product price.

images_ix = 'images' class-attribute instance-attribute

Name of the column containing product images URLs (list).

videos_ix = 'videos' class-attribute instance-attribute

Name of the column containing product videos URLs (list).

store_ix = 'store' class-attribute instance-attribute

Name of the column containing store/seller information.

categories_ix = 'categories' class-attribute instance-attribute

Name of the column containing category hierarchy (list).

details_ix = 'details' class-attribute instance-attribute

Name of the column containing product details (dict).

bought_together_ix = 'bought_together' class-attribute instance-attribute

Name of the column containing products bought together (list).

column_names property

dtype_dict property

user_ix = 'user_id' class-attribute instance-attribute

timestamp_ix = 'timestamp' class-attribute instance-attribute

dataset_url = '' class-attribute instance-attribute

default_base_path = DatasetConfig.default_base_path + '/amazon' class-attribute instance-attribute

remote_zipname = '' class-attribute instance-attribute

remote_filename = '' class-attribute instance-attribute

default_filename property

Return just the filename for Amazon datasets (no zipname prefix).

rating_ix = 'rating' class-attribute instance-attribute

helpful_vote_ix = 'helpful_vote' class-attribute instance-attribute

sep = '|' class-attribute instance-attribute

Column separator in the data file.

AmazonDigitalMusicItemMetadataConfig dataclass

Bases: AmazonItemMetadataConfig

Amazon Digital Music metadata configuration.

Source code in src/recnexteval/datasets/config/amazon.py
139
140
141
142
143
144
145
146
147
148
149
@dataclass
class AmazonDigitalMusicItemMetadataConfig(AmazonItemMetadataConfig):
    """Amazon Digital Music metadata configuration."""

    remote_filename: str = "meta_Digital_Music.jsonl.gz"
    """Filename for Digital Music metadata."""

    dataset_url: str = (
        "https://mcauleylab.ucsd.edu/public_datasets/data/amazon_2023/"
        "raw/meta_categories/meta_Digital_Music.jsonl.gz"
    )

remote_filename = 'meta_Digital_Music.jsonl.gz' class-attribute instance-attribute

Filename for Digital Music metadata.

dataset_url = 'https://mcauleylab.ucsd.edu/public_datasets/data/amazon_2023/raw/meta_categories/meta_Digital_Music.jsonl.gz' class-attribute instance-attribute

user_ix = 'user_id' class-attribute instance-attribute

item_ix = 'parent_asin' class-attribute instance-attribute

Name of the column containing product identifiers (parent ASIN).

timestamp_ix = 'timestamp' class-attribute instance-attribute

default_base_path = DatasetConfig.default_base_path + '/amazon' class-attribute instance-attribute

remote_zipname = '' class-attribute instance-attribute

default_filename property

Return just the filename for Amazon datasets (no zipname prefix).

rating_ix = 'rating' class-attribute instance-attribute

helpful_vote_ix = 'helpful_vote' class-attribute instance-attribute

sep = '|' class-attribute instance-attribute

Column separator in the data file.

column_names property

dtype_dict property

main_category_ix = 'main_category' class-attribute instance-attribute

Name of the column containing the main product category.

title_ix = 'title' class-attribute instance-attribute

Name of the column containing product title.

average_rating_ix = 'average_rating' class-attribute instance-attribute

Name of the column containing average product rating (0-5).

rating_number_ix = 'rating_number' class-attribute instance-attribute

Name of the column containing number of ratings received.

features_ix = 'features' class-attribute instance-attribute

Name of the column containing product features (list).

description_ix = 'description' class-attribute instance-attribute

Name of the column containing product description (list).

price_ix = 'price' class-attribute instance-attribute

Name of the column containing product price.

images_ix = 'images' class-attribute instance-attribute

Name of the column containing product images URLs (list).

videos_ix = 'videos' class-attribute instance-attribute

Name of the column containing product videos URLs (list).

store_ix = 'store' class-attribute instance-attribute

Name of the column containing store/seller information.

categories_ix = 'categories' class-attribute instance-attribute

Name of the column containing category hierarchy (list).

details_ix = 'details' class-attribute instance-attribute

Name of the column containing product details (dict).

bought_together_ix = 'bought_together' class-attribute instance-attribute

Name of the column containing products bought together (list).

AmazonMoviesAndTVItemMetadataConfig dataclass

Bases: AmazonItemMetadataConfig

Amazon Movies and TV metadata configuration.

Source code in src/recnexteval/datasets/config/amazon.py
152
153
154
155
156
157
158
159
160
161
162
@dataclass
class AmazonMoviesAndTVItemMetadataConfig(AmazonItemMetadataConfig):
    """Amazon Movies and TV metadata configuration."""

    remote_filename: str = "meta_Movies_and_TV.jsonl.gz"
    """Filename for Movies and TV metadata."""

    dataset_url: str = (
        "https://mcauleylab.ucsd.edu/public_datasets/data/amazon_2023/"
        "raw/meta_categories/meta_Movies_and_TV.jsonl.gz"
    )

remote_filename = 'meta_Movies_and_TV.jsonl.gz' class-attribute instance-attribute

Filename for Movies and TV metadata.

dataset_url = 'https://mcauleylab.ucsd.edu/public_datasets/data/amazon_2023/raw/meta_categories/meta_Movies_and_TV.jsonl.gz' class-attribute instance-attribute

user_ix = 'user_id' class-attribute instance-attribute

item_ix = 'parent_asin' class-attribute instance-attribute

Name of the column containing product identifiers (parent ASIN).

timestamp_ix = 'timestamp' class-attribute instance-attribute

default_base_path = DatasetConfig.default_base_path + '/amazon' class-attribute instance-attribute

remote_zipname = '' class-attribute instance-attribute

default_filename property

Return just the filename for Amazon datasets (no zipname prefix).

rating_ix = 'rating' class-attribute instance-attribute

helpful_vote_ix = 'helpful_vote' class-attribute instance-attribute

sep = '|' class-attribute instance-attribute

Column separator in the data file.

column_names property

dtype_dict property

main_category_ix = 'main_category' class-attribute instance-attribute

Name of the column containing the main product category.

title_ix = 'title' class-attribute instance-attribute

Name of the column containing product title.

average_rating_ix = 'average_rating' class-attribute instance-attribute

Name of the column containing average product rating (0-5).

rating_number_ix = 'rating_number' class-attribute instance-attribute

Name of the column containing number of ratings received.

features_ix = 'features' class-attribute instance-attribute

Name of the column containing product features (list).

description_ix = 'description' class-attribute instance-attribute

Name of the column containing product description (list).

price_ix = 'price' class-attribute instance-attribute

Name of the column containing product price.

images_ix = 'images' class-attribute instance-attribute

Name of the column containing product images URLs (list).

videos_ix = 'videos' class-attribute instance-attribute

Name of the column containing product videos URLs (list).

store_ix = 'store' class-attribute instance-attribute

Name of the column containing store/seller information.

categories_ix = 'categories' class-attribute instance-attribute

Name of the column containing category hierarchy (list).

details_ix = 'details' class-attribute instance-attribute

Name of the column containing product details (dict).

bought_together_ix = 'bought_together' class-attribute instance-attribute

Name of the column containing products bought together (list).

AmazonSubscriptionBoxesItemMetadataConfig dataclass

Bases: AmazonItemMetadataConfig

Amazon Subscription Boxes metadata configuration.

Source code in src/recnexteval/datasets/config/amazon.py
165
166
167
168
169
170
171
172
173
174
175
@dataclass
class AmazonSubscriptionBoxesItemMetadataConfig(AmazonItemMetadataConfig):
    """Amazon Subscription Boxes metadata configuration."""

    remote_filename: str = "meta_Subscription_Boxes.jsonl.gz"
    """Filename for Subscription Boxes metadata."""

    dataset_url: str = (
        "https://mcauleylab.ucsd.edu/public_datasets/data/amazon_2023/"
        "raw/meta_categories/meta_Subscription_Boxes.jsonl.gz"
    )

remote_filename = 'meta_Subscription_Boxes.jsonl.gz' class-attribute instance-attribute

Filename for Subscription Boxes metadata.

dataset_url = 'https://mcauleylab.ucsd.edu/public_datasets/data/amazon_2023/raw/meta_categories/meta_Subscription_Boxes.jsonl.gz' class-attribute instance-attribute

user_ix = 'user_id' class-attribute instance-attribute

item_ix = 'parent_asin' class-attribute instance-attribute

Name of the column containing product identifiers (parent ASIN).

timestamp_ix = 'timestamp' class-attribute instance-attribute

default_base_path = DatasetConfig.default_base_path + '/amazon' class-attribute instance-attribute

remote_zipname = '' class-attribute instance-attribute

default_filename property

Return just the filename for Amazon datasets (no zipname prefix).

rating_ix = 'rating' class-attribute instance-attribute

helpful_vote_ix = 'helpful_vote' class-attribute instance-attribute

sep = '|' class-attribute instance-attribute

Column separator in the data file.

column_names property

dtype_dict property

main_category_ix = 'main_category' class-attribute instance-attribute

Name of the column containing the main product category.

title_ix = 'title' class-attribute instance-attribute

Name of the column containing product title.

average_rating_ix = 'average_rating' class-attribute instance-attribute

Name of the column containing average product rating (0-5).

rating_number_ix = 'rating_number' class-attribute instance-attribute

Name of the column containing number of ratings received.

features_ix = 'features' class-attribute instance-attribute

Name of the column containing product features (list).

description_ix = 'description' class-attribute instance-attribute

Name of the column containing product description (list).

price_ix = 'price' class-attribute instance-attribute

Name of the column containing product price.

images_ix = 'images' class-attribute instance-attribute

Name of the column containing product images URLs (list).

videos_ix = 'videos' class-attribute instance-attribute

Name of the column containing product videos URLs (list).

store_ix = 'store' class-attribute instance-attribute

Name of the column containing store/seller information.

categories_ix = 'categories' class-attribute instance-attribute

Name of the column containing category hierarchy (list).

details_ix = 'details' class-attribute instance-attribute

Name of the column containing product details (dict).

bought_together_ix = 'bought_together' class-attribute instance-attribute

Name of the column containing products bought together (list).

AmazonBooksItemMetadataConfig dataclass

Bases: AmazonItemMetadataConfig

Amazon Books metadata configuration.

Source code in src/recnexteval/datasets/config/amazon.py
178
179
180
181
182
183
184
185
186
187
188
@dataclass
class AmazonBooksItemMetadataConfig(AmazonItemMetadataConfig):
    """Amazon Books metadata configuration."""

    remote_filename: str = "meta_Books.jsonl.gz"
    """Filename for Books metadata."""

    dataset_url: str = (
        "https://mcauleylab.ucsd.edu/public_datasets/data/amazon_2023/"
        "raw/meta_categories/meta_Books.jsonl.gz"
    )

remote_filename = 'meta_Books.jsonl.gz' class-attribute instance-attribute

Filename for Books metadata.

dataset_url = 'https://mcauleylab.ucsd.edu/public_datasets/data/amazon_2023/raw/meta_categories/meta_Books.jsonl.gz' class-attribute instance-attribute

user_ix = 'user_id' class-attribute instance-attribute

item_ix = 'parent_asin' class-attribute instance-attribute

Name of the column containing product identifiers (parent ASIN).

timestamp_ix = 'timestamp' class-attribute instance-attribute

default_base_path = DatasetConfig.default_base_path + '/amazon' class-attribute instance-attribute

remote_zipname = '' class-attribute instance-attribute

default_filename property

Return just the filename for Amazon datasets (no zipname prefix).

rating_ix = 'rating' class-attribute instance-attribute

helpful_vote_ix = 'helpful_vote' class-attribute instance-attribute

sep = '|' class-attribute instance-attribute

Column separator in the data file.

column_names property

dtype_dict property

main_category_ix = 'main_category' class-attribute instance-attribute

Name of the column containing the main product category.

title_ix = 'title' class-attribute instance-attribute

Name of the column containing product title.

average_rating_ix = 'average_rating' class-attribute instance-attribute

Name of the column containing average product rating (0-5).

rating_number_ix = 'rating_number' class-attribute instance-attribute

Name of the column containing number of ratings received.

features_ix = 'features' class-attribute instance-attribute

Name of the column containing product features (list).

description_ix = 'description' class-attribute instance-attribute

Name of the column containing product description (list).

price_ix = 'price' class-attribute instance-attribute

Name of the column containing product price.

images_ix = 'images' class-attribute instance-attribute

Name of the column containing product images URLs (list).

videos_ix = 'videos' class-attribute instance-attribute

Name of the column containing product videos URLs (list).

store_ix = 'store' class-attribute instance-attribute

Name of the column containing store/seller information.

categories_ix = 'categories' class-attribute instance-attribute

Name of the column containing category hierarchy (list).

details_ix = 'details' class-attribute instance-attribute

Name of the column containing product details (dict).

bought_together_ix = 'bought_together' class-attribute instance-attribute

Name of the column containing products bought together (list).