Skip to content

base

DatasetConfig dataclass

Base configuration for datasets.

Source code in src/recnexteval/datasets/config/base.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
@dataclass
class DatasetConfig:
    """Base configuration for datasets."""

    user_ix: str = "user_id"
    """Name of the column in the DataFrame with user identifiers"""
    item_ix: str = "item_id"
    """Name of the column in the DataFrame with item identifiers"""
    timestamp_ix: str = "timestamp"
    """Name of the column in the DataFrame that contains time of interaction in seconds since epoch."""
    dataset_url: str = "http://example.com"
    """URL to fetch the dataset from."""
    default_base_path: str = str(get_data_dir())
    """Default base path where the dataset will be stored."""
    remote_zipname: str = ""
    remote_filename: str = ""
    """Name of the file containing user interaction."""

    @property
    def default_filename(self) -> str:
        """Derived filename from remote components."""
        if not self.remote_zipname or not self.remote_filename:
            return "dataset.csv"
        return f"{self.remote_zipname}_{self.remote_filename}"

user_ix = 'user_id' class-attribute instance-attribute

Name of the column in the DataFrame with user identifiers

item_ix = 'item_id' class-attribute instance-attribute

Name of the column in the DataFrame with item identifiers

timestamp_ix = 'timestamp' class-attribute instance-attribute

Name of the column in the DataFrame that contains time of interaction in seconds since epoch.

dataset_url = 'http://example.com' class-attribute instance-attribute

URL to fetch the dataset from.

default_base_path = str(get_data_dir()) class-attribute instance-attribute

Default base path where the dataset will be stored.

remote_zipname = '' class-attribute instance-attribute

remote_filename = '' class-attribute instance-attribute

Name of the file containing user interaction.

default_filename property

Derived filename from remote components.

MetadataConfig dataclass

Bases: DatasetConfig

Source code in src/recnexteval/datasets/config/base.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
@dataclass
class MetadataConfig(DatasetConfig):
    sep: str = "|"
    """Column separator in the data file."""

    def __post_init__(self) -> None:
        self.default_base_path = super().default_base_path + "/metadata"

    @property
    def column_names(self) -> list[str]:
        """
        Ordered list of column names for pd.read_table.

        Returns:
            list[str]: Column names in file order [user_id, age, gender, ...]

        Example:
            ["userId", "age", "gender", "occupation", "zipcode"]
        """
        return []

    @property
    def dtype_dict(self) -> dict:
        """
        Data type mapping for all columns.

        Used in pd.read_table() dtype parameter to ensure correct
        column types are loaded from file.

        Returns:
            dict: Mapping of column names to numpy dtypes

        Example:
            {
                "age": "int64",
                "gender": "<U1",  # string
                "occupation": "object",
                "zipcode": "object"
            }
        """
        return {}

sep = '|' class-attribute instance-attribute

Column separator in the data file.

column_names property

Ordered list of column names for pd.read_table.

Returns:

Type Description
list[str]

list[str]: Column names in file order [user_id, age, gender, ...]

Example

["userId", "age", "gender", "occupation", "zipcode"]

dtype_dict property

Data type mapping for all columns.

Used in pd.read_table() dtype parameter to ensure correct column types are loaded from file.

Returns:

Name Type Description
dict dict

Mapping of column names to numpy dtypes

Example

{ "age": "int64", "gender": "<U1", # string "occupation": "object", "zipcode": "object" }

user_ix = 'user_id' class-attribute instance-attribute

Name of the column in the DataFrame with user identifiers

item_ix = 'item_id' class-attribute instance-attribute

Name of the column in the DataFrame with item identifiers

timestamp_ix = 'timestamp' class-attribute instance-attribute

Name of the column in the DataFrame that contains time of interaction in seconds since epoch.

dataset_url = 'http://example.com' class-attribute instance-attribute

URL to fetch the dataset from.

default_base_path = str(get_data_dir()) class-attribute instance-attribute

Default base path where the dataset will be stored.

remote_zipname = '' class-attribute instance-attribute

remote_filename = '' class-attribute instance-attribute

Name of the file containing user interaction.

default_filename property

Derived filename from remote components.