Skip to content

movielens

MovieLens100kMetadata

Bases: Metadata

Source code in src/recnexteval/datasets/metadata/movielens.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
class MovieLens100kMetadata(Metadata):
    def _download_dataset(self) -> None:
        # Download the zip into the data directory
        self._fetch_remote(
            f"{self.config.dataset_url}/{self.config.remote_zipname}.zip",
            os.path.join(self.base_path, f"{self.config.remote_zipname}.zip"),
        )

        # Extract the ratings file which we will use
        with zipfile.ZipFile(
            os.path.join(self.base_path, f"{self.config.remote_zipname}.zip"), "r"
        ) as zip_ref:
            zip_ref.extract(
                f"{self.config.remote_zipname}/{self.config.remote_filename}", self.base_path
            )

        # Rename the ratings file to the specified filename
        os.rename(
            os.path.join(
                self.base_path, f"{self.config.remote_zipname}/{self.config.remote_filename}"
            ),
            self.file_path,
        )

IS_BASE = True class-attribute instance-attribute

name property

Name of the object's class.

:return: Name of the object's class :rtype: str

config = MetadataConfig() class-attribute

file_path property

File path of the dataset.

processed_cache_path property

Path for cached processed data.

base_path = base_path if base_path else self.config.default_base_path instance-attribute

filename = filename if filename else self.config.default_filename instance-attribute

fetch_dataset()

Check if dataset is present, if not download

Source code in src/recnexteval/datasets/base.py
39
40
41
42
43
44
45
def fetch_dataset(self) -> None:
    """Check if dataset is present, if not download"""
    if os.path.exists(self.file_path):
        logger.debug("Data file is in memory and in dir specified.")
        return
    logger.debug(f"{self.name} dataset not found in {self.file_path}.")
    self._download_dataset()

fetch_dataset_force()

Force re-download of the dataset.

Source code in src/recnexteval/datasets/base.py
47
48
49
50
def fetch_dataset_force(self) -> None:
    """Force re-download of the dataset."""
    logger.debug(f"{self.name} force re-download of dataset.")
    self._download_dataset()

load()

Load the metadata from file and return it as a DataFrame.

Returns:

Type Description
DataFrame

DataFrame containing the metadata.

Source code in src/recnexteval/datasets/metadata/base.py
32
33
34
35
36
37
38
def load(self) -> pd.DataFrame:
    """Load the metadata from file and return it as a DataFrame.

    Returns:
        DataFrame containing the metadata.
    """
    return self._load_dataframe()

MovieLens100kUserMetadata

Bases: MovieLens100kMetadata

Source code in src/recnexteval/datasets/metadata/movielens.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
class MovieLens100kUserMetadata(MovieLens100kMetadata):
    config: ClassVar[MovieLens100kUserMetadataConfig] = MovieLens100kUserMetadataConfig()  # type: ignore

    def __init__(self, user_id_mapping: pd.DataFrame) -> None:
        super().__init__()
        self.user_id_mapping = user_id_mapping

    def _load_dataframe(self) -> pd.DataFrame:
        self.fetch_dataset()
        df = pd.read_table(
            self.file_path,
            dtype=self.config.dtype_dict,
            sep=self.config.sep,
            names=self.config.column_names,
            converters={self.config.user_ix: self._map_user_id},
        )
        return df

    def _map_user_id(self, user_id):
        user_id_to_uid = dict(
            zip(self.user_id_mapping[self.config.user_ix], self.user_id_mapping["uid"])
        )
        return user_id_to_uid.get(int(user_id), user_id)

config = MovieLens100kUserMetadataConfig() class-attribute

user_id_mapping = user_id_mapping instance-attribute

IS_BASE = True class-attribute instance-attribute

name property

Name of the object's class.

:return: Name of the object's class :rtype: str

file_path property

File path of the dataset.

processed_cache_path property

Path for cached processed data.

base_path = base_path if base_path else self.config.default_base_path instance-attribute

filename = filename if filename else self.config.default_filename instance-attribute

fetch_dataset()

Check if dataset is present, if not download

Source code in src/recnexteval/datasets/base.py
39
40
41
42
43
44
45
def fetch_dataset(self) -> None:
    """Check if dataset is present, if not download"""
    if os.path.exists(self.file_path):
        logger.debug("Data file is in memory and in dir specified.")
        return
    logger.debug(f"{self.name} dataset not found in {self.file_path}.")
    self._download_dataset()

fetch_dataset_force()

Force re-download of the dataset.

Source code in src/recnexteval/datasets/base.py
47
48
49
50
def fetch_dataset_force(self) -> None:
    """Force re-download of the dataset."""
    logger.debug(f"{self.name} force re-download of dataset.")
    self._download_dataset()

load()

Load the metadata from file and return it as a DataFrame.

Returns:

Type Description
DataFrame

DataFrame containing the metadata.

Source code in src/recnexteval/datasets/metadata/base.py
32
33
34
35
36
37
38
def load(self) -> pd.DataFrame:
    """Load the metadata from file and return it as a DataFrame.

    Returns:
        DataFrame containing the metadata.
    """
    return self._load_dataframe()

MovieLens100kItemMetadata

Bases: MovieLens100kMetadata

Source code in src/recnexteval/datasets/metadata/movielens.py
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
class MovieLens100kItemMetadata(MovieLens100kMetadata):
    config: ClassVar[MovieLens100kItemMetadataConfig] = MovieLens100kItemMetadataConfig()  # type: ignore

    def __init__(self, item_id_mapping: pd.DataFrame) -> None:
        super().__init__()
        self.item_id_mapping = item_id_mapping

    def _load_dataframe(self) -> pd.DataFrame:
        self.fetch_dataset()
        df = pd.read_table(
            self.file_path,
            dtype=self.config.dtype_dict,
            sep=self.config.sep,
            names=self.config.column_names,
            converters={self.config.item_ix: self._map_item_id},
            encoding=self.config.encoding,
        )
        return df

    def _map_item_id(self, item_id):
        item_id_to_iid = dict(
            zip(self.item_id_mapping[self.config.item_ix], self.item_id_mapping["iid"])
        )
        return item_id_to_iid.get(int(item_id), item_id)

config = MovieLens100kItemMetadataConfig() class-attribute

item_id_mapping = item_id_mapping instance-attribute

IS_BASE = True class-attribute instance-attribute

name property

Name of the object's class.

:return: Name of the object's class :rtype: str

file_path property

File path of the dataset.

processed_cache_path property

Path for cached processed data.

base_path = base_path if base_path else self.config.default_base_path instance-attribute

filename = filename if filename else self.config.default_filename instance-attribute

fetch_dataset()

Check if dataset is present, if not download

Source code in src/recnexteval/datasets/base.py
39
40
41
42
43
44
45
def fetch_dataset(self) -> None:
    """Check if dataset is present, if not download"""
    if os.path.exists(self.file_path):
        logger.debug("Data file is in memory and in dir specified.")
        return
    logger.debug(f"{self.name} dataset not found in {self.file_path}.")
    self._download_dataset()

fetch_dataset_force()

Force re-download of the dataset.

Source code in src/recnexteval/datasets/base.py
47
48
49
50
def fetch_dataset_force(self) -> None:
    """Force re-download of the dataset."""
    logger.debug(f"{self.name} force re-download of dataset.")
    self._download_dataset()

load()

Load the metadata from file and return it as a DataFrame.

Returns:

Type Description
DataFrame

DataFrame containing the metadata.

Source code in src/recnexteval/datasets/metadata/base.py
32
33
34
35
36
37
38
def load(self) -> pd.DataFrame:
    """Load the metadata from file and return it as a DataFrame.

    Returns:
        DataFrame containing the metadata.
    """
    return self._load_dataframe()