Source code for flwr_datasets.partitioner.iid_partitioner

# Copyright 2023 Flower Labs GmbH. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""IID partitioner class that works with Hugging Face Datasets."""


import datasets
from flwr_datasets.partitioner.partitioner import Partitioner


[docs]class IidPartitioner(Partitioner): """Partitioner creates each partition sampled randomly from the dataset. Parameters ---------- num_partitions : int The total number of partitions that the data will be divided into. Examples -------- >>> from flwr_datasets import FederatedDataset >>> from flwr_datasets.partitioner import IidPartitioner >>> >>> partitioner = IidPartitioner(num_partitions=10) >>> fds = FederatedDataset(dataset="mnist", partitioners={"train": partitioner}) >>> partition = fds.load_partition(0) """ def __init__(self, num_partitions: int) -> None: super().__init__() if num_partitions <= 0: raise ValueError("The number of partitions must be greater than zero.") self._num_partitions = num_partitions
[docs] def load_partition(self, partition_id: int) -> datasets.Dataset: """Load a single IID partition based on the partition index. Parameters ---------- partition_id : int the index that corresponds to the requested partition Returns ------- dataset_partition : Dataset single dataset partition """ return self.dataset.shard( num_shards=self._num_partitions, index=partition_id, contiguous=True )
@property def num_partitions(self) -> int: """Total number of partitions.""" return self._num_partitions